Adopt Bibtex's alpha style for bibliographic references

blackjax-devs · Feb 22, 2023 · 6b76746 · 6b76746
1 parent 5697173
commit 6b76746
Show file tree

Hide file tree

Showing 10 changed files with 87 additions and 81 deletions.
diff --git a/blackjax/adaptation/step_size.py b/blackjax/adaptation/step_size.py
@@ -77,7 +77,7 @@ def dual_averaging_adaptation(
     the error at time t. We would like to find a procedure that adapts the
     value of :math:`\\epsilon` such that :math:`h(x) =\\mathbb{E}\\left[H_t|\\epsilon\\right] = 0`
 
-    Following [Nesterov2009]_, the authors of [Hoffman2014]_ proposed the following update scheme. If
+    Following [Nes09]_, the authors of [HG14]_ proposed the following update scheme. If
     we note :math:`x = \\log \\epsilon` we follow:
 
     .. math:
@@ -88,21 +88,21 @@ def dual_averaging_adaptation(
     :math:`h(\\overline{x}_t)` converges to 0, i.e. the Metropolis acceptance
     rate converges to the desired rate.
 
-    See reference [Hoffman2014a]_ (section 3.2.1) for a detailed discussion.
+    See reference [HG14]_ (section 3.2.1) for a detailed discussion.
 
     Parameters
     ----------
     t0: float >= 0
         Free parameter that stabilizes the initial iterations of the algorithm.
-        Large values may slow down convergence. Introduced in [Hoffman2014a]_ with a default
-        value of 10.
+        Large values may slow down convergence. Introduced in [HG14]_ with a
+        default value of 10.
     gamma:
-        Controls the speed of convergence of the scheme. The authors of [Hoffman2014a]_ recommend
-        a value of 0.05.
+        Controls the speed of convergence of the scheme. The authors of
+        [HG14]_ recommend a value of 0.05.
     kappa: float in ]0.5, 1]
         Controls the weights of past steps in the current update. The scheme will
-        quickly forget earlier step for a small value of `kappa`. Introduced
-        in [Hoffman2014a]_, with a recommended value of .75
+        quickly forget earlier step for a small value of `kappa`. Introduced in
+        [HG14]_, with a recommended value of .75
     target:
         Target acceptance rate.
 
@@ -116,9 +116,9 @@ def dual_averaging_adaptation(
     References
     ----------
 
-    .. [Nesterov2009] Nesterov, Yurii. "Primal-dual subgradient methods for convex
+    .. [Nes09] Nesterov, Yurii. "Primal-dual subgradient methods for convex
             problems." Mathematical programming 120.1 (2009): 221-259.
-    .. [Hoffman2014a] Hoffman, Matthew D., and Andrew Gelman. "The No-U-Turn sampler:
+    .. [HG14] Hoffman, Matthew D., and Andrew Gelman. "The No-U-Turn sampler:
            adaptively setting path lengths in Hamiltonian Monte Carlo." Journal
            of Machine Learning Research 15.1 (2014): 1593-1623.
 
@@ -199,7 +199,7 @@ def find_reasonable_step_size(
     value for the step size starting from any value, choosing a good first
     value can speed up the convergence. This heuristics doubles and halves the
     step size until the acceptance probability of the HMC proposal crosses the
-    target value [Hoffman2014b]_.
+    target value [HG14]_.
 
     Parameters
     ----------
@@ -226,7 +226,7 @@ def find_reasonable_step_size(
 
     References
     ----------
-    .. [Hoffman2014b] Hoffman, Matthew D., and Andrew Gelman. "The No-U-Turn sampler:
+    .. [HG14] Hoffman, Matthew D., and Andrew Gelman. "The No-U-Turn sampler:
            adaptively setting path lengths in Hamiltonian Monte Carlo." Journal
            of Machine Learning Research 15.1 (2014): 1593-1623.
 

diff --git a/blackjax/kernels.py b/blackjax/kernels.py
@@ -442,7 +442,7 @@ def step_fn(rng_key: PRNGKey, state):
 
 
 class mgrad_gaussian:
-    """Implements the marginal sampler for latent Gaussian model of [1].
+    """Implements the marginal sampler for latent Gaussian model of [TP18].
 
     It uses a first order approximation to the log_likelihood of a model with Gaussian prior.
     Interestingly, the only parameter that needs calibrating is the "step size" delta, which can be done very efficiently.
@@ -481,7 +481,9 @@ class mgrad_gaussian:
 
     References
     ----------
-    [1]: Titsias, M.K. and Papaspiliopoulos, O. (2018), Auxiliary gradient-based sampling algorithms. J. R. Stat. Soc. B, 80: 749-767. https://doi.org/10.1111/rssb.12269
+    .. [TP18] Titsias, M.K. and Papaspiliopoulos, O. (2018),
+        Auxiliary gradient-based sampling algorithms. J. R. Stat. Soc. B, 80: 749-767. https://doi.org/10.1111/rssb.12269
+
     """
 
     def __new__(  # type: ignore[misc]

diff --git a/blackjax/mcmc/integrators.py b/blackjax/mcmc/integrators.py
@@ -49,15 +49,15 @@ def velocity_verlet(
 ) -> EuclideanIntegrator:
     """The velocity Verlet (or Verlet-Störmer) integrator.
 
-    The velocity Verlet is a two-stage palindromic integrator [BouRabee2018]_ of the form
+    The velocity Verlet is a two-stage palindromic integrator [BS18]_ of the form
     (a1, b1, a2, b1, a1) with a1 = 0. It is numerically stable for values of
     the step size that range between 0 and 2 (when the mass matrix is the
     identity).
 
     While the position (a1 = 0.5) and velocity Verlet are the most commonly used
     in samplers, it is known in the numerical computation literature that the
     value $a1 \approx 0.1932$ leads to a lower integration error
-    [McLachlan1995]_, [Schlick2010]_. The authors of [BouRabee2018]_ show that the
+    [McL95]_, [Sch10]_. The authors of [BS18]_ show that the
     value $a1 \approx 0.21132$ leads to an even higher step acceptance rate, up
     to 3 times higher than with the standard position verlet (p.22, Fig.4).
 
@@ -68,13 +68,13 @@ def velocity_verlet(
 
     References
     ----------
-    .. [BouRabee2018] Bou-Rabee, Nawaf, and Jesús Marıa Sanz-Serna. "Geometric
+    .. [BS18] Bou-Rabee, Nawaf, and Jesús Marıa Sanz-Serna. "Geometric
             integrators and the Hamiltonian Monte Carlo method." Acta Numerica 27
             (2018): 113-206.
-    .. [McLachlan1995] McLachlan, Robert I. "On the numerical integration of ordinary
+    .. [McL95] McLachlan, Robert I. "On the numerical integration of ordinary
             differential equations by symmetric composition methods." SIAM Journal on
             Scientific Computing 16.1 (1995): 151-168.
-    .. [Schlick2010] Schlick, Tamar. Molecular modeling and simulation: an
+    .. [Sch10] Schlick, Tamar. Molecular modeling and simulation: an
             interdisciplinary guide: Vol. 21. Springer
             Science & Business Media, 2010.
 
@@ -120,20 +120,20 @@ def mclachlan(
     logdensity_fn: Callable,
     kinetic_energy_fn: Callable,
 ) -> EuclideanIntegrator:
-    """Two-stage palindromic symplectic integrator derived in [Blanes2014]_
+    """Two-stage palindromic symplectic integrator derived in [BCS14]_
 
     The integrator is of the form (b1, a1, b2, a1, b1). The choice of the parameters
     determine both the bound on the integration error and the stability of the
     method with respect to the value of `step_size`. The values used here are
-    the ones derived in [McLachlan1995]_; note that [Blanes2014]_ is more focused on stability
+    the ones derived in [McL95]_; note that [BCS14]_ is more focused on stability
     and derives different values.
 
     References
     ----------
-    .. [Blanes2014] Blanes, Sergio, Fernando Casas, and J. M. Sanz-Serna. "Numerical
+    .. [BCS14] Blanes, Sergio, Fernando Casas, and J. M. Sanz-Serna. "Numerical
             integrators for the Hybrid Monte Carlo method." SIAM Journal on Scientific
             Computing 36.4 (2014): A1556-A1580.
-    .. [McLachlan1995] McLachlan, Robert I. "On the numerical integration of ordinary
+    .. [McL95] McLachlan, Robert I. "On the numerical integration of ordinary
             differential equations by symmetric composition methods." SIAM Journal on
             Scientific Computing 16.1 (1995): 151-168.
 
@@ -194,17 +194,17 @@ def yoshida(
     logdensity_fn: Callable,
     kinetic_energy_fn: Callable,
 ) -> EuclideanIntegrator:
-    """Three stages palindromic symplectic integrator derived in [Blanes2014]_
+    """Three stages palindromic symplectic integrator derived in [BCS14]_
 
     The integrator is of the form (b1, a1, b2, a2, b2, a1, b1). The choice of
     the parameters determine both the bound on the integration error and the
     stability of the method with respect to the value of `step_size`. The
-    values used here are the ones derived in [Blanes2014]_ which guarantees a stability
+    values used here are the ones derived in [BCS14]_ which guarantees a stability
     interval length approximately equal to 4.67.
 
     References
     ----------
-    .. [Blanes2014] Blanes, Sergio, Fernando Casas, and J. M. Sanz-Serna. "Numerical
+    .. [BCS14] Blanes, Sergio, Fernando Casas, and J. M. Sanz-Serna. "Numerical
             integrators for the Hybrid Monte Carlo method." SIAM Journal on Scientific
             Computing 36.4 (2014): A1556-A1580.
 

diff --git a/blackjax/mcmc/metrics.py b/blackjax/mcmc/metrics.py
@@ -17,20 +17,21 @@
 position space in the Euclidean metric. It is defined by a definite positive
 matrix :math:`M` with fixed value so that the kinetic energy of the hamiltonian
 dynamic is independent of the position and only depends on the momentum
-:math:`p` [1]_.
+:math:`p` [Bet17]_.
 
 For a Newtonian hamiltonian dynamic the kinetic energy is given by:
 
 .. math::
     K(p) = \frac{1}{2} p^T M^{-1} p
 
-We can also generate a relativistic dynamic [2]_.
+We can also generate a relativistic dynamic [LPH+17]_.
 
 References
 ----------
-.. [1] Betancourt, Michael, et al. "The geometric foundations of hamiltonian
+.. [Bet17] Betancourt, Michael, et al. "The geometric foundations of hamiltonian
         monte carlo." Bernoulli 23.4A (2017): 2257-2298.
-.. [2] Lu, Xiaoyu, et al. "Relativistic monte carlo." Artificial Intelligence and Statistics. PMLR, 2017.
+.. [LPH+17] Lu, Xiaoyu, et al. "Relativistic monte carlo."
+        Artificial Intelligence and Statistics. PMLR, 2017.
 
 """
 from typing import Callable, Tuple
@@ -125,8 +126,10 @@ def is_turning(
         momentum_sum
             Sum of the momenta along the trajectory.
 
-        .. [Betancourt2013b]: Betancourt, Michael J. "Generalizing the no-U-turn sampler to Riemannian manifolds." arXiv preprint arXiv:1304.1920 (2013).
-        .. [Stan]: "NUTS misses U-turn, runs in cicles until max depth", Stan Discourse Forum
+        References
+        ----------
+        .. [Bet13] Betancourt, Michael J. "Generalizing the no-U-turn sampler to Riemannian manifolds." arXiv preprint arXiv:1304.1920 (2013).
+        .. [Sta19] "NUTS misses U-turn, runs in cicles until max depth", Stan Discourse Forum
                 https://discourse.mc-stan.org/t/nuts-misses-u-turns-runs-in-circles-until-max-treedepth/9727/46
 
         """

diff --git a/blackjax/mcmc/nuts.py b/blackjax/mcmc/nuts.py
@@ -80,13 +80,13 @@ def kernel(
 ):
     """Build an iterative NUTS kernel.
 
-    This algorithm is an iteration on the original NUTS algorithm [Hoffman2014]_
+    This algorithm is an iteration on the original NUTS algorithm [HG14]_
     with two major differences:
 
     - We do not use slice samplig but multinomial sampling for the proposal
-      [Betancourt2017]_;
-    - The trajectory expansion is not recursive but iterative [Phan2019]_,
-      [Lao2020]_.
+      [Bet17]_;
+    - The trajectory expansion is not recursive but iterative [PPJ19]_,
+      [LSL+20]_.
 
     The implementation can seem unusual for those familiar with similar
     algorithms. Indeed, we do not conceptualize the trajectory construction as
@@ -114,18 +114,18 @@ def kernel(
 
     References
     ----------
-    .. [Hoffman2014] Hoffman, Matthew D., and Andrew Gelman.
-                      "The No-U-Turn sampler: adaptively setting path lengths in Hamiltonian Monte Carlo."
-                      J. Mach. Learn. Res. 15.1 (2014): 1593-1623.
-    .. [Betancourt2017] Betancourt, Michael.
-                         "A conceptual introduction to Hamiltonian Monte Carlo."
-                         arXiv preprint arXiv:1701.02434 (2017).
-    .. [Phan2019] Phan Du, Neeraj Pradhan, and Martin Jankowiak.
-                   "Composable effects for flexible and accelerated probabilistic programming in NumPyro."
-                   arXiv preprint arXiv:1912.11554 (2019).
-    .. [Lao2020] Lao, Junpeng, et al.
-                  "tfp. mcmc: Modern markov chain monte carlo tools built for modern hardware."
-                  arXiv preprint arXiv:2002.01184 (2020).
+    .. [HG14] Hoffman, Matthew D., and Andrew Gelman.
+        "The No-U-Turn sampler: adaptively setting path lengths in Hamiltonian Monte Carlo."
+        J. Mach. Learn. Res. 15.1 (2014): 1593-1623.
+    .. [Bet17] Betancourt, Michael.
+        "A conceptual introduction to Hamiltonian Monte Carlo."
+        arXiv preprint arXiv:1701.02434 (2017).
+    .. [PPJ19] Phan Du, Neeraj Pradhan, and Martin Jankowiak.
+        "Composable effects for flexible and accelerated probabilistic programming in NumPyro."
+        arXiv preprint arXiv:1912.11554 (2019).
+    .. [LSL+20] Lao, Junpeng, et al.
+        "tfp. mcmc: Modern markov chain monte carlo tools built for modern hardware."
+        arXiv preprint arXiv:2002.01184 (2020).
 
     """
 

diff --git a/blackjax/mcmc/proposal.py b/blackjax/mcmc/proposal.py
@@ -160,9 +160,9 @@ def progressive_biased_sampling(rng_key, proposal, new_proposal):
 
     References
     ----------
-    .. [Betancourt2017] Betancourt, Michael.
-            "A conceptual introduction to Hamiltonian Monte Carlo."
-            arXiv preprint arXiv:1701.02434 (2017).
+    .. [Bet17] Betancourt, Michael.
+        "A conceptual introduction to Hamiltonian Monte Carlo."
+        arXiv preprint arXiv:1701.02434 (2017).
 
     """
     p_accept = jnp.clip(jnp.exp(new_proposal.weight - proposal.weight), a_max=1)
@@ -199,14 +199,14 @@ def nonreversible_slice_sampling(slice, proposal, new_proposal):
     """Slice sampling for non-reversible Metropolis-Hasting update.
 
     Performs a non-reversible update of a uniform [0, 1] value
-    for Metropolis-Hastings accept/reject decisions [Neal2020]_, in addition
+    for Metropolis-Hastings accept/reject decisions [Nea20]_, in addition
     to the accept/reject step of a current state and new proposal.
 
     References
     ----------
-    .. [Neal2020] Neal, R. M. (2020).
-            "Non-reversibly updating a uniform [0, 1] value for Metropolis accept/reject decisions."
-            arXiv preprint arXiv:2001.11950.
+    .. [Nea20] Neal, R. M. (2020).
+        "Non-reversibly updating a uniform [0, 1] value for Metropolis accept/reject decisions."
+        arXiv preprint arXiv:2001.11950.
 
     """
     delta_energy = new_proposal.weight

diff --git a/blackjax/mcmc/trajectory.py b/blackjax/mcmc/trajectory.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 """Procedures to build trajectories for algorithms in the HMC family.
 
-To propose a new state, algorithms in the HMC family generally proceed by [Betancourt2017]_:
+To propose a new state, algorithms in the HMC family generally proceed by [Bet17]_:
 
 1. Sampling a trajectory starting from the initial point;
 2. Sampling a new state from this sampled trajectory.
@@ -38,7 +38,7 @@
 
 References
 ----------
-.. [Betancourt2017] Betancourt, Michael.
+.. [Bet17] Betancourt, Michael.
         "A conceptual introduction to Hamiltonian Monte Carlo."
         arXiv preprint arXiv:1701.02434 (2017).
 
@@ -310,7 +310,7 @@ def dynamic_recursive_integration(
     """Integrate a trajectory and update the proposal recursively in Python
     until the termination criterion is met.
 
-    This is the implementation of Algorithm 6 from [1]_ with multinomial sampling.
+    This is the implementation of Algorithm 6 from [HG14]_ with multinomial sampling.
     The implemenation here is mostly for validating the progressive implementation
     to make sure the two are equivalent. The recursive implementation should not
     be used for actually sampling as it cannot be jitted and thus likely slow.
@@ -330,7 +330,8 @@ def dynamic_recursive_integration(
 
     References
     ----------
-    .. [1] Hoffman, Matthew D., and Andrew Gelman. "The No-U-Turn sampler: adaptively setting path lengths in Hamiltonian Monte Carlo." J. Mach. Learn. Res. 15.1 (2014): 1593-1623.
+    .. [HG14] Hoffman, Matthew D., and Andrew Gelman. "The No-U-Turn sampler: adaptively setting path lengths in Hamiltonian Monte Carlo."
+           J. Mach. Learn. Res. 15.1 (2014): 1593-1623.
 
     """
     _, generate_proposal = proposal_generator(kinetic_energy, divergence_threshold)

diff --git a/blackjax/sgmcmc/csgld.py b/blackjax/sgmcmc/csgld.py
@@ -2,14 +2,14 @@
 
 References
 ----------
-.. [0]: Deng, W., Lin, G., Liang, F. (2020).
-        A Contour Stochastic Gradient Langevin Dynamics Algorithm
-        for Simulations of Multi-modal Distributions.
-        In Neural Information Processing Systems (NeurIPS 2020).
-
-.. [1]: Deng, W., Liang, S., Hao, B., Lin, G., Liang, F. (2022)
-        Interacting Contour Stochastic Gradient Langevin Dynamics
-        In International Conference on Learning Representations (ICLR)
+.. [DLL20] Deng, W., Lin, G., Liang, F. (2020).
+       A Contour Stochastic Gradient Langevin Dynamics Algorithm
+       for Simulations of Multi-modal Distributions.
+       In Neural Information Processing Systems (NeurIPS 2020).
+
+.. [DLH+22] Deng, W., Liang, S., Hao, B., Lin, G., Liang, F. (2022)
+       Interacting Contour Stochastic Gradient Langevin Dynamics
+       In International Conference on Learning Representations (ICLR)
 """
 from typing import Callable, NamedTuple
 
@@ -122,12 +122,12 @@ def one_step(
 
         References
         ----------
-        .. [Deng2020] Deng, W., Lin, G., Liang, F. (2020).
+        .. [DLL20] Deng, W., Lin, G., Liang, F. (2020).
                 A Contour Stochastic Gradient Langevin Dynamics Algorithm
                 for Simulations of Multi-modal Distributions.
                 In Neural Information Processing Systems (NeurIPS 2020).
 
-        .. [Deng2022] Deng, W., Liang, S., Hao, B., Lin, G., Liang, F. (2022)
+        .. [DLH+22] Deng, W., Liang, S., Hao, B., Lin, G., Liang, F. (2022)
                 Interacting Contour Stochastic Gradient Langevin Dynamics
                 In International Conference on Learning Representations (ICLR)
         """

diff --git a/blackjax/sgmcmc/diffusions.py b/blackjax/sgmcmc/diffusions.py
@@ -24,11 +24,11 @@
 def overdamped_langevin():
     """Euler solver for overdamped Langevin diffusion.
 
-    This algorithm was ported from [Coullon2022]_.
+    This algorithm was ported from [CN22]_.
 
     References
     ----------
-    .. [Coullon2022] Coullon, J., & Nemeth, C. (2022). SGMCMCJax: a lightweight JAX
+    .. [CN22] Coullon, J., & Nemeth, C. (2022). SGMCMCJax: a lightweight JAX
             library for stochastic gradient Markov chain Monte Carlo algorithms.
             Journal of Open Source Software, 7(72), 4113.
 
@@ -57,16 +57,16 @@ def one_step(
 
 
 def sghmc(alpha: float = 0.01, beta: float = 0):
-    """Solver for the diffusion equation of the SGHMC algorithm [Chen2014]_.
+    """Solver for the diffusion equation of the SGHMC algorithm [CFG14]_.
 
-    This algorithm was ported from [Coullon2022]_.
+    This algorithm was ported from [CN22]_.
 
     References
     ----------
-    .. [Chen2014] Chen, T., Fox, E., & Guestrin, C. (2014, June). Stochastic
+    .. [CFG14] Chen, T., Fox, E., & Guestrin, C. (2014, June). Stochastic
              gradient hamiltonian monte carlo. In International conference on
              machine learning (pp. 1683-1691). PMLR.
-    .. [Coullon2022] Coullon, J., & Nemeth, C. (2022). SGMCMCJax: a lightweight JAX
+    .. [CN22] Coullon, J., & Nemeth, C. (2022). SGMCMCJax: a lightweight JAX
             library for stochastic gradient Markov chain Monte Carlo algorithms.
             Journal of Open Source Software, 7(72), 4113.