"""Module for Maximum Likelihood Estimation."""
from pyhf import get_backend
from pyhf.exceptions import UnspecifiedPOI
__all__ = ["fit", "fixed_poi_fit", "twice_nll"]
def __dir__():
return __all__
[docs]
def twice_nll(pars, data, pdf):
r"""
Two times the negative log-likelihood of the model parameters, :math:`\left(\mu, \boldsymbol{\theta}\right)`, given the observed data.
It is used in the calculation of the test statistic, :math:`t_{\mu}`, as defined in Equation (8) in :xref:`arXiv:1007.1727`
.. math::
t_{\mu} = -2\ln\lambda\left(\mu\right)
where :math:`\lambda\left(\mu\right)` is the profile likelihood ratio as defined in Equation (7)
.. math::
\lambda\left(\mu\right) = \frac{L\left(\mu, \hat{\hat{\boldsymbol{\theta}}}\right)}{L\left(\hat{\mu}, \hat{\boldsymbol{\theta}}\right)}\,.
It serves as the objective function to minimize in :func:`~pyhf.infer.mle.fit`
and :func:`~pyhf.infer.mle.fixed_poi_fit`.
Example:
>>> import pyhf
>>> pyhf.set_backend("numpy")
>>> model = pyhf.simplemodels.uncorrelated_background(
... signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0]
... )
>>> observations = [51, 48]
>>> data = pyhf.tensorlib.astensor(observations + model.config.auxdata)
>>> parameters = model.config.suggested_init() # nominal parameters
>>> twice_nll = pyhf.infer.mle.twice_nll(parameters, data, model)
>>> twice_nll
array([30.77525435])
>>> -2 * model.logpdf(parameters, data) == twice_nll
array([ True])
Args:
pars (:obj:`tensor`): The parameters of the HistFactory model
data (:obj:`tensor`): The data to be considered
pdf (~pyhf.pdf.Model): The statistical model adhering to the schema model.json
Returns:
Tensor: Two times the negative log-likelihood, :math:`-2\ln L\left(\mu, \boldsymbol{\theta}\right)`
"""
return -2 * pdf.logpdf(pars, data)
def _validate_fit_inputs(init_pars, par_bounds, fixed_params): # noqa: ARG001
for par_idx, (value, bound) in enumerate(zip(init_pars, par_bounds)):
if not (bound[0] <= value <= bound[1]):
msg = (
f"fit initialization parameter (index: {par_idx}, value: {value}) lies outside of its bounds: {bound}"
"\nTo correct this adjust the initialization parameter values in the model spec or those given"
"\nas arguments to pyhf.infer.fit. If this value is intended, adjust the range of the parameter"
"\nbounds."
)
raise ValueError(msg)
[docs]
def fit(data, pdf, init_pars=None, par_bounds=None, fixed_params=None, **kwargs):
r"""
Run a maximum likelihood fit.
This is done by minimizing the objective function :func:`~pyhf.infer.mle.twice_nll`
of the model parameters given the observed data.
This is used to produce the maximal likelihood :math:`L\left(\hat{\mu}, \hat{\boldsymbol{\theta}}\right)`
in the profile likelihood ratio in Equation (7) in :xref:`arXiv:1007.1727`
.. math::
\lambda\left(\mu\right) = \frac{L\left(\mu, \hat{\hat{\boldsymbol{\theta}}}\right)}{L\left(\hat{\mu}, \hat{\boldsymbol{\theta}}\right)}
.. note::
:func:`twice_nll` is the objective function given to the optimizer and
is returned evaluated at the best fit model parameters when the optional
kwarg ``return_fitted_val`` is ``True``.
Example:
>>> import pyhf
>>> import numpy as np
>>> pyhf.set_backend("numpy")
>>> model = pyhf.simplemodels.uncorrelated_background(
... signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0]
... )
>>> observations = [51, 48]
>>> data = pyhf.tensorlib.astensor(observations + model.config.auxdata)
>>> bestfit_pars, twice_nll = pyhf.infer.mle.fit(data, model, return_fitted_val=True)
>>> np.isclose(bestfit_pars, [0. , 1.0030512 , 0.96266961])
array([ True, True, True])
>>> twice_nll
array(24.98393521)
>>> -2 * model.logpdf(bestfit_pars, data) == twice_nll
array([ True])
Args:
data (:obj:`tensor`): The data
pdf (~pyhf.pdf.Model): The statistical model adhering to the schema model.json
init_pars (:obj:`list` of :obj:`float`): The starting values of the model parameters for minimization.
par_bounds (:obj:`list` of :obj:`list`/:obj:`tuple`): The extrema of values the model parameters
are allowed to reach in the fit.
The shape should be ``(n, 2)`` for ``n`` model parameters.
fixed_params (:obj:`tuple` or :obj:`list` of :obj:`bool`): The flag to set a parameter
constant to its starting value during minimization.
kwargs: Keyword arguments passed through to the optimizer API
Returns:
See optimizer API
"""
_, opt = get_backend()
init_pars = init_pars or pdf.config.suggested_init()
par_bounds = par_bounds or pdf.config.suggested_bounds()
fixed_params = fixed_params or pdf.config.suggested_fixed()
_validate_fit_inputs(init_pars, par_bounds, fixed_params)
# get fixed vals from the model
fixed_vals = [
(index, init)
for index, (init, is_fixed) in enumerate(zip(init_pars, fixed_params))
if is_fixed
]
return opt.minimize(
twice_nll, data, pdf, init_pars, par_bounds, fixed_vals, **kwargs
)
[docs]
def fixed_poi_fit(
poi_val, data, pdf, init_pars=None, par_bounds=None, fixed_params=None, **kwargs
):
r"""
Run a maximum likelihood fit with the POI value fixed.
This is done by minimizing the objective function of :func:`~pyhf.infer.mle.twice_nll`
of the model parameters given the observed data, for a given fixed value of :math:`\mu`.
This is used to produce the constrained maximal likelihood for the given :math:`\mu`,
:math:`L\left(\mu, \hat{\hat{\boldsymbol{\theta}}}\right)`, in the profile
likelihood ratio in Equation (7) in :xref:`arXiv:1007.1727`
.. math::
\lambda\left(\mu\right) = \frac{L\left(\mu, \hat{\hat{\boldsymbol{\theta}}}\right)}{L\left(\hat{\mu}, \hat{\boldsymbol{\theta}}\right)}
.. note::
:func:`twice_nll` is the objective function given to the optimizer and
is returned evaluated at the best fit model parameters when the optional
kwarg ``return_fitted_val`` is ``True``.
Example:
>>> import pyhf
>>> import numpy as np
>>> pyhf.set_backend("numpy")
>>> model = pyhf.simplemodels.uncorrelated_background(
... signal=[12.0, 11.0], bkg=[50.0, 52.0], bkg_uncertainty=[3.0, 7.0]
... )
>>> observations = [51, 48]
>>> data = pyhf.tensorlib.astensor(observations + model.config.auxdata)
>>> test_poi = 1.0
>>> bestfit_pars, twice_nll = pyhf.infer.mle.fixed_poi_fit(
... test_poi, data, model, return_fitted_val=True
... )
>>> np.isclose(bestfit_pars, [1. , 0.97224597, 0.87553894])
array([ True, True, True])
>>> twice_nll
array(28.92218013)
>>> -2 * model.logpdf(bestfit_pars, data) == twice_nll
array([ True])
Args:
data: The data
pdf (~pyhf.pdf.Model): The statistical model adhering to the schema model.json
init_pars (:obj:`list` of :obj:`float`): The starting values of the model parameters for minimization.
par_bounds (:obj:`list` of :obj:`list`/:obj:`tuple`): The extrema of values the model parameters
are allowed to reach in the fit.
The shape should be ``(n, 2)`` for ``n`` model parameters.
fixed_params (:obj:`tuple` or :obj:`list` of :obj:`bool`): The flag to set a parameter
constant to its starting value during minimization.
kwargs: Keyword arguments passed through to the optimizer API
Returns:
See optimizer API
"""
if pdf.config.poi_index is None:
msg = "No POI is defined. A POI is required to fit with a fixed POI."
raise UnspecifiedPOI(msg)
init_pars = [*(init_pars or pdf.config.suggested_init())]
fixed_params = [*(fixed_params or pdf.config.suggested_fixed())]
init_pars[pdf.config.poi_index] = poi_val
fixed_params[pdf.config.poi_index] = True
return fit(data, pdf, init_pars, par_bounds, fixed_params, **kwargs)