"""
This module implements empirical likelihood regression that is forced through
the origin.
This is different than regression not forced through the origin because the
maximum empirical likelihood estimate is calculated with a vector of ones in
the exogenous matrix but restricts the intercept parameter to be 0. This
results in significantly more narrow confidence intervals and different
parameter estimates.
For notes on regression not forced through the origin, see empirical likelihood
methods in the OLSResults class.
General References
------------------
Owen, A.B. (2001). Empirical Likelihood. Chapman and Hall. p. 82.
"""
import numpy as np
from scipy.stats import chi2
from scipy import optimize
# When descriptive merged, this will be changed
from statsmodels.tools.tools import add_constant
from statsmodels.regression.linear_model import OLS, RegressionResults
[docs]class ELOriginRegress(object):
"""
Empirical Likelihood inference and estimation for linear regression
through the origin
Parameters
----------
endog: nx1 array
Array of response variables
exog: nxk array
Array of exogenous variables. Assumes no array of ones
Attributes
----------
endog : nx1 array
Array of response variables
exog : nxk array
Array of exogenous variables. Assumes no array of ones
nobs : float
Number of observations
nvar : float
Number of exogenous regressors
"""
[docs] def __init__(self, endog, exog):
self.endog = endog
self.exog = exog
self.nobs = float(self.exog.shape[0])
try:
self.nvar = float(exog.shape[1])
except IndexError:
self.nvar = 1.
[docs] def fit(self):
"""
Fits the model and provides regression results.
Returns
-------
Results: class
Empirical likelihood regression class
"""
exog_with = add_constant(self.exog, prepend=True)
restricted_model = OLS(self.endog, exog_with)
restricted_fit = restricted_model.fit()
restricted_el = restricted_fit.el_test(
np.array([0]), np.array([0]), ret_params=1)
params = np.squeeze(restricted_el[3])
beta_hat_llr = restricted_el[0]
llf = np.sum(np.log(restricted_el[2]))
return OriginResults(restricted_model, params, beta_hat_llr, llf)
[docs] def predict(self, params, exog=None):
if exog is None:
exog = self.exog
return np.dot(add_constant(exog, prepend=True), params)
[docs]class OriginResults(RegressionResults):
"""
A Results class for empirical likelihood regression through the origin
Parameters
----------
model : class
An OLS model with an intercept
params : 1darray
Fitted parameters
est_llr : float
The log likelihood ratio of the model with the intercept restricted to
0 at the maximum likelihood estimates of the parameters.
llr_restricted/llr_unrestricted
llf_el : float
The log likelihood of the fitted model with the intercept restricted to 0.
Attributes
----------
model : class
An OLS model with an intercept
params : 1darray
Fitted parameter
llr : float
The log likelihood ratio of the maximum empirical likelihood estimate
llf_el : float
The log likelihood of the fitted model with the intercept restricted to 0
Notes
-----
IMPORTANT. Since EL estimation does not drop the intercept parameter but
instead estimates the slope parameters conditional on the slope parameter
being 0, the first element for params will be the intercept, which is
restricted to 0.
IMPORTANT. This class inherits from RegressionResults but inference is
conducted via empirical likelihood. Therefore, any methods that
require an estimate of the covariance matrix will not function. Instead
use el_test and conf_int_el to conduct inference.
Examples
--------
>>> import statsmodels.api as sm
>>> import numpy as np
>>> data = sm.datasets.bc.load()
>>> model = sm.emplike.OriginRegress(data.endog, data.exog)
>>> fitted = model.fit()
>>> fitted.params
>>> array([ 0. , 0.00351813])
>>> # The 0 is the intercept term.
>>> fitted.el_test(np.array([.0034]), np.array([1]))
>>> (3.6696503297979302, 0.055411808127497755)
>>> fitted.conf_int_el(1)
>>> (0.0033971871114706867, 0.0036373150174892847
>>> fitted.conf_int()
>>> TypeError: unsupported operand type(s) for *: 'instancemethod' and 'float'
>>> # No covariance matrix so normal inference is not valid
"""
[docs] def __init__(self, model, params, est_llr, llf_el):
self.model = model
self.params = np.squeeze(params)
self.llr = est_llr
self.llf_el = llf_el
[docs] def el_test(self, b0_vals, param_nums, method='nm',
stochastic_exog=1, return_weights=0):
"""
Returns the llr and p-value for a hypothesized parameter value
for a regression that goes through the origin
Parameters
----------
b0_vals : 1darray
The hypothesized value to be tested
param_num : 1darray
Which parameters to test. Note this uses python
indexing but the '0' parameter refers to the intercept term,
which is assumed 0. Therefore, param_num should be > 0.
return_weights : bool
If true, returns the weights that optimize the likelihood
ratio at b0_vals. Default is False
method : string
Can either be 'nm' for Nelder-Mead or 'powell' for Powell. The
optimization method that optimizes over nuisance parameters.
Default is 'nm'
stochastic_exog : bool
When TRUE, the exogenous variables are assumed to be stochastic.
When the regressors are nonstochastic, moment conditions are
placed on the exogenous variables. Confidence intervals for
stochastic regressors are at least as large as non-stochastic
regressors. Default is TRUE
Returns
-------
res : tuple
pvalue and likelihood ratio
"""
b0_vals = np.hstack((0, b0_vals))
param_nums = np.hstack((0, param_nums))
test_res = self.model.fit().el_test(b0_vals, param_nums, method=method,
stochastic_exog=stochastic_exog,
return_weights=return_weights)
llr_test = test_res[0]
llr_res = llr_test - self.llr
pval = chi2.sf(llr_res, self.model.exog.shape[1] - 1)
if return_weights:
return llr_res, pval, test_res[2]
else:
return llr_res, pval
[docs] def conf_int_el(self, param_num, upper_bound=None,
lower_bound=None, sig=.05, method='nm',
stochastic_exog=1):
"""
Returns the confidence interval for a regression parameter when the
regression is forced through the origin
Parameters
----------
param_num : int
The parameter number to be tested. Note this uses python
indexing but the '0' parameter refers to the intercept term
upper_bound : float
The maximum value the upper confidence limit can be. The
closer this is to the confidence limit, the quicker the
computation. Default is .00001 confidence limit under normality
lower_bound : float
The minimum value the lower confidence limit can be.
Default is .00001 confidence limit under normality
sig : float, optional
The significance level. Default .05
method : str, optional
Algorithm to optimize of nuisance params. Can be 'nm' or
'powell'. Default is 'nm'.
Returns
-------
ci: tuple
The confidence interval for the parameter 'param_num'
"""
r0 = chi2.ppf(1 - sig, 1)
param_num = np.array([param_num])
if upper_bound is None:
upper_bound = (np.squeeze(self.model.fit().
conf_int(.0001)[param_num])[1])
if lower_bound is None:
lower_bound = (np.squeeze(self.model.fit().conf_int(.00001)
[param_num])[0])
f = lambda b0: self.el_test(np.array([b0]), param_num,
method=method,
stochastic_exog=stochastic_exog)[0] - r0
lowerl = optimize.brentq(f, lower_bound, self.params[param_num])
upperl = optimize.brentq(f, self.params[param_num], upper_bound)
return (lowerl, upperl)