# -*- coding: utf-8 -*-
"""
Created on Tue Dec 20 20:24:20 2011
Author: Josef Perktold
License: BSD-3
"""
from statsmodels.compat.python import range
import numpy as np
import statsmodels.base.model as base
from statsmodels.regression.linear_model import OLS, GLS, WLS, RegressionResults
[docs]def atleast_2dcols(x):
x = np.asarray(x)
if x.ndim == 1:
x = x[:,None]
return x
[docs]class GLSHet2(GLS):
'''WLS with heteroscedasticity that depends on explanatory variables
note: mixing GLS sigma and weights for heteroscedasticity might not make
sense
I think rewriting following the pattern of GLSAR is better
stopping criteria: improve in GLSAR also, e.g. change in rho
'''
[docs] def __init__(self, endog, exog, exog_var, sigma=None):
self.exog_var = atleast_2dcols(exog_var)
super(self.__class__, self).__init__(endog, exog, sigma=sigma)
[docs] def fit(self, lambd=1.):
#maybe iterate
#preliminary estimate
res_gls = GLS(self.endog, self.exog, sigma=self.sigma).fit()
res_resid = OLS(res_gls.resid**2, self.exog_var).fit()
#or log-link
#res_resid = OLS(np.log(res_gls.resid**2), self.exog_var).fit()
#here I could use whiten and current instance instead of delegating
#but this is easier
#see pattern of GLSAR, calls self.initialize and self.fit
res_wls = WLS(self.endog, self.exog, weights=1./res_resid.fittedvalues).fit()
res_wls._results.results_residual_regression = res_resid
return res_wls
[docs]class GLSHet(WLS):
"""
A regression model with an estimated heteroscedasticity.
A subclass of WLS, that additionally estimates the weight matrix as a
function of additional explanatory variables.
Parameters
----------
endog : array_like
exog : array_like
exog_var : array_like, 1d or 2d
regressors, explanatory variables for the variance
weights : array_like or None
If weights are given, then they are used in the first step estimation.
link : link function or None
If None, then the variance is assumed to be a linear combination of
the exog_var. If given, then ... not tested yet
*extra attributes*
history : dict
contains the parameter estimates in both regression for each iteration
result instance has
results_residual_regression : OLS result instance
result of heteroscedasticity estimation
except for fit_iterative all methods are inherited from WLS.
Notes
-----
GLSHet is considered to be experimental.
`fit` is just standard WLS fit for fixed weights
`fit_iterative` updates the estimate for weights, see its docstring
The two alternative for handling heteroscedasticity in the data are to
use heteroscedasticity robust standard errors or estimating the
heteroscedasticity
Estimating heteroscedasticity and using weighted least squares produces
smaller confidence intervals for the estimated parameters then the
heteroscedasticity robust standard errors if the heteroscedasticity is
correctly specified. If the heteroscedasticity is incorrectly specified
then the estimated covariance is inconsistent.
Stock and Watson for example argue in favor of using OLS with
heteroscedasticity robust standard errors instead of GLSHet sind we are
seldom sure enough about the correct specification (in economics).
GLSHet has asymptotically the same distribution as WLS if the true
weights are know. In both cases the asymptotic distribution of the
parameter estimates is the normal distribution.
The assumption of the model:
y = X*beta + u,
with E(u) = 0, E(X*u)=0, var(u_i) = z_i*gamma
or for vector of all observations Sigma = diag(Z*gamma)
where
y : endog (nobs)
X : exog (nobs, k_vars)
Z : exog_var (nobs, k_vars2)
beta, gamma estimated parameters
If a link is specified, then the heteroscedasticity is
var(u_i) = link.inverse(z_i*gamma), or
link(var(u_i)) = z_i*gamma
for example for log-linkg
var(u_i) = exp(z_i*gamma)
Usage : see example ....
TODO: test link option
"""
[docs] def __init__(self, endog, exog, exog_var=None, weights=None, link=None):
self.exog_var = atleast_2dcols(exog_var)
if weights is None:
weights = np.ones(endog.shape)
if link is not None:
self.link = link
self.linkinv = link.inverse #as defined in families.links
else:
self.link = lambda x: x #no transformation
self.linkinv = lambda x: x
super(self.__class__, self).__init__(endog, exog, weights=weights)
[docs] def iterative_fit(self, maxiter=3):
"""
Perform an iterative two-step procedure to estimate a WLS model.
The model is assumed to have heteroscedastic errors.
The variance is estimated by OLS regression of the link transformed
squared residuals on Z, i.e.::
link(sigma_i) = x_i*gamma.
Parameters
----------
maxiter : integer, optional
the number of iterations
Notes
-----
maxiter=1: returns the estimated based on given weights
maxiter=2: performs a second estimation with the updated weights,
this is 2-step estimation
maxiter>2: iteratively estimate and update the weights
TODO: possible extension stop iteration if change in parameter
estimates is smaller than x_tol
Repeated calls to fit_iterative, will do one redundant pinv_wexog
calculation. Calling fit_iterative(maxiter) ones does not do any
redundant recalculations (whitening or calculating pinv_wexog).
"""
import collections
self.history = collections.defaultdict(list) #not really necessary
res_resid = None #if maxiter < 2 no updating
for i in range(maxiter):
#pinv_wexog is cached
if hasattr(self, 'pinv_wexog'):
del self.pinv_wexog
#self.initialize()
#print 'wls self',
results = self.fit()
self.history['self_params'].append(results.params)
if not i == maxiter-1: #skip for last iteration, could break instead
#print 'ols',
self.results_old = results #for debugging
#estimate heteroscedasticity
res_resid = OLS(self.link(results.resid**2), self.exog_var).fit()
self.history['ols_params'].append(res_resid.params)
#update weights
self.weights = 1./self.linkinv(res_resid.fittedvalues)
self.weights /= self.weights.max() #not required
self.weights[self.weights < 1e-14] = 1e-14 #clip
#print 'in iter', i, self.weights.var() #debug, do weights change
self.initialize()
#note results is the wrapper, results._results is the results instance
results._results.results_residual_regression = res_resid
return results
if __name__ == '__main__':
pass