# -*- coding: utf-8 -*-
"""Panel data analysis for short T and large N
Created on Sat Dec 17 19:32:00 2011
Author: Josef Perktold
License: BSD-3
starting from scratch before looking at references again
just a stub to get the basic structure for group handling
target outsource as much as possible for reuse
Notes
-----
this is the basic version using a loop over individuals which will be more
widely applicable. Depending on the special cases, there will be faster
implementations possible (sparse, kroneker, ...)
the only two group specific methods or get_within_cov and whiten
"""
import numpy as np
from statsmodels.regression.linear_model import OLS, GLS
from statsmodels.tools.grouputils import Group, GroupSorted
#not used
[docs]class Unit(object):
[docs] def __init__(endog, exog):
self.endog = endog
self.exog = exog
[docs]def sum_outer_product_loop(x, group_iter):
'''sum outerproduct dot(x_i, x_i.T) over individuals
loop version
'''
mom = 0
for g in group_iter():
x_g = x[g]
#print 'x_g.shape', x_g.shape
mom += np.outer(x_g, x_g)
return mom
[docs]def sum_outer_product_balanced(x, n_groups):
'''sum outerproduct dot(x_i, x_i.T) over individuals
where x_i is (nobs_i, 1), and result is (nobs_i, nobs_i)
reshape-dot version, for x.ndim=1 only
'''
xrs = x.reshape(-1, n_groups, order='F')
return np.dot(xrs, xrs.T) #should be (nobs_i, nobs_i)
#x.reshape(n_groups, nobs_i, k_vars) #, order='F')
#... ? this is getting 3-dimensional dot, tensordot?
#needs (n_groups, k_vars, k_vars) array with sum over groups
#NOT
#I only need this for x is 1d, i.e. residual
[docs]def whiten_individuals_loop(x, transform, group_iter):
'''apply linear transform for each individual
loop version
'''
#Note: figure out dimension of transformed variable
#so we can pre-allocate
x_new = []
for g in group_iter():
x_g = x[g]
x_new.append(np.dot(transform, x_g))
return np.concatenate(x_new) #np.vstack(x_new) #or np.array(x_new) #check shape
[docs]class ShortPanelGLS2(object):
'''Short Panel with general intertemporal within correlation
assumes data is stacked by individuals, panel is balanced and
within correlation structure is identical across individuals.
It looks like this can just inherit GLS and overwrite whiten
'''
[docs] def __init__(self, endog, exog, group):
self.endog = endog
self.exog = exog
self.group = GroupSorted(group)
self.n_groups = self.group.n_groups
#self.nobs_group = #list for unbalanced?
[docs] def fit_ols(self):
self.res_pooled = OLS(self.endog, self.exog).fit()
return self.res_pooled #return or not
[docs] def get_within_cov(self, resid):
#central moment or not?
mom = sum_outer_product_loop(resid, self.group.group_iter)
return mom / self.n_groups #df correction ?
[docs] def whiten_groups(self, x, cholsigmainv_i):
#from scipy import sparse #use sparse
wx = whiten_individuals_loop(x, cholsigmainv_i, self.group.group_iter)
return wx
[docs] def fit(self):
res_pooled = self.fit_ols() #get starting estimate
sigma_i = self.get_within_cov(res_pooled.resid)
self.cholsigmainv_i = np.linalg.cholesky(np.linalg.pinv(sigma_i)).T
wendog = self.whiten_groups(self.endog, self.cholsigmainv_i)
wexog = self.whiten_groups(self.exog, self.cholsigmainv_i)
#print wendog.shape, wexog.shape
self.res1 = OLS(wendog, wexog).fit()
return self.res1
[docs]class ShortPanelGLS(GLS):
'''Short Panel with general intertemporal within correlation
assumes data is stacked by individuals, panel is balanced and
within correlation structure is identical across individuals.
It looks like this can just inherit GLS and overwrite whiten
'''
[docs] def __init__(self, endog, exog, group, sigma_i=None):
self.group = GroupSorted(group)
self.n_groups = self.group.n_groups
#self.nobs_group = #list for unbalanced?
nobs_i = len(endog) / self.n_groups #endog might later not be an ndarray
#balanced only for now,
#which is a requirement anyway in this case (full cov)
#needs to change for parameterized sigma_i
#
if sigma_i is None:
sigma_i = np.eye(nobs_i)
self.cholsigmainv_i = np.linalg.cholesky(np.linalg.pinv(sigma_i)).T
#super is taking care of endog, exog and sigma
super(self.__class__, self).__init__(endog, exog, sigma=None)
[docs] def get_within_cov(self, resid):
#central moment or not?
mom = sum_outer_product_loop(resid, self.group.group_iter)
return mom / self.n_groups #df correction ?
[docs] def whiten_groups(self, x, cholsigmainv_i):
#from scipy import sparse #use sparse
wx = whiten_individuals_loop(x, cholsigmainv_i, self.group.group_iter)
return wx
def _fit_ols(self):
#used as starting estimate in old explicity version
self.res_pooled = OLS(self.endog, self.exog).fit()
return self.res_pooled #return or not
def _fit_old(self):
#old explicit version
res_pooled = self._fit_ols() #get starting estimate
sigma_i = self.get_within_cov(res_pooled.resid)
self.cholsigmainv_i = np.linalg.cholesky(np.linalg.pinv(sigma_i)).T
wendog = self.whiten_groups(self.endog, self.cholsigmainv_i)
wexog = self.whiten_groups(self.exog, self.cholsigmainv_i)
self.res1 = OLS(wendog, wexog).fit()
return self.res1
[docs] def whiten(self, x):
#whiten x by groups, will be applied to endog and exog
wx = whiten_individuals_loop(x, self.cholsigmainv_i, self.group.group_iter)
return wx
#copied from GLSHet and adjusted (boiler plate?)
[docs] def fit_iterative(self, maxiter=3):
"""
Perform an iterative two-step procedure to estimate the GLS model.
Parameters
----------
maxiter : integer, optional
the number of iterations
Notes
-----
maxiter=1: returns the estimated based on given weights
maxiter=2: performs a second estimation with the updated weights,
this is 2-step estimation
maxiter>2: iteratively estimate and update the weights
TODO: possible extension stop iteration if change in parameter
estimates is smaller than x_tol
Repeated calls to fit_iterative, will do one redundant pinv_wexog
calculation. Calling fit_iterative(maxiter) once does not do any
redundant recalculations (whitening or calculating pinv_wexog).
"""
#Note: in contrast to GLSHet, we don't have an auxilliary regression here
# might be needed if there is more structure in cov_i
#because we only have the loop we are not attaching the ols_pooled
#initial estimate anymore compared to original version
if maxiter < 1:
raise ValueError('maxiter needs to be at least 1')
import collections
self.history = collections.defaultdict(list) #not really necessary
for i in range(maxiter):
#pinv_wexog is cached, delete it to force recalculation
if hasattr(self, 'pinv_wexog'):
del self.pinv_wexog
#fit with current cov, GLS, i.e. OLS on whitened endog, exog
results = self.fit()
self.history['self_params'].append(results.params)
if not i == maxiter-1: #skip for last iteration, could break instead
#print 'ols',
self.results_old = results #store previous results for debugging
#get cov from residuals of previous regression
sigma_i = self.get_within_cov(results.resid)
self.cholsigmainv_i = np.linalg.cholesky(np.linalg.pinv(sigma_i)).T
#calculate new whitened endog and exog
self.initialize()
#note results is the wrapper, results._results is the results instance
#results._results.results_residual_regression = res_resid
return results
if __name__ == '__main__':
pass