# -*- coding: utf-8 -*-
"""Generate a random process with panel structure
Created on Sat Dec 17 22:15:27 2011
Author: Josef Perktold
Notes
-----
* written with unbalanced panels in mind, but not flexible enough yet
* need more shortcuts and options for balanced panel
* need to add random intercept or coefficients
* only one-way (repeated measures) so far
"""
import numpy as np
from . import correlation_structures as cs
[docs]class PanelSample(object):
'''data generating process for panel with within correlation
allows various within correlation structures, but no random intercept yet
Parameters
----------
nobs : int
total number of observations
k_vars : int
number of explanatory variables to create in exog, including constant
n_groups int
number of groups in balanced sample
exog : None or ndarray
default is None, in which case a exog is created
within : bool
If True (default), then the exog vary within a group. If False, then
only variation across groups is used.
TODO: this option needs more work
corr_structure : ndarray or ??
Default is np.eye.
corr_args : tuple
arguments for the corr_structure
scale : float
scale of noise, standard deviation of normal distribution
seed : None or int
If seed is given, then this is used to create the random numbers for
the sample.
Notes
-----
The behavior for panel robust covariance estimators seems to differ by
a large amount by whether exog have mostly within group or across group
variation. I do not understand why this should be the case from the theory,
and this would warrant more investigation.
This is just used in one example so far and needs more usage to see what
will be useful to add.
'''
[docs] def __init__(self, nobs, k_vars, n_groups, exog=None, within=True,
corr_structure=np.eye, corr_args=(), scale=1, seed=None):
nobs_i = nobs//n_groups
nobs = nobs_i * n_groups #make balanced
self.nobs = nobs
self.nobs_i = nobs_i
self.n_groups = n_groups
self.k_vars = k_vars
self.corr_structure = corr_structure
self.groups = np.repeat(np.arange(n_groups), nobs_i)
self.group_indices = np.arange(n_groups+1) * nobs_i #check +1
if exog is None:
if within:
#t = np.tile(np.linspace(-1,1,nobs_i), n_groups)
t = np.tile(np.linspace(0, 2, nobs_i), n_groups)
#rs2 = np.random.RandomState(9876)
#t = 1 + 0.3 * rs2.randn(nobs_i * n_groups)
#mix within and across variation
#t += np.repeat(np.linspace(-1,1,nobs_i), n_groups)
else:
#no within group variation,
t = np.repeat(np.linspace(-1,1,nobs_i), n_groups)
exog = t[:,None]**np.arange(k_vars)
self.exog = exog
#self.y_true = exog.sum(1) #all coefficients equal 1,
#moved to make random coefficients
#initialize
self.y_true = None
self.beta = None
if seed is None:
seed = np.random.randint(0, 999999)
self.seed = seed
self.random_state = np.random.RandomState(seed)
#this makes overwriting difficult, move to method?
self.std = scale * np.ones(nobs_i)
corr = self.corr_structure(nobs_i, *corr_args)
self.cov = cs.corr2cov(corr, self.std)
self.group_means = np.zeros(n_groups)
[docs] def get_y_true(self):
if self.beta is None:
self.y_true = self.exog.sum(1)
else:
self.y_true = np.dot(self.exog, self.beta)
[docs] def generate_panel(self):
'''
generate endog for a random panel dataset with within correlation
'''
random = self.random_state
if self.y_true is None:
self.get_y_true()
nobs_i = self.nobs_i
n_groups = self.n_groups
use_balanced = True
if use_balanced: #much faster for balanced case
noise = self.random_state.multivariate_normal(np.zeros(nobs_i),
self.cov,
size=n_groups).ravel()
#need to add self.group_means
noise += np.repeat(self.group_means, nobs_i)
else:
noise = np.empty(self.nobs, np.float64)
noise.fill(np.nan)
for ii in range(self.n_groups):
#print ii,
idx, idxupp = self.group_indices[ii:ii+2]
#print idx, idxupp
mean_i = self.group_means[ii]
noise[idx:idxupp] = self.random_state.multivariate_normal(
mean_i * np.ones(self.nobs_i), self.cov)
endog = self.y_true + noise
return endog
if __name__ == '__main__':
pass