Source code for statsmodels.tools.catadd

from __future__ import print_function
import numpy as np
from statsmodels.compat.numpy import np_matrix_rank


[docs]def add_indep(x, varnames, dtype=None): ''' construct array with independent columns x is either iterable (list, tuple) or instance of ndarray or a subclass of it. If x is an ndarray, then each column is assumed to represent a variable with observations in rows. ''' #TODO: this needs tests for subclasses if isinstance(x, np.ndarray) and x.ndim == 2: x = x.T nvars_orig = len(x) nobs = len(x[0]) #print('nobs, nvars_orig', nobs, nvars_orig) if not dtype: dtype = np.asarray(x[0]).dtype xout = np.zeros((nobs, nvars_orig), dtype=dtype) count = 0 rank_old = 0 varnames_new = [] varnames_dropped = [] keepindx = [] for (xi, ni) in zip(x, varnames): #print(xi.shape, xout.shape) xout[:,count] = xi rank_new = np_matrix_rank(xout) #print(rank_new) if rank_new > rank_old: varnames_new.append(ni) rank_old = rank_new count += 1 else: varnames_dropped.append(ni) return xout[:,:count], varnames_new
if __name__ == '__main__': x1 = np.array([0,0,0,0,0,1,1,1,2,2,2]) x2 = np.array([0,0,0,0,0,1,1,1,1,1,1]) x0 = np.ones(len(x2)) x = np.column_stack([x0, x1[:,None]*np.arange(3), x2[:,None]*np.arange(2)]) varnames = ['const'] + ['var1_%d' %i for i in np.arange(3)] \ + ['var2_%d' %i for i in np.arange(2)] xo,vo = add_indep(x, varnames) print(xo.shape)