Source code for statsmodels.sandbox.stats.contrast_tools

'''functions to work with contrasts for multiple tests

contrast matrices for comparing all pairs, all levels to reference level, ...
extension to 2-way groups in progress

TwoWay: class for bringing two-way analysis together and try out
various helper functions


Idea for second part
- get all transformation matrices to move in between different full rank
  parameterizations
- standardize to one parameterization to get all interesting effects.

- multivariate normal distribution
  - exploit or expand what we have in LikelihoodResults, cov_params, f_test,
    t_test, example: resols_dropf_full.cov_params(C2)
  - connect to new multiple comparison for contrast matrices, based on
    multivariate normal or t distribution (Hothorn, Bretz, Westfall)

'''



from __future__ import print_function
from statsmodels.compat.python import zip
import numpy as np

#next 3 functions copied from multicomp.py

[docs]def contrast_allpairs(nm): '''contrast or restriction matrix for all pairs of nm variables Parameters ---------- nm : int Returns ------- contr : ndarray, 2d, (nm*(nm-1)/2, nm) contrast matrix for all pairwise comparisons ''' contr = [] for i in range(nm): for j in range(i+1, nm): contr_row = np.zeros(nm) contr_row[i] = 1 contr_row[j] = -1 contr.append(contr_row) return np.array(contr)
[docs]def contrast_all_one(nm): '''contrast or restriction matrix for all against first comparison Parameters ---------- nm : int Returns ------- contr : ndarray, 2d, (nm-1, nm) contrast matrix for all against first comparisons ''' contr = np.column_stack((np.ones(nm-1), -np.eye(nm-1))) return contr
[docs]def contrast_diff_mean(nm): '''contrast or restriction matrix for all against mean comparison Parameters ---------- nm : int Returns ------- contr : ndarray, 2d, (nm-1, nm) contrast matrix for all against mean comparisons ''' return np.eye(nm) - np.ones((nm,nm))/nm
[docs]def signstr(x, noplus=False): if x in [-1,0,1]: if not noplus: return '+' if np.sign(x)>=0 else '-' else: return '' if np.sign(x)>=0 else '-' else: return str(x)
[docs]def contrast_labels(contrasts, names, reverse=False): if reverse: sl = slice(None, None, -1) else: sl = slice(None) labels = [''.join(['%s%s' % (signstr(c, noplus=True),v) for c,v in zip(row, names)[sl] if c != 0]) for row in contrasts] return labels
[docs]def contrast_product(names1, names2, intgroup1=None, intgroup2=None, pairs=False): '''build contrast matrices for products of two categorical variables this is an experimental script and should be converted to a class Parameters ---------- names1, names2 : lists of strings contains the list of level labels for each categorical variable intgroup1, intgroup2 : ndarrays TODO: this part not tested, finished yet categorical variable Notes ----- This creates a full rank matrix. It does not do all pairwise comparisons, parameterization is using contrast_all_one to get differences with first level. ? does contrast_all_pairs work as a plugin to get all pairs ? ''' n1 = len(names1) n2 = len(names2) names_prod = ['%s_%s' % (i,j) for i in names1 for j in names2] ee1 = np.zeros((1,n1)) ee1[0,0] = 1 if not pairs: dd = np.r_[ee1, -contrast_all_one(n1)] else: dd = np.r_[ee1, -contrast_allpairs(n1)] contrast_prod = np.kron(dd[1:], np.eye(n2)) names_contrast_prod0 = contrast_labels(contrast_prod, names_prod, reverse=True) names_contrast_prod = [''.join(['%s%s' % (signstr(c, noplus=True),v) for c,v in zip(row, names_prod)[::-1] if c != 0]) for row in contrast_prod] ee2 = np.zeros((1,n2)) ee2[0,0] = 1 #dd2 = np.r_[ee2, -contrast_all_one(n2)] if not pairs: dd2 = np.r_[ee2, -contrast_all_one(n2)] else: dd2 = np.r_[ee2, -contrast_allpairs(n2)] contrast_prod2 = np.kron(np.eye(n1), dd2[1:]) names_contrast_prod2 = [''.join(['%s%s' % (signstr(c, noplus=True),v) for c,v in zip(row, names_prod)[::-1] if c != 0]) for row in contrast_prod2] if (not intgroup1 is None) and (not intgroup1 is None): d1, _ = dummy_1d(intgroup1) d2, _ = dummy_1d(intgroup2) dummy = dummy_product(d1, d2) else: dummy = None return (names_prod, contrast_prod, names_contrast_prod, contrast_prod2, names_contrast_prod2, dummy)
[docs]def dummy_1d(x, varname=None): '''dummy variable for id integer groups Paramters --------- x : ndarray, 1d categorical variable, requires integers if varname is None varname : string name of the variable used in labels for category levels Returns ------- dummy : ndarray, 2d array of dummy variables, one column for each level of the category (full set) labels : list of strings labels for the columns, i.e. levels of each category Notes ----- use tools.categorical instead for more more options See Also -------- statsmodels.tools.categorical Examples -------- >>> x = np.array(['F', 'F', 'M', 'M', 'F', 'F', 'M', 'M', 'F', 'F', 'M', 'M'], dtype='|S1') >>> dummy_1d(x, varname='gender') (array([[1, 0], [1, 0], [0, 1], [0, 1], [1, 0], [1, 0], [0, 1], [0, 1], [1, 0], [1, 0], [0, 1], [0, 1]]), ['gender_F', 'gender_M']) ''' if varname is None: #assumes integer labels = ['level_%d' % i for i in range(x.max() + 1)] return (x[:,None]==np.arange(x.max()+1)).astype(int), labels else: grouplabels = np.unique(x) labels = [varname + '_%s' % str(i) for i in grouplabels] return (x[:,None]==grouplabels).astype(int), labels
[docs]def dummy_product(d1, d2, method='full'): '''dummy variable from product of two dummy variables Parameters ---------- d1, d2 : ndarray two dummy variables, assumes full set for methods 'drop-last' and 'drop-first' method : {'full', 'drop-last', 'drop-first'} 'full' returns the full product, encoding of intersection of categories. The drop methods provide a difference dummy encoding: (constant, main effects, interaction effects). The first or last columns of the dummy variable (i.e. levels) are dropped to get full rank dummy matrix. Returns ------- dummy : ndarray dummy variable for product, see method ''' if method == 'full': dd = (d1[:,:,None]*d2[:,None,:]).reshape(d1.shape[0],-1) elif method == 'drop-last': #same as SAS transreg d12rl = dummy_product(d1[:,:-1], d2[:,:-1]) dd = np.column_stack((np.ones(d1.shape[0], int), d1[:,:-1], d2[:,:-1],d12rl)) #Note: dtype int should preserve dtype of d1 and d2 elif method == 'drop-first': d12r = dummy_product(d1[:,1:], d2[:,1:]) dd = np.column_stack((np.ones(d1.shape[0], int), d1[:,1:], d2[:,1:],d12r)) else: raise ValueError('method not recognized') return dd
[docs]def dummy_limits(d): '''start and endpoints of groups in a sorted dummy variable array helper function for nested categories Examples -------- >>> d1 = np.array([[1, 0, 0], [1, 0, 0], [1, 0, 0], [1, 0, 0], [0, 1, 0], [0, 1, 0], [0, 1, 0], [0, 1, 0], [0, 0, 1], [0, 0, 1], [0, 0, 1], [0, 0, 1]]) >>> dummy_limits(d1) (array([0, 4, 8]), array([ 4, 8, 12])) get group slices from an array >>> [np.arange(d1.shape[0])[b:e] for b,e in zip(*dummy_limits(d1))] [array([0, 1, 2, 3]), array([4, 5, 6, 7]), array([ 8, 9, 10, 11])] >>> [np.arange(d1.shape[0])[b:e] for b,e in zip(*dummy_limits(d1))] [array([0, 1, 2, 3]), array([4, 5, 6, 7]), array([ 8, 9, 10, 11])] ''' nobs, nvars = d.shape start1, col1 = np.nonzero(np.diff(d,axis=0)==1) end1, col1_ = np.nonzero(np.diff(d,axis=0)==-1) cc = np.arange(nvars) #print(cc, np.r_[[0], col1], np.r_[col1_, [nvars-1]] if ((not (np.r_[[0], col1] == cc).all()) or (not (np.r_[col1_, [nvars-1]] == cc).all())): raise ValueError('dummy variable is not sorted') start = np.r_[[0], start1+1] end = np.r_[end1+1, [nobs]] return start, end
[docs]def dummy_nested(d1, d2, method='full'): '''unfinished and incomplete mainly copy past dummy_product dummy variable from product of two dummy variables Parameters ---------- d1, d2 : ndarray two dummy variables, d2 is assumed to be nested in d1 Assumes full set for methods 'drop-last' and 'drop-first'. method : {'full', 'drop-last', 'drop-first'} 'full' returns the full product, which in this case is d2. The drop methods provide an effects encoding: (constant, main effects, subgroup effects). The first or last columns of the dummy variable (i.e. levels) are dropped to get full rank encoding. Returns ------- dummy : ndarray dummy variable for product, see method ''' if method == 'full': return d2 start1, end1 = dummy_limits(d1) start2, end2 = dummy_limits(d2) first = np.in1d(start2, start1) last = np.in1d(end2, end1) equal = (first == last) col_dropf = ~first*~equal col_dropl = ~last*~equal if method == 'drop-last': d12rl = dummy_product(d1[:,:-1], d2[:,:-1]) dd = np.column_stack((np.ones(d1.shape[0], int), d1[:,:-1], d2[:,col_dropl])) #Note: dtype int should preserve dtype of d1 and d2 elif method == 'drop-first': d12r = dummy_product(d1[:,1:], d2[:,1:]) dd = np.column_stack((np.ones(d1.shape[0], int), d1[:,1:], d2[:,col_dropf])) else: raise ValueError('method not recognized') return dd, col_dropf, col_dropl
[docs]class DummyTransform(object): '''Conversion between full rank dummy encodings y = X b + u b = C a a = C^{-1} b y = X C a + u define Z = X C, then y = Z a + u contrasts: R_b b = r R_a a = R_b C a = r where R_a = R_b C Here C is the transform matrix, with dot_left and dot_right as the main methods, and the same for the inverse transform matrix, C^{-1} Note: - The class was mainly written to keep left and right straight. - No checking is done. - not sure yet if method names make sense '''
[docs] def __init__(self, d1, d2): '''C such that d1 C = d2, with d1 = X, d2 = Z should be (x, z) in arguments ? ''' self.transf_matrix = np.linalg.lstsq(d1, d2)[0] self.invtransf_matrix = np.linalg.lstsq(d2, d1)[0]
[docs] def dot_left(self, a): ''' b = C a ''' return np.dot(self.transf_matrix, a)
[docs] def dot_right(self, x): ''' z = x C ''' return np.dot(x, self.transf_matrix)
[docs] def inv_dot_left(self, b): ''' a = C^{-1} b ''' return np.dot(self.invtransf_matrix, b)
[docs] def inv_dot_right(self, z): ''' x = z C^{-1} ''' return np.dot(z, self.invtransf_matrix)
[docs]def groupmean_d(x, d): '''groupmeans using dummy variables Parameters ---------- x : array_like, ndim data array, tested for 1,2 and 3 dimensions d : ndarray, 1d dummy variable, needs to have the same length as x in axis 0. Returns ------- groupmeans : ndarray, ndim-1 means for each group along axis 0, the levels of the groups are the last axis Notes ----- This will be memory intensive if there are many levels in the categorical variable, i.e. many columns in the dummy variable. In this case it is recommended to use a more efficient version. ''' x = np.asarray(x) ## if x.ndim == 1: ## nvars = 1 ## else: nvars = x.ndim + 1 sli = [slice(None)] + [None]*(nvars-2) + [slice(None)] return (x[...,None] * d[sli]).sum(0)*1./d.sum(0)
[docs]class TwoWay(object): '''a wrapper class for two way anova type of analysis with OLS currently mainly to bring things together Notes ----- unclear: adding multiple test might assume block design or orthogonality This estimates the full dummy version with OLS. The drop first dummy representation can be recovered through the transform method. TODO: add more methods, tests, pairwise, multiple, marginal effects try out what can be added for userfriendly access. missing: ANOVA table '''
[docs] def __init__(self, endog, factor1, factor2, varnames=None): self.nobs = factor1.shape[0] if varnames is None: vname1 = 'a' vname2 = 'b' else: vname1, vname1 = varnames self.d1, self.d1_labels = d1, d1_labels = dummy_1d(factor1, vname1) self.d2, self.d2_labels = d2, d2_labels = dummy_1d(factor2, vname2) self.nlevel1 = nlevel1 = d1.shape[1] self.nlevel2 = nlevel2 = d2.shape[1] #get product dummies res = contrast_product(d1_labels, d2_labels) prodlab, C1, C1lab, C2, C2lab, _ = res self.prod_label, self.C1, self.C1_label, self.C2, self.C2_label, _ = res dp_full = dummy_product(d1, d2, method='full') dp_dropf = dummy_product(d1, d2, method='drop-first') self.transform = DummyTransform(dp_full, dp_dropf) #estimate the model self.nvars = dp_full.shape[1] self.exog = dp_full self.resols = sm.OLS(endog, dp_full).fit() self.params = self.resols.params #get transformed parameters, (constant, main, interaction effect) self.params_dropf = self.transform.inv_dot_left(self.params) self.start_interaction = 1 + (nlevel1 - 1) + (nlevel2 - 1) self.n_interaction = self.nvars - self.start_interaction
#convert to cached property
[docs] def r_nointer(self): '''contrast/restriction matrix for no interaction ''' nia = self.n_interaction R_nointer = np.hstack((np.zeros((nia, self.nvars-nia)), np.eye(nia))) #inter_direct = resols_full_dropf.tval[-nia:] R_nointer_transf = self.transform.inv_dot_right(R_nointer) self.R_nointer_transf = R_nointer_transf return R_nointer_transf
[docs] def ttest_interaction(self): '''ttests for no-interaction terms are zero ''' #use self.r_nointer instead nia = self.n_interaction R_nointer = np.hstack((np.zeros((nia, self.nvars-nia)), np.eye(nia))) #inter_direct = resols_full_dropf.tval[-nia:] R_nointer_transf = self.transform.inv_dot_right(R_nointer) self.R_nointer_transf = R_nointer_transf t_res = self.resols.t_test(R_nointer_transf) return t_res
[docs] def ftest_interaction(self): '''ttests for no-interaction terms are zero ''' R_nointer_transf = self.r_nointer() return self.resols.f_test(R_nointer_transf)
[docs] def ttest_conditional_effect(self, factorind): if factorind == 1: return self.resols.t_test(self.C1), self.C1_label else: return self.resols.t_test(self.C2), self.C2_label
[docs] def summary_coeff(self): from statsmodels.iolib import SimpleTable params_arr = self.params.reshape(self.nlevel1, self.nlevel2) stubs = self.d1_labels headers = self.d2_labels title = 'Estimated Coefficients by factors' table_fmt = dict( data_fmts = ["%#10.4g"]*self.nlevel2) return SimpleTable(params_arr, headers, stubs, title=title, txt_fmt=table_fmt)
#--------------- tests from numpy.testing import assert_equal #TODO: several tests still missing, several are in the example with print
[docs]class TestContrastTools(object):
[docs] def __init__(self): self.v1name = ['a0', 'a1', 'a2'] self.v2name = ['b0', 'b1'] self.d1 = np.array([[1, 0, 0], [1, 0, 0], [1, 0, 0], [1, 0, 0], [0, 1, 0], [0, 1, 0], [0, 1, 0], [0, 1, 0], [0, 0, 1], [0, 0, 1], [0, 0, 1], [0, 0, 1]])
[docs] def test_dummy_1d(self): x = np.array(['F', 'F', 'M', 'M', 'F', 'F', 'M', 'M', 'F', 'F', 'M', 'M'], dtype='|S1') d, labels = (np.array([[1, 0], [1, 0], [0, 1], [0, 1], [1, 0], [1, 0], [0, 1], [0, 1], [1, 0], [1, 0], [0, 1], [0, 1]]), ['gender_F', 'gender_M']) res_d, res_labels = dummy_1d(x, varname='gender') assert_equal(res_d, d) assert_equal(res_labels, labels)
[docs] def test_contrast_product(self): res_cp = contrast_product(self.v1name, self.v2name) res_t = [0]*6 res_t[0] = ['a0_b0', 'a0_b1', 'a1_b0', 'a1_b1', 'a2_b0', 'a2_b1'] res_t[1] = np.array([[-1., 0., 1., 0., 0., 0.], [ 0., -1., 0., 1., 0., 0.], [-1., 0., 0., 0., 1., 0.], [ 0., -1., 0., 0., 0., 1.]]) res_t[2] = ['a1_b0-a0_b0', 'a1_b1-a0_b1', 'a2_b0-a0_b0', 'a2_b1-a0_b1'] res_t[3] = np.array([[-1., 1., 0., 0., 0., 0.], [ 0., 0., -1., 1., 0., 0.], [ 0., 0., 0., 0., -1., 1.]]) res_t[4] = ['a0_b1-a0_b0', 'a1_b1-a1_b0', 'a2_b1-a2_b0'] for ii in range(5): np.testing.assert_equal(res_cp[ii], res_t[ii], err_msg=str(ii))
[docs] def test_dummy_limits(self): b,e = dummy_limits(self.d1) assert_equal(b, np.array([0, 4, 8])) assert_equal(e, np.array([ 4, 8, 12]))
if __name__ == '__main__': tt = TestContrastTools() tt.test_contrast_product() tt.test_dummy_1d() tt.test_dummy_limits() import statsmodels.api as sm examples = ['small', 'large', None][1] v1name = ['a0', 'a1', 'a2'] v2name = ['b0', 'b1'] res_cp = contrast_product(v1name, v2name) print(res_cp) y = np.arange(12) x1 = np.arange(12)//4 x2 = np.arange(12)//2%2 if 'small' in examples: d1, d1_labels = dummy_1d(x1) d2, d2_labels = dummy_1d(x2) if 'large' in examples: x1 = np.repeat(x1, 5, axis=0) x2 = np.repeat(x2, 5, axis=0) nobs = x1.shape[0] d1, d1_labels = dummy_1d(x1) d2, d2_labels = dummy_1d(x2) dd_full = dummy_product(d1, d2, method='full') dd_dropl = dummy_product(d1, d2, method='drop-last') dd_dropf = dummy_product(d1, d2, method='drop-first') #Note: full parameterization of dummies is orthogonal #np.eye(6)*10 in "large" example print((np.dot(dd_full.T, dd_full) == np.diag(dd_full.sum(0))).all()) #check that transforms work #generate 3 data sets with the 3 different parameterizations effect_size = [1., 0.01][1] noise_scale = [0.001, 0.1][0] noise = noise_scale * np.random.randn(nobs) beta = effect_size * np.arange(1,7) ydata_full = (dd_full * beta).sum(1) + noise ydata_dropl = (dd_dropl * beta).sum(1) + noise ydata_dropf = (dd_dropf * beta).sum(1) + noise resols_full_full = sm.OLS(ydata_full, dd_full).fit() resols_full_dropf = sm.OLS(ydata_full, dd_dropf).fit() params_f_f = resols_full_full.params params_f_df = resols_full_dropf.params resols_dropf_full = sm.OLS(ydata_dropf, dd_full).fit() resols_dropf_dropf = sm.OLS(ydata_dropf, dd_dropf).fit() params_df_f = resols_dropf_full.params params_df_df = resols_dropf_dropf.params tr_of = np.linalg.lstsq(dd_dropf, dd_full)[0] tr_fo = np.linalg.lstsq(dd_full, dd_dropf)[0] print(np.dot(tr_fo, params_df_df) - params_df_f) print(np.dot(tr_of, params_f_f) - params_f_df) transf_f_df = DummyTransform(dd_full, dd_dropf) print(np.max(np.abs((dd_full - transf_f_df.inv_dot_right(dd_dropf))))) print(np.max(np.abs((dd_dropf - transf_f_df.dot_right(dd_full))))) print(np.max(np.abs((params_df_df - transf_f_df.inv_dot_left(params_df_f))))) np.max(np.abs((params_f_df - transf_f_df.inv_dot_left(params_f_f)))) prodlab, C1, C1lab, C2, C2lab,_ = contrast_product(v1name, v2name) print('\ntvalues for no effect of factor 1') print('each test is conditional on a level of factor 2') print(C1lab) print(resols_dropf_full.t_test(C1).tvalue) print('\ntvalues for no effect of factor 2') print('each test is conditional on a level of factor 1') print(C2lab) print(resols_dropf_full.t_test(C2).tvalue) #covariance matrix of restrictions C2, note: orthogonal resols_dropf_full.cov_params(C2) #testing for no interaction effect R_noint = np.hstack((np.zeros((2,4)), np.eye(2))) inter_direct = resols_full_dropf.tvalues[-2:] inter_transf = resols_full_full.t_test(transf_f_df.inv_dot_right(R_noint)).tvalue print(np.max(np.abs((inter_direct - inter_transf)))) #now with class version tw = TwoWay(ydata_dropf, x1, x2) print(tw.ttest_interaction().tvalue) print(tw.ttest_interaction().pvalue) print(tw.ftest_interaction().fvalue) print(tw.ftest_interaction().pvalue) print(tw.ttest_conditional_effect(1)[0].tvalue) print(tw.ttest_conditional_effect(2)[0].tvalue) print(tw.summary_coeff()) ''' documentation for early examples while developing - some have changed already >>> y = np.arange(12) >>> y array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) >>> x1 = np.arange(12)//4 >>> x1 array([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2]) >>> x2 = np.arange(12)//2%2 >>> x2 array([0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1]) >>> d1 = dummy_1d(x1) >>> d1 array([[1, 0, 0], [1, 0, 0], [1, 0, 0], [1, 0, 0], [0, 1, 0], [0, 1, 0], [0, 1, 0], [0, 1, 0], [0, 0, 1], [0, 0, 1], [0, 0, 1], [0, 0, 1]]) >>> d2 = dummy_1d(x2) >>> d2 array([[1, 0], [1, 0], [0, 1], [0, 1], [1, 0], [1, 0], [0, 1], [0, 1], [1, 0], [1, 0], [0, 1], [0, 1]]) >>> d12 = dummy_product(d1, d2) >>> d12 array([[1, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0], [0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 1]]) >>> d12rl = dummy_product(d1[:,:-1], d2[:,:-1]) >>> np.column_stack((np.ones(d1.shape[0]), d1[:,:-1], d2[:,:-1],d12rl)) array([[ 1., 1., 0., 1., 1., 0.], [ 1., 1., 0., 1., 1., 0.], [ 1., 1., 0., 0., 0., 0.], [ 1., 1., 0., 0., 0., 0.], [ 1., 0., 1., 1., 0., 1.], [ 1., 0., 1., 1., 0., 1.], [ 1., 0., 1., 0., 0., 0.], [ 1., 0., 1., 0., 0., 0.], [ 1., 0., 0., 1., 0., 0.], [ 1., 0., 0., 1., 0., 0.], [ 1., 0., 0., 0., 0., 0.], [ 1., 0., 0., 0., 0., 0.]]) ''' #nprod = ['%s_%s' % (i,j) for i in ['a0', 'a1', 'a2'] for j in ['b0', 'b1']] #>>> [''.join(['%s%s' % (signstr(c),v) for c,v in zip(row, nprod) if c != 0]) # for row in np.kron(dd[1:], np.eye(2))] ''' >>> nprod = ['%s_%s' % (i,j) for i in ['a0', 'a1', 'a2'] for j in ['b0', 'b1']] >>> nprod ['a0_b0', 'a0_b1', 'a1_b0', 'a1_b1', 'a2_b0', 'a2_b1'] >>> [''.join(['%s%s' % (signstr(c),v) for c,v in zip(row, nprod) if c != 0]) for row in np.kron(dd[1:], np.eye(2))] ['-a0b0+a1b0', '-a0b1+a1b1', '-a0b0+a2b0', '-a0b1+a2b1'] >>> [''.join(['%s%s' % (signstr(c),v) for c,v in zip(row, nprod)[::-1] if c != 0]) for row in np.kron(dd[1:], np.eye(2))] ['+a1_b0-a0_b0', '+a1_b1-a0_b1', '+a2_b0-a0_b0', '+a2_b1-a0_b1'] >>> np.r_[[[1,0,0,0,0]],contrast_all_one(5)] array([[ 1., 0., 0., 0., 0.], [ 1., -1., 0., 0., 0.], [ 1., 0., -1., 0., 0.], [ 1., 0., 0., -1., 0.], [ 1., 0., 0., 0., -1.]]) >>> idxprod = [(i,j) for i in range(3) for j in range(2)] >>> idxprod [(0, 0), (0, 1), (1, 0), (1, 1), (2, 0), (2, 1)] >>> np.array(idxprod).reshape(2,3,2,order='F')[:,:,0] array([[0, 1, 2], [0, 1, 2]]) >>> np.array(idxprod).reshape(2,3,2,order='F')[:,:,1] array([[0, 0, 0], [1, 1, 1]]) >>> dd3_ = np.r_[[[0,0,0]],contrast_all_one(3)] pairwise contrasts and reparameterization dd = np.r_[[[1,0,0,0,0]],-contrast_all_one(5)] >>> dd array([[ 1., 0., 0., 0., 0.], [-1., 1., 0., 0., 0.], [-1., 0., 1., 0., 0.], [-1., 0., 0., 1., 0.], [-1., 0., 0., 0., 1.]]) >>> np.dot(dd.T, np.arange(5)) array([-10., 1., 2., 3., 4.]) >>> np.round(np.linalg.inv(dd.T)).astype(int) array([[1, 1, 1, 1, 1], [0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1]]) >>> np.round(np.linalg.inv(dd)).astype(int) array([[1, 0, 0, 0, 0], [1, 1, 0, 0, 0], [1, 0, 1, 0, 0], [1, 0, 0, 1, 0], [1, 0, 0, 0, 1]]) >>> dd array([[ 1., 0., 0., 0., 0.], [-1., 1., 0., 0., 0.], [-1., 0., 1., 0., 0.], [-1., 0., 0., 1., 0.], [-1., 0., 0., 0., 1.]]) >>> ddinv=np.round(np.linalg.inv(dd.T)).astype(int) >>> np.dot(ddinv, np.arange(5)) array([10, 1, 2, 3, 4]) >>> np.dot(dd, np.arange(5)) array([ 0., 1., 2., 3., 4.]) >>> np.dot(dd, 5+np.arange(5)) array([ 5., 1., 2., 3., 4.]) >>> ddinv2 = np.round(np.linalg.inv(dd)).astype(int) >>> np.dot(ddinv2, np.arange(5)) array([0, 1, 2, 3, 4]) >>> np.dot(ddinv2, 5+np.arange(5)) array([ 5, 11, 12, 13, 14]) >>> np.dot(ddinv2, [5, 0, 0 , 1, 2]) array([5, 5, 5, 6, 7]) >>> np.dot(ddinv2, dd) array([[ 1., 0., 0., 0., 0.], [ 0., 1., 0., 0., 0.], [ 0., 0., 1., 0., 0.], [ 0., 0., 0., 1., 0.], [ 0., 0., 0., 0., 1.]]) >>> dd3 = -np.r_[[[1,0,0]],contrast_all_one(3)] >>> dd2 = -np.r_[[[1,0]],contrast_all_one(2)] >>> np.kron(np.eye(3), dd2) array([[-1., 0., 0., 0., 0., 0.], [-1., 1., 0., 0., 0., 0.], [ 0., 0., -1., 0., 0., 0.], [ 0., 0., -1., 1., 0., 0.], [ 0., 0., 0., 0., -1., 0.], [ 0., 0., 0., 0., -1., 1.]]) >>> dd2 array([[-1., 0.], [-1., 1.]]) >>> np.kron(np.eye(3), dd2[1:]) array([[-1., 1., 0., 0., 0., 0.], [ 0., 0., -1., 1., 0., 0.], [ 0., 0., 0., 0., -1., 1.]]) >>> np.kron(dd[1:], np.eye(2)) array([[-1., 0., 1., 0., 0., 0.], [ 0., -1., 0., 1., 0., 0.], [-1., 0., 0., 0., 1., 0.], [ 0., -1., 0., 0., 0., 1.]]) d_ = np.r_[[[1,0,0,0,0]],contrast_all_one(5)] >>> d_ array([[ 1., 0., 0., 0., 0.], [ 1., -1., 0., 0., 0.], [ 1., 0., -1., 0., 0.], [ 1., 0., 0., -1., 0.], [ 1., 0., 0., 0., -1.]]) >>> np.round(np.linalg.pinv(d_)).astype(int) array([[ 1, 0, 0, 0, 0], [ 1, -1, 0, 0, 0], [ 1, 0, -1, 0, 0], [ 1, 0, 0, -1, 0], [ 1, 0, 0, 0, -1]]) >>> np.linalg.inv(d_).astype(int) array([[ 1, 0, 0, 0, 0], [ 1, -1, 0, 0, 0], [ 1, 0, -1, 0, 0], [ 1, 0, 0, -1, 0], [ 1, 0, 0, 0, -1]]) group means >>> sli = [slice(None)] + [None]*(3-2) + [slice(None)] >>> (np.column_stack((y, x1, x2))[...,None] * d1[sli]).sum(0)*1./d1.sum(0) array([[ 1.5, 5.5, 9.5], [ 0. , 1. , 2. ], [ 0.5, 0.5, 0.5]]) >>> [(z[:,None] * d1).sum(0)*1./d1.sum(0) for z in np.column_stack((y, x1, x2)).T] [array([ 1.5, 5.5, 9.5]), array([ 0., 1., 2.]), array([ 0.5, 0.5, 0.5])] >>> '''