Source code for statsmodels.sandbox.tests.test_formula

"""
Test functions for models.formula
"""

import string

import numpy as np
import numpy.random as R
import numpy.linalg as L
from numpy.testing import *

import sys, nose

#automatic conversion with 2to3 makes mistakes in formula, changes
#"if type(self.name) is types.StringType"  to  "if type(self.name) is bytes"
try:
    from statsmodels.sandbox import formula #, contrast #, utils
    from statsmodels.sandbox import contrast_old as contrast
except:
    if sys.version_info[0] >= 3:
        raise nose.SkipTest('No tests here')
    else:
        raise

[docs]def setup(): if sys.version_info[0] >= 3: raise nose.SkipTest('No tests here')
[docs]class TestTerm(TestCase):
[docs] def test_init(self): t1 = formula.Term("trivial") sqr = lambda x: x*x t2 = formula.Term("not_so_trivial", sqr, "sqr") self.assertRaises(ValueError, formula.Term, "name", termname=0)
[docs] def test_str(self): t = formula.Term("name") s = str(t)
[docs] def test_add(self): t1 = formula.Term("t1") t2 = formula.Term("t2") f = t1 + t2 self.assert_(isinstance(f, formula.Formula)) self.assert_(f.hasterm(t1)) self.assert_(f.hasterm(t2))
[docs] def test_mul(self): t1 = formula.Term("t1") t2 = formula.Term("t2") f = t1 * t2 self.assert_(isinstance(f, formula.Formula)) intercept = formula.Term("intercept") f = t1 * intercept self.assertEqual(str(f), str(formula.Formula(t1))) f = intercept * t1 self.assertEqual(str(f), str(formula.Formula(t1)))
[docs]class TestFormula(TestCase):
[docs] def setUp(self): self.X = R.standard_normal((40,10)) self.namespace = {} self.terms = [] for i in range(10): name = '%s' % string.uppercase[i] self.namespace[name] = self.X[:,i] self.terms.append(formula.Term(name)) self.formula = self.terms[0] for i in range(1, 10): self.formula += self.terms[i] self.formula.namespace = self.namespace
[docs] def test_namespace(self): space1 = {'X':np.arange(50), 'Y':np.arange(50)*2} space2 = {'X':np.arange(20), 'Y':np.arange(20)*2} space3 = {'X':np.arange(30), 'Y':np.arange(30)*2} X = formula.Term('X') Y = formula.Term('Y') X.namespace = space1 assert_almost_equal(X(), np.arange(50)) Y.namespace = space2 assert_almost_equal(Y(), np.arange(20)*2) f = X + Y f.namespace = space1 self.assertEqual(f().shape, (2,50)) assert_almost_equal(Y(), np.arange(20)*2) assert_almost_equal(X(), np.arange(50)) f.namespace = space2 self.assertEqual(f().shape, (2,20)) assert_almost_equal(Y(), np.arange(20)*2) assert_almost_equal(X(), np.arange(50)) f.namespace = space3 self.assertEqual(f().shape, (2,30)) assert_almost_equal(Y(), np.arange(20)*2) assert_almost_equal(X(), np.arange(50)) xx = X**2 self.assertEqual(xx().shape, (50,)) xx.namespace = space3 self.assertEqual(xx().shape, (30,)) xx = X * formula.I self.assertEqual(xx().shape, (50,)) xx.namespace = space3 self.assertEqual(xx().shape, (30,)) xx = X * X self.assertEqual(xx.namespace, X.namespace) xx = X + Y self.assertEqual(xx.namespace, {}) Y.namespace = {'X':np.arange(50), 'Y':np.arange(50)*2} xx = X + Y self.assertEqual(xx.namespace, {}) Y.namespace = X.namespace xx = X+Y self.assertEqual(xx.namespace, Y.namespace)
[docs] def test_termcolumns(self): t1 = formula.Term("A") t2 = formula.Term("B") f = t1 + t2 + t1 * t2 def other(val): return np.array([3.2*val,4.342*val**2, 5.234*val**3]) q = formula.Quantitative(['other%d' % i for i in range(1,4)], termname='other', func=t1, transform=other) f += q q.namespace = f.namespace = self.formula.namespace assert_almost_equal(q(), f()[f.termcolumns(q)])
[docs] def test_str(self): s = str(self.formula)
[docs] def test_call(self): x = self.formula() self.assertEquals(np.array(x).shape, (10, 40))
[docs] def test_design(self): x = self.formula.design() self.assertEquals(x.shape, (40, 10))
[docs] def test_product(self): prod = self.formula['A'] * self.formula['C'] f = self.formula + prod f.namespace = self.namespace x = f.design() p = f['A*C'] p.namespace = self.namespace col = f.termcolumns(prod, dict=False) assert_almost_equal(np.squeeze(x[:,col]), self.X[:,0] * self.X[:,2]) assert_almost_equal(np.squeeze(p()), self.X[:,0] * self.X[:,2])
[docs] def test_intercept1(self): prod = self.terms[0] * self.terms[2] f = self.formula + formula.I icol = f.names().index('intercept') f.namespace = self.namespace assert_almost_equal(f()[icol], np.ones((40,)))
[docs] def test_intercept3(self): t = self.formula['A'] t.namespace = self.namespace prod = t * formula.I prod.namespace = self.formula.namespace assert_almost_equal(np.squeeze(prod()), t())
[docs] def test_contrast1(self): term = self.terms[0] + self.terms[2] c = contrast.Contrast(term, self.formula) col1 = self.formula.termcolumns(self.terms[0], dict=False) col2 = self.formula.termcolumns(self.terms[1], dict=False) test = [[1] + [0]*9, [0]*2 + [1] + [0]*7] assert_almost_equal(c.matrix, test)
[docs] def test_contrast2(self): dummy = formula.Term('zero') self.namespace['zero'] = np.zeros((40,), np.float64) term = dummy + self.terms[2] c = contrast.Contrast(term, self.formula) test = [0]*2 + [1] + [0]*7 assert_almost_equal(c.matrix, test)
[docs] def test_contrast3(self): X = self.formula.design() P = np.dot(X, L.pinv(X)) dummy = formula.Term('noise') resid = np.identity(40) - P self.namespace['noise'] = np.transpose(np.dot(resid, R.standard_normal((40,5)))) terms = dummy + self.terms[2] terms.namespace = self.formula.namespace c = contrast.Contrast(terms, self.formula) self.assertEquals(c.matrix.shape, (10,))
[docs] def test_power(self): t = self.terms[2] t2 = t**2 t.namespace = t2.namespace = self.formula.namespace assert_almost_equal(t()**2, t2())
[docs] def test_quantitative(self): t = self.terms[2] sint = formula.Quantitative('t', func=t, transform=np.sin) t.namespace = sint.namespace = self.formula.namespace assert_almost_equal(np.sin(t()), sint())
[docs] def test_factor1(self): f = ['a','b','c']*10 fac = formula.Factor('ff', f) fac.namespace = {'ff':f} self.assertEquals(list(fac.values()), f)
[docs] def test_factor2(self): f = ['a','b','c']*10 fac = formula.Factor('ff', f) fac.namespace = {'ff':f} self.assertEquals(fac().shape, (3,30))
[docs] def test_factor3(self): f = ['a','b','c']*10 fac = formula.Factor('ff', f) fac.namespace = {'ff':f} m = fac.main_effect(reference=1) m.namespace = fac.namespace self.assertEquals(m().shape, (2,30))
[docs] def test_factor4(self): f = ['a','b','c']*10 fac = formula.Factor('ff', f) fac.namespace = {'ff':f} m = fac.main_effect(reference=2) m.namespace = fac.namespace r = np.array([np.identity(3)]*10) r.shape = (30,3) r = r.T _m = np.array([r[0]-r[2],r[1]-r[2]]) assert_almost_equal(_m, m())
[docs] def test_factor5(self): f = ['a','b','c']*3 fac = formula.Factor('ff', f) fac.namespace = {'ff':f} assert_equal(fac(), [[1,0,0]*3, [0,1,0]*3, [0,0,1]*3]) assert_equal(fac['a'], [1,0,0]*3) assert_equal(fac['b'], [0,1,0]*3) assert_equal(fac['c'], [0,0,1]*3)
[docs] def test_ordinal_factor(self): f = ['a','b','c']*3 fac = formula.Factor('ff', ['a','b','c'], ordinal=True) fac.namespace = {'ff':f} assert_equal(fac(), [0,1,2]*3) assert_equal(fac['a'], [1,0,0]*3) assert_equal(fac['b'], [0,1,0]*3) assert_equal(fac['c'], [0,0,1]*3)
[docs] def test_ordinal_factor2(self): f = ['b','c', 'a']*3 fac = formula.Factor('ff', ['a','b','c'], ordinal=True) fac.namespace = {'ff':f} assert_equal(fac(), [1,2,0]*3) assert_equal(fac['a'], [0,0,1]*3) assert_equal(fac['b'], [1,0,0]*3) assert_equal(fac['c'], [0,1,0]*3)
[docs] def test_contrast4(self): f = self.formula + self.terms[5] + self.terms[5] f.namespace = self.namespace estimable = False c = contrast.Contrast(self.terms[5], f) self.assertEquals(estimable, False)
[docs] def test_interactions(self): f = formula.interactions([formula.Term(l) for l in ['a', 'b', 'c']]) assert_equal(set(f.termnames()), set(['a', 'b', 'c', 'a*b', 'a*c', 'b*c'])) f = formula.interactions([formula.Term(l) for l in ['a', 'b', 'c', 'd']], order=3) assert_equal(set(f.termnames()), set(['a', 'b', 'c', 'd', 'a*b', 'a*c', 'a*d', 'b*c', 'b*d', 'c*d', 'a*b*c', 'a*c*d', 'a*b*d', 'b*c*d'])) f = formula.interactions([formula.Term(l) for l in ['a', 'b', 'c', 'd']], order=[1,2,3]) assert_equal(set(f.termnames()), set(['a', 'b', 'c', 'd', 'a*b', 'a*c', 'a*d', 'b*c', 'b*d', 'c*d', 'a*b*c', 'a*c*d', 'a*b*d', 'b*c*d'])) f = formula.interactions([formula.Term(l) for l in ['a', 'b', 'c', 'd']], order=[3]) assert_equal(set(f.termnames()), set(['a*b*c', 'a*c*d', 'a*b*d', 'b*c*d']))
[docs] def test_subtract(self): f = formula.interactions([formula.Term(l) for l in ['a', 'b', 'c']]) ff = f - f['a*b'] assert_equal(set(ff.termnames()), set(['a', 'b', 'c', 'a*c', 'b*c'])) ff = f - f['a*b'] - f['a*c'] assert_equal(set(ff.termnames()), set(['a', 'b', 'c', 'b*c'])) ff = f - (f['a*b'] + f['a*c']) assert_equal(set(ff.termnames()), set(['a', 'b', 'c', 'b*c']))