Source code for statsmodels.sandbox.tests.test_formula
"""
Test functions for models.formula
"""
import string
import numpy as np
import numpy.random as R
import numpy.linalg as L
from numpy.testing import *
import sys, nose
#automatic conversion with 2to3 makes mistakes in formula, changes
#"if type(self.name) is types.StringType" to "if type(self.name) is bytes"
try:
from statsmodels.sandbox import formula #, contrast #, utils
from statsmodels.sandbox import contrast_old as contrast
except:
if sys.version_info[0] >= 3:
raise nose.SkipTest('No tests here')
else:
raise
[docs]class TestTerm(TestCase):
[docs] def test_init(self):
t1 = formula.Term("trivial")
sqr = lambda x: x*x
t2 = formula.Term("not_so_trivial", sqr, "sqr")
self.assertRaises(ValueError, formula.Term, "name", termname=0)
[docs] def test_add(self):
t1 = formula.Term("t1")
t2 = formula.Term("t2")
f = t1 + t2
self.assert_(isinstance(f, formula.Formula))
self.assert_(f.hasterm(t1))
self.assert_(f.hasterm(t2))
[docs] def test_mul(self):
t1 = formula.Term("t1")
t2 = formula.Term("t2")
f = t1 * t2
self.assert_(isinstance(f, formula.Formula))
intercept = formula.Term("intercept")
f = t1 * intercept
self.assertEqual(str(f), str(formula.Formula(t1)))
f = intercept * t1
self.assertEqual(str(f), str(formula.Formula(t1)))
[docs]class TestFormula(TestCase):
[docs] def setUp(self):
self.X = R.standard_normal((40,10))
self.namespace = {}
self.terms = []
for i in range(10):
name = '%s' % string.uppercase[i]
self.namespace[name] = self.X[:,i]
self.terms.append(formula.Term(name))
self.formula = self.terms[0]
for i in range(1, 10):
self.formula += self.terms[i]
self.formula.namespace = self.namespace
[docs] def test_namespace(self):
space1 = {'X':np.arange(50), 'Y':np.arange(50)*2}
space2 = {'X':np.arange(20), 'Y':np.arange(20)*2}
space3 = {'X':np.arange(30), 'Y':np.arange(30)*2}
X = formula.Term('X')
Y = formula.Term('Y')
X.namespace = space1
assert_almost_equal(X(), np.arange(50))
Y.namespace = space2
assert_almost_equal(Y(), np.arange(20)*2)
f = X + Y
f.namespace = space1
self.assertEqual(f().shape, (2,50))
assert_almost_equal(Y(), np.arange(20)*2)
assert_almost_equal(X(), np.arange(50))
f.namespace = space2
self.assertEqual(f().shape, (2,20))
assert_almost_equal(Y(), np.arange(20)*2)
assert_almost_equal(X(), np.arange(50))
f.namespace = space3
self.assertEqual(f().shape, (2,30))
assert_almost_equal(Y(), np.arange(20)*2)
assert_almost_equal(X(), np.arange(50))
xx = X**2
self.assertEqual(xx().shape, (50,))
xx.namespace = space3
self.assertEqual(xx().shape, (30,))
xx = X * formula.I
self.assertEqual(xx().shape, (50,))
xx.namespace = space3
self.assertEqual(xx().shape, (30,))
xx = X * X
self.assertEqual(xx.namespace, X.namespace)
xx = X + Y
self.assertEqual(xx.namespace, {})
Y.namespace = {'X':np.arange(50), 'Y':np.arange(50)*2}
xx = X + Y
self.assertEqual(xx.namespace, {})
Y.namespace = X.namespace
xx = X+Y
self.assertEqual(xx.namespace, Y.namespace)
[docs] def test_termcolumns(self):
t1 = formula.Term("A")
t2 = formula.Term("B")
f = t1 + t2 + t1 * t2
def other(val):
return np.array([3.2*val,4.342*val**2, 5.234*val**3])
q = formula.Quantitative(['other%d' % i for i in range(1,4)], termname='other', func=t1, transform=other)
f += q
q.namespace = f.namespace = self.formula.namespace
assert_almost_equal(q(), f()[f.termcolumns(q)])
[docs] def test_product(self):
prod = self.formula['A'] * self.formula['C']
f = self.formula + prod
f.namespace = self.namespace
x = f.design()
p = f['A*C']
p.namespace = self.namespace
col = f.termcolumns(prod, dict=False)
assert_almost_equal(np.squeeze(x[:,col]), self.X[:,0] * self.X[:,2])
assert_almost_equal(np.squeeze(p()), self.X[:,0] * self.X[:,2])
[docs] def test_intercept1(self):
prod = self.terms[0] * self.terms[2]
f = self.formula + formula.I
icol = f.names().index('intercept')
f.namespace = self.namespace
assert_almost_equal(f()[icol], np.ones((40,)))
[docs] def test_intercept3(self):
t = self.formula['A']
t.namespace = self.namespace
prod = t * formula.I
prod.namespace = self.formula.namespace
assert_almost_equal(np.squeeze(prod()), t())
[docs] def test_contrast1(self):
term = self.terms[0] + self.terms[2]
c = contrast.Contrast(term, self.formula)
col1 = self.formula.termcolumns(self.terms[0], dict=False)
col2 = self.formula.termcolumns(self.terms[1], dict=False)
test = [[1] + [0]*9, [0]*2 + [1] + [0]*7]
assert_almost_equal(c.matrix, test)
[docs] def test_contrast2(self):
dummy = formula.Term('zero')
self.namespace['zero'] = np.zeros((40,), np.float64)
term = dummy + self.terms[2]
c = contrast.Contrast(term, self.formula)
test = [0]*2 + [1] + [0]*7
assert_almost_equal(c.matrix, test)
[docs] def test_contrast3(self):
X = self.formula.design()
P = np.dot(X, L.pinv(X))
dummy = formula.Term('noise')
resid = np.identity(40) - P
self.namespace['noise'] = np.transpose(np.dot(resid, R.standard_normal((40,5))))
terms = dummy + self.terms[2]
terms.namespace = self.formula.namespace
c = contrast.Contrast(terms, self.formula)
self.assertEquals(c.matrix.shape, (10,))
[docs] def test_power(self):
t = self.terms[2]
t2 = t**2
t.namespace = t2.namespace = self.formula.namespace
assert_almost_equal(t()**2, t2())
[docs] def test_quantitative(self):
t = self.terms[2]
sint = formula.Quantitative('t', func=t, transform=np.sin)
t.namespace = sint.namespace = self.formula.namespace
assert_almost_equal(np.sin(t()), sint())
[docs] def test_factor1(self):
f = ['a','b','c']*10
fac = formula.Factor('ff', f)
fac.namespace = {'ff':f}
self.assertEquals(list(fac.values()), f)
[docs] def test_factor2(self):
f = ['a','b','c']*10
fac = formula.Factor('ff', f)
fac.namespace = {'ff':f}
self.assertEquals(fac().shape, (3,30))
[docs] def test_factor3(self):
f = ['a','b','c']*10
fac = formula.Factor('ff', f)
fac.namespace = {'ff':f}
m = fac.main_effect(reference=1)
m.namespace = fac.namespace
self.assertEquals(m().shape, (2,30))
[docs] def test_factor4(self):
f = ['a','b','c']*10
fac = formula.Factor('ff', f)
fac.namespace = {'ff':f}
m = fac.main_effect(reference=2)
m.namespace = fac.namespace
r = np.array([np.identity(3)]*10)
r.shape = (30,3)
r = r.T
_m = np.array([r[0]-r[2],r[1]-r[2]])
assert_almost_equal(_m, m())
[docs] def test_factor5(self):
f = ['a','b','c']*3
fac = formula.Factor('ff', f)
fac.namespace = {'ff':f}
assert_equal(fac(), [[1,0,0]*3,
[0,1,0]*3,
[0,0,1]*3])
assert_equal(fac['a'], [1,0,0]*3)
assert_equal(fac['b'], [0,1,0]*3)
assert_equal(fac['c'], [0,0,1]*3)
[docs] def test_ordinal_factor(self):
f = ['a','b','c']*3
fac = formula.Factor('ff', ['a','b','c'], ordinal=True)
fac.namespace = {'ff':f}
assert_equal(fac(), [0,1,2]*3)
assert_equal(fac['a'], [1,0,0]*3)
assert_equal(fac['b'], [0,1,0]*3)
assert_equal(fac['c'], [0,0,1]*3)
[docs] def test_ordinal_factor2(self):
f = ['b','c', 'a']*3
fac = formula.Factor('ff', ['a','b','c'], ordinal=True)
fac.namespace = {'ff':f}
assert_equal(fac(), [1,2,0]*3)
assert_equal(fac['a'], [0,0,1]*3)
assert_equal(fac['b'], [1,0,0]*3)
assert_equal(fac['c'], [0,1,0]*3)
[docs] def test_contrast4(self):
f = self.formula + self.terms[5] + self.terms[5]
f.namespace = self.namespace
estimable = False
c = contrast.Contrast(self.terms[5], f)
self.assertEquals(estimable, False)
[docs] def test_interactions(self):
f = formula.interactions([formula.Term(l) for l in ['a', 'b', 'c']])
assert_equal(set(f.termnames()), set(['a', 'b', 'c', 'a*b', 'a*c', 'b*c']))
f = formula.interactions([formula.Term(l) for l in ['a', 'b', 'c', 'd']], order=3)
assert_equal(set(f.termnames()), set(['a', 'b', 'c', 'd', 'a*b', 'a*c', 'a*d', 'b*c', 'b*d', 'c*d', 'a*b*c', 'a*c*d', 'a*b*d', 'b*c*d']))
f = formula.interactions([formula.Term(l) for l in ['a', 'b', 'c', 'd']], order=[1,2,3])
assert_equal(set(f.termnames()), set(['a', 'b', 'c', 'd', 'a*b', 'a*c', 'a*d', 'b*c', 'b*d', 'c*d', 'a*b*c', 'a*c*d', 'a*b*d', 'b*c*d']))
f = formula.interactions([formula.Term(l) for l in ['a', 'b', 'c', 'd']], order=[3])
assert_equal(set(f.termnames()), set(['a*b*c', 'a*c*d', 'a*b*d', 'b*c*d']))
[docs] def test_subtract(self):
f = formula.interactions([formula.Term(l) for l in ['a', 'b', 'c']])
ff = f - f['a*b']
assert_equal(set(ff.termnames()), set(['a', 'b', 'c', 'a*c', 'b*c']))
ff = f - f['a*b'] - f['a*c']
assert_equal(set(ff.termnames()), set(['a', 'b', 'c', 'b*c']))
ff = f - (f['a*b'] + f['a*c'])
assert_equal(set(ff.termnames()), set(['a', 'b', 'c', 'b*c']))