Source code for statsmodels.sandbox.contrast_old

import copy

import numpy as np
from numpy.linalg import pinv
from statsmodels.sandbox import utils_old as utils

[docs]class ContrastResults(object):
    """
    Results from looking at a particular contrast of coefficients in
    a parametric model. The class does nothing, it is a container
    for the results from T and F contrasts.
    """

[docs]    def __init__(self, t=None, F=None, sd=None, effect=None, df_denom=None,
                 df_num=None):
        if F is not None:
            self.F = F
            self.df_denom = df_denom
            self.df_num = df_num
        else:
            self.t = t
            self.sd = sd
            self.effect = effect
            self.df_denom = df_denom

    def __array__(self):
        if hasattr(self, "F"):
            return self.F
        else:
            return self.t

    def __str__(self):
        if hasattr(self, 'F'):
            return '<F contrast: F=%s, df_denom=%d, df_num=%d>' % \
                   (repr(self.F), self.df_denom, self.df_num)
        else:
            return '<T contrast: effect=%s, sd=%s, t=%s, df_denom=%d>' % \
                   (repr(self.effect), repr(self.sd), repr(self.t), self.df_denom)


[docs]class Contrast(object):
    """
    This class is used to construct contrast matrices in regression models.
    They are specified by a (term, formula) pair.

    The term, T,  is a linear combination of columns of the design
    matrix D=formula(). The matrix attribute is
    a contrast matrix C so that

    colspan(dot(D, C)) = colspan(dot(D, dot(pinv(D), T)))

    where pinv(D) is the generalized inverse of D. Further, the matrix

    Tnew = dot(C, D)

    is full rank. The rank attribute is the rank of

    dot(D, dot(pinv(D), T))

    In a regression model, the contrast tests that E(dot(Tnew, Y)) = 0
    for each column of Tnew.

    """

[docs]    def __init__(self, term, formula, name=''):
        self.term = term
        self.formula = formula
        if name is '':
            self.name = str(term)
        else:
            self.name = name

    def __str__(self):
        return '<contrast:%s>' % \
               repr({'term':str(self.term), 'formula':str(self.formula)})

[docs]    def compute_matrix(self, *args, **kw):
        """
        Construct a contrast matrix C so that

        colspan(dot(D, C)) = colspan(dot(D, dot(pinv(D), T)))

        where pinv(D) is the generalized inverse of D=self.D=self.formula().

        If the design, self.D is already set,
        then evaldesign can be set to False.
        """

        t = copy.copy(self.term)
        t.namespace = self.formula.namespace
        T = np.transpose(np.array(t(*args, **kw)))

        if T.ndim == 1:
            T.shape = (T.shape[0], 1)

        self.T = utils.clean0(T)

        self.D = self.formula.design(*args, **kw)

        self._matrix = contrastfromcols(self.T, self.D)
        try:
            self.rank = self.matrix.shape[1]
        except:
            self.rank = 1

    def _get_matrix(self):
        """
        This will fail if the formula needs arguments to construct
        the design.
        """
        if not hasattr(self, "_matrix"):
            self.compute_matrix()
        return self._matrix
    matrix = property(_get_matrix)

[docs]def contrastfromcols(L, D, pseudo=None):
    """
    From an n x p design matrix D and a matrix L, tries
    to determine a p x q contrast matrix C which
    determines a contrast of full rank, i.e. the
    n x q matrix

    dot(transpose(C), pinv(D))

    is full rank.

    L must satisfy either L.shape[0] == n or L.shape[1] == p.

    If L.shape[0] == n, then L is thought of as representing
    columns in the column space of D.

    If L.shape[1] == p, then L is thought of as what is known
    as a contrast matrix. In this case, this function returns an estimable
    contrast corresponding to the dot(D, L.T)

    Note that this always produces a meaningful contrast, not always
    with the intended properties because q is always non-zero unless
    L is identically 0. That is, it produces a contrast that spans
    the column space of L (after projection onto the column space of D).

    """

    L = np.asarray(L)
    D = np.asarray(D)

    n, p = D.shape

    if L.shape[0] != n and L.shape[1] != p:
        raise ValueError('shape of L and D mismatched')

    if pseudo is None:
        pseudo = pinv(D)

    if L.shape[0] == n:
        C = np.dot(pseudo, L).T
    else:
        C = L
        C = np.dot(pseudo, np.dot(D, C.T)).T

    Lp = np.dot(D, C.T)

    if len(Lp.shape) == 1:
        Lp.shape = (n, 1)

    if utils.rank(Lp) != Lp.shape[1]:
        Lp = utils.fullrank(Lp)
        C = np.dot(pseudo, Lp).T

    return np.squeeze(C)