Source code for statsmodels.sandbox.archive.linalg_decomp_1

'''Recipes for more efficient work with linalg using classes


intended for use for multivariate normal and linear regression
calculations

x  is the data (nobs, nvars)
m  is the moment matrix (x'x) or a covariance matrix Sigma

examples:
x'sigma^{-1}x
z = Px  where P=Sigma^{-1/2}  or P=Sigma^{1/2}

Initially assume positive definite, then add spectral cutoff and
regularization of moment matrix, and extend to PCA

maybe extend to sparse if some examples work out
(transformation matrix P for random effect and for toeplitz)


Author: josef-pktd
Created on 2010-10-20
'''

from __future__ import print_function
from statsmodels.compat.python import get_function_name
import numpy as np
from scipy import linalg


#this has been copied from nitime a long time ago
#TODO: ceck whether class has changed in nitime
[docs]class OneTimeProperty(object):


    """A descriptor to make special properties that become normal attributes.

    This is meant to be used mostly by the auto_attr decorator in this module.
    Author: Fernando Perez, copied from nitime
    """
[docs]    def __init__(self,func):

        """Create a OneTimeProperty instance.

         Parameters
         ----------
           func : method

             The method that will be called the first time to compute a value.
             Afterwards, the method's name will be a standard attribute holding
             the value of this computation.
             """
        self.getter = func
        self.name = get_function_name(func)

    def __get__(self,obj,type=None):
        """This will be called on attribute access on the class or instance. """

        if obj is None:
            # Being called on the class, return the original function. This way,
            # introspection works on the class.
            #return func
            print('class access')
            return self.getter

        val = self.getter(obj)
        #print("** auto_attr - loading '%s'" % self.name  # dbg)
        setattr(obj, self.name, val)
        return val


[docs]class PlainMatrixArray(object):
    '''Class that defines linalg operation on an array

    simplest version as benchmark

    linear algebra recipes for multivariate normal and linear
    regression calculations

    '''
[docs]    def __init__(self, data=None, sym=None):
        if not data is None:
            if sym is None:
                self.x = np.asarray(data)
                self.m = np.dot(self.x.T, self.x)
            else:
                raise ValueError('data and sym cannot be both given')
        elif not sym is None:
            self.m = np.asarray(sym)
            self.x = np.eye(*self.m.shape) #default

        else:
            raise ValueError('either data or sym need to be given')

    @OneTimeProperty
[docs]    def minv(self):
        return np.linalg.inv(self.m)

    @OneTimeProperty
[docs]    def m_y(self, y):
        return np.dot(self.m, y)

[docs]    def minv_y(self, y):
        return np.dot(self.minv, y)

    @OneTimeProperty
[docs]    def mpinv(self):
        return linalg.pinv(self.m)

    @OneTimeProperty
[docs]    def xpinv(self):
        return linalg.pinv(self.x)

[docs]    def yt_m_y(self, y):
        return np.dot(y.T, np.dot(self.m, y))

[docs]    def yt_minv_y(self, y):
        return np.dot(y.T, np.dot(self.minv, y))

    #next two are redundant
[docs]    def y_m_yt(self, y):
        return np.dot(y, np.dot(self.m, y.T))

[docs]    def y_minv_yt(self, y):
        return np.dot(y, np.dot(self.minv, y.T))

    @OneTimeProperty
[docs]    def mdet(self):
        return linalg.det(self.m)

    @OneTimeProperty
[docs]    def mlogdet(self):
        return np.log(linalg.det(self.m))

    @OneTimeProperty
[docs]    def meigh(self):
        evals, evecs = linalg.eigh(self.m)
        sortind = np.argsort(evals)[::-1]
        return evals[sortind], evecs[:,sortind]

    @OneTimeProperty
[docs]    def mhalf(self):
        evals, evecs = self.meigh
        return np.dot(np.diag(evals**0.5), evecs.T)
        #return np.dot(evecs, np.dot(np.diag(evals**0.5), evecs.T))
        #return np.dot(evecs, 1./np.sqrt(evals) * evecs.T))

    @OneTimeProperty
[docs]    def minvhalf(self):
        evals, evecs = self.meigh
        return np.dot(evecs, 1./np.sqrt(evals) * evecs.T)



[docs]class SvdArray(PlainMatrixArray):
    '''Class that defines linalg operation on an array

    svd version, where svd is taken on original data array, if
    or when it matters

    no spectral cutoff in first version
    '''

[docs]    def __init__(self, data=None, sym=None):
        super(SvdArray, self).__init__(data=data, sym=sym)

        u, s, v = np.linalg.svd(self.x, full_matrices=1)
        self.u, self.s, self.v = u, s, v
        self.sdiag = linalg.diagsvd(s, *x.shape)
        self.sinvdiag = linalg.diagsvd(1./s, *x.shape)

    def _sdiagpow(self, p):
        return linalg.diagsvd(np.power(self.s, p), *x.shape)

    @OneTimeProperty
[docs]    def minv(self):
        sinvv = np.dot(self.sinvdiag, self.v)
        return np.dot(sinvv.T, sinvv)


    @OneTimeProperty
[docs]    def meigh(self):
        evecs = self.v.T
        evals = self.s**2
        return evals, evecs

    @OneTimeProperty
[docs]    def mdet(self):
        return self.meigh[0].prod()

    @OneTimeProperty
[docs]    def mlogdet(self):
        return np.log(self.meigh[0]).sum()

    @OneTimeProperty
[docs]    def mhalf(self):
        return np.dot(np.diag(self.s), self.v)

    @OneTimeProperty
[docs]    def xxthalf(self):
        return np.dot(self.u, self.sdiag)

    @OneTimeProperty
[docs]    def xxtinvhalf(self):
        return np.dot(self.u, self.sinvdiag)


[docs]class CholArray(PlainMatrixArray):
    '''Class that defines linalg operation on an array

    cholesky version, where svd is taken on original data array, if
    or when it matters

    plan: use cholesky factor and cholesky solve
    nothing implemented yet
    '''

[docs]    def __init__(self, data=None, sym=None):
        super(SvdArray, self).__init__(data=data, sym=sym)


[docs]    def yt_minv_y(self, y):
        '''xSigmainvx
        doesn't use stored cholesky yet
        '''
        return np.dot(x,linalg.cho_solve(linalg.cho_factor(self.m),x))
        #same as
        #lower = False   #if cholesky(sigma) is used, default is upper
        #np.dot(x,linalg.cho_solve((self.cholsigma, lower),x))



[docs]def testcompare(m1, m2):
    from numpy.testing import assert_almost_equal, assert_approx_equal
    decimal = 12

    #inv
    assert_almost_equal(m1.minv, m2.minv, decimal=decimal)

    #matrix half and invhalf
    #fix sign in test, should this be standardized
    s1 = np.sign(m1.mhalf.sum(1))[:,None]
    s2 = np.sign(m2.mhalf.sum(1))[:,None]
    scorr = s1/s2
    assert_almost_equal(m1.mhalf, m2.mhalf * scorr, decimal=decimal)
    assert_almost_equal(m1.minvhalf, m2.minvhalf, decimal=decimal)

    #eigenvalues, eigenvectors
    evals1, evecs1 = m1.meigh
    evals2, evecs2 = m2.meigh
    assert_almost_equal(evals1, evals2, decimal=decimal)
    #normalization can be different: evecs in columns
    s1 = np.sign(evecs1.sum(0))
    s2 = np.sign(evecs2.sum(0))
    scorr = s1/s2
    assert_almost_equal(evecs1, evecs2 * scorr, decimal=decimal)

    #determinant
    assert_approx_equal(m1.mdet, m2.mdet, significant=13)
    assert_approx_equal(m1.mlogdet, m2.mlogdet, significant=13)

####### helper function for interactive work
[docs]def tiny2zero(x, eps = 1e-15):
    '''replace abs values smaller than eps by zero, makes copy
    '''
    mask = np.abs(x.copy()) <  eps
    x[mask] = 0
    return x

[docs]def maxabs(x):
    return np.max(np.abs(x))


if __name__ == '__main__':


    n = 5
    y = np.arange(n)
    x = np.random.randn(100,n)
    autocov = 2*0.8**np.arange(n) +0.01 * np.random.randn(n)
    sigma = linalg.toeplitz(autocov)

    mat = PlainMatrixArray(sym=sigma)
    print(tiny2zero(mat.mhalf))
    mih = mat.minvhalf
    print(tiny2zero(mih)) #for nicer printing

    mat2 = PlainMatrixArray(data=x)
    print(maxabs(mat2.yt_minv_y(np.dot(x.T, x)) - mat2.m))
    print(tiny2zero(mat2.minv_y(mat2.m)))

    mat3 = SvdArray(data=x)
    print(mat3.meigh[0])
    print(mat2.meigh[0])

    testcompare(mat2, mat3)

    '''
    m = np.dot(x.T, x)

    u,s,v = np.linalg.svd(x, full_matrices=1)
    Sig = linalg.diagsvd(s,*x.shape)

    >>> np.max(np.abs(np.dot(u, np.dot(Sig, v)) - x))
    3.1086244689504383e-015
    >>> np.max(np.abs(np.dot(u.T, u) - np.eye(100)))
    3.3306690738754696e-016
    >>> np.max(np.abs(np.dot(v.T, v) - np.eye(5)))
    6.6613381477509392e-016
    >>> np.max(np.abs(np.dot(Sig.T, Sig) - np.diag(s**2)))
    5.6843418860808015e-014

    >>> evals,evecs = linalg.eigh(np.dot(x.T, x))
    >>> evals[::-1]
    array([ 123.36404464,  112.17036442,  102.04198468,   76.60832278,
             74.70484487])

    >>> s**2
    array([ 123.36404464,  112.17036442,  102.04198468,   76.60832278,
             74.70484487])

    >>> np.max(np.abs(np.dot(v.T, np.dot(np.diag(s**2), v)) - m))
    1.1368683772161603e-013

    >>> us = np.dot(u, Sig)
    >>> np.max(np.abs(np.dot(us, us.T) - np.dot(x, x.T)))
    1.0658141036401503e-014

    >>> sv = np.dot(Sig, v)
    >>> np.max(np.abs(np.dot(sv.T, sv) - np.dot(x.T, x)))
    1.1368683772161603e-013


    '''