Source code for statsmodels.sandbox.distributions.multivariate

'''Multivariate Distribution

Probability of a multivariate t distribution

Now also mvstnormcdf has tests against R mvtnorm

Still need non-central t, extra options, and convenience function for
location, scale version.

Author: Josef Perktold
License: BSD (3-clause)

Reference:
Genz and Bretz for formula

'''
from __future__ import print_function
import numpy as np
from scipy import integrate, stats, special
from scipy.stats import chi,chi2

from .extras import mvnormcdf, mvstdnormcdf, mvnormcdf

from numpy import exp as np_exp
from numpy import log as np_log
from scipy.special import gamma as sps_gamma
from scipy.special import gammaln as sps_gammaln

[docs]def chi2_pdf(self, x, df): '''pdf of chi-square distribution''' #from scipy.stats.distributions Px = x**(df/2.0-1)*np.exp(-x/2.0) Px /= special.gamma(df/2.0)* 2**(df/2.0) return Px
[docs]def chi_pdf(x, df): tmp = (df-1.)*np_log(x) + (-x*x*0.5) - (df*0.5-1)*np_log(2.0) \ - sps_gammaln(df*0.5) return np_exp(tmp)
#return x**(df-1.)*np_exp(-x*x*0.5)/(2.0)**(df*0.5-1)/sps_gamma(df*0.5)
[docs]def chi_logpdf(x, df): tmp = (df-1.)*np_log(x) + (-x*x*0.5) - (df*0.5-1)*np_log(2.0) \ - sps_gammaln(df*0.5) return tmp
[docs]def funbgh(s, a, b, R, df): sqrt_df = np.sqrt(df+0.5) ret = chi_logpdf(s,df) ret += np_log(mvstdnormcdf(s*a/sqrt_df, s*b/sqrt_df, R, maxpts=1000000, abseps=1e-6)) ret = np_exp(ret) return ret
[docs]def funbgh2(s, a, b, R, df): n = len(a) sqrt_df = np.sqrt(df) #np.power(s, df-1) * np_exp(-s*s*0.5) return np_exp((df-1)*np_log(s)-s*s*0.5) \ * mvstdnormcdf(s*a/sqrt_df, s*b/sqrt_df, R[np.tril_indices(n, -1)], maxpts=1000000, abseps=1e-4)
[docs]def bghfactor(df): return np.power(2.0, 1-df*0.5) / sps_gamma(df*0.5)
[docs]def mvstdtprob(a, b, R, df, ieps=1e-5, quadkwds=None, mvstkwds=None): '''probability of rectangular area of standard t distribution assumes mean is zero and R is correlation matrix Notes ----- This function does not calculate the estimate of the combined error between the underlying multivariate normal probability calculations and the integration. ''' kwds = dict(args=(a,b,R,df), epsabs=1e-4, epsrel=1e-2, limit=150) if not quadkwds is None: kwds.update(quadkwds) #print kwds res, err = integrate.quad(funbgh2, *chi.ppf([ieps,1-ieps], df), **kwds) prob = res * bghfactor(df) return prob
#written by Enzo Michelangeli, style changes by josef-pktd # Student's T random variable
[docs]def multivariate_t_rvs(m, S, df=np.inf, n=1): '''generate random variables of multivariate t distribution Parameters ---------- m : array_like mean of random variable, length determines dimension of random variable S : array_like square array of covariance matrix df : int or float degrees of freedom n : int number of observations, return random array will be (n, len(m)) Returns ------- rvs : ndarray, (n, len(m)) each row is an independent draw of a multivariate t distributed random variable ''' m = np.asarray(m) d = len(m) if df == np.inf: x = 1. else: x = np.random.chisquare(df, n)/df z = np.random.multivariate_normal(np.zeros(d),S,(n,)) return m + z/np.sqrt(x)[:,None] # same output format as random.multivariate_normal
if __name__ == '__main__': corr = np.asarray([[1.0, 0, 0.5],[0,1,0],[0.5,0,1]]) corr_indep = np.asarray([[1.0, 0, 0],[0,1,0],[0,0,1]]) corr_equal = np.asarray([[1.0, 0.5, 0.5],[0.5,1,0.5],[0.5,0.5,1]]) R = corr_equal a = np.array([-np.inf,-np.inf,-100.0]) a = np.array([-0.96,-0.96,-0.96]) b = np.array([0.0,0.0,0.0]) b = np.array([0.96,0.96, 0.96]) a[:] = -1 b[:] = 3 df = 10. sqrt_df = np.sqrt(df) print(mvstdnormcdf(a, b, corr, abseps=1e-6)) #print integrate.quad(funbgh, 0, np.inf, args=(a,b,R,df)) print((stats.t.cdf(b[0], df) - stats.t.cdf(a[0], df))**3) s = 1 print(mvstdnormcdf(s*a/sqrt_df, s*b/sqrt_df, R)) df=4 print(mvstdtprob(a, b, R, df)) S = np.array([[1.,.5],[.5,1.]]) print(multivariate_t_rvs([10.,20.], S, 2, 5)) nobs = 10000 rvst = multivariate_t_rvs([10.,20.], S, 2, nobs) print(np.sum((rvst<[10.,20.]).all(1),0) * 1. / nobs) print(mvstdtprob(-np.inf*np.ones(2), np.zeros(2), R[:2,:2], 2)) ''' > lower <- -1 > upper <- 3 > df <- 4 > corr <- diag(3) > delta <- rep(0, 3) > pmvt(lower=lower, upper=upper, delta=delta, df=df, corr=corr) [1] 0.5300413 attr(,"error") [1] 4.321136e-05 attr(,"msg") [1] "Normal Completion" > (pt(upper, df) - pt(lower, df))**3 [1] 0.4988254 '''