'''Multivariate Distribution
Probability of a multivariate t distribution
Now also mvstnormcdf has tests against R mvtnorm
Still need non-central t, extra options, and convenience function for
location, scale version.
Author: Josef Perktold
License: BSD (3-clause)
Reference:
Genz and Bretz for formula
'''
from __future__ import print_function
import numpy as np
from scipy import integrate, stats, special
from scipy.stats import chi,chi2
from .extras import mvnormcdf, mvstdnormcdf, mvnormcdf
from numpy import exp as np_exp
from numpy import log as np_log
from scipy.special import gamma as sps_gamma
from scipy.special import gammaln as sps_gammaln
[docs]def chi2_pdf(self, x, df):
'''pdf of chi-square distribution'''
#from scipy.stats.distributions
Px = x**(df/2.0-1)*np.exp(-x/2.0)
Px /= special.gamma(df/2.0)* 2**(df/2.0)
return Px
[docs]def chi_pdf(x, df):
tmp = (df-1.)*np_log(x) + (-x*x*0.5) - (df*0.5-1)*np_log(2.0) \
- sps_gammaln(df*0.5)
return np_exp(tmp)
#return x**(df-1.)*np_exp(-x*x*0.5)/(2.0)**(df*0.5-1)/sps_gamma(df*0.5)
[docs]def chi_logpdf(x, df):
tmp = (df-1.)*np_log(x) + (-x*x*0.5) - (df*0.5-1)*np_log(2.0) \
- sps_gammaln(df*0.5)
return tmp
[docs]def funbgh(s, a, b, R, df):
sqrt_df = np.sqrt(df+0.5)
ret = chi_logpdf(s,df)
ret += np_log(mvstdnormcdf(s*a/sqrt_df, s*b/sqrt_df, R,
maxpts=1000000, abseps=1e-6))
ret = np_exp(ret)
return ret
[docs]def funbgh2(s, a, b, R, df):
n = len(a)
sqrt_df = np.sqrt(df)
#np.power(s, df-1) * np_exp(-s*s*0.5)
return np_exp((df-1)*np_log(s)-s*s*0.5) \
* mvstdnormcdf(s*a/sqrt_df, s*b/sqrt_df, R[np.tril_indices(n, -1)],
maxpts=1000000, abseps=1e-4)
[docs]def bghfactor(df):
return np.power(2.0, 1-df*0.5) / sps_gamma(df*0.5)
[docs]def mvstdtprob(a, b, R, df, ieps=1e-5, quadkwds=None, mvstkwds=None):
'''probability of rectangular area of standard t distribution
assumes mean is zero and R is correlation matrix
Notes
-----
This function does not calculate the estimate of the combined error
between the underlying multivariate normal probability calculations
and the integration.
'''
kwds = dict(args=(a,b,R,df), epsabs=1e-4, epsrel=1e-2, limit=150)
if not quadkwds is None:
kwds.update(quadkwds)
#print kwds
res, err = integrate.quad(funbgh2, *chi.ppf([ieps,1-ieps], df),
**kwds)
prob = res * bghfactor(df)
return prob
#written by Enzo Michelangeli, style changes by josef-pktd
# Student's T random variable
[docs]def multivariate_t_rvs(m, S, df=np.inf, n=1):
'''generate random variables of multivariate t distribution
Parameters
----------
m : array_like
mean of random variable, length determines dimension of random variable
S : array_like
square array of covariance matrix
df : int or float
degrees of freedom
n : int
number of observations, return random array will be (n, len(m))
Returns
-------
rvs : ndarray, (n, len(m))
each row is an independent draw of a multivariate t distributed
random variable
'''
m = np.asarray(m)
d = len(m)
if df == np.inf:
x = 1.
else:
x = np.random.chisquare(df, n)/df
z = np.random.multivariate_normal(np.zeros(d),S,(n,))
return m + z/np.sqrt(x)[:,None] # same output format as random.multivariate_normal
if __name__ == '__main__':
corr = np.asarray([[1.0, 0, 0.5],[0,1,0],[0.5,0,1]])
corr_indep = np.asarray([[1.0, 0, 0],[0,1,0],[0,0,1]])
corr_equal = np.asarray([[1.0, 0.5, 0.5],[0.5,1,0.5],[0.5,0.5,1]])
R = corr_equal
a = np.array([-np.inf,-np.inf,-100.0])
a = np.array([-0.96,-0.96,-0.96])
b = np.array([0.0,0.0,0.0])
b = np.array([0.96,0.96, 0.96])
a[:] = -1
b[:] = 3
df = 10.
sqrt_df = np.sqrt(df)
print(mvstdnormcdf(a, b, corr, abseps=1e-6))
#print integrate.quad(funbgh, 0, np.inf, args=(a,b,R,df))
print((stats.t.cdf(b[0], df) - stats.t.cdf(a[0], df))**3)
s = 1
print(mvstdnormcdf(s*a/sqrt_df, s*b/sqrt_df, R))
df=4
print(mvstdtprob(a, b, R, df))
S = np.array([[1.,.5],[.5,1.]])
print(multivariate_t_rvs([10.,20.], S, 2, 5))
nobs = 10000
rvst = multivariate_t_rvs([10.,20.], S, 2, nobs)
print(np.sum((rvst<[10.,20.]).all(1),0) * 1. / nobs)
print(mvstdtprob(-np.inf*np.ones(2), np.zeros(2), R[:2,:2], 2))
'''
> lower <- -1
> upper <- 3
> df <- 4
> corr <- diag(3)
> delta <- rep(0, 3)
> pmvt(lower=lower, upper=upper, delta=delta, df=df, corr=corr)
[1] 0.5300413
attr(,"error")
[1] 4.321136e-05
attr(,"msg")
[1] "Normal Completion"
> (pt(upper, df) - pt(lower, df))**3
[1] 0.4988254
'''