'''Quantizing a continuous distribution in 2d
Author: josef-pktd
'''
from __future__ import print_function
from statsmodels.compat.python import range, lmap
import numpy as np
[docs]def prob_bv_rectangle(lower, upper, cdf):
'''helper function for probability of a rectangle in a bivariate distribution
Parameters
----------
lower : array_like
tuple of lower integration bounds
upper : array_like
tuple of upper integration bounds
cdf : callable
cdf(x,y), cumulative distribution function of bivariate distribution
how does this generalize to more than 2 variates ?
'''
probuu = cdf(*upper)
probul = cdf(upper[0], lower[1])
problu = cdf(lower[0], upper[1])
probll = cdf(*lower)
return probuu - probul - problu + probll
[docs]def prob_mv_grid(bins, cdf, axis=-1):
'''helper function for probability of a rectangle grid in a multivariate distribution
how does this generalize to more than 2 variates ?
bins : tuple
tuple of bin edges, currently it is assumed that they broadcast
correctly
'''
if not isinstance(bins, np.ndarray):
bins = lmap(np.asarray, bins)
n_dim = len(bins)
bins_ = []
#broadcast if binedges are 1d
if all(lmap(np.ndim, bins) == np.ones(n_dim)):
for d in range(n_dim):
sl = [None]*n_dim
sl[d] = slice(None)
bins_.append(bins[d][sl])
else: #assume it is already correctly broadcasted
n_dim = bins.shape[0]
bins_ = bins
print(len(bins))
cdf_values = cdf(bins_)
probs = cdf_values.copy()
for d in range(n_dim):
probs = np.diff(probs, axis=d)
return probs
[docs]def prob_quantize_cdf(binsx, binsy, cdf):
'''quantize a continuous distribution given by a cdf
Parameters
----------
binsx : array_like, 1d
binedges
'''
binsx = np.asarray(binsx)
binsy = np.asarray(binsy)
nx = len(binsx) - 1
ny = len(binsy) - 1
probs = np.nan * np.ones((nx, ny)) #np.empty(nx,ny)
cdf_values = cdf(binsx[:,None], binsy)
cdf_func = lambda x, y: cdf_values[x,y]
for xind in range(1, nx+1):
for yind in range(1, ny+1):
upper = (xind, yind)
lower = (xind-1, yind-1)
#print upper,lower,
probs[xind-1,yind-1] = prob_bv_rectangle(lower, upper, cdf_func)
assert not np.isnan(probs).any()
return probs
[docs]def prob_quantize_cdf_old(binsx, binsy, cdf):
'''quantize a continuous distribution given by a cdf
old version without precomputing cdf values
Parameters
----------
binsx : array_like, 1d
binedges
'''
binsx = np.asarray(binsx)
binsy = np.asarray(binsy)
nx = len(binsx) - 1
ny = len(binsy) - 1
probs = np.nan * np.ones((nx, ny)) #np.empty(nx,ny)
for xind in range(1, nx+1):
for yind in range(1, ny+1):
upper = (binsx[xind], binsy[yind])
lower = (binsx[xind-1], binsy[yind-1])
#print upper,lower,
probs[xind-1,yind-1] = prob_bv_rectangle(lower, upper, cdf)
assert not np.isnan(probs).any()
return probs
if __name__ == '__main__':
from numpy.testing import assert_almost_equal
unif_2d = lambda x,y: x*y
assert_almost_equal(prob_bv_rectangle([0,0], [1,0.5], unif_2d), 0.5, 14)
assert_almost_equal(prob_bv_rectangle([0,0], [0.5,0.5], unif_2d), 0.25, 14)
arr1b = np.array([[ 0.05, 0.05, 0.05, 0.05],
[ 0.05, 0.05, 0.05, 0.05],
[ 0.05, 0.05, 0.05, 0.05],
[ 0.05, 0.05, 0.05, 0.05],
[ 0.05, 0.05, 0.05, 0.05]])
arr1a = prob_quantize_cdf(np.linspace(0,1,6), np.linspace(0,1,5), unif_2d)
assert_almost_equal(arr1a, arr1b, 14)
arr2b = np.array([[ 0.25],
[ 0.25],
[ 0.25],
[ 0.25]])
arr2a = prob_quantize_cdf(np.linspace(0,1,5), np.linspace(0,1,2), unif_2d)
assert_almost_equal(arr2a, arr2b, 14)
arr3b = np.array([[ 0.25, 0.25, 0.25, 0.25]])
arr3a = prob_quantize_cdf(np.linspace(0,1,2), np.linspace(0,1,5), unif_2d)
assert_almost_equal(arr3a, arr3b, 14)