Source code for statsmodels.sandbox.distributions.quantize

'''Quantizing a continuous distribution in 2d

Author: josef-pktd
'''
from __future__ import print_function
from statsmodels.compat.python import range, lmap
import numpy as np

[docs]def prob_bv_rectangle(lower, upper, cdf): '''helper function for probability of a rectangle in a bivariate distribution Parameters ---------- lower : array_like tuple of lower integration bounds upper : array_like tuple of upper integration bounds cdf : callable cdf(x,y), cumulative distribution function of bivariate distribution how does this generalize to more than 2 variates ? ''' probuu = cdf(*upper) probul = cdf(upper[0], lower[1]) problu = cdf(lower[0], upper[1]) probll = cdf(*lower) return probuu - probul - problu + probll
[docs]def prob_mv_grid(bins, cdf, axis=-1): '''helper function for probability of a rectangle grid in a multivariate distribution how does this generalize to more than 2 variates ? bins : tuple tuple of bin edges, currently it is assumed that they broadcast correctly ''' if not isinstance(bins, np.ndarray): bins = lmap(np.asarray, bins) n_dim = len(bins) bins_ = [] #broadcast if binedges are 1d if all(lmap(np.ndim, bins) == np.ones(n_dim)): for d in range(n_dim): sl = [None]*n_dim sl[d] = slice(None) bins_.append(bins[d][sl]) else: #assume it is already correctly broadcasted n_dim = bins.shape[0] bins_ = bins print(len(bins)) cdf_values = cdf(bins_) probs = cdf_values.copy() for d in range(n_dim): probs = np.diff(probs, axis=d) return probs
[docs]def prob_quantize_cdf(binsx, binsy, cdf): '''quantize a continuous distribution given by a cdf Parameters ---------- binsx : array_like, 1d binedges ''' binsx = np.asarray(binsx) binsy = np.asarray(binsy) nx = len(binsx) - 1 ny = len(binsy) - 1 probs = np.nan * np.ones((nx, ny)) #np.empty(nx,ny) cdf_values = cdf(binsx[:,None], binsy) cdf_func = lambda x, y: cdf_values[x,y] for xind in range(1, nx+1): for yind in range(1, ny+1): upper = (xind, yind) lower = (xind-1, yind-1) #print upper,lower, probs[xind-1,yind-1] = prob_bv_rectangle(lower, upper, cdf_func) assert not np.isnan(probs).any() return probs
[docs]def prob_quantize_cdf_old(binsx, binsy, cdf): '''quantize a continuous distribution given by a cdf old version without precomputing cdf values Parameters ---------- binsx : array_like, 1d binedges ''' binsx = np.asarray(binsx) binsy = np.asarray(binsy) nx = len(binsx) - 1 ny = len(binsy) - 1 probs = np.nan * np.ones((nx, ny)) #np.empty(nx,ny) for xind in range(1, nx+1): for yind in range(1, ny+1): upper = (binsx[xind], binsy[yind]) lower = (binsx[xind-1], binsy[yind-1]) #print upper,lower, probs[xind-1,yind-1] = prob_bv_rectangle(lower, upper, cdf) assert not np.isnan(probs).any() return probs
if __name__ == '__main__': from numpy.testing import assert_almost_equal unif_2d = lambda x,y: x*y assert_almost_equal(prob_bv_rectangle([0,0], [1,0.5], unif_2d), 0.5, 14) assert_almost_equal(prob_bv_rectangle([0,0], [0.5,0.5], unif_2d), 0.25, 14) arr1b = np.array([[ 0.05, 0.05, 0.05, 0.05], [ 0.05, 0.05, 0.05, 0.05], [ 0.05, 0.05, 0.05, 0.05], [ 0.05, 0.05, 0.05, 0.05], [ 0.05, 0.05, 0.05, 0.05]]) arr1a = prob_quantize_cdf(np.linspace(0,1,6), np.linspace(0,1,5), unif_2d) assert_almost_equal(arr1a, arr1b, 14) arr2b = np.array([[ 0.25], [ 0.25], [ 0.25], [ 0.25]]) arr2a = prob_quantize_cdf(np.linspace(0,1,5), np.linspace(0,1,2), unif_2d) assert_almost_equal(arr2a, arr2b, 14) arr3b = np.array([[ 0.25, 0.25, 0.25, 0.25]]) arr3a = prob_quantize_cdf(np.linspace(0,1,2), np.linspace(0,1,5), unif_2d) assert_almost_equal(arr3a, arr3b, 14)