import numpy as np
from numpy import (dot, eye, diag_indices, zeros, column_stack, ones, diag,
asarray, r_)
from numpy.linalg import inv, solve
#from scipy.linalg import block_diag
from scipy import linalg
#def denton(indicator, benchmark, freq="aq", **kwarg):
# """
# Denton's method to convert low-frequency to high frequency data.
#
# Parameters
# ----------
# benchmark : array-like
# The higher frequency benchmark. A 1d or 2d data series in columns.
# If 2d, then M series are assumed.
# indicator
# A low-frequency indicator series. It is assumed that there are no
# pre-sample indicators. Ie., the first indicators line up with
# the first benchmark.
# freq : str {"aq","qm", "other"}
# "aq" - Benchmarking an annual series to quarterly.
# "mq" - Benchmarking a quarterly series to monthly.
# "other" - Custom stride. A kwarg, k, must be supplied.
# kwargs :
# k : int
# The number of high-frequency observations that sum to make an
# aggregate low-frequency observation. `k` is used with
# `freq` == "other".
# Returns
# -------
# benchmarked series : array
#
# Notes
# -----
# Denton's method minimizes the distance given by the penalty function, in
# a least squares sense, between the unknown benchmarked series and the
# indicator series subject to the condition that the sum of the benchmarked
# series is equal to the benchmark.
#
#
# References
# ----------
# Bloem, A.M, Dippelsman, R.J. and Maehle, N.O. 2001 Quarterly National
# Accounts Manual--Concepts, Data Sources, and Compilation. IMF.
# http://www.imf.org/external/pubs/ft/qna/2000/Textbook/index.htm
# Denton, F.T. 1971. "Adjustment of monthly or quarterly series to annual
# totals: an approach based on quadratic minimization." Journal of the
# American Statistical Association. 99-102.
#
# """
# # check arrays and make 2d
# indicator = np.asarray(indicator)
# if indicator.ndim == 1:
# indicator = indicator[:,None]
# benchmark = np.asarray(benchmark)
# if benchmark.ndim == 1:
# benchmark = benchmark[:,None]
#
# # get dimensions
# N = len(indicator) # total number of high-freq
# m = len(benchmark) # total number of low-freq
#
# # number of low-freq observations for aggregate measure
# # 4 for annual to quarter and 3 for quarter to monthly
# if freq == "aq":
# k = 4
# elif freq == "qm":
# k = 3
# elif freq == "other":
# k = kwargs.get("k")
# if not k:
# raise ValueError("k must be supplied with freq=\"other\"")
# else:
# raise ValueError("freq %s not understood" % freq)
#
# n = k*m # number of indicator series with a benchmark for back-series
# # if k*m != n, then we are going to extrapolate q observations
#
# B = block_diag(*(np.ones((k,1)),)*m)
#
# r = benchmark - B.T.dot(indicator)
#TODO: take code in the string at the end and implement Denton's original
# method with a few of the penalty functions.
[docs]def dentonm(indicator, benchmark, freq="aq", **kwargs):
"""
Modified Denton's method to convert low-frequency to high-frequency data.
Uses proportionate first-differences as the penalty function. See notes.
Parameters
----------
indicator
A low-frequency indicator series. It is assumed that there are no
pre-sample indicators. Ie., the first indicators line up with
the first benchmark.
benchmark : array-like
The higher frequency benchmark. A 1d or 2d data series in columns.
If 2d, then M series are assumed.
freq : str {"aq","qm", "other"}
"aq" - Benchmarking an annual series to quarterly.
"mq" - Benchmarking a quarterly series to monthly.
"other" - Custom stride. A kwarg, k, must be supplied.
kwargs :
k : int
The number of high-frequency observations that sum to make an
aggregate low-frequency observation. `k` is used with
`freq` == "other".
Returns
-------
benchmarked series : array
Examples
--------
>>> indicator = [50,100,150,100] * 5
>>> benchmark = [500,400,300,400,500]
>>> benchmarked = dentonm(indicator, benchmark, freq="aq")
Notes
-----
Denton's method minimizes the distance given by the penalty function, in
a least squares sense, between the unknown benchmarked series and the
indicator series subject to the condition that the sum of the benchmarked
series is equal to the benchmark. The modification allows that the first
value not be pre-determined as is the case with Denton's original method.
If the there is no benchmark provided for the last few indicator
observations, then extrapolation is performed using the last
benchmark-indicator ratio of the previous period.
Minimizes sum((X[t]/I[t] - X[t-1]/I[t-1])**2)
s.t.
sum(X) = A, for each period. Where X is the benchmarked series, I is
the indicator, and A is the benchmark.
References
----------
Bloem, A.M, Dippelsman, R.J. and Maehle, N.O. 2001 Quarterly National
Accounts Manual--Concepts, Data Sources, and Compilation. IMF.
http://www.imf.org/external/pubs/ft/qna/2000/Textbook/index.htm
Cholette, P. 1988. "Benchmarking systems of socio-economic time series."
Statistics Canada, Time Series Research and Analysis Division,
Working Paper No TSRA-88-017E.
Denton, F.T. 1971. "Adjustment of monthly or quarterly series to annual
totals: an approach based on quadratic minimization." Journal of the
American Statistical Association. 99-102.
"""
# penalty : str
# Penalty function. Can be "D1", "D2", "D3", "D4", "D5".
# X is the benchmarked series and I is the indicator.
# D1 - sum((X[t] - X[t-1]) - (I[t] - I[ti-1])**2)
# D2 - sum((ln(X[t]/X[t-1]) - ln(I[t]/I[t-1]))**2)
# D3 - sum((X[t]/X[t-1] / I[t]/I[t-1])**2)
# D4 - sum((X[t]/I[t] - X[t-1]/I[t-1])**2)
# D5 - sum((X[t]/I[t] / X[t-1]/I[t-1] - 1)**2)
#NOTE: only D4 is the only one implemented, see IMF chapter 6.
# check arrays and make 2d
indicator = asarray(indicator)
if indicator.ndim == 1:
indicator = indicator[:,None]
benchmark = asarray(benchmark)
if benchmark.ndim == 1:
benchmark = benchmark[:,None]
# get dimensions
N = len(indicator) # total number of high-freq
m = len(benchmark) # total number of low-freq
# number of low-freq observations for aggregate measure
# 4 for annual to quarter and 3 for quarter to monthly
if freq == "aq":
k = 4
elif freq == "qm":
k = 3
elif freq == "other":
k = kwargs.get("k")
if not k:
raise ValueError("k must be supplied with freq=\"other\"")
else:
raise ValueError("freq %s not understood" % freq)
n = k*m # number of indicator series with a benchmark for back-series
# if k*m != n, then we are going to extrapolate q observations
if N > n:
q = N - n
else:
q = 0
# make the aggregator matrix
#B = block_diag(*(ones((k,1)),)*m)
B = np.kron(np.eye(m), ones((k,1)))
# following the IMF paper, we can do
Zinv = diag(1./indicator.squeeze()[:n])
# this is D in Denton's notation (not using initial value correction)
# D = eye(n)
# make off-diagonal = -1
# D[((np.diag_indices(n)[0])[:-1]+1,(np.diag_indices(n)[1])[:-1])] = -1
# account for starting conditions
# H = D[1:,:]
# HTH = dot(H.T,H)
# just make HTH
HTH = eye(n)
diag_idx0, diag_idx1 = diag_indices(n)
HTH[diag_idx0[1:-1], diag_idx1[1:-1]] += 1
HTH[diag_idx0[:-1]+1, diag_idx1[:-1]] = -1
HTH[diag_idx0[:-1], diag_idx1[:-1]+1] = -1
W = dot(dot(Zinv,HTH),Zinv)
# make partitioned matrices
#TODO: break this out so that we can simplify the linalg?
I = zeros((n+m,n+m))
I[:n,:n] = W
I[:n,n:] = B
I[n:,:n] = B.T
A = zeros((m+n,1)) # zero first-order constraints
A[-m:] = benchmark # adding up constraints
X = solve(I,A)
X = X[:-m] # drop the lagrange multipliers
# handle extrapolation
if q > 0:
# get last Benchmark-Indicator ratio
bi = X[n-1]/indicator[n-1]
extrapolated = bi * indicator[n:]
X = r_[X,extrapolated]
return X.squeeze()
if __name__ == "__main__":
import numpy as np
#these will be the tests
# from IMF paper
# quarterly data
indicator = np.array([98.2, 100.8, 102.2, 100.8, 99.0, 101.6,
102.7, 101.5, 100.5, 103.0, 103.5, 101.5])
# two annual observations
benchmark = np.array([4000.,4161.4])
x_imf = dentonm(indicator, benchmark, freq="aq")
imf_stata = np.array([969.8, 998.4, 1018.3, 1013.4, 1007.2, 1042.9,
1060.3, 1051.0, 1040.6, 1066.5, 1071.7, 1051.0])
np.testing.assert_almost_equal(imf_stata, x_imf, 1)
# Denton example
zQ = np.array([50,100,150,100] * 5)
Y = np.array([500,400,300,400,500])
x_denton = dentonm(zQ, Y, freq="aq")
x_stata = np.array([64.334796,127.80616,187.82379,120.03526,56.563894,
105.97568,147.50144,89.958987,40.547201,74.445963,
108.34473,76.66211,42.763347,94.14664,153.41596,
109.67405,58.290761,122.62556,190.41409,128.66959])
"""
# Examples from the Denton 1971 paper
k = 4
m = 5
n = m*k
zQ = [50,100,150,100] * m
Y = [500,400,300,400,500]
A = np.eye(n)
B = block_diag(*(np.ones((k,1)),)*m)
r = Y - B.T.dot(zQ)
#Ainv = inv(A)
Ainv = A # shortcut for identity
C = Ainv.dot(B).dot(inv(B.T.dot(Ainv).dot(B)))
x = zQ + C.dot(r)
# minimize first difference d(x-z)
R = linalg.tri(n, dtype=float) # R is tril so actually R.T in paper
Ainv = R.dot(R.T)
C = Ainv.dot(B).dot(inv(B.T.dot(Ainv).dot(B)))
x1 = zQ + C.dot(r)
# minimize the second difference d**2(x-z)
Ainv = R.dot(Ainv).dot(R.T)
C = Ainv.dot(B).dot(inv(B.T.dot(Ainv).dot(B)))
x12 = zQ + C.dot(r)
# # do it proportionately (x-z)/z
Z = np.diag(zQ)
Ainv = np.eye(n)
C = Z.dot(Ainv).dot(Z).dot(B).dot(inv(B.T.dot(Z).dot(Ainv).dot(Z).dot(B)))
x11 = zQ + C.dot(r)
# do it proportionately with differencing d((x-z)/z)
Ainv = R.dot(R.T)
C = Z.dot(Ainv).dot(Z).dot(B).dot(inv(B.T.dot(Z).dot(Ainv).dot(Z).dot(B)))
x111 = zQ + C.dot(r)
x_stata = np.array([64.334796,127.80616,187.82379,120.03526,56.563894,
105.97568,147.50144,89.958987,40.547201,74.445963,
108.34473,76.66211,42.763347,94.14664,153.41596,
109.67405,58.290761,122.62556,190.41409,128.66959])
"""