Source code for statsmodels.sandbox.tsa.fftarma

# -*- coding: utf-8 -*-
"""
Created on Mon Dec 14 19:53:25 2009

Author: josef-pktd

generate arma sample using fft with all the lfilter it looks slow
to get the ma representation first

apply arma filter (in ar representation) to time series to get white noise
but seems slow to be useful for fast estimation for nobs=10000

change/check: instead of using marep, use fft-transform of ar and ma
    separately, use ratio check theory is correct and example works
    DONE : feels much faster than lfilter
    -> use for estimation of ARMA
    -> use pade (scipy.misc) approximation to get starting polynomial
       from autocorrelation (is autocorrelation of AR(p) related to marep?)
       check if pade is fast, not for larger arrays ?
       maybe pade doesn't do the right thing for this, not tried yet
       scipy.pade([ 1.    ,  0.6,  0.25, 0.125, 0.0625, 0.1],2)
       raises LinAlgError: singular matrix
       also doesn't have roots inside unit circle ??
    -> even without initialization, it might be fast for estimation
    -> how do I enforce stationarity and invertibility,
       need helper function

get function drop imag if close to zero from numpy/scipy source, where?

"""

from __future__ import print_function
import numpy as np
import numpy.fft as fft
#import scipy.fftpack as fft
from scipy import signal
#from try_var_convolve import maxabs
from statsmodels.sandbox.archive.linalg_decomp_1 import OneTimeProperty
from statsmodels.tsa.arima_process import ArmaProcess


#trying to convert old experiments to a class


[docs]class ArmaFft(ArmaProcess):
    '''fft tools for arma processes

    This class contains several methods that are providing the same or similar
    returns to try out and test different implementations.

    Notes
    -----
    TODO:
    check whether we don't want to fix maxlags, and create new instance if
    maxlag changes. usage for different lengths of timeseries ?
    or fix frequency and length for fft

    check default frequencies w, terminology norw  n_or_w

    some ffts are currently done without padding with zeros

    returns for spectral density methods needs checking, is it always the power
    spectrum hw*hw.conj()

    normalization of the power spectrum, spectral density: not checked yet, for
    example no variance of underlying process is used

    '''

[docs]    def __init__(self, ar, ma, n):
        #duplicates now that are subclassing ArmaProcess
        super(ArmaFft, self).__init__(ar, ma)

        self.ar = np.asarray(ar)
        self.ma = np.asarray(ma)
        self.nobs = n
        #could make the polynomials into cached attributes
        self.arpoly = np.polynomial.Polynomial(ar)
        self.mapoly = np.polynomial.Polynomial(ma)
        self.nar = len(ar)  #1d only currently
        self.nma = len(ma)

[docs]    def padarr(self, arr, maxlag, atend=True):
        '''pad 1d array with zeros at end to have length maxlag
        function that is a method, no self used

        Parameters
        ----------
        arr : array_like, 1d
            array that will be padded with zeros
        maxlag : int
            length of array after padding
        atend : boolean
            If True (default), then the zeros are added to the end, otherwise
            to the front of the array

        Returns
        -------
        arrp : ndarray
            zero-padded array

        Notes
        -----
        This is mainly written to extend coefficient arrays for the lag-polynomials.
        It returns a copy.

        '''
        if atend:
            return np.r_[arr, np.zeros(maxlag-len(arr))]
        else:
            return np.r_[np.zeros(maxlag-len(arr)), arr]


[docs]    def pad(self, maxlag):
        '''construct AR and MA polynomials that are zero-padded to a common length

        Parameters
        ----------
        maxlag : int
            new length of lag-polynomials

        Returns
        -------
        ar : ndarray
            extended AR polynomial coefficients
        ma : ndarray
            extended AR polynomial coefficients

        '''
        arpad = np.r_[self.ar, np.zeros(maxlag-self.nar)]
        mapad = np.r_[self.ma, np.zeros(maxlag-self.nma)]
        return arpad, mapad

[docs]    def fftar(self, n=None):
        '''Fourier transform of AR polynomial, zero-padded at end to n

        Parameters
        ----------
        n : int
            length of array after zero-padding

        Returns
        -------
        fftar : ndarray
            fft of zero-padded ar polynomial
        '''
        if n is None:
            n = len(self.ar)
        return fft.fft(self.padarr(self.ar, n))

[docs]    def fftma(self, n):
        '''Fourier transform of MA polynomial, zero-padded at end to n

        Parameters
        ----------
        n : int
            length of array after zero-padding

        Returns
        -------
        fftar : ndarray
            fft of zero-padded ar polynomial
        '''
        if n is None:
            n = len(self.ar)
        return fft.fft(self.padarr(self.ma, n))

    #@OneTimeProperty  # not while still debugging things
[docs]    def fftarma(self, n=None):
        '''Fourier transform of ARMA polynomial, zero-padded at end to n

        The Fourier transform of the ARMA process is calculated as the ratio
        of the fft of the MA polynomial divided by the fft of the AR polynomial.

        Parameters
        ----------
        n : int
            length of array after zero-padding

        Returns
        -------
        fftarma : ndarray
            fft of zero-padded arma polynomial
        '''
        if n is None:
            n = self.nobs
        return (self.fftma(n) / self.fftar(n))

[docs]    def spd(self, npos):
        '''raw spectral density, returns Fourier transform

        n is number of points in positive spectrum, the actual number of points
        is twice as large. different from other spd methods with fft
        '''
        n = npos
        w = fft.fftfreq(2*n) * 2 * np.pi
        hw = self.fftarma(2*n)  #not sure, need to check normalization
        #return (hw*hw.conj()).real[n//2-1:]  * 0.5 / np.pi #doesn't show in plot
        return (hw*hw.conj()).real * 0.5 / np.pi, w

[docs]    def spdshift(self, n):
        '''power spectral density using fftshift

        currently returns two-sided according to fft frequencies, use first half
        '''
        #size = s1+s2-1
        mapadded = self.padarr(self.ma, n)
        arpadded = self.padarr(self.ar, n)
        hw = fft.fft(fft.fftshift(mapadded)) / fft.fft(fft.fftshift(arpadded))
        #return np.abs(spd)[n//2-1:]
        w = fft.fftfreq(n) * 2 * np.pi
        wslice = slice(n//2-1, None, None)
        #return (hw*hw.conj()).real[wslice], w[wslice]
        return (hw*hw.conj()).real, w

[docs]    def spddirect(self, n):
        '''power spectral density using padding to length n done by fft

        currently returns two-sided according to fft frequencies, use first half
        '''
        #size = s1+s2-1
        #abs looks wrong
        hw = fft.fft(self.ma, n) / fft.fft(self.ar, n)
        w = fft.fftfreq(n) * 2 * np.pi
        wslice = slice(None, n//2, None)
        #return (np.abs(hw)**2)[wslice], w[wslice]
        return (np.abs(hw)**2) * 0.5/np.pi, w

    def _spddirect2(self, n):
        '''this looks bad, maybe with an fftshift
        '''
        #size = s1+s2-1
        hw = (fft.fft(np.r_[self.ma[::-1],self.ma], n)
                / fft.fft(np.r_[self.ar[::-1],self.ar], n))
        return (hw*hw.conj()) #.real[n//2-1:]

[docs]    def spdroots(self, w):
        '''spectral density for frequency using polynomial roots

        builds two arrays (number of roots, number of frequencies)
        '''
        return self.spdroots_(self.arroots, self.maroots, w)

[docs]    def spdroots_(self, arroots, maroots, w):
        '''spectral density for frequency using polynomial roots

        builds two arrays (number of roots, number of frequencies)

        Parameters
        ----------
        arroots : ndarray
            roots of ar (denominator) lag-polynomial
        maroots : ndarray
            roots of ma (numerator) lag-polynomial
        w : array_like
            frequencies for which spd is calculated

        Notes
        -----
        this should go into a function
        '''
        w = np.atleast_2d(w).T
        cosw = np.cos(w)
        #Greene 5th edt. p626, section 20.2.7.a.
        maroots = 1./maroots
        arroots = 1./arroots
        num = 1 + maroots**2 - 2* maroots * cosw
        den = 1 + arroots**2 - 2* arroots * cosw
        #print 'num.shape, den.shape', num.shape, den.shape
        hw = 0.5 / np.pi * num.prod(-1) / den.prod(-1) #or use expsumlog
        return np.squeeze(hw), w.squeeze()

[docs]    def spdpoly(self, w, nma=50):
        '''spectral density from MA polynomial representation for ARMA process

        References
        ----------
        Cochrane, section 8.3.3
        '''
        mpoly = np.polynomial.Polynomial(self.arma2ma(nma))
        hw = mpoly(np.exp(1j * w))
        spd = np.real_if_close(hw * hw.conj() * 0.5/np.pi)
        return spd, w

[docs]    def filter(self, x):
        '''
        filter a timeseries with the ARMA filter

        padding with zero is missing, in example I needed the padding to get
        initial conditions identical to direct filter

        Initial filtered observations differ from filter2 and signal.lfilter, but
        at end they are the same.

        See Also
        --------
        tsa.filters.fftconvolve

        '''
        n = x.shape[0]
        if n == self.fftarma:
            fftarma = self.fftarma
        else:
            fftarma = self.fftma(n) / self.fftar(n)
        tmpfft = fftarma * fft.fft(x)
        return fft.ifft(tmpfft)

[docs]    def filter2(self, x, pad=0):
        '''filter a time series using fftconvolve3 with ARMA filter

        padding of x currently works only if x is 1d
        in example it produces same observations at beginning as lfilter even
        without padding.

        TODO: this returns 1 additional observation at the end
        '''
        from statsmodels.tsa.filters import fftconvolve3
        if not pad:
            pass
        elif pad == 'auto':
            #just guessing how much padding
            x = self.padarr(x, x.shape[0] + 2*(self.nma+self.nar), atend=False)
        else:
            x = self.padarr(x, x.shape[0] + int(pad), atend=False)

        return fftconvolve3(x, self.ma, self.ar)


[docs]    def acf2spdfreq(self, acovf, nfreq=100, w=None):
        '''
        not really a method
        just for comparison, not efficient for large n or long acf

        this is also similarly use in tsa.stattools.periodogram with window
        '''
        if w is None:
            w = np.linspace(0, np.pi, nfreq)[:, None]
        nac = len(acovf)
        hw = 0.5 / np.pi * (acovf[0] +
                            2 * (acovf[1:] * np.cos(w*np.arange(1,nac))).sum(1))
        return hw

[docs]    def invpowerspd(self, n):
        '''autocovariance from spectral density

        scaling is correct, but n needs to be large for numerical accuracy
        maybe padding with zero in fft would be faster
        without slicing it returns 2-sided autocovariance with fftshift

        >>> ArmaFft([1, -0.5], [1., 0.4], 40).invpowerspd(2**8)[:10]
        array([ 2.08    ,  1.44    ,  0.72    ,  0.36    ,  0.18    ,  0.09    ,
                0.045   ,  0.0225  ,  0.01125 ,  0.005625])
        >>> ArmaFft([1, -0.5], [1., 0.4], 40).acovf(10)
        array([ 2.08    ,  1.44    ,  0.72    ,  0.36    ,  0.18    ,  0.09    ,
                0.045   ,  0.0225  ,  0.01125 ,  0.005625])
        '''
        hw = self.fftarma(n)
        return np.real_if_close(fft.ifft(hw*hw.conj()), tol=200)[:n]

[docs]    def spdmapoly(self, w, twosided=False):
        '''ma only, need division for ar, use LagPolynomial
        '''
        if w is None:
            w = np.linspace(0, np.pi, nfreq)
        return 0.5 / np.pi * self.mapoly(np.exp(w*1j))


[docs]    def plot4(self, fig=None, nobs=100, nacf=20, nfreq=100):
        rvs = self.generate_sample(nsample=100, burnin=500)
        acf = self.acf(nacf)[:nacf]  #TODO: check return length
        pacf = self.pacf(nacf)
        w = np.linspace(0, np.pi, nfreq)
        spdr, wr = self.spdroots(w)

        if fig is None:
            import matplotlib.pyplot as plt
            fig = plt.figure()
        ax = fig.add_subplot(2,2,1)
        ax.plot(rvs)
        ax.set_title('Random Sample \nar=%s, ma=%s' % (self.ar, self.ma))

        ax = fig.add_subplot(2,2,2)
        ax.plot(acf)
        ax.set_title('Autocorrelation \nar=%s, ma=%rs' % (self.ar, self.ma))

        ax = fig.add_subplot(2,2,3)
        ax.plot(wr, spdr)
        ax.set_title('Power Spectrum \nar=%s, ma=%s' % (self.ar, self.ma))

        ax = fig.add_subplot(2,2,4)
        ax.plot(pacf)
        ax.set_title('Partial Autocorrelation \nar=%s, ma=%s' % (self.ar, self.ma))

        return fig







[docs]def spdar1(ar, w):
    if np.ndim(ar) == 0:
        rho = ar
    else:
        rho = -ar[1]
    return 0.5 / np.pi /(1 + rho*rho - 2 * rho * np.cos(w))

if __name__ == '__main__':
    def maxabs(x,y):
        return np.max(np.abs(x-y))
    nobs = 200  #10000
    ar = [1, 0.0]
    ma = [1, 0.0]
    ar2 = np.zeros(nobs)
    ar2[:2] = [1, -0.9]



    uni = np.zeros(nobs)
    uni[0]=1.
    #arrep = signal.lfilter(ma, ar, ar2)
    #marep = signal.lfilter([1],arrep, uni)
    # same faster:
    arcomb = np.convolve(ar, ar2, mode='same')
    marep = signal.lfilter(ma,arcomb, uni) #[len(ma):]
    print(marep[:10])
    mafr = fft.fft(marep)

    rvs = np.random.normal(size=nobs)
    datafr = fft.fft(rvs)
    y = fft.ifft(mafr*datafr)
    print(np.corrcoef(np.c_[y[2:], y[1:-1], y[:-2]],rowvar=0))

    arrep = signal.lfilter([1],marep, uni)
    print(arrep[:20])  # roundtrip to ar
    arfr = fft.fft(arrep)
    yfr = fft.fft(y)
    x = fft.ifft(arfr*yfr).real  #imag part is e-15
    # the next two are equal, roundtrip works
    print(x[:5])
    print(rvs[:5])
    print(np.corrcoef(np.c_[x[2:], x[1:-1], x[:-2]],rowvar=0))


    # ARMA filter using fft with ratio of fft of ma/ar lag polynomial
    # seems much faster than using lfilter

    #padding, note arcomb is already full length
    arcombp = np.zeros(nobs)
    arcombp[:len(arcomb)] = arcomb
    map_ = np.zeros(nobs)    #rename: map was shadowing builtin
    map_[:len(ma)] = ma
    ar0fr = fft.fft(arcombp)
    ma0fr = fft.fft(map_)
    y2 = fft.ifft(ma0fr/ar0fr*datafr)
    #the next two are (almost) equal in real part, almost zero but different in imag
    print(y2[:10])
    print(y[:10])
    print(maxabs(y, y2))  # from chfdiscrete
    #1.1282071239631782e-014

    ar = [1, -0.4]
    ma = [1, 0.2]

    arma1 = ArmaFft([1, -0.5,0,0,0,00, -0.7, 0.3], [1, 0.8], nobs)

    nfreq = nobs
    w = np.linspace(0, np.pi, nfreq)
    w2 = np.linspace(0, 2*np.pi, nfreq)

    import matplotlib.pyplot as plt
    plt.close('all')

    plt.figure()
    spd1, w1 = arma1.spd(2**10)
    print(spd1.shape)
    _ = plt.plot(spd1)
    plt.title('spd fft complex')

    plt.figure()
    spd2, w2 = arma1.spdshift(2**10)
    print(spd2.shape)
    _ = plt.plot(w2, spd2)
    plt.title('spd fft shift')

    plt.figure()
    spd3, w3 = arma1.spddirect(2**10)
    print(spd3.shape)
    _ = plt.plot(w3, spd3)
    plt.title('spd fft direct')

    plt.figure()
    spd3b = arma1._spddirect2(2**10)
    print(spd3b.shape)
    _ = plt.plot(spd3b)
    plt.title('spd fft direct mirrored')

    plt.figure()
    spdr, wr = arma1.spdroots(w)
    print(spdr.shape)
    plt.plot(w, spdr)
    plt.title('spd from roots')

    plt.figure()
    spdar1_ = spdar1(arma1.ar, w)
    print(spdar1_.shape)
    _ = plt.plot(w, spdar1_)
    plt.title('spd ar1')


    plt.figure()
    wper, spdper = arma1.periodogram(nfreq)
    print(spdper.shape)
    _ = plt.plot(w, spdper)
    plt.title('periodogram')

    startup = 1000
    rvs = arma1.generate_sample(startup+10000)[startup:]
    import matplotlib.mlab as mlb
    plt.figure()
    sdm, wm = mlb.psd(x)
    print('sdm.shape', sdm.shape)
    sdm = sdm.ravel()
    plt.plot(wm, sdm)
    plt.title('matplotlib')

    from nitime.algorithms import LD_AR_est
    #yule_AR_est(s, order, Nfreqs)
    wnt, spdnt = LD_AR_est(rvs, 10, 512)
    plt.figure()
    print('spdnt.shape', spdnt.shape)
    _ = plt.plot(spdnt.ravel())
    print(spdnt[:10])
    plt.title('nitime')

    fig = plt.figure()
    arma1.plot4(fig)

    #plt.show()