Source code for statsmodels.tsa.seasonal

"""
Seasonal Decomposition by Moving Averages
"""
from statsmodels.compat.python import lmap, range, iteritems
import numpy as np
from pandas.core.nanops import nanmean as pd_nanmean
from .filters._utils import _maybe_get_pandas_wrapper_freq
from .filters.filtertools import convolution_filter
from statsmodels.tsa.tsatools import freq_to_period


[docs]def seasonal_mean(x, freq): """ Return means for each period in x. freq is an int that gives the number of periods per cycle. E.g., 12 for monthly. NaNs are ignored in the mean. """ return np.array([pd_nanmean(x[i::freq]) for i in range(freq)])
[docs]def seasonal_decompose(x, model="additive", filt=None, freq=None): """ Parameters ---------- x : array-like Time series model : str {"additive", "multiplicative"} Type of seasonal component. Abbreviations are accepted. filt : array-like The filter coefficients for filtering out the seasonal component. The default is a symmetric moving average. freq : int, optional Frequency of the series. Must be used if x is not a pandas object with a timeseries index. Returns ------- results : obj A object with seasonal, trend, and resid attributes. Notes ----- This is a naive decomposition. More sophisticated methods should be preferred. The additive model is Y[t] = T[t] + S[t] + e[t] The multiplicative model is Y[t] = T[t] * S[t] * e[t] The seasonal component is first removed by applying a convolution filter to the data. The average of this smoothed series for each period is the returned seasonal component. See Also -------- statsmodels.tsa.filters.convolution_filter """ _pandas_wrapper, pfreq = _maybe_get_pandas_wrapper_freq(x) x = np.asanyarray(x).squeeze() nobs = len(x) if not np.all(np.isfinite(x)): raise ValueError("This function does not handle missing values") if model.startswith('m'): if np.any(x <= 0): raise ValueError("Multiplicative seasonality is not appropriate " "for zero and negative values") if pfreq is not None: pfreq = freq_to_period(pfreq) if freq and pfreq != freq: raise ValueError("Inferred frequency of index and frequency " "don't match. This function does not re-sample") else: freq = pfreq elif freq is None: raise ValueError("You must specify a freq or x must be a " "pandas object with a timeseries index") if filt is None: if freq % 2 == 0: # split weights at ends filt = np.array([.5] + [1] * (freq - 1) + [.5]) / freq else: filt = np.repeat(1./freq, freq) trend = convolution_filter(x, filt) # nan pad for conformability - convolve doesn't do it if model.startswith('m'): detrended = x / trend else: detrended = x - trend period_averages = seasonal_mean(detrended, freq) if model.startswith('m'): period_averages /= np.mean(period_averages) else: period_averages -= np.mean(period_averages) seasonal = np.tile(period_averages, nobs // freq + 1)[:nobs] if model.startswith('m'): resid = x / seasonal / trend else: resid = detrended - seasonal results = lmap(_pandas_wrapper, [seasonal, trend, resid, x]) return DecomposeResult(seasonal=results[0], trend=results[1], resid=results[2], observed=results[3])
[docs]class DecomposeResult(object):
[docs] def __init__(self, **kwargs): for key, value in iteritems(kwargs): setattr(self, key, value) self.nobs = len(self.observed)
[docs] def plot(self): from statsmodels.graphics.utils import _import_mpl plt = _import_mpl() fig, axes = plt.subplots(4, 1, sharex=True) if hasattr(self.observed, 'plot'): # got pandas use it self.observed.plot(ax=axes[0], legend=False) axes[0].set_ylabel('Observed') self.trend.plot(ax=axes[1], legend=False) axes[1].set_ylabel('Trend') self.seasonal.plot(ax=axes[2], legend=False) axes[2].set_ylabel('Seasonal') self.resid.plot(ax=axes[3], legend=False) axes[3].set_ylabel('Residual') else: axes[0].plot(self.observed) axes[0].set_ylabel('Observed') axes[1].plot(self.trend) axes[1].set_ylabel('Trend') axes[2].plot(self.seasonal) axes[2].set_ylabel('Seasonal') axes[3].plot(self.resid) axes[3].set_ylabel('Residual') axes[3].set_xlabel('Time') axes[3].set_xlim(0, self.nobs) fig.tight_layout() return fig
if __name__ == "__main__": x = np.array([-50, 175, 149, 214, 247, 237, 225, 329, 729, 809, 530, 489, 540, 457, 195, 176, 337, 239, 128, 102, 232, 429, 3, 98, 43, -141, -77, -13, 125, 361, -45, 184]) results = seasonal_decompose(x, freq=4) from pandas import DataFrame, DatetimeIndex data = DataFrame(x, DatetimeIndex(start='1/1/1951', periods=len(x), freq='Q')) res = seasonal_decompose(data)