Source code for statsmodels.tsa.base.tsa_model

from statsmodels.compat.python import lrange
import statsmodels.base.model as base
from statsmodels.base import data
import statsmodels.base.wrapper as wrap
from statsmodels.tsa.base import datetools
from numpy import arange, asarray
from pandas import Index
from pandas import datetools as pandas_datetools
import datetime

_freq_to_pandas = datetools._freq_to_pandas

_tsa_doc = """
    %(model)s

    Parameters
    ----------
    %(params)s
    dates : array-like of datetime, optional
        An array-like object of datetime objects. If a pandas object is given
        for endog or exog, it is assumed to have a DateIndex.
    freq : str, optional
        The frequency of the time-series. A Pandas offset or 'B', 'D', 'W',
        'M', 'A', or 'Q'. This is optional if dates are given.
    %(extra_params)s
    %(extra_sections)s
"""

_model_doc = "Timeseries model base class"

_generic_params = base._model_params_doc
_missing_param_doc = base._missing_param_doc

[docs]class TimeSeriesModel(base.LikelihoodModel): __doc__ = _tsa_doc % {"model" : _model_doc, "params" : _generic_params, "extra_params" : _missing_param_doc, "extra_sections" : ""}
[docs] def __init__(self, endog, exog=None, dates=None, freq=None, missing='none'): super(TimeSeriesModel, self).__init__(endog, exog, missing=missing) self._init_dates(dates, freq)
def _init_dates(self, dates, freq): if dates is None: dates = self.data.row_labels if dates is not None: if (not datetools._is_datetime_index(dates) and isinstance(self.data, data.PandasData)): raise ValueError("Given a pandas object and the index does " "not contain dates") if not freq: try: freq = datetools._infer_freq(dates) except: raise ValueError("Frequency inference failed. Use `freq` " "keyword.") dates = Index(dates) self.data.dates = dates if freq: try: #NOTE: Can drop this once we move to pandas >= 0.8.x _freq_to_pandas[freq] except: raise ValueError("freq %s not understood" % freq) self.data.freq = freq def _get_exog_names(self): return self.data.xnames def _set_exog_names(self, vals): if not isinstance(vals, list): vals = [vals] self.data.xnames = vals #overwrite with writable property for (V)AR models exog_names = property(_get_exog_names, _set_exog_names) def _get_dates_loc(self, dates, date): if hasattr(dates, 'indexMap'): # 0.7.x date = dates.indexMap[date] else: date = dates.get_loc(date) try: # pandas 0.8.0 returns a boolean array len(date) from numpy import where date = where(date)[0].item() except TypeError: # this is expected behavior pass return date def _str_to_date(self, date): """ Takes a string and returns a datetime object """ return datetools.date_parser(date) def _set_predict_start_date(self, start): dates = self.data.dates if dates is None: return if start > len(dates): raise ValueError("Start must be <= len(endog)") if start == len(dates): self.data.predict_start = datetools._date_from_idx(dates[-1], 1, self.data.freq) elif start < len(dates): self.data.predict_start = dates[start] else: raise ValueError("Start must be <= len(dates)") def _get_predict_start(self, start): """ Returns the index of the given start date. Subclasses should define default behavior for start = None. That isn't handled here. Start can be a string or an integer if self.data.dates is None. """ dates = self.data.dates if isinstance(start, str): if dates is None: raise ValueError("Got a string for start and dates is None") dtstart = self._str_to_date(start) self.data.predict_start = dtstart try: start = self._get_dates_loc(dates, dtstart) except KeyError: raise ValueError("Start must be in dates. Got %s | %s" % (str(start), str(dtstart))) self._set_predict_start_date(start) return start def _get_predict_end(self, end): """ See _get_predict_start for more information. Subclasses do not need to define anything for this. """ out_of_sample = 0 # will be overwritten if needed if end is None: # use data for ARIMA - endog changes end = len(self.data.endog) - 1 dates = self.data.dates freq = self.data.freq if isinstance(end, str): if dates is None: raise ValueError("Got a string for end and dates is None") try: dtend = self._str_to_date(end) self.data.predict_end = dtend end = self._get_dates_loc(dates, dtend) except KeyError as err: # end is greater than dates[-1]...probably if dtend > self.data.dates[-1]: end = len(self.data.endog) - 1 freq = self.data.freq out_of_sample = datetools._idx_from_dates(dates[-1], dtend, freq) else: if freq is None: raise ValueError("There is no frequency for these " "dates and date %s is not in dates " "index. Try giving a date that is in " "the dates index or use an integer." % dtend) else: #pragma: no cover raise err # should never get here self._make_predict_dates() # attaches self.data.predict_dates elif isinstance(end, int) and dates is not None: try: self.data.predict_end = dates[end] except IndexError as err: nobs = len(self.data.endog) - 1 # as an index out_of_sample = end - nobs end = nobs if freq is not None: self.data.predict_end = datetools._date_from_idx(dates[-1], out_of_sample, freq) elif out_of_sample <= 0: # have no frequency but are in sample #TODO: what error to catch here to make sure dates is #on the index? try: self.data.predict_end = self._get_dates_loc(dates, end) except KeyError: raise else: self.data.predict_end = end + out_of_sample self.data.predict_start = self._get_dates_loc(dates, self.data.predict_start) self._make_predict_dates() elif isinstance(end, int): nobs = len(self.data.endog) - 1 # is an index if end > nobs: out_of_sample = end - nobs end = nobs elif freq is None: # should have a date with freq = None raise ValueError("When freq is None, you must give an integer " "index for end.") return end, out_of_sample def _make_predict_dates(self): data = self.data dtstart = data.predict_start dtend = data.predict_end freq = data.freq if freq is not None: pandas_freq = _freq_to_pandas[freq] try: from pandas import DatetimeIndex dates = DatetimeIndex(start=dtstart, end=dtend, freq=pandas_freq) except ImportError as err: from pandas import DateRange dates = DateRange(dtstart, dtend, offset = pandas_freq).values # handle elif freq is None and (isinstance(dtstart, int) and isinstance(dtend, int)): from pandas import Index dates = Index(lrange(dtstart, dtend+1)) # if freq is None and dtstart and dtend aren't integers, we're # in sample else: dates = self.data.dates start = self._get_dates_loc(dates, dtstart) end = self._get_dates_loc(dates, dtend) dates = dates[start:end+1] # is this index inclusive? self.data.predict_dates = dates
[docs]class TimeSeriesModelResults(base.LikelihoodModelResults):
[docs] def __init__(self, model, params, normalized_cov_params, scale=1.): self.data = model.data super(TimeSeriesModelResults, self).__init__(model, params, normalized_cov_params, scale)
[docs]class TimeSeriesResultsWrapper(wrap.ResultsWrapper): _attrs = {} _wrap_attrs = wrap.union_dicts(base.LikelihoodResultsWrapper._wrap_attrs, _attrs) _methods = {'predict' : 'dates'} _wrap_methods = wrap.union_dicts(base.LikelihoodResultsWrapper._wrap_methods, _methods)
wrap.populate_wrapper(TimeSeriesResultsWrapper, TimeSeriesModelResults) if __name__ == "__main__": import statsmodels.api as sm import datetime import pandas data = sm.datasets.macrodata.load() #make a DataFrame #TODO: attach a DataFrame to some of the datasets, for quicker use dates = [str(int(x[0])) +':'+ str(int(x[1])) \ for x in data.data[['year','quarter']]] df = pandas.DataFrame(data.data[['realgdp','realinv','realcons']], index=dates) ex_mod = TimeSeriesModel(df) #ts_series = pandas.TimeSeries()