"""Nile River Flows."""
__docformat__ = 'restructuredtext'
COPYRIGHT = """This is public domain."""
TITLE = """Nile River flows at Ashwan 1871-1970"""
SOURCE = """
This data is first analyzed in:
Cobb, G. W. 1978. "The Problem of the Nile: Conditional Solution to a
Changepoint Problem." *Biometrika*. 65.2, 243-51.
"""
DESCRSHORT = """This dataset contains measurements on the annual flow of
the Nile as measured at Ashwan for 100 years from 1871-1970."""
DESCRLONG = DESCRSHORT + " There is an apparent changepoint near 1898."
#suggested notes
NOTE = """::
Number of observations: 100
Number of variables: 2
Variable name definitions:
year - the year of the observations
volumne - the discharge at Aswan in 10^8, m^3
"""
from numpy import recfromtxt, array
from pandas import Series, DataFrame
from statsmodels.datasets.utils import Dataset
from os.path import dirname, abspath
[docs]def load():
"""
Load the Nile data and return a Dataset class instance.
Returns
-------
Dataset instance:
See DATASET_PROPOSAL.txt for more information.
"""
data = _get_data()
names = list(data.dtype.names)
endog_name = 'volume'
endog = array(data[endog_name], dtype=float)
dataset = Dataset(data=data, names=[endog_name], endog=endog,
endog_name=endog_name)
return dataset
[docs]def load_pandas():
data = DataFrame(_get_data())
# TODO: time series
endog = Series(data['volume'], index=data['year'].astype(int))
dataset = Dataset(data=data, names=list(data.columns),
endog=endog, endog_name='volume')
return dataset
def _get_data():
filepath = dirname(abspath(__file__))
data = recfromtxt(open(filepath + '/nile.csv', 'rb'), delimiter=",",
names=True, dtype=float)
return data