"""Breast Cancer Data"""
__docformat__ = 'restructuredtext'
COPYRIGHT = """???"""
TITLE = """Breast Cancer Data"""
SOURCE = """
This is the breast cancer data used in Owen's empirical likelihood. It is taken from
Rice, J.A. Mathematical Statistics and Data Analysis.
http://www.cengage.com/statistics/discipline_content/dataLibrary.html
"""
DESCRSHORT = """Breast Cancer and county population"""
DESCRLONG = """The number of breast cancer observances in various counties"""
#suggested notes
NOTE = """::
Number of observations: 301
Number of variables: 2
Variable name definitions:
cancer - The number of breast cancer observances
population - The population of the county
"""
import numpy as np
from statsmodels.datasets import utils as du
from os.path import dirname, abspath
[docs]def load():
"""
Load the data and return a Dataset class instance.
Returns
-------
Dataset instance:
See DATASET_PROPOSAL.txt for more information.
"""
data = _get_data()
##### SET THE INDICES #####
#NOTE: None for exog_idx is the complement of endog_idx
return du.process_recarray(data, endog_idx=0, exog_idx=None, dtype=float)
[docs]def load_pandas():
data = _get_data()
##### SET THE INDICES #####
#NOTE: None for exog_idx is the complement of endog_idx
return du.process_recarray_pandas(data, endog_idx=0, exog_idx=None,
dtype=float)
def _get_data():
filepath = dirname(abspath(__file__))
##### EDIT THE FOLLOWING TO POINT TO DatasetName.csv #####
data = np.recfromtxt(open(filepath + '/cancer.csv', 'rb'),
delimiter=",", names = True, dtype=float)
return data