"""Heart Transplant Data, Miller 1976"""
__docformat__ = 'restructuredtext'
COPYRIGHT = """???"""
TITLE = """Transplant Survival Data"""
SOURCE = """ Miller, R. (1976). Least squares regression with censored dara. Biometrica, 63 (3). 449-464.
"""
DESCRSHORT = """Survival times after receiving a heart transplant"""
DESCRLONG = """This data contains the survival time after receiving a heart transplant, the age of the patient and whether or not the survival time was censored.
"""
NOTE = """::
Number of Observations - 69
Number of Variables - 3
Variable name definitions::
death - Days after surgery until death
age - age at the time of surgery
censored - indicates if an observation is censored. 1 is uncensored
"""
import numpy as np
from statsmodels.datasets import utils as du
from os.path import dirname, abspath
[docs]def load():
"""
Load the data and return a Dataset class instance.
Returns
-------
Dataset instance:
See DATASET_PROPOSAL.txt for more information.
"""
data = _get_data()
##### SET THE INDICES #####
#NOTE: None for exog_idx is the complement of endog_idx
dset = du.process_recarray(data, endog_idx=0, exog_idx=None, dtype=float)
dset.censors = dset.exog[:,0]
dset.exog = dset.exog[:,1]
return dset
[docs]def load_pandas():
data = _get_data()
##### SET THE INDICES #####
#NOTE: None for exog_idx is the complement of endog_idx
return du.process_recarray_pandas(data, endog_idx=0, exog_idx=None,
dtype=float)
def _get_data():
filepath = dirname(abspath(__file__))
##### EDIT THE FOLLOWING TO POINT TO DatasetName.csv #####
data = np.recfromtxt(open(filepath + '/heart.csv', 'rb'),
delimiter=",", names = True, dtype=float)
return data