Source code for statsmodels.datasets.anes96.data

"""American National Election Survey 1996"""

__docformat__ = 'restructuredtext'

COPYRIGHT   = """This is public domain."""
TITLE       = __doc__
SOURCE      = """
http://www.electionstudies.org/

The American National Election Studies.
"""

DESCRSHORT  = """This data is a subset of the American National Election Studies of 1996."""

DESCRLONG   = DESCRSHORT

NOTE        = """::

    Number of observations - 944
    Number of variables - 10

    Variables name definitions::

            popul - Census place population in 1000s
            TVnews - Number of times per week that respondent watches TV news.
            PID - Party identification of respondent.
                0 - Strong Democrat
                1 - Weak Democrat
                2 - Independent-Democrat
                3 - Independent-Indpendent
                4 - Independent-Republican
                5 - Weak Republican
                6 - Strong Republican
            age : Age of respondent.
            educ - Education level of respondent
                1 - 1-8 grades
                2 - Some high school
                3 - High school graduate
                4 - Some college
                5 - College degree
                6 - Master's degree
                7 - PhD
            income - Income of household
                1  - None or less than $2,999
                2  - $3,000-$4,999
                3  - $5,000-$6,999
                4  - $7,000-$8,999
                5  - $9,000-$9,999
                6  - $10,000-$10,999
                7  - $11,000-$11,999
                8  - $12,000-$12,999
                9  - $13,000-$13,999
                10 - $14,000-$14.999
                11 - $15,000-$16,999
                12 - $17,000-$19,999
                13 - $20,000-$21,999
                14 - $22,000-$24,999
                15 - $25,000-$29,999
                16 - $30,000-$34,999
                17 - $35,000-$39,999
                18 - $40,000-$44,999
                19 - $45,000-$49,999
                20 - $50,000-$59,999
                21 - $60,000-$74,999
                22 - $75,000-89,999
                23 - $90,000-$104,999
                24 - $105,000 and over
            vote - Expected vote
                0 - Clinton
                1 - Dole
            The following 3 variables all take the values:
                1 - Extremely liberal
                2 - Liberal
                3 - Slightly liberal
                4 - Moderate
                5 - Slightly conservative
                6 - Conservative
                7 - Extremely Conservative
            selfLR - Respondent's self-reported political leanings from "Left"
                to "Right".
            ClinLR - Respondents impression of Bill Clinton's political
                leanings from "Left" to "Right".
            DoleLR  - Respondents impression of Bob Dole's political leanings
                from "Left" to "Right".
            logpopul - log(popul + .1)
"""

from numpy import recfromtxt, column_stack, array, log
import numpy.lib.recfunctions as nprf
from statsmodels.datasets import utils as du
from os.path import dirname, abspath

[docs]def load(): """Load the anes96 data and returns a Dataset class. Returns ------- Dataset instance: See DATASET_PROPOSAL.txt for more information. """ data = _get_data() return du.process_recarray(data, endog_idx=5, exog_idx=[10,2,6,7,8], dtype=float)
[docs]def load_pandas(): """Load the anes96 data and returns a Dataset class. Returns ------- Dataset instance: See DATASET_PROPOSAL.txt for more information. """ data = _get_data() return du.process_recarray_pandas(data, endog_idx=5, exog_idx=[10,2,6,7,8], dtype=float)
def _get_data(): filepath = dirname(abspath(__file__)) data = recfromtxt(open(filepath + '/anes96.csv',"rb"), delimiter="\t", names = True, dtype=float) logpopul = log(data['popul'] + .1) data = nprf.append_fields(data, 'logpopul', logpopul, usemask=False, asrecarray=True) return data