Source code for pandas.core.categorical

# pylint: disable=E1101,W0232

import numpy as np
from warnings import warn
import types

from pandas import compat, lib
from pandas.compat import u

from pandas.types.generic import ABCSeries, ABCIndexClass, ABCCategoricalIndex
from pandas.types.missing import isnull, notnull
from pandas.types.cast import (_possibly_infer_to_datetimelike,
                               _coerce_indexer_dtype)
from pandas.types.dtypes import CategoricalDtype
from pandas.types.common import (_ensure_int64,
                                 _ensure_object,
                                 _ensure_platform_int,
                                 is_dtype_equal,
                                 is_datetimelike,
                                 is_categorical_dtype,
                                 is_integer_dtype, is_bool,
                                 is_list_like, is_sequence,
                                 is_scalar)
from pandas.core.common import is_null_slice

from pandas.core.algorithms import factorize, take_1d
from pandas.core.base import (PandasObject, PandasDelegate,
                              NoNewAttributesMixin, _shared_docs)
import pandas.core.common as com
from pandas.core.missing import interpolate_2d
from pandas.compat.numpy import function as nv
from pandas.util.decorators import (Appender, cache_readonly,
                                    deprecate_kwarg, Substitution)

from pandas.util.terminal import get_terminal_size
from pandas.core.config import get_option


def _cat_compare_op(op):
    def f(self, other):
        # On python2, you can usually compare any type to any type, and
        # Categoricals can be seen as a custom type, but having different
        # results depending whether categories are the same or not is kind of
        # insane, so be a bit stricter here and use the python3 idea of
        # comparing only things of equal type.
        if not self.ordered:
            if op in ['__lt__', '__gt__', '__le__', '__ge__']:
                raise TypeError("Unordered Categoricals can only compare "
                                "equality or not")
        if isinstance(other, Categorical):
            # Two Categoricals can only be be compared if the categories are
            # the same
            if ((len(self.categories) != len(other.categories)) or
                    not ((self.categories == other.categories).all())):
                raise TypeError("Categoricals can only be compared if "
                                "'categories' are the same")
            if not (self.ordered == other.ordered):
                raise TypeError("Categoricals can only be compared if "
                                "'ordered' is the same")
            na_mask = (self._codes == -1) | (other._codes == -1)
            f = getattr(self._codes, op)
            ret = f(other._codes)
            if na_mask.any():
                # In other series, the leads to False, so do that here too
                ret[na_mask] = False
            return ret

        # Numpy-1.9 and earlier may convert a scalar to a zerodim array during
        # comparison operation when second arg has higher priority, e.g.
        #
        #     cat[0] < cat
        #
        # With cat[0], for example, being ``np.int64(1)`` by the time it gets
        # into this function would become ``np.array(1)``.
        other = lib.item_from_zerodim(other)
        if is_scalar(other):
            if other in self.categories:
                i = self.categories.get_loc(other)
                return getattr(self._codes, op)(i)
            else:
                if op == '__eq__':
                    return np.repeat(False, len(self))
                elif op == '__ne__':
                    return np.repeat(True, len(self))
                else:
                    msg = ("Cannot compare a Categorical for op {op} with a "
                           "scalar, which is not a category.")
                    raise TypeError(msg.format(op=op))
        else:

            # allow categorical vs object dtype array comparisons for equality
            # these are only positional comparisons
            if op in ['__eq__', '__ne__']:
                return getattr(np.array(self), op)(np.array(other))

            msg = ("Cannot compare a Categorical for op {op} with type {typ}."
                   "\nIf you want to compare values, use 'np.asarray(cat) "
                   "<op> other'.")
            raise TypeError(msg.format(op=op, typ=type(other)))

    f.__name__ = op

    return f


def maybe_to_categorical(array):
    """ coerce to a categorical if a series is given """
    if isinstance(array, (ABCSeries, ABCCategoricalIndex)):
        return array._values
    return array


_codes_doc = """The category codes of this categorical.

Level codes are an array if integer which are the positions of the real
values in the categories array.

There is not setter, use the other categorical methods and the normal item
setter to change values in the categorical.
"""

_categories_doc = """The categories of this categorical.

Setting assigns new values to each category (effectively a rename of
each individual category).

The assigned value has to be a list-like object. All items must be unique and
the number of items in the new categories must be the same as the number of
items in the old categories.

Assigning to `categories` is a inplace operation!

Raises
------
ValueError
    If the new categories do not validate as categories or if the number of new
    categories is unequal the number of old categories

See also
--------
rename_categories
reorder_categories
add_categories
remove_categories
remove_unused_categories
set_categories
"""


[docs]class Categorical(PandasObject):
    """
    Represents a categorical variable in classic R / S-plus fashion

    `Categoricals` can only take on only a limited, and usually fixed, number
    of possible values (`categories`). In contrast to statistical categorical
    variables, a `Categorical` might have an order, but numerical operations
    (additions, divisions, ...) are not possible.

    All values of the `Categorical` are either in `categories` or `np.nan`.
    Assigning values outside of `categories` will raise a `ValueError`. Order
    is defined by the order of the `categories`, not lexical order of the
    values.

    Parameters
    ----------
    values : list-like
        The values of the categorical. If categories are given, values not in
        categories will be replaced with NaN.
    categories : Index-like (unique), optional
        The unique categories for this categorical. If not given, the
        categories are assumed to be the unique values of values.
    ordered : boolean, (default False)
        Whether or not this categorical is treated as a ordered categorical.
        If not given, the resulting categorical will not be ordered.

    Attributes
    ----------
    categories : Index
        The categories of this categorical
    codes : ndarray
        The codes (integer positions, which point to the categories) of this
        categorical, read only.
    ordered : boolean
        Whether or not this Categorical is ordered.

    Raises
    ------
    ValueError
        If the categories do not validate.
    TypeError
        If an explicit ``ordered=True`` is given but no `categories` and the
        `values` are not sortable.


    Examples
    --------
    >>> from pandas import Categorical
    >>> Categorical([1, 2, 3, 1, 2, 3])
    [1, 2, 3, 1, 2, 3]
    Categories (3, int64): [1 < 2 < 3]

    >>> Categorical(['a', 'b', 'c', 'a', 'b', 'c'])
    [a, b, c, a, b, c]
    Categories (3, object): [a < b < c]

    >>> a = Categorical(['a','b','c','a','b','c'], ['c', 'b', 'a'],
                        ordered=True)
    >>> a.min()
    'c'
    """
    dtype = CategoricalDtype()
    """The dtype (always "category")"""
    """Whether or not this Categorical is ordered.

    Only ordered `Categoricals` can be sorted (according to the order
    of the categories) and have a min and max value.

    See also
    --------
    Categorical.sort
    Categorical.order
    Categorical.min
    Categorical.max
    """

    # For comparisons, so that numpy uses our implementation if the compare
    # ops, which raise
    __array_priority__ = 1000
    _typ = 'categorical'

    def __init__(self, values, categories=None, ordered=False,
                 name=None, fastpath=False):

        if fastpath:
            # fast path
            self._codes = _coerce_indexer_dtype(values, categories)
            self._categories = self._validate_categories(
                categories, fastpath=isinstance(categories, ABCIndexClass))
            self._ordered = ordered
            return

        if name is not None:
            msg = ("the 'name' keyword is removed, use 'name' with consumers "
                   "of the categorical instead (e.g. 'Series(cat, "
                   "name=\"something\")'")
            warn(msg, UserWarning, stacklevel=2)

        # sanitize input
        if is_categorical_dtype(values):

            # we are either a Series or a CategoricalIndex
            if isinstance(values, (ABCSeries, ABCCategoricalIndex)):
                values = values._values

            if ordered is None:
                ordered = values.ordered
            if categories is None:
                categories = values.categories
            values = values.__array__()

        elif isinstance(values, (ABCIndexClass, ABCSeries)):
            pass

        else:

            # on numpy < 1.6 datetimelike get inferred to all i8 by
            # _sanitize_array which is fine, but since factorize does this
            # correctly no need here this is an issue because _sanitize_array
            # also coerces np.nan to a string under certain versions of numpy
            # as well
            values = _possibly_infer_to_datetimelike(values,
                                                     convert_dates=True)
            if not isinstance(values, np.ndarray):
                values = _convert_to_list_like(values)
                from pandas.core.series import _sanitize_array
                # On list with NaNs, int values will be converted to float. Use
                # "object" dtype to prevent this. In the end objects will be
                # casted to int/... in the category assignment step.
                dtype = 'object' if isnull(values).any() else None
                values = _sanitize_array(values, None, dtype=dtype)

        if categories is None:
            try:
                codes, categories = factorize(values, sort=True)
            except TypeError:
                codes, categories = factorize(values, sort=False)
                if ordered:
                    # raise, as we don't have a sortable data structure and so
                    # the user should give us one by specifying categories
                    raise TypeError("'values' is not ordered, please "
                                    "explicitly specify the categories order "
                                    "by passing in a categories argument.")
            except ValueError:

                # FIXME
                raise NotImplementedError("> 1 ndim Categorical are not "
                                          "supported at this time")

            categories = self._validate_categories(categories)

        else:
            # there were two ways if categories are present
            # - the old one, where each value is a int pointer to the levels
            #   array -> not anymore possible, but code outside of pandas could
            #   call us like that, so make some checks
            # - the new one, where each value is also in the categories array
            #   (or np.nan)

            # make sure that we always have the same type here, no matter what
            # we get passed in
            categories = self._validate_categories(categories)
            codes = _get_codes_for_values(values, categories)

            # TODO: check for old style usage. These warnings should be removes
            # after 0.18/ in 2016
            if is_integer_dtype(values) and not is_integer_dtype(categories):
                warn("Values and categories have different dtypes. Did you "
                     "mean to use\n'Categorical.from_codes(codes, "
                     "categories)'?", RuntimeWarning, stacklevel=2)

            if (len(values) and is_integer_dtype(values) and
                    (codes == -1).all()):
                warn("None of the categories were found in values. Did you "
                     "mean to use\n'Categorical.from_codes(codes, "
                     "categories)'?", RuntimeWarning, stacklevel=2)

        self.set_ordered(ordered or False, inplace=True)
        self._categories = categories
        self._codes = _coerce_indexer_dtype(codes, categories)

    def copy(self):
        """ Copy constructor. """
        return Categorical(values=self._codes.copy(),
                           categories=self.categories, ordered=self.ordered,
                           fastpath=True)

    def astype(self, dtype, copy=True):
        """
        Coerce this type to another dtype

        Parameters
        ----------
        dtype : numpy dtype or pandas type
        copy : bool, default True
            By default, astype always returns a newly allocated object.
            If copy is set to False and dtype is categorical, the original
            object is returned.

            .. versionadded:: 0.19.0

        """
        if is_categorical_dtype(dtype):
            if copy is True:
                return self.copy()
            return self
        return np.array(self, dtype=dtype, copy=copy)

    @cache_readonly
    def ndim(self):
        """Number of dimensions of the Categorical """
        return self._codes.ndim

    @cache_readonly
    def size(self):
        """ return the len of myself """
        return len(self)

    @cache_readonly
    def itemsize(self):
        """ return the size of a single category """
        return self.categories.itemsize

    def reshape(self, new_shape, *args, **kwargs):
        """
        DEPRECATED: calling this method will raise an error in a
        future release.

        An ndarray-compatible method that returns `self` because
        `Categorical` instances cannot actually be reshaped.

        Parameters
        ----------
        new_shape : int or tuple of ints
            A 1-D array of integers that correspond to the new
            shape of the `Categorical`. For more information on
            the parameter, please refer to `np.reshape`.
        """
        warn("reshape is deprecated and will raise "
             "in a subsequent release", FutureWarning, stacklevel=2)

        nv.validate_reshape(args, kwargs)

        # while the 'new_shape' parameter has no effect,
        # we should still enforce valid shape parameters
        np.reshape(self.codes, new_shape)

        return self

    @property
    def base(self):
        """ compat, we are always our own object """
        return None

    @classmethod
    def from_array(cls, data, **kwargs):
        """
        Make a Categorical type from a single array-like object.

        For internal compatibility with numpy arrays.

        Parameters
        ----------
        data : array-like
            Can be an Index or array-like. The categories are assumed to be
            the unique values of `data`.
        """
        return Categorical(data, **kwargs)

    @classmethod
[docs]    def from_codes(cls, codes, categories, ordered=False, name=None):
        """
        Make a Categorical type from codes and categories arrays.

        This constructor is useful if you already have codes and categories and
        so do not need the (computation intensive) factorization step, which is
        usually done on the constructor.

        If your data does not follow this convention, please use the normal
        constructor.

        Parameters
        ----------
        codes : array-like, integers
            An integer array, where each integer points to a category in
            categories or -1 for NaN
        categories : index-like
            The categories for the categorical. Items need to be unique.
        ordered : boolean, (default False)
            Whether or not this categorical is treated as a ordered
            categorical. If not given, the resulting categorical will be
            unordered.
        """
        if name is not None:
            msg = ("the 'name' keyword is removed, use 'name' with consumers "
                   "of the categorical instead (e.g. 'Series(cat, "
                   "name=\"something\")'")
            warn(msg, UserWarning, stacklevel=2)

        try:
            codes = np.asarray(codes, np.int64)
        except:
            raise ValueError(
                "codes need to be convertible to an arrays of integers")

        categories = cls._validate_categories(categories)

        if len(codes) and (codes.max() >= len(categories) or codes.min() < -1):
            raise ValueError("codes need to be between -1 and "
                             "len(categories)-1")

        return Categorical(codes, categories=categories, ordered=ordered,
                           fastpath=True)

    _codes = None

    def _get_codes(self):
        """ Get the codes.

        Returns
        -------
        codes : integer array view
            A non writable view of the `codes` array.
        """
        v = self._codes.view()
        v.flags.writeable = False
        return v

    def _set_codes(self, codes):
        """
        Not settable by the user directly
        """
        raise ValueError("cannot set Categorical codes directly")

    codes = property(fget=_get_codes, fset=_set_codes, doc=_codes_doc)

    def _get_labels(self):
        """
        Get the category labels (deprecated).

        Deprecated, use .codes!
        """
        warn("'labels' is deprecated. Use 'codes' instead", FutureWarning,
             stacklevel=2)
        return self.codes

    labels = property(fget=_get_labels, fset=_set_codes)

    _categories = None

    @classmethod
    def _validate_categories(cls, categories, fastpath=False):
        """
        Validates that we have good categories

        Parameters
        ----------
        fastpath : boolean (default: False)
           Don't perform validation of the categories for uniqueness or nulls

        """
        if not isinstance(categories, ABCIndexClass):
            dtype = None
            if not hasattr(categories, "dtype"):
                categories = _convert_to_list_like(categories)
                # On categories with NaNs, int values would be converted to
                # float. Use "object" dtype to prevent this.
                if isnull(categories).any():
                    without_na = np.array([x for x in categories
                                           if notnull(x)])
                    with_na = np.array(categories)
                    if with_na.dtype != without_na.dtype:
                        dtype = "object"

            from pandas import Index
            categories = Index(categories, dtype=dtype)

        if not fastpath:

            # check properties of the categories
            # we don't allow NaNs in the categories themselves

            if categories.hasnans:
                # NaNs in cats deprecated in 0.17,
                # remove in 0.18 or 0.19 GH 10748
                msg = ('\nSetting NaNs in `categories` is deprecated and '
                       'will be removed in a future version of pandas.')
                warn(msg, FutureWarning, stacklevel=3)

            # categories must be unique

            if not categories.is_unique:
                raise ValueError('Categorical categories must be unique')

        return categories

    def _set_categories(self, categories, fastpath=False):
        """ Sets new categories

        Parameters
        ----------
        fastpath : boolean (default: False)
           Don't perform validation of the categories for uniqueness or nulls

        """

        categories = self._validate_categories(categories, fastpath=fastpath)
        if (not fastpath and self._categories is not None and
                len(categories) != len(self._categories)):
            raise ValueError("new categories need to have the same number of "
                             "items than the old categories!")

        self._categories = categories

    def _get_categories(self):
        """ Gets the categories """
        # categories is an Index, which is immutable -> no need to copy
        return self._categories

    categories = property(fget=_get_categories, fset=_set_categories,
                          doc=_categories_doc)

    _ordered = None

    def set_ordered(self, value, inplace=False):
        """
        Sets the ordered attribute to the boolean value

        Parameters
        ----------
        value : boolean to set whether this categorical is ordered (True) or
           not (False)
        inplace : boolean (default: False)
           Whether or not to set the ordered attribute inplace or return a copy
           of this categorical with ordered set to the value
        """
        if not is_bool(value):
            raise TypeError("ordered must be a boolean value")
        cat = self if inplace else self.copy()
        cat._ordered = value
        if not inplace:
            return cat

    def as_ordered(self, inplace=False):
        """
        Sets the Categorical to be ordered

        Parameters
        ----------
        inplace : boolean (default: False)
           Whether or not to set the ordered attribute inplace or return a copy
           of this categorical with ordered set to True
        """
        return self.set_ordered(True, inplace=inplace)

    def as_unordered(self, inplace=False):
        """
        Sets the Categorical to be unordered

        Parameters
        ----------
        inplace : boolean (default: False)
           Whether or not to set the ordered attribute inplace or return a copy
           of this categorical with ordered set to False
        """
        return self.set_ordered(False, inplace=inplace)

    def _get_ordered(self):
        """ Gets the ordered attribute """
        return self._ordered

    ordered = property(fget=_get_ordered)

    def set_categories(self, new_categories, ordered=None, rename=False,
                       inplace=False):
        """ Sets the categories to the specified new_categories.

        `new_categories` can include new categories (which will result in
        unused categories) or remove old categories (which results in values
        set to NaN). If `rename==True`, the categories will simple be renamed
        (less or more items than in old categories will result in values set to
        NaN or in unused categories respectively).

        This method can be used to perform more than one action of adding,
        removing, and reordering simultaneously and is therefore faster than
        performing the individual steps via the more specialised methods.

        On the other hand this methods does not do checks (e.g., whether the
        old categories are included in the new categories on a reorder), which
        can result in surprising changes, for example when using special string
        dtypes on python3, which does not considers a S1 string equal to a
        single char python string.

        Raises
        ------
        ValueError
            If new_categories does not validate as categories

        Parameters
        ----------
        new_categories : Index-like
           The categories in new order.
        ordered : boolean, (default: False)
           Whether or not the categorical is treated as a ordered categorical.
           If not given, do not change the ordered information.
        rename : boolean (default: False)
           Whether or not the new_categories should be considered as a rename
           of the old  categories or as reordered categories.
        inplace : boolean (default: False)
           Whether or not to reorder the categories inplace or return a copy of
           this categorical with reordered categories.

        Returns
        -------
        cat : Categorical with reordered categories or None if inplace.

        See also
        --------
        rename_categories
        reorder_categories
        add_categories
        remove_categories
        remove_unused_categories
        """
        new_categories = self._validate_categories(new_categories)
        cat = self if inplace else self.copy()
        if rename:
            if (cat._categories is not None and
                    len(new_categories) < len(cat._categories)):
                # remove all _codes which are larger and set to -1/NaN
                self._codes[self._codes >= len(new_categories)] = -1
        else:
            values = cat.__array__()
            cat._codes = _get_codes_for_values(values, new_categories)
        cat._categories = new_categories

        if ordered is None:
            ordered = self.ordered
        cat.set_ordered(ordered, inplace=True)

        if not inplace:
            return cat

    def rename_categories(self, new_categories, inplace=False):
        """ Renames categories.

        The new categories has to be a list-like object. All items must be
        unique and the number of items in the new categories must be the same
        as the number of items in the old categories.

        Raises
        ------
        ValueError
            If the new categories do not have the same number of items than the
            current categories or do not validate as categories

        Parameters
        ----------
        new_categories : Index-like
           The renamed categories.
        inplace : boolean (default: False)
           Whether or not to rename the categories inplace or return a copy of
           this categorical with renamed categories.

        Returns
        -------
        cat : Categorical with renamed categories added or None if inplace.

        See also
        --------
        reorder_categories
        add_categories
        remove_categories
        remove_unused_categories
        set_categories
        """
        cat = self if inplace else self.copy()
        cat.categories = new_categories
        if not inplace:
            return cat

    def reorder_categories(self, new_categories, ordered=None, inplace=False):
        """ Reorders categories as specified in new_categories.

        `new_categories` need to include all old categories and no new category
        items.

        Raises
        ------
        ValueError
            If the new categories do not contain all old category items or any
            new ones

        Parameters
        ----------
        new_categories : Index-like
           The categories in new order.
        ordered : boolean, optional
           Whether or not the categorical is treated as a ordered categorical.
           If not given, do not change the ordered information.
        inplace : boolean (default: False)
           Whether or not to reorder the categories inplace or return a copy of
           this categorical with reordered categories.

        Returns
        -------
        cat : Categorical with reordered categories or None if inplace.

        See also
        --------
        rename_categories
        add_categories
        remove_categories
        remove_unused_categories
        set_categories
        """
        if set(self._categories) != set(new_categories):
            raise ValueError("items in new_categories are not the same as in "
                             "old categories")
        return self.set_categories(new_categories, ordered=ordered,
                                   inplace=inplace)

    def add_categories(self, new_categories, inplace=False):
        """ Add new categories.

        `new_categories` will be included at the last/highest place in the
        categories and will be unused directly after this call.

        Raises
        ------
        ValueError
            If the new categories include old categories or do not validate as
            categories

        Parameters
        ----------
        new_categories : category or list-like of category
           The new categories to be included.
        inplace : boolean (default: False)
           Whether or not to add the categories inplace or return a copy of
           this categorical with added categories.

        Returns
        -------
        cat : Categorical with new categories added or None if inplace.

        See also
        --------
        rename_categories
        reorder_categories
        remove_categories
        remove_unused_categories
        set_categories
        """
        if not is_list_like(new_categories):
            new_categories = [new_categories]
        already_included = set(new_categories) & set(self._categories)
        if len(already_included) != 0:
            msg = ("new categories must not include old categories: %s" %
                   str(already_included))
            raise ValueError(msg)
        new_categories = list(self._categories) + list(new_categories)
        cat = self if inplace else self.copy()
        cat._categories = self._validate_categories(new_categories)
        cat._codes = _coerce_indexer_dtype(cat._codes, new_categories)
        if not inplace:
            return cat

    def remove_categories(self, removals, inplace=False):
        """ Removes the specified categories.

        `removals` must be included in the old categories. Values which were in
        the removed categories will be set to NaN

        Raises
        ------
        ValueError
            If the removals are not contained in the categories

        Parameters
        ----------
        removals : category or list of categories
           The categories which should be removed.
        inplace : boolean (default: False)
           Whether or not to remove the categories inplace or return a copy of
           this categorical with removed categories.

        Returns
        -------
        cat : Categorical with removed categories or None if inplace.

        See also
        --------
        rename_categories
        reorder_categories
        add_categories
        remove_unused_categories
        set_categories
        """
        if not is_list_like(removals):
            removals = [removals]

        removal_set = set(list(removals))
        not_included = removal_set - set(self._categories)
        new_categories = [c for c in self._categories if c not in removal_set]

        # GH 10156
        if any(isnull(removals)):
            not_included = [x for x in not_included if notnull(x)]
            new_categories = [x for x in new_categories if notnull(x)]

        if len(not_included) != 0:
            raise ValueError("removals must all be in old categories: %s" %
                             str(not_included))

        return self.set_categories(new_categories, ordered=self.ordered,
                                   rename=False, inplace=inplace)

    def remove_unused_categories(self, inplace=False):
        """ Removes categories which are not used.

        Parameters
        ----------
        inplace : boolean (default: False)
           Whether or not to drop unused categories inplace or return a copy of
           this categorical with unused categories dropped.

        Returns
        -------
        cat : Categorical with unused categories dropped or None if inplace.

        See also
        --------
        rename_categories
        reorder_categories
        add_categories
        remove_categories
        set_categories
        """
        cat = self if inplace else self.copy()
        idx, inv = np.unique(cat._codes, return_inverse=True)

        if idx.size != 0 and idx[0] == -1:  # na sentinel
            idx, inv = idx[1:], inv - 1

        cat._categories = cat.categories.take(idx)
        cat._codes = _coerce_indexer_dtype(inv, self._categories)

        if not inplace:
            return cat

    def map(self, mapper):
        """
        Apply mapper function to its categories (not codes).

        Parameters
        ----------
        mapper : callable
            Function to be applied. When all categories are mapped
            to different categories, the result will be Categorical which has
            the same order property as the original. Otherwise, the result will
            be np.ndarray.

        Returns
        -------
        applied : Categorical or np.ndarray.
        """
        new_categories = self.categories.map(mapper)
        try:
            return Categorical.from_codes(self._codes.copy(),
                                          categories=new_categories,
                                          ordered=self.ordered)
        except ValueError:
            return np.take(new_categories, self._codes)

    __eq__ = _cat_compare_op('__eq__')
    __ne__ = _cat_compare_op('__ne__')
    __lt__ = _cat_compare_op('__lt__')
    __gt__ = _cat_compare_op('__gt__')
    __le__ = _cat_compare_op('__le__')
    __ge__ = _cat_compare_op('__ge__')

    # for Series/ndarray like compat
    @property
    def shape(self):
        """ Shape of the Categorical.

        For internal compatibility with numpy arrays.

        Returns
        -------
        shape : tuple
        """

        return tuple([len(self._codes)])

    def shift(self, periods):
        """
        Shift Categorical by desired number of periods.

        Parameters
        ----------
        periods : int
            Number of periods to move, can be positive or negative

        Returns
        -------
        shifted : Categorical
        """
        # since categoricals always have ndim == 1, an axis parameter
        # doesnt make any sense here.
        codes = self.codes
        if codes.ndim > 1:
            raise NotImplementedError("Categorical with ndim > 1.")
        if np.prod(codes.shape) and (periods != 0):
            codes = np.roll(codes, _ensure_platform_int(periods), axis=0)
            if periods > 0:
                codes[:periods] = -1
            else:
                codes[periods:] = -1

        return Categorical.from_codes(codes, categories=self.categories,
                                      ordered=self.ordered)

[docs]    def __array__(self, dtype=None):
        """
        The numpy array interface.

        Returns
        -------
        values : numpy array
            A numpy array of either the specified dtype or,
            if dtype==None (default), the same dtype as
            categorical.categories.dtype
        """
        ret = take_1d(self.categories.values, self._codes)
        if dtype and not is_dtype_equal(dtype, self.categories.dtype):
            return np.asarray(ret, dtype)
        return ret

    def __setstate__(self, state):
        """Necessary for making this object picklable"""
        if not isinstance(state, dict):
            raise Exception('invalid pickle state')

        # Provide compatibility with pre-0.15.0 Categoricals.
        if '_categories' not in state and '_levels' in state:
            state['_categories'] = self._validate_categories(state.pop(
                '_levels'))
        if '_codes' not in state and 'labels' in state:
            state['_codes'] = _coerce_indexer_dtype(state.pop('labels'),
                                                    state['_categories'])

        # 0.16.0 ordered change
        if '_ordered' not in state:

            # >=15.0 < 0.16.0
            if 'ordered' in state:
                state['_ordered'] = state.pop('ordered')
            else:
                state['_ordered'] = False

        for k, v in compat.iteritems(state):
            setattr(self, k, v)

    @property
    def T(self):
        return self

    @property
    def nbytes(self):
        return self._codes.nbytes + self._categories.values.nbytes

    def memory_usage(self, deep=False):
        """
        Memory usage of my values

        Parameters
        ----------
        deep : bool
            Introspect the data deeply, interrogate
            `object` dtypes for system-level memory consumption

        Returns
        -------
        bytes used

        Notes
        -----
        Memory usage does not include memory consumed by elements that
        are not components of the array if deep=False

        See Also
        --------
        numpy.ndarray.nbytes
        """
        return self._codes.nbytes + self._categories.memory_usage(deep=deep)

    @Substitution(klass='Categorical', value='v')
    @Appender(_shared_docs['searchsorted'])
    def searchsorted(self, v, side='left', sorter=None):
        if not self.ordered:
            raise ValueError("Categorical not ordered\nyou can use "
                             ".as_ordered() to change the Categorical to an "
                             "ordered one")

        from pandas.core.series import Series
        values_as_codes = self.categories.values.searchsorted(
            Series(v).values, side=side)

        return self.codes.searchsorted(values_as_codes, sorter=sorter)

    def isnull(self):
        """
        Detect missing values

        Both missing values (-1 in .codes) and NA as a category are detected.

        Returns
        -------
        a boolean array of whether my values are null

        See also
        --------
        pandas.isnull : pandas version
        Categorical.notnull : boolean inverse of Categorical.isnull
        """

        ret = self._codes == -1

        # String/object and float categories can hold np.nan
        if self.categories.dtype.kind in ['S', 'O', 'f']:
            if np.nan in self.categories:
                nan_pos = np.where(isnull(self.categories))[0]
                # we only have one NA in categories
                ret = np.logical_or(ret, self._codes == nan_pos)
        return ret

    def notnull(self):
        """
        Reverse of isnull

        Both missing values (-1 in .codes) and NA as a category are detected as
        null.

        Returns
        -------
        a boolean array of whether my values are not null

        See also
        --------
        pandas.notnull : pandas version
        Categorical.isnull : boolean inverse of Categorical.notnull
        """
        return ~self.isnull()

    def put(self, *args, **kwargs):
        """
        Replace specific elements in the Categorical with given values.
        """
        raise NotImplementedError(("'put' is not yet implemented "
                                   "for Categorical"))

    def dropna(self):
        """
        Return the Categorical without null values.

        Both missing values (-1 in .codes) and NA as a category are detected.
        NA is removed from the categories if present.

        Returns
        -------
        valid : Categorical
        """
        result = self[self.notnull()]
        if isnull(result.categories).any():
            result = result.remove_categories([np.nan])
        return result

    def value_counts(self, dropna=True):
        """
        Returns a Series containing counts of each category.

        Every category will have an entry, even those with a count of 0.

        Parameters
        ----------
        dropna : boolean, default True
            Don't include counts of NaN, even if NaN is a category.

        Returns
        -------
        counts : Series
        """
        from numpy import bincount
        from pandas.types.missing import isnull
        from pandas.core.series import Series
        from pandas.core.index import CategoricalIndex

        obj = (self.remove_categories([np.nan]) if dropna and
               isnull(self.categories).any() else self)
        code, cat = obj._codes, obj.categories
        ncat, mask = len(cat), 0 <= code
        ix, clean = np.arange(ncat), mask.all()

        if dropna or clean:
            obs = code if clean else code[mask]
            count = bincount(obs, minlength=ncat or None)
        else:
            count = bincount(np.where(mask, code, ncat))
            ix = np.append(ix, -1)

        ix = Categorical(ix, categories=cat, ordered=obj.ordered,
                         fastpath=True)

        return Series(count, index=CategoricalIndex(ix), dtype='int64')

    def get_values(self):
        """ Return the values.

        For internal compatibility with pandas formatting.

        Returns
        -------
        values : numpy array
            A numpy array of the same dtype as categorical.categories.dtype or
            Index if datetime / periods
        """
        # if we are a datetime and period index, return Index to keep metadata
        if is_datetimelike(self.categories):
            return self.categories.take(self._codes, fill_value=np.nan)
        return np.array(self)

    def check_for_ordered(self, op):
        """ assert that we are ordered """
        if not self.ordered:
            raise TypeError("Categorical is not ordered for operation {op}\n"
                            "you can use .as_ordered() to change the "
                            "Categorical to an ordered one\n".format(op=op))

    def argsort(self, ascending=True, *args, **kwargs):
        """
        Returns the indices that would sort the Categorical instance if
        'sort_values' was called. This function is implemented to provide
        compatibility with numpy ndarray objects.

        While an ordering is applied to the category values, arg-sorting
        in this context refers more to organizing and grouping together
        based on matching category values. Thus, this function can be
        called on an unordered Categorical instance unlike the functions
        'Categorical.min' and 'Categorical.max'.

        Returns
        -------
        argsorted : numpy array

        See also
        --------
        numpy.ndarray.argsort
        """
        ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs)
        result = np.argsort(self._codes.copy(), **kwargs)
        if not ascending:
            result = result[::-1]
        return result

    def sort_values(self, inplace=False, ascending=True, na_position='last'):
        """ Sorts the Categorical by category value returning a new
        Categorical by default.

        While an ordering is applied to the category values, sorting in this
        context refers more to organizing and grouping together based on
        matching category values. Thus, this function can be called on an
        unordered Categorical instance unlike the functions 'Categorical.min'
        and 'Categorical.max'.

        Parameters
        ----------
        inplace : boolean, default False
            Do operation in place.
        ascending : boolean, default True
            Order ascending. Passing False orders descending. The
            ordering parameter provides the method by which the
            category values are organized.
        na_position : {'first', 'last'} (optional, default='last')
            'first' puts NaNs at the beginning
            'last' puts NaNs at the end

        Returns
        -------
        y : Categorical or None

        See Also
        --------
        Categorical.sort

        Examples
        --------
        >>> c = pd.Categorical([1, 2, 2, 1, 5])
        >>> c
        [1, 2, 2, 1, 5]
        Categories (3, int64): [1, 2, 5]
        >>> c.sort_values()
        [1, 1, 2, 2, 5]
        Categories (3, int64): [1, 2, 5]
        >>> c.sort_values(ascending=False)
        [5, 2, 2, 1, 1]
        Categories (3, int64): [1, 2, 5]

        Inplace sorting can be done as well:

        >>> c.sort_values(inplace=True)
        >>> c
        [1, 1, 2, 2, 5]
        Categories (3, int64): [1, 2, 5]
        >>>
        >>> c = pd.Categorical([1, 2, 2, 1, 5])

        'sort_values' behaviour with NaNs. Note that 'na_position'
        is independent of the 'ascending' parameter:

        >>> c = pd.Categorical([np.nan, 2, 2, np.nan, 5])
        >>> c
        [NaN, 2.0, 2.0, NaN, 5.0]
        Categories (2, int64): [2, 5]
        >>> c.sort_values()
        [2.0, 2.0, 5.0, NaN, NaN]
        Categories (2, int64): [2, 5]
        >>> c.sort_values(ascending=False)
        [5.0, 2.0, 2.0, NaN, NaN]
        Categories (2, int64): [2, 5]
        >>> c.sort_values(na_position='first')
        [NaN, NaN, 2.0, 2.0, 5.0]
        Categories (2, int64): [2, 5]
        >>> c.sort_values(ascending=False, na_position='first')
        [NaN, NaN, 5.0, 2.0, 2.0]
        Categories (2, int64): [2, 5]
        """
        if na_position not in ['last', 'first']:
            raise ValueError('invalid na_position: {!r}'.format(na_position))

        codes = np.sort(self._codes)
        if not ascending:
            codes = codes[::-1]

        # NaN handling
        na_mask = (codes == -1)
        if na_mask.any():
            n_nans = len(codes[na_mask])
            if na_position == "first":
                # in this case sort to the front
                new_codes = codes.copy()
                new_codes[0:n_nans] = -1
                new_codes[n_nans:] = codes[~na_mask]
                codes = new_codes
            elif na_position == "last":
                # ... and to the end
                new_codes = codes.copy()
                pos = len(codes) - n_nans
                new_codes[0:pos] = codes[~na_mask]
                new_codes[pos:] = -1
                codes = new_codes
        if inplace:
            self._codes = codes
            return
        else:
            return Categorical(values=codes, categories=self.categories,
                               ordered=self.ordered, fastpath=True)

    def order(self, inplace=False, ascending=True, na_position='last'):
        """
        DEPRECATED: use :meth:`Categorical.sort_values`. That function
        is entirely equivalent to this one.

        See Also
        --------
        Categorical.sort_values
        """
        warn("order is deprecated, use sort_values(...)", FutureWarning,
             stacklevel=2)
        return self.sort_values(inplace=inplace, ascending=ascending,
                                na_position=na_position)

    def sort(self, inplace=True, ascending=True, na_position='last', **kwargs):
        """
        DEPRECATED: use :meth:`Categorical.sort_values`. That function
        is just like this one, except that a new Categorical is returned
        by default, so make sure to pass in 'inplace=True' to get
        inplace sorting.

        See Also
        --------
        Categorical.sort_values
        """
        warn("sort is deprecated, use sort_values(...)", FutureWarning,
             stacklevel=2)
        nv.validate_sort(tuple(), kwargs)
        return self.sort_values(inplace=inplace, ascending=ascending,
                                na_position=na_position)

    def ravel(self, order='C'):
        """ Return a flattened (numpy) array.

        For internal compatibility with numpy arrays.

        Returns
        -------
        raveled : numpy array
        """
        return np.array(self)

    def view(self):
        """Return a view of myself.

        For internal compatibility with numpy arrays.

        Returns
        -------
        view : Categorical
           Returns `self`!
        """
        return self

    def to_dense(self):
        """Return my 'dense' representation

        For internal compatibility with numpy arrays.

        Returns
        -------
        dense : array
        """
        return np.asarray(self)

    @deprecate_kwarg(old_arg_name='fill_value', new_arg_name='value')
    def fillna(self, value=None, method=None, limit=None):
        """ Fill NA/NaN values using the specified method.

        Parameters
        ----------
        method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
            Method to use for filling holes in reindexed Series
            pad / ffill: propagate last valid observation forward to next valid
            backfill / bfill: use NEXT valid observation to fill gap
        value : scalar
            Value to use to fill holes (e.g. 0)
        limit : int, default None
            (Not implemented yet for Categorical!)
            If method is specified, this is the maximum number of consecutive
            NaN values to forward/backward fill. In other words, if there is
            a gap with more than this number of consecutive NaNs, it will only
            be partially filled. If method is not specified, this is the
            maximum number of entries along the entire axis where NaNs will be
            filled.

        Returns
        -------
        filled : Categorical with NA/NaN filled
        """

        if value is None:
            value = np.nan
        if limit is not None:
            raise NotImplementedError("specifying a limit for fillna has not "
                                      "been implemented yet")

        values = self._codes

        # Make sure that we also get NA in categories
        if self.categories.dtype.kind in ['S', 'O', 'f']:
            if np.nan in self.categories:
                values = values.copy()
                nan_pos = np.where(isnull(self.categories))[0]
                # we only have one NA in categories
                values[values == nan_pos] = -1

        # pad / bfill
        if method is not None:

            values = self.to_dense().reshape(-1, len(self))
            values = interpolate_2d(values, method, 0, None,
                                    value).astype(self.categories.dtype)[0]
            values = _get_codes_for_values(values, self.categories)

        else:

            if not isnull(value) and value not in self.categories:
                raise ValueError("fill value must be in categories")

            mask = values == -1
            if mask.any():
                values = values.copy()
                values[mask] = self.categories.get_loc(value)

        return Categorical(values, categories=self.categories,
                           ordered=self.ordered, fastpath=True)

    def take_nd(self, indexer, allow_fill=True, fill_value=None):
        """ Take the codes by the indexer, fill with the fill_value.

        For internal compatibility with numpy arrays.
        """

        # filling must always be None/nan here
        # but is passed thru internally
        assert isnull(fill_value)

        codes = take_1d(self._codes, indexer, allow_fill=True, fill_value=-1)
        result = Categorical(codes, categories=self.categories,
                             ordered=self.ordered, fastpath=True)
        return result

    take = take_nd

    def _slice(self, slicer):
        """ Return a slice of myself.

        For internal compatibility with numpy arrays.
        """

        # only allow 1 dimensional slicing, but can
        # in a 2-d case be passd (slice(None),....)
        if isinstance(slicer, tuple) and len(slicer) == 2:
            if not is_null_slice(slicer[0]):
                raise AssertionError("invalid slicing for a 1-ndim "
                                     "categorical")
            slicer = slicer[1]

        _codes = self._codes[slicer]
        return Categorical(values=_codes, categories=self.categories,
                           ordered=self.ordered, fastpath=True)

    def __len__(self):
        """The length of this Categorical."""
        return len(self._codes)

    def __iter__(self):
        """Returns an Iterator over the values of this Categorical."""
        return iter(self.get_values())

    def _tidy_repr(self, max_vals=10, footer=True):
        """ a short repr displaying only max_vals and an optional (but default
        footer)
        """
        num = max_vals // 2
        head = self[:num]._get_repr(length=False, footer=False)
        tail = self[-(max_vals - num):]._get_repr(length=False, footer=False)

        result = '%s, ..., %s' % (head[:-1], tail[1:])
        if footer:
            result = '%s\n%s' % (result, self._repr_footer())

        return compat.text_type(result)

    def _repr_categories(self):
        """ return the base repr for the categories """
        max_categories = (10 if get_option("display.max_categories") == 0 else
                          get_option("display.max_categories"))
        from pandas.formats import format as fmt
        if len(self.categories) > max_categories:
            num = max_categories // 2
            head = fmt.format_array(self.categories[:num], None)
            tail = fmt.format_array(self.categories[-num:], None)
            category_strs = head + ["..."] + tail
        else:
            category_strs = fmt.format_array(self.categories, None)

        # Strip all leading spaces, which format_array adds for columns...
        category_strs = [x.strip() for x in category_strs]
        return category_strs

    def _repr_categories_info(self):
        """ Returns a string representation of the footer."""

        category_strs = self._repr_categories()
        dtype = getattr(self.categories, 'dtype_str',
                        str(self.categories.dtype))

        levheader = "Categories (%d, %s): " % (len(self.categories), dtype)
        width, height = get_terminal_size()
        max_width = get_option("display.width") or width
        if com.in_ipython_frontend():
            # 0 = no breaks
            max_width = 0
        levstring = ""
        start = True
        cur_col_len = len(levheader)  # header
        sep_len, sep = (3, " < ") if self.ordered else (2, ", ")
        linesep = sep.rstrip() + "\n"  # remove whitespace
        for val in category_strs:
            if max_width != 0 and cur_col_len + sep_len + len(val) > max_width:
                levstring += linesep + (" " * (len(levheader) + 1))
                cur_col_len = len(levheader) + 1  # header + a whitespace
            elif not start:
                levstring += sep
                cur_col_len += len(val)
            levstring += val
            start = False
        # replace to simple save space by
        return levheader + "[" + levstring.replace(" < ... < ", " ... ") + "]"

    def _repr_footer(self):

        return u('Length: %d\n%s') % (len(self), self._repr_categories_info())

    def _get_repr(self, length=True, na_rep='NaN', footer=True):
        from pandas.formats import format as fmt
        formatter = fmt.CategoricalFormatter(self, length=length,
                                             na_rep=na_rep, footer=footer)
        result = formatter.to_string()
        return compat.text_type(result)

    def __unicode__(self):
        """ Unicode representation. """
        _maxlen = 10
        if len(self._codes) > _maxlen:
            result = self._tidy_repr(_maxlen)
        elif len(self._codes) > 0:
            result = self._get_repr(length=len(self) > _maxlen)
        else:
            result = ('[], %s' %
                      self._get_repr(length=False,
                                     footer=True, ).replace("\n", ", "))

        return result

    def _maybe_coerce_indexer(self, indexer):
        """ return an indexer coerced to the codes dtype """
        if isinstance(indexer, np.ndarray) and indexer.dtype.kind == 'i':
            indexer = indexer.astype(self._codes.dtype)
        return indexer

    def __getitem__(self, key):
        """ Return an item. """
        if isinstance(key, (int, np.integer)):
            i = self._codes[key]
            if i == -1:
                return np.nan
            else:
                return self.categories[i]
        else:
            return Categorical(values=self._codes[key],
                               categories=self.categories,
                               ordered=self.ordered,
                               fastpath=True)

    def __setitem__(self, key, value):
        """ Item assignment.


        Raises
        ------
        ValueError
            If (one or more) Value is not in categories or if a assigned
            `Categorical` does not have the same categories
        """

        # require identical categories set
        if isinstance(value, Categorical):
            if not value.categories.equals(self.categories):
                raise ValueError("Cannot set a Categorical with another, "
                                 "without identical categories")

        rvalue = value if is_list_like(value) else [value]

        from pandas import Index
        to_add = Index(rvalue).difference(self.categories)

        # no assignments of values not in categories, but it's always ok to set
        # something to np.nan
        if len(to_add) and not isnull(to_add).all():
            raise ValueError("Cannot setitem on a Categorical with a new "
                             "category, set the categories first")

        # set by position
        if isinstance(key, (int, np.integer)):
            pass

        # tuple of indexers (dataframe)
        elif isinstance(key, tuple):
            # only allow 1 dimensional slicing, but can
            # in a 2-d case be passd (slice(None),....)
            if len(key) == 2:
                if not is_null_slice(key[0]):
                    raise AssertionError("invalid slicing for a 1-ndim "
                                         "categorical")
                key = key[1]
            elif len(key) == 1:
                key = key[0]
            else:
                raise AssertionError("invalid slicing for a 1-ndim "
                                     "categorical")

        # slicing in Series or Categorical
        elif isinstance(key, slice):
            pass

        # Array of True/False in Series or Categorical
        else:
            # There is a bug in numpy, which does not accept a Series as a
            # indexer
            # https://github.com/pydata/pandas/issues/6168
            # https://github.com/numpy/numpy/issues/4240 -> fixed in numpy 1.9
            # FIXME: remove when numpy 1.9 is the lowest numpy version pandas
            # accepts...
            key = np.asarray(key)

        lindexer = self.categories.get_indexer(rvalue)

        # FIXME: the following can be removed after GH7820 is fixed:
        # https://github.com/pydata/pandas/issues/7820
        # float categories do currently return -1 for np.nan, even if np.nan is
        # included in the index -> "repair" this here
        if isnull(rvalue).any() and isnull(self.categories).any():
            nan_pos = np.where(isnull(self.categories))[0]
            lindexer[lindexer == -1] = nan_pos

        lindexer = self._maybe_coerce_indexer(lindexer)
        self._codes[key] = lindexer

    # reduction ops #
    def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
                filter_type=None, **kwds):
        """ perform the reduction type operation """
        func = getattr(self, name, None)
        if func is None:
            raise TypeError("Categorical cannot perform the operation "
                            "{op}".format(op=name))
        return func(numeric_only=numeric_only, **kwds)

    def min(self, numeric_only=None, **kwargs):
        """ The minimum value of the object.

        Only ordered `Categoricals` have a minimum!

        Raises
        ------
        TypeError
            If the `Categorical` is not `ordered`.

        Returns
        -------
        min : the minimum of this `Categorical`
        """
        self.check_for_ordered('min')
        if numeric_only:
            good = self._codes != -1
            pointer = self._codes[good].min(**kwargs)
        else:
            pointer = self._codes.min(**kwargs)
        if pointer == -1:
            return np.nan
        else:
            return self.categories[pointer]

    def max(self, numeric_only=None, **kwargs):
        """ The maximum value of the object.

        Only ordered `Categoricals` have a maximum!

        Raises
        ------
        TypeError
            If the `Categorical` is not `ordered`.

        Returns
        -------
        max : the maximum of this `Categorical`
        """
        self.check_for_ordered('max')
        if numeric_only:
            good = self._codes != -1
            pointer = self._codes[good].max(**kwargs)
        else:
            pointer = self._codes.max(**kwargs)
        if pointer == -1:
            return np.nan
        else:
            return self.categories[pointer]

    def mode(self):
        """
        Returns the mode(s) of the Categorical.

        Empty if nothing occurs at least 2 times.  Always returns `Categorical`
        even if only one value.

        Returns
        -------
        modes : `Categorical` (sorted)
        """

        import pandas.hashtable as htable
        good = self._codes != -1
        values = sorted(htable.mode_int64(_ensure_int64(self._codes[good])))
        result = Categorical(values=values, categories=self.categories,
                             ordered=self.ordered, fastpath=True)
        return result

    def unique(self):
        """
        Return the ``Categorical`` which ``categories`` and ``codes`` are
        unique. Unused categories are NOT returned.

        - unordered category: values and categories are sorted by appearance
          order.
        - ordered category: values are sorted by appearance order, categories
          keeps existing order.

        Returns
        -------
        unique values : ``Categorical``
        """

        from pandas.core.nanops import unique1d
        # unlike np.unique, unique1d does not sort
        unique_codes = unique1d(self.codes)
        cat = self.copy()
        # keep nan in codes
        cat._codes = unique_codes
        # exclude nan from indexer for categories
        take_codes = unique_codes[unique_codes != -1]
        if self.ordered:
            take_codes = sorted(take_codes)
        return cat.set_categories(cat.categories.take(take_codes))

    def equals(self, other):
        """
        Returns True if categorical arrays are equal.

        Parameters
        ----------
        other : `Categorical`

        Returns
        -------
        are_equal : boolean
        """
        return (self.is_dtype_equal(other) and
                np.array_equal(self._codes, other._codes))

    def is_dtype_equal(self, other):
        """
        Returns True if categoricals are the same dtype
          same categories, and same ordered

        Parameters
        ----------
        other : Categorical

        Returns
        -------
        are_equal : boolean
        """

        try:
            return (self.categories.equals(other.categories) and
                    self.ordered == other.ordered)
        except (AttributeError, TypeError):
            return False

    def describe(self):
        """ Describes this Categorical

        Returns
        -------
        description: `DataFrame`
            A dataframe with frequency and counts by category.
        """
        counts = self.value_counts(dropna=False)
        freqs = counts / float(counts.sum())

        from pandas.tools.merge import concat
        result = concat([counts, freqs], axis=1)
        result.columns = ['counts', 'freqs']
        result.index.name = 'categories'

        return result

    def repeat(self, repeats, *args, **kwargs):
        """
        Repeat elements of a Categorical.

        See also
        --------
        numpy.ndarray.repeat

        """
        nv.validate_repeat(args, kwargs)
        codes = self._codes.repeat(repeats)
        return Categorical(values=codes, categories=self.categories,
                           ordered=self.ordered, fastpath=True)

# The Series.cat accessor


class CategoricalAccessor(PandasDelegate, NoNewAttributesMixin):
    """
    Accessor object for categorical properties of the Series values.

    Be aware that assigning to `categories` is a inplace operation, while all
    methods return new categorical data per default (but can be called with
    `inplace=True`).

    Examples
    --------
    >>> s.cat.categories
    >>> s.cat.categories = list('abc')
    >>> s.cat.rename_categories(list('cab'))
    >>> s.cat.reorder_categories(list('cab'))
    >>> s.cat.add_categories(['d','e'])
    >>> s.cat.remove_categories(['d'])
    >>> s.cat.remove_unused_categories()
    >>> s.cat.set_categories(list('abcde'))
    >>> s.cat.as_ordered()
    >>> s.cat.as_unordered()

    """

    def __init__(self, values, index):
        self.categorical = values
        self.index = index
        self._freeze()

    def _delegate_property_get(self, name):
        return getattr(self.categorical, name)

    def _delegate_property_set(self, name, new_values):
        return setattr(self.categorical, name, new_values)

    @property
    def codes(self):
        from pandas import Series
        return Series(self.categorical.codes, index=self.index)

    def _delegate_method(self, name, *args, **kwargs):
        from pandas import Series
        method = getattr(self.categorical, name)
        res = method(*args, **kwargs)
        if res is not None:
            return Series(res, index=self.index)


CategoricalAccessor._add_delegate_accessors(delegate=Categorical,
                                            accessors=["categories",
                                                       "ordered"],
                                            typ='property')
CategoricalAccessor._add_delegate_accessors(delegate=Categorical, accessors=[
    "rename_categories", "reorder_categories", "add_categories",
    "remove_categories", "remove_unused_categories", "set_categories",
    "as_ordered", "as_unordered"], typ='method')

# utility routines


def _get_codes_for_values(values, categories):
    """
    utility routine to turn values into codes given the specified categories
    """

    from pandas.core.algorithms import _get_data_algo, _hashtables
    if not is_dtype_equal(values.dtype, categories.dtype):
        values = _ensure_object(values)
        categories = _ensure_object(categories)

    (hash_klass, vec_klass), vals = _get_data_algo(values, _hashtables)
    (_, _), cats = _get_data_algo(categories, _hashtables)
    t = hash_klass(len(cats))
    t.map_locations(cats)
    return _coerce_indexer_dtype(t.lookup(vals), cats)


def _convert_to_list_like(list_like):
    if hasattr(list_like, "dtype"):
        return list_like
    if isinstance(list_like, list):
        return list_like
    if (is_sequence(list_like) or isinstance(list_like, tuple) or
            isinstance(list_like, types.GeneratorType)):
        return list(list_like)
    elif is_scalar(list_like):
        return [list_like]
    else:
        # is this reached?
        return [list_like]