Source code for pandas.indexes.base

import datetime
import warnings
import operator

import numpy as np
import pandas.tslib as tslib
import pandas.lib as lib
import pandas.algos as _algos
import pandas.index as _index
from pandas.lib import Timestamp, Timedelta, is_datetime_array

from pandas.compat import range, u
from pandas.compat.numpy import function as nv
from pandas import compat


from pandas.types.generic import ABCSeries, ABCMultiIndex, ABCPeriodIndex
from pandas.types.missing import isnull, array_equivalent
from pandas.types.common import (_ensure_int64, _ensure_object,
                                 _ensure_platform_int,
                                 is_datetimetz,
                                 is_integer,
                                 is_float,
                                 is_dtype_equal,
                                 is_object_dtype,
                                 is_categorical_dtype,
                                 is_bool_dtype,
                                 is_integer_dtype, is_float_dtype,
                                 needs_i8_conversion,
                                 is_iterator, is_list_like,
                                 is_scalar)
from pandas.types.cast import _coerce_indexer_dtype
from pandas.core.common import (is_bool_indexer,
                                _values_from_object,
                                _asarray_tuplesafe)

from pandas.core.base import (PandasObject, FrozenList, FrozenNDArray,
                              IndexOpsMixin)
import pandas.core.base as base
from pandas.util.decorators import (Appender, Substitution, cache_readonly,
                                    deprecate, deprecate_kwarg)
import pandas.core.common as com
import pandas.types.concat as _concat
import pandas.core.missing as missing
import pandas.core.algorithms as algos
from pandas.formats.printing import pprint_thing
from pandas.core.ops import _comp_method_OBJECT_ARRAY
from pandas.core.strings import StringAccessorMixin

from pandas.core.config import get_option

# simplify
default_pprint = lambda x, max_seq_items=None: \
    pprint_thing(x, escape_chars=('\t', '\r', '\n'), quote_strings=True,
                 max_seq_items=max_seq_items)

__all__ = ['Index']

_unsortable_types = frozenset(('mixed', 'mixed-integer'))

_index_doc_kwargs = dict(klass='Index', inplace='', duplicated='np.array')
_index_shared_docs = dict()


def _try_get_item(x):
    try:
        return x.item()
    except AttributeError:
        return x


class InvalidIndexError(Exception):
    pass


_o_dtype = np.dtype(object)
_Identity = object


def _new_Index(cls, d):
    """ This is called upon unpickling, rather than the default which doesn't
    have arguments and breaks __new__
    """
    return cls.__new__(cls, **d)


[docs]class Index(IndexOpsMixin, StringAccessorMixin, PandasObject): """ Immutable ndarray implementing an ordered, sliceable set. The basic object storing axis labels for all pandas objects Parameters ---------- data : array-like (1-dimensional) dtype : NumPy dtype (default: object) copy : bool Make a copy of input ndarray name : object Name to be stored in the index tupleize_cols : bool (default: True) When True, attempt to create a MultiIndex if possible Notes ----- An Index instance can **only** contain hashable objects """ # To hand over control to subclasses _join_precedence = 1 # Cython methods _groupby = _algos.groupby_object _arrmap = _algos.arrmap_object _left_indexer_unique = _algos.left_join_indexer_unique_object _left_indexer = _algos.left_join_indexer_object _inner_indexer = _algos.inner_join_indexer_object _outer_indexer = _algos.outer_join_indexer_object _box_scalars = False _typ = 'index' _data = None _id = None name = None asi8 = None _comparables = ['name'] _attributes = ['name'] _allow_index_ops = True _allow_datetime_index_ops = False _allow_period_index_ops = False _is_numeric_dtype = False _can_hold_na = True # prioritize current class for _shallow_copy_with_infer, # used to infer integers as datetime-likes _infer_as_myclass = False _engine_type = _index.ObjectEngine def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, tupleize_cols=True, **kwargs): if name is None and hasattr(data, 'name'): name = data.name if fastpath: return cls._simple_new(data, name) from .range import RangeIndex # range if isinstance(data, RangeIndex): return RangeIndex(start=data, copy=copy, dtype=dtype, name=name) elif isinstance(data, range): return RangeIndex.from_range(data, copy=copy, dtype=dtype, name=name) # categorical if is_categorical_dtype(data) or is_categorical_dtype(dtype): from .category import CategoricalIndex return CategoricalIndex(data, copy=copy, name=name, **kwargs) # index-like elif isinstance(data, (np.ndarray, Index, ABCSeries)): if (issubclass(data.dtype.type, np.datetime64) or is_datetimetz(data)): from pandas.tseries.index import DatetimeIndex result = DatetimeIndex(data, copy=copy, name=name, **kwargs) if dtype is not None and _o_dtype == dtype: return Index(result.to_pydatetime(), dtype=_o_dtype) else: return result elif issubclass(data.dtype.type, np.timedelta64): from pandas.tseries.tdi import TimedeltaIndex result = TimedeltaIndex(data, copy=copy, name=name, **kwargs) if dtype is not None and _o_dtype == dtype: return Index(result.to_pytimedelta(), dtype=_o_dtype) else: return result if dtype is not None: try: # we need to avoid having numpy coerce # things that look like ints/floats to ints unless # they are actually ints, e.g. '0' and 0.0 # should not be coerced # GH 11836 if is_integer_dtype(dtype): inferred = lib.infer_dtype(data) if inferred == 'integer': data = np.array(data, copy=copy, dtype=dtype) elif inferred in ['floating', 'mixed-integer-float']: # if we are actually all equal to integers # then coerce to integer from .numeric import Int64Index, Float64Index try: res = data.astype('i8') if (res == data).all(): return Int64Index(res, copy=copy, name=name) except (TypeError, ValueError): pass # return an actual float index return Float64Index(data, copy=copy, dtype=dtype, name=name) elif inferred == 'string': pass else: data = data.astype(dtype) elif is_float_dtype(dtype): inferred = lib.infer_dtype(data) if inferred == 'string': pass else: data = data.astype(dtype) else: data = np.array(data, dtype=dtype, copy=copy) except (TypeError, ValueError): pass # maybe coerce to a sub-class from pandas.tseries.period import (PeriodIndex, IncompatibleFrequency) if isinstance(data, PeriodIndex): return PeriodIndex(data, copy=copy, name=name, **kwargs) if issubclass(data.dtype.type, np.integer): from .numeric import Int64Index return Int64Index(data, copy=copy, dtype=dtype, name=name) elif issubclass(data.dtype.type, np.floating): from .numeric import Float64Index return Float64Index(data, copy=copy, dtype=dtype, name=name) elif issubclass(data.dtype.type, np.bool) or is_bool_dtype(data): subarr = data.astype('object') else: subarr = _asarray_tuplesafe(data, dtype=object) # _asarray_tuplesafe does not always copy underlying data, # so need to make sure that this happens if copy: subarr = subarr.copy() if dtype is None: inferred = lib.infer_dtype(subarr) if inferred == 'integer': from .numeric import Int64Index return Int64Index(subarr.astype('i8'), copy=copy, name=name) elif inferred in ['floating', 'mixed-integer-float']: from .numeric import Float64Index return Float64Index(subarr, copy=copy, name=name) elif inferred == 'boolean': # don't support boolean explicity ATM pass elif inferred != 'string': if inferred.startswith('datetime'): if (lib.is_datetime_with_singletz_array(subarr) or 'tz' in kwargs): # only when subarr has the same tz from pandas.tseries.index import DatetimeIndex try: return DatetimeIndex(subarr, copy=copy, name=name, **kwargs) except tslib.OutOfBoundsDatetime: pass elif inferred.startswith('timedelta'): from pandas.tseries.tdi import TimedeltaIndex return TimedeltaIndex(subarr, copy=copy, name=name, **kwargs) elif inferred == 'period': try: return PeriodIndex(subarr, name=name, **kwargs) except IncompatibleFrequency: pass return cls._simple_new(subarr, name) elif hasattr(data, '__array__'): return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs) elif data is None or is_scalar(data): cls._scalar_data_error(data) else: if (tupleize_cols and isinstance(data, list) and data and isinstance(data[0], tuple)): # we must be all tuples, otherwise don't construct # 10697 if all(isinstance(e, tuple) for e in data): try: # must be orderable in py3 if compat.PY3: sorted(data) from .multi import MultiIndex return MultiIndex.from_tuples( data, names=name or kwargs.get('names')) except (TypeError, KeyError): # python2 - MultiIndex fails on mixed types pass # other iterable of some kind subarr = _asarray_tuplesafe(data, dtype=object) return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs) """ NOTE for new Index creation: - _simple_new: It returns new Index with the same type as the caller. All metadata (such as name) must be provided by caller's responsibility. Using _shallow_copy is recommended because it fills these metadata otherwise specified. - _shallow_copy: It returns new Index with the same type (using _simple_new), but fills caller's metadata otherwise specified. Passed kwargs will overwrite corresponding metadata. - _shallow_copy_with_infer: It returns new Index inferring its type from passed values. It fills caller's metadata otherwise specified as the same as _shallow_copy. See each method's docstring. """ @classmethod def _simple_new(cls, values, name=None, dtype=None, **kwargs): """ we require the we have a dtype compat for the values if we are passed a non-dtype compat, then coerce using the constructor Must be careful not to recurse. """ if not hasattr(values, 'dtype'): if values is None and dtype is not None: values = np.empty(0, dtype=dtype) else: values = np.array(values, copy=False) if is_object_dtype(values): values = cls(values, name=name, dtype=dtype, **kwargs)._values result = object.__new__(cls) result._data = values result.name = name for k, v in compat.iteritems(kwargs): setattr(result, k, v) return result._reset_identity() _index_shared_docs['_shallow_copy'] = """ create a new Index with the same class as the caller, don't copy the data, use the same object attributes with passed in attributes taking precedence *this is an internal non-public method* Parameters ---------- values : the values to create the new Index, optional kwargs : updates the default attributes for this Index """ @Appender(_index_shared_docs['_shallow_copy']) def _shallow_copy(self, values=None, **kwargs): if values is None: values = self.values attributes = self._get_attributes_dict() attributes.update(kwargs) return self._simple_new(values, **attributes) def _shallow_copy_with_infer(self, values=None, **kwargs): """ create a new Index inferring the class with passed value, don't copy the data, use the same object attributes with passed in attributes taking precedence *this is an internal non-public method* Parameters ---------- values : the values to create the new Index, optional kwargs : updates the default attributes for this Index """ if values is None: values = self.values attributes = self._get_attributes_dict() attributes.update(kwargs) attributes['copy'] = False if self._infer_as_myclass: try: return self._constructor(values, **attributes) except (TypeError, ValueError): pass return Index(values, **attributes) def _deepcopy_if_needed(self, orig, copy=False): """ .. versionadded:: 0.19.0 Make a copy of self if data coincides (in memory) with orig. Subclasses should override this if self._base is not an ndarray. Parameters ---------- orig : ndarray other ndarray to compare self._data against copy : boolean, default False when False, do not run any check, just return self Returns ------- A copy of self if needed, otherwise self : Index """ if copy: # Retrieve the "base objects", i.e. the original memory allocations orig = orig if orig.base is None else orig.base new = self._data if self._data.base is None else self._data.base if orig is new: return self.copy(deep=True) return self def _update_inplace(self, result, **kwargs): # guard when called from IndexOpsMixin raise TypeError("Index can't be updated inplace")
[docs] def is_(self, other): """ More flexible, faster check like ``is`` but that works through views Note: this is *not* the same as ``Index.identical()``, which checks that metadata is also the same. Parameters ---------- other : object other object to compare against. Returns ------- True if both have same underlying data, False otherwise : bool """ # use something other than None to be clearer return self._id is getattr( other, '_id', Ellipsis) and self._id is not None
def _reset_identity(self): """Initializes or resets ``_id`` attribute with new object""" self._id = _Identity() return self # ndarray compat def __len__(self): """ return the length of the Index """ return len(self._data) def __array__(self, dtype=None): """ the array interface, return my values """ return self._data.view(np.ndarray) def __array_wrap__(self, result, context=None): """ Gets called after a ufunc """ if is_bool_dtype(result): return result attrs = self._get_attributes_dict() attrs = self._maybe_update_attributes(attrs) return Index(result, **attrs) @cache_readonly def dtype(self): """ return the dtype object of the underlying data """ return self._data.dtype @cache_readonly def dtype_str(self): """ return the dtype str of the underlying data """ return str(self.dtype) @property def values(self): """ return the underlying data as an ndarray """ return self._data.view(np.ndarray)
[docs] def get_values(self): """ return the underlying data as an ndarray """ return self.values
# ops compat
[docs] def tolist(self): """ return a list of the Index values """ return list(self.values)
[docs] def repeat(self, n, *args, **kwargs): """ Repeat elements of an Index. Refer to `numpy.ndarray.repeat` for more information about the `n` argument. See also -------- numpy.ndarray.repeat """ nv.validate_repeat(args, kwargs) return self._shallow_copy(self._values.repeat(n))
[docs] def where(self, cond, other=None): """ .. versionadded:: 0.19.0 Return an Index of same shape as self and whose corresponding entries are from self where cond is True and otherwise are from other. Parameters ---------- cond : boolean same length as self other : scalar, or array-like """ if other is None: other = self._na_value values = np.where(cond, self.values, other) return self._shallow_copy_with_infer(values, dtype=self.dtype)
[docs] def ravel(self, order='C'): """ return an ndarray of the flattened values of the underlying data See also -------- numpy.ndarray.ravel """ return self._values.ravel(order=order)
# construction helpers @classmethod def _scalar_data_error(cls, data): raise TypeError('{0}(...) must be called with a collection of some ' 'kind, {1} was passed'.format(cls.__name__, repr(data))) @classmethod def _string_data_error(cls, data): raise TypeError('String dtype not supported, you may need ' 'to explicitly cast to a numeric type') @classmethod def _coerce_to_ndarray(cls, data): """coerces data to ndarray, raises on scalar data. Converts other iterables to list first and then to array. Does not touch ndarrays. """ if not isinstance(data, (np.ndarray, Index)): if data is None or is_scalar(data): cls._scalar_data_error(data) # other iterable of some kind if not isinstance(data, (ABCSeries, list, tuple)): data = list(data) data = np.asarray(data) return data def _get_attributes_dict(self): """ return an attributes dict for my class """ return dict([(k, getattr(self, k, None)) for k in self._attributes])
[docs] def view(self, cls=None): # we need to see if we are subclassing an # index type here if cls is not None and not hasattr(cls, '_typ'): result = self._data.view(cls) else: result = self._shallow_copy() if isinstance(result, Index): result._id = self._id return result
def _coerce_scalar_to_index(self, item): """ we need to coerce a scalar to a compat for our index type Parameters ---------- item : scalar item to coerce """ return Index([item], dtype=self.dtype, **self._get_attributes_dict()) _index_shared_docs['copy'] = """ Make a copy of this object. Name and dtype sets those attributes on the new object. Parameters ---------- name : string, optional deep : boolean, default False dtype : numpy dtype or pandas type Returns ------- copy : Index Notes ----- In most cases, there should be no functional difference from using ``deep``, but if ``deep`` is passed it will attempt to deepcopy. """ @Appender(_index_shared_docs['copy'])
[docs] def copy(self, name=None, deep=False, dtype=None, **kwargs): names = kwargs.get('names') if names is not None and name is not None: raise TypeError("Can only provide one of `names` and `name`") if deep: from copy import deepcopy new_index = self._shallow_copy(self._data.copy()) name = name or deepcopy(self.name) else: new_index = self._shallow_copy() name = self.name if name is not None: names = [name] if names: new_index = new_index.set_names(names) if dtype: new_index = new_index.astype(dtype) return new_index
__copy__ = copy def __unicode__(self): """ Return a string representation for this object. Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3. """ klass = self.__class__.__name__ data = self._format_data() attrs = self._format_attrs() space = self._format_space() prepr = (u(",%s") % space).join([u("%s=%s") % (k, v) for k, v in attrs]) # no data provided, just attributes if data is None: data = '' res = u("%s(%s%s)") % (klass, data, prepr) return res def _format_space(self): # using space here controls if the attributes # are line separated or not (the default) # max_seq_items = get_option('display.max_seq_items') # if len(self) > max_seq_items: # space = "\n%s" % (' ' * (len(klass) + 1)) return " " @property def _formatter_func(self): """ Return the formatted data as a unicode string """ return default_pprint def _format_data(self): """ Return the formatted data as a unicode string """ from pandas.formats.format import get_console_size, _get_adjustment display_width, _ = get_console_size() if display_width is None: display_width = get_option('display.width') or 80 space1 = "\n%s" % (' ' * (len(self.__class__.__name__) + 1)) space2 = "\n%s" % (' ' * (len(self.__class__.__name__) + 2)) n = len(self) sep = ',' max_seq_items = get_option('display.max_seq_items') or n formatter = self._formatter_func # do we want to justify (only do so for non-objects) is_justify = not (self.inferred_type in ('string', 'unicode') or (self.inferred_type == 'categorical' and is_object_dtype(self.categories))) # are we a truncated display is_truncated = n > max_seq_items # adj can optionaly handle unicode eastern asian width adj = _get_adjustment() def _extend_line(s, line, value, display_width, next_line_prefix): if (adj.len(line.rstrip()) + adj.len(value.rstrip()) >= display_width): s += line.rstrip() line = next_line_prefix line += value return s, line def best_len(values): if values: return max([adj.len(x) for x in values]) else: return 0 if n == 0: summary = '[], ' elif n == 1: first = formatter(self[0]) summary = '[%s], ' % first elif n == 2: first = formatter(self[0]) last = formatter(self[-1]) summary = '[%s, %s], ' % (first, last) else: if n > max_seq_items: n = min(max_seq_items // 2, 10) head = [formatter(x) for x in self[:n]] tail = [formatter(x) for x in self[-n:]] else: head = [] tail = [formatter(x) for x in self] # adjust all values to max length if needed if is_justify: # however, if we are not truncated and we are only a single # line, then don't justify if (is_truncated or not (len(', '.join(head)) < display_width and len(', '.join(tail)) < display_width)): max_len = max(best_len(head), best_len(tail)) head = [x.rjust(max_len) for x in head] tail = [x.rjust(max_len) for x in tail] summary = "" line = space2 for i in range(len(head)): word = head[i] + sep + ' ' summary, line = _extend_line(summary, line, word, display_width, space2) if is_truncated: # remove trailing space of last line summary += line.rstrip() + space2 + '...' line = space2 for i in range(len(tail) - 1): word = tail[i] + sep + ' ' summary, line = _extend_line(summary, line, word, display_width, space2) # last value: no sep added + 1 space of width used for trailing ',' summary, line = _extend_line(summary, line, tail[-1], display_width - 2, space2) summary += line summary += '],' if len(summary) > (display_width): summary += space1 else: # one row summary += ' ' # remove initial space summary = '[' + summary[len(space2):] return summary def _format_attrs(self): """ Return a list of tuples of the (attr,formatted_value) """ attrs = [] attrs.append(('dtype', "'%s'" % self.dtype)) if self.name is not None: attrs.append(('name', default_pprint(self.name))) max_seq_items = get_option('display.max_seq_items') or len(self) if len(self) > max_seq_items: attrs.append(('length', len(self))) return attrs
[docs] def to_series(self, **kwargs): """ Create a Series with both index and values equal to the index keys useful with map for returning an indexer based on an index Returns ------- Series : dtype will be based on the type of the Index values. """ from pandas import Series return Series(self._to_embed(), index=self, name=self.name)
def _to_embed(self, keep_tz=False): """ *this is an internal non-public method* return an array repr of this object, potentially casting to object """ return self.values.copy() _index_shared_docs['astype'] = """ Create an Index with values cast to dtypes. The class of a new Index is determined by dtype. When conversion is impossible, a ValueError exception is raised. Parameters ---------- dtype : numpy dtype or pandas type copy : bool, default True By default, astype always returns a newly allocated object. If copy is set to False and internal requirements on dtype are satisfied, the original data is used to create a new Index or the original Index is returned. .. versionadded:: 0.19.0 """ @Appender(_index_shared_docs['astype'])
[docs] def astype(self, dtype, copy=True): return Index(self.values.astype(dtype, copy=copy), name=self.name, dtype=dtype)
def _to_safe_for_reshape(self): """ convert to object if we are a categorical """ return self
[docs] def to_datetime(self, dayfirst=False): """ For an Index containing strings or datetime.datetime objects, attempt conversion to DatetimeIndex """ from pandas.tseries.index import DatetimeIndex if self.inferred_type == 'string': from dateutil.parser import parse parser = lambda x: parse(x, dayfirst=dayfirst) parsed = lib.try_parse_dates(self.values, parser=parser) return DatetimeIndex(parsed) else: return DatetimeIndex(self.values)
def _assert_can_do_setop(self, other): if not is_list_like(other): raise TypeError('Input must be Index or array-like') return True def _convert_can_do_setop(self, other): if not isinstance(other, Index): other = Index(other, name=self.name) result_name = self.name else: result_name = self.name if self.name == other.name else None return other, result_name def _convert_for_op(self, value): """ Convert value to be insertable to ndarray """ return value def _assert_can_do_op(self, value): """ Check value is valid for scalar op """ if not lib.isscalar(value): msg = "'value' must be a scalar, passed: {0}" raise TypeError(msg.format(type(value).__name__)) @property def nlevels(self): return 1 def _get_names(self): return FrozenList((self.name, )) def _set_names(self, values, level=None): if len(values) != 1: raise ValueError('Length of new names must be 1, got %d' % len(values)) self.name = values[0] names = property(fset=_set_names, fget=_get_names)
[docs] def set_names(self, names, level=None, inplace=False): """ Set new names on index. Defaults to returning new index. Parameters ---------- names : str or sequence name(s) to set level : int, level name, or sequence of int/level names (default None) If the index is a MultiIndex (hierarchical), level(s) to set (None for all levels). Otherwise level must be None inplace : bool if True, mutates in place Returns ------- new index (of same type and class...etc) [if inplace, returns None] Examples -------- >>> Index([1, 2, 3, 4]).set_names('foo') Int64Index([1, 2, 3, 4], dtype='int64') >>> Index([1, 2, 3, 4]).set_names(['foo']) Int64Index([1, 2, 3, 4], dtype='int64') >>> idx = MultiIndex.from_tuples([(1, u'one'), (1, u'two'), (2, u'one'), (2, u'two')], names=['foo', 'bar']) >>> idx.set_names(['baz', 'quz']) MultiIndex(levels=[[1, 2], [u'one', u'two']], labels=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[u'baz', u'quz']) >>> idx.set_names('baz', level=0) MultiIndex(levels=[[1, 2], [u'one', u'two']], labels=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[u'baz', u'bar']) """ if level is not None and self.nlevels == 1: raise ValueError('Level must be None for non-MultiIndex') if level is not None and not is_list_like(level) and is_list_like( names): raise TypeError("Names must be a string") if not is_list_like(names) and level is None and self.nlevels > 1: raise TypeError("Must pass list-like as `names`.") if not is_list_like(names): names = [names] if level is not None and not is_list_like(level): level = [level] if inplace: idx = self else: idx = self._shallow_copy() idx._set_names(names, level=level) if not inplace: return idx
[docs] def rename(self, name, inplace=False): """ Set new names on index. Defaults to returning new index. Parameters ---------- name : str or list name to set inplace : bool if True, mutates in place Returns ------- new index (of same type and class...etc) [if inplace, returns None] """ return self.set_names([name], inplace=inplace)
[docs] def reshape(self, *args, **kwargs): """ NOT IMPLEMENTED: do not call this method, as reshaping is not supported for Index objects and will raise an error. Reshape an Index. """ raise NotImplementedError("reshaping is not supported " "for Index objects")
@property def _has_complex_internals(self): # to disable groupby tricks in MultiIndex return False
[docs] def summary(self, name=None): if len(self) > 0: head = self[0] if (hasattr(head, 'format') and not isinstance(head, compat.string_types)): head = head.format() tail = self[-1] if (hasattr(tail, 'format') and not isinstance(tail, compat.string_types)): tail = tail.format() index_summary = ', %s to %s' % (pprint_thing(head), pprint_thing(tail)) else: index_summary = '' if name is None: name = type(self).__name__ return '%s: %s entries%s' % (name, len(self), index_summary)
def _mpl_repr(self): # how to represent ourselves to matplotlib return self.values _na_value = np.nan """The expected NA value to use with this index.""" # introspection @property def is_monotonic(self): """ alias for is_monotonic_increasing (deprecated) """ return self._engine.is_monotonic_increasing @property def is_monotonic_increasing(self): """ return if the index is monotonic increasing (only equal or increasing) values. """ return self._engine.is_monotonic_increasing @property def is_monotonic_decreasing(self): """ return if the index is monotonic decreasing (only equal or decreasing) values. """ return self._engine.is_monotonic_decreasing
[docs] def is_lexsorted_for_tuple(self, tup): return True
@cache_readonly(allow_setting=True) def is_unique(self): """ return if the index has unique values """ return self._engine.is_unique @property def has_duplicates(self): return not self.is_unique
[docs] def is_boolean(self): return self.inferred_type in ['boolean']
[docs] def is_integer(self): return self.inferred_type in ['integer']
[docs] def is_floating(self): return self.inferred_type in ['floating', 'mixed-integer-float']
[docs] def is_numeric(self): return self.inferred_type in ['integer', 'floating']
[docs] def is_object(self): return is_object_dtype(self.dtype)
[docs] def is_categorical(self): return self.inferred_type in ['categorical']
[docs] def is_mixed(self): return self.inferred_type in ['mixed']
[docs] def holds_integer(self): return self.inferred_type in ['integer', 'mixed-integer']
# validate / convert indexers def _convert_scalar_indexer(self, key, kind=None): """ convert a scalar indexer Parameters ---------- key : label of the slice bound kind : {'ix', 'loc', 'getitem', 'iloc'} or None """ assert kind in ['ix', 'loc', 'getitem', 'iloc', None] if kind == 'iloc': return self._validate_indexer('positional', key, kind) if len(self) and not isinstance(self, ABCMultiIndex,): # we can raise here if we are definitive that this # is positional indexing (eg. .ix on with a float) # or label indexing if we are using a type able # to be represented in the index if kind in ['getitem', 'ix'] and is_float(key): if not self.is_floating(): return self._invalid_indexer('label', key) elif kind in ['loc'] and is_float(key): # we want to raise KeyError on string/mixed here # technically we *could* raise a TypeError # on anything but mixed though if self.inferred_type not in ['floating', 'mixed-integer-float', 'string', 'unicode', 'mixed']: return self._invalid_indexer('label', key) elif kind in ['loc'] and is_integer(key): if not self.holds_integer(): return self._invalid_indexer('label', key) return key def _convert_slice_indexer(self, key, kind=None): """ convert a slice indexer. disallow floats in the start/stop/step Parameters ---------- key : label of the slice bound kind : {'ix', 'loc', 'getitem', 'iloc'} or None """ assert kind in ['ix', 'loc', 'getitem', 'iloc', None] # if we are not a slice, then we are done if not isinstance(key, slice): return key # validate iloc if kind == 'iloc': return slice(self._validate_indexer('slice', key.start, kind), self._validate_indexer('slice', key.stop, kind), self._validate_indexer('slice', key.step, kind)) # potentially cast the bounds to integers start, stop, step = key.start, key.stop, key.step # figure out if this is a positional indexer def is_int(v): return v is None or is_integer(v) is_null_slicer = start is None and stop is None is_index_slice = is_int(start) and is_int(stop) is_positional = is_index_slice and not self.is_integer() if kind == 'getitem': """ called from the getitem slicers, validate that we are in fact integers """ if self.is_integer() or is_index_slice: return slice(self._validate_indexer('slice', key.start, kind), self._validate_indexer('slice', key.stop, kind), self._validate_indexer('slice', key.step, kind)) # convert the slice to an indexer here # if we are mixed and have integers try: if is_positional and self.is_mixed(): # TODO: i, j are not used anywhere if start is not None: i = self.get_loc(start) # noqa if stop is not None: j = self.get_loc(stop) # noqa is_positional = False except KeyError: if self.inferred_type == 'mixed-integer-float': raise if is_null_slicer: indexer = key elif is_positional: indexer = key else: try: indexer = self.slice_indexer(start, stop, step, kind=kind) except Exception: if is_index_slice: if self.is_integer(): raise else: indexer = key else: raise return indexer def _convert_list_indexer(self, keyarr, kind=None): """ passed a key that is tuplesafe that is integer based and we have a mixed index (e.g. number/labels). figure out the indexer. return None if we can't help """ if (kind in [None, 'iloc', 'ix'] and is_integer_dtype(keyarr) and not self.is_floating() and not isinstance(keyarr, ABCPeriodIndex)): if self.inferred_type == 'mixed-integer': indexer = self.get_indexer(keyarr) if (indexer >= 0).all(): return indexer # missing values are flagged as -1 by get_indexer and negative # indices are already converted to positive indices in the # above if-statement, so the negative flags are changed to # values outside the range of indices so as to trigger an # IndexError in maybe_convert_indices indexer[indexer < 0] = len(self) from pandas.core.indexing import maybe_convert_indices return maybe_convert_indices(indexer, len(self)) elif not self.inferred_type == 'integer': keyarr = np.where(keyarr < 0, len(self) + keyarr, keyarr) return keyarr return None def _invalid_indexer(self, form, key): """ consistent invalid indexer message """ raise TypeError("cannot do {form} indexing on {klass} with these " "indexers [{key}] of {kind}".format( form=form, klass=type(self), key=key, kind=type(key)))
[docs] def get_duplicates(self): from collections import defaultdict counter = defaultdict(lambda: 0) for k in self.values: counter[k] += 1 return sorted(k for k, v in compat.iteritems(counter) if v > 1)
_get_duplicates = get_duplicates def _cleanup(self): self._engine.clear_mapping() @cache_readonly def _constructor(self): return type(self) @cache_readonly def _engine(self): # property, for now, slow to look up return self._engine_type(lambda: self.values, len(self)) def _validate_index_level(self, level): """ Validate index level. For single-level Index getting level number is a no-op, but some verification must be done like in MultiIndex. """ if isinstance(level, int): if level < 0 and level != -1: raise IndexError("Too many levels: Index has only 1 level," " %d is not a valid level number" % (level, )) elif level > 0: raise IndexError("Too many levels:" " Index has only 1 level, not %d" % (level + 1)) elif level != self.name: raise KeyError('Level %s must be same as name (%s)' % (level, self.name)) def _get_level_number(self, level): self._validate_index_level(level) return 0 @cache_readonly def inferred_type(self): """ return a string of the type inferred from the values """ return lib.infer_dtype(self)
[docs] def is_type_compatible(self, kind): return kind == self.inferred_type
@cache_readonly def is_all_dates(self): if self._data is None: return False return is_datetime_array(_ensure_object(self.values)) def __iter__(self): return iter(self.values) def __reduce__(self): d = dict(data=self._data) d.update(self._get_attributes_dict()) return _new_Index, (self.__class__, d), None def __setstate__(self, state): """Necessary for making this object picklable""" if isinstance(state, dict): self._data = state.pop('data') for k, v in compat.iteritems(state): setattr(self, k, v) elif isinstance(state, tuple): if len(state) == 2: nd_state, own_state = state data = np.empty(nd_state[1], dtype=nd_state[2]) np.ndarray.__setstate__(data, nd_state) self.name = own_state[0] else: # pragma: no cover data = np.empty(state) np.ndarray.__setstate__(data, state) self._data = data self._reset_identity() else: raise Exception("invalid pickle state") _unpickle_compat = __setstate__ def __deepcopy__(self, memo=None): if memo is None: memo = {} return self.copy(deep=True) def __nonzero__(self): raise ValueError("The truth value of a {0} is ambiguous. " "Use a.empty, a.bool(), a.item(), a.any() or a.all()." .format(self.__class__.__name__)) __bool__ = __nonzero__ def __contains__(self, key): hash(key) # work around some kind of odd cython bug try: return key in self._engine except TypeError: return False def __hash__(self): raise TypeError("unhashable type: %r" % type(self).__name__) def __setitem__(self, key, value): raise TypeError("Index does not support mutable operations") def __getitem__(self, key): """ Override numpy.ndarray's __getitem__ method to work as desired. This function adds lists and Series as valid boolean indexers (ndarrays only supports ndarray with dtype=bool). If resulting ndim != 1, plain ndarray is returned instead of corresponding `Index` subclass. """ # There's no custom logic to be implemented in __getslice__, so it's # not overloaded intentionally. getitem = self._data.__getitem__ promote = self._shallow_copy if is_scalar(key): return getitem(key) if isinstance(key, slice): # This case is separated from the conditional above to avoid # pessimization of basic indexing. return promote(getitem(key)) if is_bool_indexer(key): key = np.asarray(key) key = _values_from_object(key) result = getitem(key) if not is_scalar(result): return promote(result) else: return result def _ensure_compat_append(self, other): """ prepare the append Returns ------- list of to_concat, name of result Index """ name = self.name to_concat = [self] if isinstance(other, (list, tuple)): to_concat = to_concat + list(other) else: to_concat.append(other) for obj in to_concat: if (isinstance(obj, Index) and obj.name != name and obj.name is not None): name = None break to_concat = self._ensure_compat_concat(to_concat) to_concat = [x._values if isinstance(x, Index) else x for x in to_concat] return to_concat, name
[docs] def append(self, other): """ Append a collection of Index options together Parameters ---------- other : Index or list/tuple of indices Returns ------- appended : Index """ to_concat, name = self._ensure_compat_append(other) attribs = self._get_attributes_dict() attribs['name'] = name return self._shallow_copy_with_infer( np.concatenate(to_concat), **attribs)
@staticmethod def _ensure_compat_concat(indexes): from pandas.tseries.api import (DatetimeIndex, PeriodIndex, TimedeltaIndex) klasses = DatetimeIndex, PeriodIndex, TimedeltaIndex is_ts = [isinstance(idx, klasses) for idx in indexes] if any(is_ts) and not all(is_ts): return [_maybe_box(idx) for idx in indexes] return indexes _index_shared_docs['take'] = """ return a new %(klass)s of the values selected by the indices For internal compatibility with numpy arrays. Parameters ---------- indices : list Indices to be taken axis : int, optional The axis over which to select values, always 0. allow_fill : bool, default True fill_value : bool, default None If allow_fill=True and fill_value is not None, indices specified by -1 is regarded as NA. If Index doesn't hold NA, raise ValueError See also -------- numpy.ndarray.take """ @Appender(_index_shared_docs['take'])
[docs] def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) indices = _ensure_platform_int(indices) if self._can_hold_na: taken = self._assert_take_fillable(self.values, indices, allow_fill=allow_fill, fill_value=fill_value, na_value=self._na_value) else: if allow_fill and fill_value is not None: msg = 'Unable to fill values because {0} cannot contain NA' raise ValueError(msg.format(self.__class__.__name__)) taken = self.values.take(indices) return self._shallow_copy(taken)
def _assert_take_fillable(self, values, indices, allow_fill=True, fill_value=None, na_value=np.nan): """ Internal method to handle NA filling of take """ indices = _ensure_platform_int(indices) # only fill if we are passing a non-None fill_value if allow_fill and fill_value is not None: if (indices < -1).any(): msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') raise ValueError(msg) taken = values.take(indices) mask = indices == -1 if mask.any(): taken[mask] = na_value else: taken = values.take(indices) return taken @cache_readonly def _isnan(self): """ return if each value is nan""" if self._can_hold_na: return isnull(self) else: # shouldn't reach to this condition by checking hasnans beforehand values = np.empty(len(self), dtype=np.bool_) values.fill(False) return values @cache_readonly def _nan_idxs(self): if self._can_hold_na: w, = self._isnan.nonzero() return w else: return np.array([], dtype=np.int64) @cache_readonly def hasnans(self): """ return if I have any nans; enables various perf speedups """ if self._can_hold_na: return self._isnan.any() else: return False
[docs] def putmask(self, mask, value): """ return a new Index of the values set with the mask See also -------- numpy.ndarray.putmask """ values = self.values.copy() try: np.putmask(values, mask, self._convert_for_op(value)) return self._shallow_copy(values) except (ValueError, TypeError): # coerces to object return self.astype(object).putmask(mask, value)
[docs] def format(self, name=False, formatter=None, **kwargs): """ Render a string representation of the Index """ header = [] if name: header.append(pprint_thing(self.name, escape_chars=('\t', '\r', '\n')) if self.name is not None else '') if formatter is not None: return header + list(self.map(formatter)) return self._format_with_header(header, **kwargs)
def _format_with_header(self, header, na_rep='NaN', **kwargs): values = self.values from pandas.formats.format import format_array if is_categorical_dtype(values.dtype): values = np.array(values) elif is_object_dtype(values.dtype): values = lib.maybe_convert_objects(values, safe=1) if is_object_dtype(values.dtype): result = [pprint_thing(x, escape_chars=('\t', '\r', '\n')) for x in values] # could have nans mask = isnull(values) if mask.any(): result = np.array(result) result[mask] = na_rep result = result.tolist() else: result = _trim_front(format_array(values, None, justify='left')) return header + result
[docs] def to_native_types(self, slicer=None, **kwargs): """ slice and dice then format """ values = self if slicer is not None: values = values[slicer] return values._format_native_types(**kwargs)
def _format_native_types(self, na_rep='', quoting=None, **kwargs): """ actually format my specific types """ mask = isnull(self) if not self.is_object() and not quoting: values = np.asarray(self).astype(str) else: values = np.array(self, dtype=object, copy=True) values[mask] = na_rep return values
[docs] def equals(self, other): """ Determines if two Index objects contain the same elements. """ if self.is_(other): return True if not isinstance(other, Index): return False return array_equivalent(_values_from_object(self), _values_from_object(other))
[docs] def identical(self, other): """Similar to equals, but check that other comparable attributes are also equal """ return (self.equals(other) and all((getattr(self, c, None) == getattr(other, c, None) for c in self._comparables)) and type(self) == type(other))
[docs] def asof(self, label): """ For a sorted index, return the most recent label up to and including the passed label. Return NaN if not found. See also -------- get_loc : asof is a thin wrapper around get_loc with method='pad' """ try: loc = self.get_loc(label, method='pad') except KeyError: return _get_na_value(self.dtype) else: if isinstance(loc, slice): loc = loc.indices(len(self))[-1] return self[loc]
[docs] def asof_locs(self, where, mask): """ where : array of timestamps mask : array of booleans where data is not NA """ locs = self.values[mask].searchsorted(where.values, side='right') locs = np.where(locs > 0, locs - 1, 0) result = np.arange(len(self))[mask].take(locs) first = mask.argmax() result[(locs == 0) & (where < self.values[first])] = -1 return result
[docs] def sort_values(self, return_indexer=False, ascending=True): """ Return sorted copy of Index """ _as = self.argsort() if not ascending: _as = _as[::-1] sorted_index = self.take(_as) if return_indexer: return sorted_index, _as else: return sorted_index
[docs] def order(self, return_indexer=False, ascending=True): """ Return sorted copy of Index DEPRECATED: use :meth:`Index.sort_values` """ warnings.warn("order is deprecated, use sort_values(...)", FutureWarning, stacklevel=2) return self.sort_values(return_indexer=return_indexer, ascending=ascending)
[docs] def sort(self, *args, **kwargs): raise TypeError("cannot sort an Index object in-place, use " "sort_values instead")
[docs] def sortlevel(self, level=None, ascending=True, sort_remaining=None): """ For internal compatibility with with the Index API Sort the Index. This is for compat with MultiIndex Parameters ---------- ascending : boolean, default True False to sort in descending order level, sort_remaining are compat parameters Returns ------- sorted_index : Index """ return self.sort_values(return_indexer=True, ascending=ascending)
[docs] def shift(self, periods=1, freq=None): """ Shift Index containing datetime objects by input number of periods and DateOffset Returns ------- shifted : Index """ raise NotImplementedError("Not supported for type %s" % type(self).__name__)
[docs] def argsort(self, *args, **kwargs): """ Returns the indices that would sort the index and its underlying data. Returns ------- argsorted : numpy array See also -------- numpy.ndarray.argsort """ result = self.asi8 if result is None: result = np.array(self) return result.argsort(*args, **kwargs)
def __add__(self, other): if is_list_like(other): warnings.warn("using '+' to provide set union with Indexes is " "deprecated, use '|' or .union()", FutureWarning, stacklevel=2) if isinstance(other, Index): return self.union(other) return Index(np.array(self) + other) def __radd__(self, other): if is_list_like(other): warnings.warn("using '+' to provide set union with Indexes is " "deprecated, use '|' or .union()", FutureWarning, stacklevel=2) return Index(other + np.array(self)) __iadd__ = __add__ def __sub__(self, other): warnings.warn("using '-' to provide set differences with Indexes is " "deprecated, use .difference()", FutureWarning, stacklevel=2) return self.difference(other) def __and__(self, other): return self.intersection(other) def __or__(self, other): return self.union(other) def __xor__(self, other): return self.symmetric_difference(other) def _get_consensus_name(self, other): """ Given 2 indexes, give a consensus name meaning we take the not None one, or None if the names differ. Return a new object if we are resetting the name """ if self.name != other.name: if self.name is None or other.name is None: name = self.name or other.name else: name = None if self.name != name: return self._shallow_copy(name=name) return self
[docs] def union(self, other): """ Form the union of two Index objects and sorts if possible. Parameters ---------- other : Index or array-like Returns ------- union : Index Examples -------- >>> idx1 = pd.Index([1, 2, 3, 4]) >>> idx2 = pd.Index([3, 4, 5, 6]) >>> idx1.union(idx2) Int64Index([1, 2, 3, 4, 5, 6], dtype='int64') """ self._assert_can_do_setop(other) other = _ensure_index(other) if len(other) == 0 or self.equals(other): return self._get_consensus_name(other) if len(self) == 0: return other._get_consensus_name(self) if not is_dtype_equal(self.dtype, other.dtype): this = self.astype('O') other = other.astype('O') return this.union(other) if self.is_monotonic and other.is_monotonic: try: result = self._outer_indexer(self.values, other._values)[0] except TypeError: # incomparable objects result = list(self.values) # worth making this faster? a very unusual case value_set = set(self.values) result.extend([x for x in other._values if x not in value_set]) else: indexer = self.get_indexer(other) indexer, = (indexer == -1).nonzero() if len(indexer) > 0: other_diff = algos.take_nd(other._values, indexer, allow_fill=False) result = _concat._concat_compat((self.values, other_diff)) try: self.values[0] < other_diff[0] except TypeError as e: warnings.warn("%s, sort order is undefined for " "incomparable objects" % e, RuntimeWarning, stacklevel=3) else: types = frozenset((self.inferred_type, other.inferred_type)) if not types & _unsortable_types: result.sort() else: result = self.values try: result = np.sort(result) except TypeError as e: warnings.warn("%s, sort order is undefined for " "incomparable objects" % e, RuntimeWarning, stacklevel=3) # for subclasses return self._wrap_union_result(other, result)
def _wrap_union_result(self, other, result): name = self.name if self.name == other.name else None return self.__class__(result, name=name)
[docs] def intersection(self, other): """ Form the intersection of two Index objects. This returns a new Index with elements common to the index and `other`. Sortedness of the result is not guaranteed. Parameters ---------- other : Index or array-like Returns ------- intersection : Index Examples -------- >>> idx1 = pd.Index([1, 2, 3, 4]) >>> idx2 = pd.Index([3, 4, 5, 6]) >>> idx1.intersection(idx2) Int64Index([3, 4], dtype='int64') """ self._assert_can_do_setop(other) other = _ensure_index(other) if self.equals(other): return self._get_consensus_name(other) if not is_dtype_equal(self.dtype, other.dtype): this = self.astype('O') other = other.astype('O') return this.intersection(other) if self.is_monotonic and other.is_monotonic: try: result = self._inner_indexer(self.values, other._values)[0] return self._wrap_union_result(other, result) except TypeError: pass try: indexer = Index(self.values).get_indexer(other._values) indexer = indexer.take((indexer != -1).nonzero()[0]) except: # duplicates indexer = Index(self.values).get_indexer_non_unique( other._values)[0].unique() indexer = indexer[indexer != -1] taken = self.take(indexer) if self.name != other.name: taken.name = None return taken
[docs] def difference(self, other): """ Return a new Index with elements from the index that are not in `other`. This is the set difference of two Index objects. It's sorted if sorting is possible. Parameters ---------- other : Index or array-like Returns ------- difference : Index Examples -------- >>> idx1 = pd.Index([1, 2, 3, 4]) >>> idx2 = pd.Index([3, 4, 5, 6]) >>> idx1.difference(idx2) Int64Index([1, 2], dtype='int64') """ self._assert_can_do_setop(other) if self.equals(other): return Index([], name=self.name) other, result_name = self._convert_can_do_setop(other) this = self._get_unique_index() indexer = this.get_indexer(other) indexer = indexer.take((indexer != -1).nonzero()[0]) label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True) the_diff = this.values.take(label_diff) try: the_diff = algos.safe_sort(the_diff) except TypeError: pass return this._shallow_copy(the_diff, name=result_name)
[docs] def symmetric_difference(self, other, result_name=None): """ Compute the symmetric difference of two Index objects. It's sorted if sorting is possible. Parameters ---------- other : Index or array-like result_name : str Returns ------- symmetric_difference : Index Notes ----- ``symmetric_difference`` contains elements that appear in either ``idx1`` or ``idx2`` but not both. Equivalent to the Index created by ``(idx1 - idx2) + (idx2 - idx1)`` with duplicates dropped. Examples -------- >>> idx1 = Index([1, 2, 3, 4]) >>> idx2 = Index([2, 3, 4, 5]) >>> idx1.symmetric_difference(idx2) Int64Index([1, 5], dtype='int64') You can also use the ``^`` operator: >>> idx1 ^ idx2 Int64Index([1, 5], dtype='int64') """ self._assert_can_do_setop(other) other, result_name_update = self._convert_can_do_setop(other) if result_name is None: result_name = result_name_update this = self._get_unique_index() other = other._get_unique_index() indexer = this.get_indexer(other) # {this} minus {other} common_indexer = indexer.take((indexer != -1).nonzero()[0]) left_indexer = np.setdiff1d(np.arange(this.size), common_indexer, assume_unique=True) left_diff = this.values.take(left_indexer) # {other} minus {this} right_indexer = (indexer == -1).nonzero()[0] right_diff = other.values.take(right_indexer) the_diff = _concat._concat_compat([left_diff, right_diff]) try: the_diff = algos.safe_sort(the_diff) except TypeError: pass attribs = self._get_attributes_dict() attribs['name'] = result_name if 'freq' in attribs: attribs['freq'] = None return self._shallow_copy_with_infer(the_diff, **attribs)
sym_diff = deprecate('sym_diff', symmetric_difference) def _get_unique_index(self, dropna=False): """ Returns an index containing unique values. Parameters ---------- dropna : bool If True, NaN values are dropped. Returns ------- uniques : index """ if self.is_unique and not dropna: return self values = self.values if not self.is_unique: values = self.unique() if dropna: try: if self.hasnans: values = values[~isnull(values)] except NotImplementedError: pass return self._shallow_copy(values)
[docs] def get_loc(self, key, method=None, tolerance=None): """ Get integer location for requested label Parameters ---------- key : label method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional * default: exact matches only. * pad / ffill: find the PREVIOUS index value if no exact match. * backfill / bfill: use NEXT index value if no exact match * nearest: use the NEAREST index value if no exact match. Tied distances are broken by preferring the larger index value. tolerance : optional Maximum distance from index value for inexact matches. The value of the index at the matching location most satisfy the equation ``abs(index[loc] - key) <= tolerance``. .. versionadded:: 0.17.0 Returns ------- loc : int if unique index, possibly slice or mask if not """ if method is None: if tolerance is not None: raise ValueError('tolerance argument only valid if using pad, ' 'backfill or nearest lookups') key = _values_from_object(key) try: return self._engine.get_loc(key) except KeyError: return self._engine.get_loc(self._maybe_cast_indexer(key)) indexer = self.get_indexer([key], method=method, tolerance=tolerance) if indexer.ndim > 1 or indexer.size > 1: raise TypeError('get_loc requires scalar valued input') loc = indexer.item() if loc == -1: raise KeyError(key) return loc
[docs] def get_value(self, series, key): """ Fast lookup of value from 1-dimensional ndarray. Only use this if you know what you're doing """ # if we have something that is Index-like, then # use this, e.g. DatetimeIndex s = getattr(series, '_values', None) if isinstance(s, Index) and is_scalar(key): try: return s[key] except (IndexError, ValueError): # invalid type as an indexer pass s = _values_from_object(series) k = _values_from_object(key) k = self._convert_scalar_indexer(k, kind='getitem') try: return self._engine.get_value(s, k, tz=getattr(series.dtype, 'tz', None)) except KeyError as e1: if len(self) > 0 and self.inferred_type in ['integer', 'boolean']: raise try: return tslib.get_value_box(s, key) except IndexError: raise except TypeError: # generator/iterator-like if is_iterator(key): raise InvalidIndexError(key) else: raise e1 except Exception: # pragma: no cover raise e1 except TypeError: # python 3 if is_scalar(key): # pragma: no cover raise IndexError(key) raise InvalidIndexError(key)
[docs] def set_value(self, arr, key, value): """ Fast lookup of value from 1-dimensional ndarray. Only use this if you know what you're doing """ self._engine.set_value(_values_from_object(arr), _values_from_object(key), value)
[docs] def get_level_values(self, level): """ Return vector of label values for requested level, equal to the length of the index Parameters ---------- level : int Returns ------- values : ndarray """ # checks that level number is actually just 1 self._validate_index_level(level) return self
[docs] def get_indexer(self, target, method=None, limit=None, tolerance=None): """ Compute indexer and mask for new index given the current index. The indexer should be then used as an input to ndarray.take to align the current data to the new index. Parameters ---------- target : Index method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional * default: exact matches only. * pad / ffill: find the PREVIOUS index value if no exact match. * backfill / bfill: use NEXT index value if no exact match * nearest: use the NEAREST index value if no exact match. Tied distances are broken by preferring the larger index value. limit : int, optional Maximum number of consecutive labels in ``target`` to match for inexact matches. tolerance : optional Maximum distance between original and new labels for inexact matches. The values of the index at the matching locations most satisfy the equation ``abs(index[indexer] - target) <= tolerance``. .. versionadded:: 0.17.0 Examples -------- >>> indexer = index.get_indexer(new_index) >>> new_values = cur_values.take(indexer) Returns ------- indexer : ndarray of int Integers from 0 to n - 1 indicating that the index at these positions matches the corresponding target values. Missing values in the target are marked by -1. """ method = missing.clean_reindex_fill_method(method) target = _ensure_index(target) if tolerance is not None: tolerance = self._convert_tolerance(tolerance) pself, ptarget = self._possibly_promote(target) if pself is not self or ptarget is not target: return pself.get_indexer(ptarget, method=method, limit=limit, tolerance=tolerance) if not is_dtype_equal(self.dtype, target.dtype): this = self.astype(object) target = target.astype(object) return this.get_indexer(target, method=method, limit=limit, tolerance=tolerance) if not self.is_unique: raise InvalidIndexError('Reindexing only valid with uniquely' ' valued Index objects') if method == 'pad' or method == 'backfill': indexer = self._get_fill_indexer(target, method, limit, tolerance) elif method == 'nearest': indexer = self._get_nearest_indexer(target, limit, tolerance) else: if tolerance is not None: raise ValueError('tolerance argument only valid if doing pad, ' 'backfill or nearest reindexing') if limit is not None: raise ValueError('limit argument only valid if doing pad, ' 'backfill or nearest reindexing') indexer = self._engine.get_indexer(target._values) return _ensure_platform_int(indexer)
def _convert_tolerance(self, tolerance): # override this method on subclasses return tolerance def _get_fill_indexer(self, target, method, limit=None, tolerance=None): if self.is_monotonic_increasing and target.is_monotonic_increasing: method = (self._engine.get_pad_indexer if method == 'pad' else self._engine.get_backfill_indexer) indexer = method(target._values, limit) else: indexer = self._get_fill_indexer_searchsorted(target, method, limit) if tolerance is not None: indexer = self._filter_indexer_tolerance(target._values, indexer, tolerance) return indexer def _get_fill_indexer_searchsorted(self, target, method, limit=None): """ Fallback pad/backfill get_indexer that works for monotonic decreasing indexes and non-monotonic targets """ if limit is not None: raise ValueError('limit argument for %r method only well-defined ' 'if index and target are monotonic' % method) side = 'left' if method == 'pad' else 'right' target = np.asarray(target) # find exact matches first (this simplifies the algorithm) indexer = self.get_indexer(target) nonexact = (indexer == -1) indexer[nonexact] = self._searchsorted_monotonic(target[nonexact], side) if side == 'left': # searchsorted returns "indices into a sorted array such that, # if the corresponding elements in v were inserted before the # indices, the order of a would be preserved". # Thus, we need to subtract 1 to find values to the left. indexer[nonexact] -= 1 # This also mapped not found values (values of 0 from # np.searchsorted) to -1, which conveniently is also our # sentinel for missing values else: # Mark indices to the right of the largest value as not found indexer[indexer == len(self)] = -1 return indexer def _get_nearest_indexer(self, target, limit, tolerance): """ Get the indexer for the nearest index labels; requires an index with values that can be subtracted from each other (e.g., not strings or tuples). """ left_indexer = self.get_indexer(target, 'pad', limit=limit) right_indexer = self.get_indexer(target, 'backfill', limit=limit) target = np.asarray(target) left_distances = abs(self.values[left_indexer] - target) right_distances = abs(self.values[right_indexer] - target) op = operator.lt if self.is_monotonic_increasing else operator.le indexer = np.where(op(left_distances, right_distances) | (right_indexer == -1), left_indexer, right_indexer) if tolerance is not None: indexer = self._filter_indexer_tolerance(target, indexer, tolerance) return indexer def _filter_indexer_tolerance(self, target, indexer, tolerance): distance = abs(self.values[indexer] - target) indexer = np.where(distance <= tolerance, indexer, -1) return indexer
[docs] def get_indexer_non_unique(self, target): """ return an indexer suitable for taking from a non unique index return the labels in the same order as the target, and return a missing indexer into the target (missing are marked as -1 in the indexer); target must be an iterable """ target = _ensure_index(target) pself, ptarget = self._possibly_promote(target) if pself is not self or ptarget is not target: return pself.get_indexer_non_unique(ptarget) if self.is_all_dates: self = Index(self.asi8) tgt_values = target.asi8 else: tgt_values = target._values indexer, missing = self._engine.get_indexer_non_unique(tgt_values) return Index(indexer), missing
[docs] def get_indexer_for(self, target, **kwargs): """ guaranteed return of an indexer even when non-unique """ if self.is_unique: return self.get_indexer(target, **kwargs) indexer, _ = self.get_indexer_non_unique(target, **kwargs) return indexer
def _possibly_promote(self, other): # A hack, but it works from pandas.tseries.index import DatetimeIndex if self.inferred_type == 'date' and isinstance(other, DatetimeIndex): return DatetimeIndex(self), other elif self.inferred_type == 'boolean': if not is_object_dtype(self.dtype): return self.astype('object'), other.astype('object') return self, other
[docs] def groupby(self, to_groupby): """ Group the index labels by a given array of values. Parameters ---------- to_groupby : array Values used to determine the groups. Returns ------- groups : dict {group name -> group labels} """ return self._groupby(self.values, _values_from_object(to_groupby))
[docs] def map(self, mapper): """ Apply mapper function to its values. Parameters ---------- mapper : callable Function to be applied. Returns ------- applied : array """ return self._arrmap(self.values, mapper)
[docs] def isin(self, values, level=None): """ Compute boolean array of whether each index value is found in the passed set of values. Parameters ---------- values : set or list-like Sought values. .. versionadded:: 0.18.1 Support for values as a set level : str or int, optional Name or position of the index level to use (if the index is a MultiIndex). Notes ----- If `level` is specified: - if it is the name of one *and only one* index level, use that level; - otherwise it should be a number indicating level position. Returns ------- is_contained : ndarray (boolean dtype) """ if level is not None: self._validate_index_level(level) return algos.isin(np.array(self), values)
def _can_reindex(self, indexer): """ *this is an internal non-public method* Check if we are allowing reindexing with this particular indexer Parameters ---------- indexer : an integer indexer Raises ------ ValueError if its a duplicate axis """ # trying to reindex on an axis with duplicates if not self.is_unique and len(indexer): raise ValueError("cannot reindex from a duplicate axis")
[docs] def reindex(self, target, method=None, level=None, limit=None, tolerance=None): """ Create index with target's values (move/add/delete values as necessary) Parameters ---------- target : an iterable Returns ------- new_index : pd.Index Resulting index indexer : np.ndarray or None Indices of output values in original index """ # GH6552: preserve names when reindexing to non-named target # (i.e. neither Index nor Series). preserve_names = not hasattr(target, 'name') # GH7774: preserve dtype/tz if target is empty and not an Index. target = _ensure_has_len(target) # target may be an iterator if not isinstance(target, Index) and len(target) == 0: attrs = self._get_attributes_dict() attrs.pop('freq', None) # don't preserve freq target = self._simple_new(None, dtype=self.dtype, **attrs) else: target = _ensure_index(target) if level is not None: if method is not None: raise TypeError('Fill method not supported if level passed') _, indexer, _ = self._join_level(target, level, how='right', return_indexers=True) else: if self.equals(target): indexer = None else: if self.is_unique: indexer = self.get_indexer(target, method=method, limit=limit, tolerance=tolerance) else: if method is not None or limit is not None: raise ValueError("cannot reindex a non-unique index " "with a method or limit") indexer, missing = self.get_indexer_non_unique(target) if preserve_names and target.nlevels == 1 and target.name != self.name: target = target.copy() target.name = self.name return target, indexer
def _reindex_non_unique(self, target): """ *this is an internal non-public method* Create a new index with target's values (move/add/delete values as necessary) use with non-unique Index and a possibly non-unique target Parameters ---------- target : an iterable Returns ------- new_index : pd.Index Resulting index indexer : np.ndarray or None Indices of output values in original index """ target = _ensure_index(target) indexer, missing = self.get_indexer_non_unique(target) check = indexer != -1 new_labels = self.take(indexer[check]) new_indexer = None if len(missing): l = np.arange(len(indexer)) missing = _ensure_platform_int(missing) missing_labels = target.take(missing) missing_indexer = _ensure_int64(l[~check]) cur_labels = self.take(indexer[check])._values cur_indexer = _ensure_int64(l[check]) new_labels = np.empty(tuple([len(indexer)]), dtype=object) new_labels[cur_indexer] = cur_labels new_labels[missing_indexer] = missing_labels # a unique indexer if target.is_unique: # see GH5553, make sure we use the right indexer new_indexer = np.arange(len(indexer)) new_indexer[cur_indexer] = np.arange(len(cur_labels)) new_indexer[missing_indexer] = -1 # we have a non_unique selector, need to use the original # indexer here else: # need to retake to have the same size as the indexer indexer = indexer._values indexer[~check] = 0 # reset the new indexer to account for the new size new_indexer = np.arange(len(self.take(indexer))) new_indexer[~check] = -1 new_index = self._shallow_copy_with_infer(new_labels, freq=None) return new_index, indexer, new_indexer
[docs] def join(self, other, how='left', level=None, return_indexers=False): """ *this is an internal non-public method* Compute join_index and indexers to conform data structures to the new index. Parameters ---------- other : Index how : {'left', 'right', 'inner', 'outer'} level : int or level name, default None return_indexers : boolean, default False Returns ------- join_index, (left_indexer, right_indexer) """ from .multi import MultiIndex self_is_mi = isinstance(self, MultiIndex) other_is_mi = isinstance(other, MultiIndex) # try to figure out the join level # GH3662 if level is None and (self_is_mi or other_is_mi): # have the same levels/names so a simple join if self.names == other.names: pass else: return self._join_multi(other, how=how, return_indexers=return_indexers) # join on the level if level is not None and (self_is_mi or other_is_mi): return self._join_level(other, level, how=how, return_indexers=return_indexers) other = _ensure_index(other) if len(other) == 0 and how in ('left', 'outer'): join_index = self._shallow_copy() if return_indexers: rindexer = np.repeat(-1, len(join_index)) return join_index, None, rindexer else: return join_index if len(self) == 0 and how in ('right', 'outer'): join_index = other._shallow_copy() if return_indexers: lindexer = np.repeat(-1, len(join_index)) return join_index, lindexer, None else: return join_index if self._join_precedence < other._join_precedence: how = {'right': 'left', 'left': 'right'}.get(how, how) result = other.join(self, how=how, level=level, return_indexers=return_indexers) if return_indexers: x, y, z = result result = x, z, y return result if not is_dtype_equal(self.dtype, other.dtype): this = self.astype('O') other = other.astype('O') return this.join(other, how=how, return_indexers=return_indexers) _validate_join_method(how) if not self.is_unique and not other.is_unique: return self._join_non_unique(other, how=how, return_indexers=return_indexers) elif not self.is_unique or not other.is_unique: if self.is_monotonic and other.is_monotonic: return self._join_monotonic(other, how=how, return_indexers=return_indexers) else: return self._join_non_unique(other, how=how, return_indexers=return_indexers) elif self.is_monotonic and other.is_monotonic: try: return self._join_monotonic(other, how=how, return_indexers=return_indexers) except TypeError: pass if how == 'left': join_index = self elif how == 'right': join_index = other elif how == 'inner': join_index = self.intersection(other) elif how == 'outer': join_index = self.union(other) if return_indexers: if join_index is self: lindexer = None else: lindexer = self.get_indexer(join_index) if join_index is other: rindexer = None else: rindexer = other.get_indexer(join_index) return join_index, lindexer, rindexer else: return join_index
def _join_multi(self, other, how, return_indexers=True): from .multi import MultiIndex self_is_mi = isinstance(self, MultiIndex) other_is_mi = isinstance(other, MultiIndex) # figure out join names self_names = [n for n in self.names if n is not None] other_names = [n for n in other.names if n is not None] overlap = list(set(self_names) & set(other_names)) # need at least 1 in common, but not more than 1 if not len(overlap): raise ValueError("cannot join with no level specified and no " "overlapping names") if len(overlap) > 1: raise NotImplementedError("merging with more than one level " "overlap on a multi-index is not " "implemented") jl = overlap[0] # make the indices into mi's that match if not (self_is_mi and other_is_mi): flip_order = False if self_is_mi: self, other = other, self flip_order = True # flip if join method is right or left how = {'right': 'left', 'left': 'right'}.get(how, how) level = other.names.index(jl) result = self._join_level(other, level, how=how, return_indexers=return_indexers) if flip_order: if isinstance(result, tuple): return result[0], result[2], result[1] return result # 2 multi-indexes raise NotImplementedError("merging with both multi-indexes is not " "implemented") def _join_non_unique(self, other, how='left', return_indexers=False): from pandas.tools.merge import _get_join_indexers left_idx, right_idx = _get_join_indexers([self.values], [other._values], how=how, sort=True) left_idx = _ensure_platform_int(left_idx) right_idx = _ensure_platform_int(right_idx) join_index = self.values.take(left_idx) mask = left_idx == -1 np.putmask(join_index, mask, other._values.take(right_idx)) join_index = self._wrap_joined_index(join_index, other) if return_indexers: return join_index, left_idx, right_idx else: return join_index def _join_level(self, other, level, how='left', return_indexers=False, keep_order=True): """ The join method *only* affects the level of the resulting MultiIndex. Otherwise it just exactly aligns the Index data to the labels of the level in the MultiIndex. If `keep_order` == True, the order of the data indexed by the MultiIndex will not be changed; otherwise, it will tie out with `other`. """ from pandas.algos import groupsort_indexer from .multi import MultiIndex def _get_leaf_sorter(labels): """ returns sorter for the inner most level while preserving the order of higher levels """ if labels[0].size == 0: return np.empty(0, dtype='int64') if len(labels) == 1: lab = _ensure_int64(labels[0]) sorter, _ = groupsort_indexer(lab, 1 + lab.max()) return sorter # find indexers of begining of each set of # same-key labels w.r.t all but last level tic = labels[0][:-1] != labels[0][1:] for lab in labels[1:-1]: tic |= lab[:-1] != lab[1:] starts = np.hstack(([True], tic, [True])).nonzero()[0] lab = _ensure_int64(labels[-1]) return lib.get_level_sorter(lab, _ensure_int64(starts)) if isinstance(self, MultiIndex) and isinstance(other, MultiIndex): raise TypeError('Join on level between two MultiIndex objects ' 'is ambiguous') left, right = self, other flip_order = not isinstance(self, MultiIndex) if flip_order: left, right = right, left how = {'right': 'left', 'left': 'right'}.get(how, how) level = left._get_level_number(level) old_level = left.levels[level] if not right.is_unique: raise NotImplementedError('Index._join_level on non-unique index ' 'is not implemented') new_level, left_lev_indexer, right_lev_indexer = \ old_level.join(right, how=how, return_indexers=True) if left_lev_indexer is None: if keep_order or len(left) == 0: left_indexer = None join_index = left else: # sort the leaves left_indexer = _get_leaf_sorter(left.labels[:level + 1]) join_index = left[left_indexer] else: left_lev_indexer = _ensure_int64(left_lev_indexer) rev_indexer = lib.get_reverse_indexer(left_lev_indexer, len(old_level)) new_lev_labels = algos.take_nd(rev_indexer, left.labels[level], allow_fill=False) new_labels = list(left.labels) new_labels[level] = new_lev_labels new_levels = list(left.levels) new_levels[level] = new_level if keep_order: # just drop missing values. o.w. keep order left_indexer = np.arange(len(left)) mask = new_lev_labels != -1 if not mask.all(): new_labels = [lab[mask] for lab in new_labels] left_indexer = left_indexer[mask] else: # tie out the order with other if level == 0: # outer most level, take the fast route ngroups = 1 + new_lev_labels.max() left_indexer, counts = groupsort_indexer(new_lev_labels, ngroups) # missing values are placed first; drop them! left_indexer = left_indexer[counts[0]:] new_labels = [lab[left_indexer] for lab in new_labels] else: # sort the leaves mask = new_lev_labels != -1 mask_all = mask.all() if not mask_all: new_labels = [lab[mask] for lab in new_labels] left_indexer = _get_leaf_sorter(new_labels[:level + 1]) new_labels = [lab[left_indexer] for lab in new_labels] # left_indexers are w.r.t masked frame. # reverse to original frame! if not mask_all: left_indexer = mask.nonzero()[0][left_indexer] join_index = MultiIndex(levels=new_levels, labels=new_labels, names=left.names, verify_integrity=False) if right_lev_indexer is not None: right_indexer = algos.take_nd(right_lev_indexer, join_index.labels[level], allow_fill=False) else: right_indexer = join_index.labels[level] if flip_order: left_indexer, right_indexer = right_indexer, left_indexer if return_indexers: return join_index, left_indexer, right_indexer else: return join_index def _join_monotonic(self, other, how='left', return_indexers=False): if self.equals(other): ret_index = other if how == 'right' else self if return_indexers: return ret_index, None, None else: return ret_index sv = self.values ov = other._values if self.is_unique and other.is_unique: # We can perform much better than the general case if how == 'left': join_index = self lidx = None ridx = self._left_indexer_unique(sv, ov) elif how == 'right': join_index = other lidx = self._left_indexer_unique(ov, sv) ridx = None elif how == 'inner': join_index, lidx, ridx = self._inner_indexer(sv, ov) join_index = self._wrap_joined_index(join_index, other) elif how == 'outer': join_index, lidx, ridx = self._outer_indexer(sv, ov) join_index = self._wrap_joined_index(join_index, other) else: if how == 'left': join_index, lidx, ridx = self._left_indexer(sv, ov) elif how == 'right': join_index, ridx, lidx = self._left_indexer(ov, sv) elif how == 'inner': join_index, lidx, ridx = self._inner_indexer(sv, ov) elif how == 'outer': join_index, lidx, ridx = self._outer_indexer(sv, ov) join_index = self._wrap_joined_index(join_index, other) if return_indexers: return join_index, lidx, ridx else: return join_index def _wrap_joined_index(self, joined, other): name = self.name if self.name == other.name else None return Index(joined, name=name)
[docs] def slice_indexer(self, start=None, end=None, step=None, kind=None): """ For an ordered Index, compute the slice indexer for input labels and step Parameters ---------- start : label, default None If None, defaults to the beginning end : label, default None If None, defaults to the end step : int, default None kind : string, default None Returns ------- indexer : ndarray or slice Notes ----- This function assumes that the data is sorted, so use at your own peril """ start_slice, end_slice = self.slice_locs(start, end, step=step, kind=kind) # return a slice if not is_scalar(start_slice): raise AssertionError("Start slice bound is non-scalar") if not is_scalar(end_slice): raise AssertionError("End slice bound is non-scalar") return slice(start_slice, end_slice, step)
def _maybe_cast_indexer(self, key): """ If we have a float key and are not a floating index then try to cast to an int if equivalent """ if is_float(key) and not self.is_floating(): try: ckey = int(key) if ckey == key: key = ckey except (ValueError, TypeError): pass return key def _validate_indexer(self, form, key, kind): """ if we are positional indexer validate that we have appropriate typed bounds must be an integer """ assert kind in ['ix', 'loc', 'getitem', 'iloc'] if key is None: pass elif is_integer(key): pass elif kind in ['iloc', 'getitem']: self._invalid_indexer(form, key) return key def _maybe_cast_slice_bound(self, label, side, kind): """ This function should be overloaded in subclasses that allow non-trivial casting on label-slice bounds, e.g. datetime-like indices allowing strings containing formatted datetimes. Parameters ---------- label : object side : {'left', 'right'} kind : {'ix', 'loc', 'getitem'} Returns ------- label : object Notes ----- Value of `side` parameter should be validated in caller. """ assert kind in ['ix', 'loc', 'getitem', None] # We are a plain index here (sub-class override this method if they # wish to have special treatment for floats/ints, e.g. Float64Index and # datetimelike Indexes # reject them if is_float(label): if not (kind in ['ix'] and (self.holds_integer() or self.is_floating())): self._invalid_indexer('slice', label) # we are trying to find integer bounds on a non-integer based index # this is rejected (generally .loc gets you here) elif is_integer(label): self._invalid_indexer('slice', label) return label def _searchsorted_monotonic(self, label, side='left'): if self.is_monotonic_increasing: return self.searchsorted(label, side=side) elif self.is_monotonic_decreasing: # np.searchsorted expects ascending sort order, have to reverse # everything for it to work (element ordering, search side and # resulting value). pos = self[::-1].searchsorted(label, side='right' if side == 'left' else 'right') return len(self) - pos raise ValueError('index must be monotonic increasing or decreasing')
[docs] def get_slice_bound(self, label, side, kind): """ Calculate slice bound that corresponds to given label. Returns leftmost (one-past-the-rightmost if ``side=='right'``) position of given label. Parameters ---------- label : object side : {'left', 'right'} kind : {'ix', 'loc', 'getitem'} """ assert kind in ['ix', 'loc', 'getitem', None] if side not in ('left', 'right'): raise ValueError("Invalid value for side kwarg," " must be either 'left' or 'right': %s" % (side, )) original_label = label # For datetime indices label may be a string that has to be converted # to datetime boundary according to its resolution. label = self._maybe_cast_slice_bound(label, side, kind) # we need to look up the label try: slc = self.get_loc(label) except KeyError as err: try: return self._searchsorted_monotonic(label, side) except ValueError: # raise the original KeyError raise err if isinstance(slc, np.ndarray): # get_loc may return a boolean array or an array of indices, which # is OK as long as they are representable by a slice. if is_bool_dtype(slc): slc = lib.maybe_booleans_to_slice(slc.view('u1')) else: slc = lib.maybe_indices_to_slice(slc.astype('i8'), len(self)) if isinstance(slc, np.ndarray): raise KeyError("Cannot get %s slice bound for non-unique " "label: %r" % (side, original_label)) if isinstance(slc, slice): if side == 'left': return slc.start else: return slc.stop else: if side == 'right': return slc + 1 else: return slc
[docs] def slice_locs(self, start=None, end=None, step=None, kind=None): """ Compute slice locations for input labels. Parameters ---------- start : label, default None If None, defaults to the beginning end : label, default None If None, defaults to the end step : int, defaults None If None, defaults to 1 kind : {'ix', 'loc', 'getitem'} or None Returns ------- start, end : int """ inc = (step is None or step >= 0) if not inc: # If it's a reverse slice, temporarily swap bounds. start, end = end, start start_slice = None if start is not None: start_slice = self.get_slice_bound(start, 'left', kind) if start_slice is None: start_slice = 0 end_slice = None if end is not None: end_slice = self.get_slice_bound(end, 'right', kind) if end_slice is None: end_slice = len(self) if not inc: # Bounds at this moment are swapped, swap them back and shift by 1. # # slice_locs('B', 'A', step=-1): s='B', e='A' # # s='A' e='B' # AFTER SWAP: | | # v ------------------> V # ----------------------------------- # | | |A|A|A|A| | | | | |B|B| | | | | # ----------------------------------- # ^ <------------------ ^ # SHOULD BE: | | # end=s-1 start=e-1 # end_slice, start_slice = start_slice - 1, end_slice - 1 # i == -1 triggers ``len(self) + i`` selection that points to the # last element, not before-the-first one, subtracting len(self) # compensates that. if end_slice == -1: end_slice -= len(self) if start_slice == -1: start_slice -= len(self) return start_slice, end_slice
[docs] def delete(self, loc): """ Make new Index with passed location(-s) deleted Returns ------- new_index : Index """ return self._shallow_copy(np.delete(self._data, loc))
[docs] def insert(self, loc, item): """ Make new Index inserting new item at location. Follows Python list.append semantics for negative values Parameters ---------- loc : int item : object Returns ------- new_index : Index """ _self = np.asarray(self) item = self._coerce_scalar_to_index(item)._values idx = np.concatenate((_self[:loc], item, _self[loc:])) return self._shallow_copy_with_infer(idx)
[docs] def drop(self, labels, errors='raise'): """ Make new Index with passed list of labels deleted Parameters ---------- labels : array-like errors : {'ignore', 'raise'}, default 'raise' If 'ignore', suppress error and existing labels are dropped. Returns ------- dropped : Index """ labels = com._index_labels_to_array(labels) indexer = self.get_indexer(labels) mask = indexer == -1 if mask.any(): if errors != 'ignore': raise ValueError('labels %s not contained in axis' % labels[mask]) indexer = indexer[~mask] return self.delete(indexer)
@deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'}) @Appender(base._shared_docs['drop_duplicates'] % _index_doc_kwargs)
[docs] def drop_duplicates(self, keep='first'): return super(Index, self).drop_duplicates(keep=keep)
@deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'}) @Appender(base._shared_docs['duplicated'] % _index_doc_kwargs)
[docs] def duplicated(self, keep='first'): return super(Index, self).duplicated(keep=keep)
_index_shared_docs['fillna'] = """ Fill NA/NaN values with the specified value Parameters ---------- value : scalar Scalar value to use to fill holes (e.g. 0). This value cannot be a list-likes. downcast : dict, default is None a dict of item->dtype of what to downcast if possible, or the string 'infer' which will try to downcast to an appropriate equal type (e.g. float64 to int64 if possible) Returns ------- filled : %(klass)s """ @Appender(_index_shared_docs['fillna'])
[docs] def fillna(self, value=None, downcast=None): self._assert_can_do_op(value) if self.hasnans: result = self.putmask(self._isnan, value) if downcast is None: # no need to care metadata other than name # because it can't have freq if return Index(result, name=self.name) return self._shallow_copy()
_index_shared_docs['dropna'] = """ Return Index without NA/NaN values Parameters ---------- how : {'any', 'all'}, default 'any' If the Index is a MultiIndex, drop the value when any or all levels are NaN. Returns ------- valid : Index """ @Appender(_index_shared_docs['dropna'])
[docs] def dropna(self, how='any'): if how not in ('any', 'all'): raise ValueError("invalid how option: {0}".format(how)) if self.hasnans: return self._shallow_copy(self.values[~self._isnan]) return self._shallow_copy()
def _evaluate_with_timedelta_like(self, other, op, opstr): raise TypeError("can only perform ops with timedelta like values") def _evaluate_with_datetime_like(self, other, op, opstr): raise TypeError("can only perform ops with datetime like values") def _evalute_compare(self, op): raise base.AbstractMethodError(self) @classmethod def _add_comparison_methods(cls): """ add in comparison methods """ def _make_compare(op): def _evaluate_compare(self, other): if isinstance(other, (np.ndarray, Index, ABCSeries)): if other.ndim > 0 and len(self) != len(other): raise ValueError('Lengths must match to compare') # we may need to directly compare underlying # representations if needs_i8_conversion(self) and needs_i8_conversion(other): return self._evaluate_compare(other, op) if is_object_dtype(self) and self.nlevels == 1: # don't pass MultiIndex result = _comp_method_OBJECT_ARRAY(op, self.values, other) else: result = op(self.values, np.asarray(other)) # technically we could support bool dtyped Index # for now just return the indexing array directly if is_bool_dtype(result): return result try: return Index(result) except TypeError: return result return _evaluate_compare cls.__eq__ = _make_compare(operator.eq) cls.__ne__ = _make_compare(operator.ne) cls.__lt__ = _make_compare(operator.lt) cls.__gt__ = _make_compare(operator.gt) cls.__le__ = _make_compare(operator.le) cls.__ge__ = _make_compare(operator.ge) @classmethod def _add_numericlike_set_methods_disabled(cls): """ add in the numeric set-like methods to disable """ def _make_invalid_op(name): def invalid_op(self, other=None): raise TypeError("cannot perform {name} with this index type: " "{typ}".format(name=name, typ=type(self))) invalid_op.__name__ = name return invalid_op cls.__add__ = cls.__radd__ = __iadd__ = _make_invalid_op('__add__') # noqa cls.__sub__ = __isub__ = _make_invalid_op('__sub__') # noqa @classmethod def _add_numeric_methods_disabled(cls): """ add in numeric methods to disable """ def _make_invalid_op(name): def invalid_op(self, other=None): raise TypeError("cannot perform {name} with this index type: " "{typ}".format(name=name, typ=type(self))) invalid_op.__name__ = name return invalid_op cls.__pow__ = cls.__rpow__ = _make_invalid_op('__pow__') cls.__mul__ = cls.__rmul__ = _make_invalid_op('__mul__') cls.__floordiv__ = cls.__rfloordiv__ = _make_invalid_op('__floordiv__') cls.__truediv__ = cls.__rtruediv__ = _make_invalid_op('__truediv__') if not compat.PY3: cls.__div__ = cls.__rdiv__ = _make_invalid_op('__div__') cls.__neg__ = _make_invalid_op('__neg__') cls.__pos__ = _make_invalid_op('__pos__') cls.__abs__ = _make_invalid_op('__abs__') cls.__inv__ = _make_invalid_op('__inv__') def _maybe_update_attributes(self, attrs): """ Update Index attributes (e.g. freq) depending on op """ return attrs def _validate_for_numeric_unaryop(self, op, opstr): """ validate if we can perform a numeric unary operation """ if not self._is_numeric_dtype: raise TypeError("cannot evaluate a numeric op " "{opstr} for type: {typ}".format( opstr=opstr, typ=type(self)) ) def _validate_for_numeric_binop(self, other, op, opstr): """ return valid other, evaluate or raise TypeError if we are not of the appropriate type internal method called by ops """ from pandas.tseries.offsets import DateOffset # if we are an inheritor of numeric, # but not actually numeric (e.g. DatetimeIndex/PeriodInde) if not self._is_numeric_dtype: raise TypeError("cannot evaluate a numeric op {opstr} " "for type: {typ}".format( opstr=opstr, typ=type(self)) ) if isinstance(other, Index): if not other._is_numeric_dtype: raise TypeError("cannot evaluate a numeric op " "{opstr} with type: {typ}".format( opstr=type(self), typ=type(other)) ) elif isinstance(other, np.ndarray) and not other.ndim: other = other.item() if isinstance(other, (Index, ABCSeries, np.ndarray)): if len(self) != len(other): raise ValueError("cannot evaluate a numeric op with " "unequal lengths") other = _values_from_object(other) if other.dtype.kind not in ['f', 'i']: raise TypeError("cannot evaluate a numeric op " "with a non-numeric dtype") elif isinstance(other, (DateOffset, np.timedelta64, Timedelta, datetime.timedelta)): # higher up to handle pass elif isinstance(other, (Timestamp, np.datetime64)): # higher up to handle pass else: if not (is_float(other) or is_integer(other)): raise TypeError("can only perform ops with scalar values") return other @classmethod def _add_numeric_methods_binary(cls): """ add in numeric methods """ def _make_evaluate_binop(op, opstr, reversed=False): def _evaluate_numeric_binop(self, other): from pandas.tseries.offsets import DateOffset other = self._validate_for_numeric_binop(other, op, opstr) # handle time-based others if isinstance(other, (DateOffset, np.timedelta64, Timedelta, datetime.timedelta)): return self._evaluate_with_timedelta_like(other, op, opstr) elif isinstance(other, (Timestamp, np.datetime64)): return self._evaluate_with_datetime_like(other, op, opstr) # if we are a reversed non-communative op values = self.values if reversed: values, other = other, values attrs = self._get_attributes_dict() attrs = self._maybe_update_attributes(attrs) return Index(op(values, other), **attrs) return _evaluate_numeric_binop cls.__add__ = cls.__radd__ = _make_evaluate_binop( operator.add, '__add__') cls.__sub__ = _make_evaluate_binop( operator.sub, '__sub__') cls.__rsub__ = _make_evaluate_binop( operator.sub, '__sub__', reversed=True) cls.__mul__ = cls.__rmul__ = _make_evaluate_binop( operator.mul, '__mul__') cls.__pow__ = cls.__rpow__ = _make_evaluate_binop( operator.pow, '__pow__') cls.__mod__ = _make_evaluate_binop( operator.mod, '__mod__') cls.__floordiv__ = _make_evaluate_binop( operator.floordiv, '__floordiv__') cls.__rfloordiv__ = _make_evaluate_binop( operator.floordiv, '__floordiv__', reversed=True) cls.__truediv__ = _make_evaluate_binop( operator.truediv, '__truediv__') cls.__rtruediv__ = _make_evaluate_binop( operator.truediv, '__truediv__', reversed=True) if not compat.PY3: cls.__div__ = _make_evaluate_binop( operator.div, '__div__') cls.__rdiv__ = _make_evaluate_binop( operator.div, '__div__', reversed=True) @classmethod def _add_numeric_methods_unary(cls): """ add in numeric unary methods """ def _make_evaluate_unary(op, opstr): def _evaluate_numeric_unary(self): self._validate_for_numeric_unaryop(op, opstr) attrs = self._get_attributes_dict() attrs = self._maybe_update_attributes(attrs) return Index(op(self.values), **attrs) return _evaluate_numeric_unary cls.__neg__ = _make_evaluate_unary(lambda x: -x, '__neg__') cls.__pos__ = _make_evaluate_unary(lambda x: x, '__pos__') cls.__abs__ = _make_evaluate_unary(np.abs, '__abs__') cls.__inv__ = _make_evaluate_unary(lambda x: -x, '__inv__') @classmethod def _add_numeric_methods(cls): cls._add_numeric_methods_unary() cls._add_numeric_methods_binary() @classmethod def _add_logical_methods(cls): """ add in logical methods """ _doc = """ %(desc)s Parameters ---------- All arguments to numpy.%(outname)s are accepted. Returns ------- %(outname)s : bool or array_like (if axis is specified) A single element array_like may be converted to bool.""" def _make_logical_function(name, desc, f): @Substitution(outname=name, desc=desc) @Appender(_doc) def logical_func(self, *args, **kwargs): result = f(self.values) if (isinstance(result, (np.ndarray, ABCSeries, Index)) and result.ndim == 0): # return NumPy type return result.dtype.type(result.item()) else: # pragma: no cover return result logical_func.__name__ = name return logical_func cls.all = _make_logical_function('all', 'Return whether all elements ' 'are True', np.all) cls.any = _make_logical_function('any', 'Return whether any element is True', np.any) @classmethod def _add_logical_methods_disabled(cls): """ add in logical methods to disable """ def _make_invalid_op(name): def invalid_op(self, other=None): raise TypeError("cannot perform {name} with this index type: " "{typ}".format(name=name, typ=type(self))) invalid_op.__name__ = name return invalid_op cls.all = _make_invalid_op('all') cls.any = _make_invalid_op('any')
Index._add_numeric_methods_disabled() Index._add_logical_methods() Index._add_comparison_methods() def _ensure_index(index_like, copy=False): if isinstance(index_like, Index): if copy: index_like = index_like.copy() return index_like if hasattr(index_like, 'name'): return Index(index_like, name=index_like.name, copy=copy) # must check for exactly list here because of strict type # check in clean_index_list if isinstance(index_like, list): if type(index_like) != list: index_like = list(index_like) # 2200 ? converted, all_arrays = lib.clean_index_list(index_like) if len(converted) > 0 and all_arrays: from .multi import MultiIndex return MultiIndex.from_arrays(converted) else: index_like = converted else: # clean_index_list does the equivalent of copying # so only need to do this if not list instance if copy: from copy import copy index_like = copy(index_like) return Index(index_like) def _get_na_value(dtype): return {np.datetime64: tslib.NaT, np.timedelta64: tslib.NaT}.get(dtype, np.nan) def _ensure_frozen(array_like, categories, copy=False): array_like = _coerce_indexer_dtype(array_like, categories) array_like = array_like.view(FrozenNDArray) if copy: array_like = array_like.copy() return array_like def _ensure_has_len(seq): """If seq is an iterator, put its values into a list.""" try: len(seq) except TypeError: return list(seq) else: return seq def _maybe_box(idx): from pandas.tseries.api import DatetimeIndex, PeriodIndex, TimedeltaIndex klasses = DatetimeIndex, PeriodIndex, TimedeltaIndex if isinstance(idx, klasses): return idx.asobject return idx def _trim_front(strings): """ Trims zeros and decimal points """ trimmed = strings while len(strings) > 0 and all([x[0] == ' ' for x in trimmed]): trimmed = [x[1:] for x in trimmed] return trimmed def _validate_join_method(method): if method not in ['left', 'right', 'inner', 'outer']: raise ValueError('do not recognize join method %s' % method)