# pylint: disable=E1101
from __future__ import division
import operator
import warnings
from datetime import time, datetime
from datetime import timedelta
import numpy as np
from pandas.core.base import _shared_docs
from pandas.types.common import (_NS_DTYPE, _INT64_DTYPE,
is_object_dtype, is_datetime64_dtype,
is_datetimetz, is_dtype_equal,
is_integer, is_float,
is_integer_dtype,
is_datetime64_ns_dtype,
is_bool_dtype,
is_string_dtype,
is_list_like,
is_scalar,
_ensure_int64)
from pandas.types.generic import ABCSeries
from pandas.types.dtypes import DatetimeTZDtype
from pandas.types.missing import isnull
import pandas.types.concat as _concat
from pandas.core.common import (_values_from_object, _maybe_box,
PerformanceWarning)
from pandas.core.index import Index, Int64Index, Float64Index
from pandas.indexes.base import _index_shared_docs
import pandas.compat as compat
from pandas.tseries.frequencies import (
to_offset, get_period_alias,
Resolution)
from pandas.tseries.base import DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin
from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay
from pandas.tseries.tools import parse_time_string, normalize_date, to_time
from pandas.tseries.timedeltas import to_timedelta
from pandas.util.decorators import (Appender, cache_readonly,
deprecate_kwarg, Substitution)
import pandas.core.common as com
import pandas.tseries.offsets as offsets
import pandas.tseries.tools as tools
from pandas.lib import Timestamp
import pandas.lib as lib
import pandas.tslib as tslib
import pandas._period as period
import pandas.algos as _algos
import pandas.index as _index
def _utc():
import pytz
return pytz.utc
# -------- some conversion wrapper functions
def _field_accessor(name, field, docstring=None):
def f(self):
values = self.asi8
if self.tz is not None:
utc = _utc()
if self.tz is not utc:
values = self._local_timestamps()
if field in ['is_month_start', 'is_month_end',
'is_quarter_start', 'is_quarter_end',
'is_year_start', 'is_year_end']:
month_kw = (self.freq.kwds.get('startingMonth',
self.freq.kwds.get('month', 12))
if self.freq else 12)
result = tslib.get_start_end_field(values, field, self.freqstr,
month_kw)
elif field in ['weekday_name']:
result = tslib.get_date_name_field(values, field)
return self._maybe_mask_results(result)
elif field in ['is_leap_year']:
# no need to mask NaT
return tslib.get_date_field(values, field)
else:
result = tslib.get_date_field(values, field)
return self._maybe_mask_results(result, convert='float64')
f.__name__ = name
f.__doc__ = docstring
return property(f)
def _dt_index_cmp(opname, nat_result=False):
"""
Wrap comparison operations to convert datetime-like to datetime64
"""
def wrapper(self, other):
func = getattr(super(DatetimeIndex, self), opname)
if (isinstance(other, datetime) or
isinstance(other, compat.string_types)):
other = _to_m8(other, tz=self.tz)
result = func(other)
if isnull(other):
result.fill(nat_result)
else:
if isinstance(other, list):
other = DatetimeIndex(other)
elif not isinstance(other, (np.ndarray, Index, ABCSeries)):
other = _ensure_datetime64(other)
result = func(np.asarray(other))
result = _values_from_object(result)
if isinstance(other, Index):
o_mask = other.values.view('i8') == tslib.iNaT
else:
o_mask = other.view('i8') == tslib.iNaT
if o_mask.any():
result[o_mask] = nat_result
if self.hasnans:
result[self._isnan] = nat_result
# support of bool dtype indexers
if is_bool_dtype(result):
return result
return Index(result)
return wrapper
def _ensure_datetime64(other):
if isinstance(other, np.datetime64):
return other
raise TypeError('%s type object %s' % (type(other), str(other)))
_midnight = time(0, 0)
def _new_DatetimeIndex(cls, d):
""" This is called upon unpickling, rather than the default which doesn't
have arguments and breaks __new__ """
# data are already in UTC
# so need to localize
tz = d.pop('tz', None)
result = cls.__new__(cls, verify_integrity=False, **d)
if tz is not None:
result = result.tz_localize('UTC').tz_convert(tz)
return result
[docs]class DatetimeIndex(DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin,
Int64Index):
"""
Immutable ndarray of datetime64 data, represented internally as int64, and
which can be boxed to Timestamp objects that are subclasses of datetime and
carry metadata such as frequency information.
Parameters
----------
data : array-like (1-dimensional), optional
Optional datetime-like data to construct index with
copy : bool
Make a copy of input ndarray
freq : string or pandas offset object, optional
One of pandas date offset strings or corresponding objects
start : starting value, datetime-like, optional
If data is None, start is used as the start point in generating regular
timestamp data.
periods : int, optional, > 0
Number of periods to generate, if generating index. Takes precedence
over end argument
end : end time, datetime-like, optional
If periods is none, generated index will extend to first conforming
time on or just past end argument
closed : string or None, default None
Make the interval closed with respect to the given frequency to
the 'left', 'right', or both sides (None)
tz : pytz.timezone or dateutil.tz.tzfile
ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
- 'infer' will attempt to infer fall dst-transition hours based on
order
- bool-ndarray where True signifies a DST time, False signifies a
non-DST time (note that this flag is only applicable for ambiguous
times)
- 'NaT' will return NaT where there are ambiguous times
- 'raise' will raise an AmbiguousTimeError if there are ambiguous times
infer_dst : boolean, default False (DEPRECATED)
Attempt to infer fall dst-transition hours based on order
name : object
Name to be stored in the index
To learn more about the frequency strings, please see `this link
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
"""
_typ = 'datetimeindex'
_join_precedence = 10
def _join_i8_wrapper(joinf, **kwargs):
return DatetimeIndexOpsMixin._join_i8_wrapper(joinf, dtype='M8[ns]',
**kwargs)
_inner_indexer = _join_i8_wrapper(_algos.inner_join_indexer_int64)
_outer_indexer = _join_i8_wrapper(_algos.outer_join_indexer_int64)
_left_indexer = _join_i8_wrapper(_algos.left_join_indexer_int64)
_left_indexer_unique = _join_i8_wrapper(
_algos.left_join_indexer_unique_int64, with_indexers=False)
_arrmap = None
__eq__ = _dt_index_cmp('__eq__')
__ne__ = _dt_index_cmp('__ne__', nat_result=True)
__lt__ = _dt_index_cmp('__lt__')
__gt__ = _dt_index_cmp('__gt__')
__le__ = _dt_index_cmp('__le__')
__ge__ = _dt_index_cmp('__ge__')
_engine_type = _index.DatetimeEngine
tz = None
offset = None
_comparables = ['name', 'freqstr', 'tz']
_attributes = ['name', 'freq', 'tz']
_datetimelike_ops = ['year', 'month', 'day', 'hour', 'minute', 'second',
'weekofyear', 'week', 'dayofweek', 'weekday',
'dayofyear', 'quarter', 'days_in_month',
'daysinmonth', 'date', 'time', 'microsecond',
'nanosecond', 'is_month_start', 'is_month_end',
'is_quarter_start', 'is_quarter_end', 'is_year_start',
'is_year_end', 'tz', 'freq', 'weekday_name',
'is_leap_year']
_is_numeric_dtype = False
_infer_as_myclass = True
@deprecate_kwarg(old_arg_name='infer_dst', new_arg_name='ambiguous',
mapping={True: 'infer', False: 'raise'})
def __new__(cls, data=None,
freq=None, start=None, end=None, periods=None,
copy=False, name=None, tz=None,
verify_integrity=True, normalize=False,
closed=None, ambiguous='raise', dtype=None, **kwargs):
# This allows to later ensure that the 'copy' parameter is honored:
if isinstance(data, Index):
ref_to_data = data._data
else:
ref_to_data = data
if name is None and hasattr(data, 'name'):
name = data.name
dayfirst = kwargs.pop('dayfirst', None)
yearfirst = kwargs.pop('yearfirst', None)
freq_infer = False
if not isinstance(freq, DateOffset):
# if a passed freq is None, don't infer automatically
if freq != 'infer':
freq = to_offset(freq)
else:
freq_infer = True
freq = None
if periods is not None:
if is_float(periods):
periods = int(periods)
elif not is_integer(periods):
raise ValueError('Periods must be a number, got %s' %
str(periods))
if data is None and freq is None:
raise ValueError("Must provide freq argument if no data is "
"supplied")
# if dtype has an embeded tz, capture it
if dtype is not None:
try:
dtype = DatetimeTZDtype.construct_from_string(dtype)
dtz = getattr(dtype, 'tz', None)
if dtz is not None:
if tz is not None and str(tz) != str(dtz):
raise ValueError("cannot supply both a tz and a dtype"
" with a tz")
tz = dtz
except TypeError:
pass
if data is None:
return cls._generate(start, end, periods, name, freq,
tz=tz, normalize=normalize, closed=closed,
ambiguous=ambiguous)
if not isinstance(data, (np.ndarray, Index, ABCSeries)):
if is_scalar(data):
raise ValueError('DatetimeIndex() must be called with a '
'collection of some kind, %s was passed'
% repr(data))
# other iterable of some kind
if not isinstance(data, (list, tuple)):
data = list(data)
data = np.asarray(data, dtype='O')
elif isinstance(data, ABCSeries):
data = data._values
# data must be Index or np.ndarray here
if not (is_datetime64_dtype(data) or is_datetimetz(data) or
is_integer_dtype(data)):
data = tools.to_datetime(data, dayfirst=dayfirst,
yearfirst=yearfirst)
if issubclass(data.dtype.type, np.datetime64) or is_datetimetz(data):
if isinstance(data, DatetimeIndex):
if tz is None:
tz = data.tz
elif data.tz is None:
data = data.tz_localize(tz, ambiguous=ambiguous)
else:
# the tz's must match
if str(tz) != str(data.tz):
msg = ('data is already tz-aware {0}, unable to '
'set specified tz: {1}')
raise TypeError(msg.format(data.tz, tz))
subarr = data.values
if freq is None:
freq = data.offset
verify_integrity = False
else:
if data.dtype != _NS_DTYPE:
subarr = tslib.cast_to_nanoseconds(data)
else:
subarr = data
else:
# must be integer dtype otherwise
if isinstance(data, Int64Index):
raise TypeError('cannot convert Int64Index->DatetimeIndex')
if data.dtype != _INT64_DTYPE:
data = data.astype(np.int64)
subarr = data.view(_NS_DTYPE)
if isinstance(subarr, DatetimeIndex):
if tz is None:
tz = subarr.tz
else:
if tz is not None:
tz = tslib.maybe_get_tz(tz)
if (not isinstance(data, DatetimeIndex) or
getattr(data, 'tz', None) is None):
# Convert tz-naive to UTC
ints = subarr.view('i8')
subarr = tslib.tz_localize_to_utc(ints, tz,
ambiguous=ambiguous)
subarr = subarr.view(_NS_DTYPE)
subarr = cls._simple_new(subarr, name=name, freq=freq, tz=tz)
if dtype is not None:
if not is_dtype_equal(subarr.dtype, dtype):
# dtype must be coerced to DatetimeTZDtype above
if subarr.tz is not None:
raise ValueError("cannot localize from non-UTC data")
if verify_integrity and len(subarr) > 0:
if freq is not None and not freq_infer:
inferred = subarr.inferred_freq
if inferred != freq.freqstr:
on_freq = cls._generate(subarr[0], None, len(subarr), None,
freq, tz=tz, ambiguous=ambiguous)
if not np.array_equal(subarr.asi8, on_freq.asi8):
raise ValueError('Inferred frequency {0} from passed '
'dates does not conform to passed '
'frequency {1}'
.format(inferred, freq.freqstr))
if freq_infer:
inferred = subarr.inferred_freq
if inferred:
subarr.offset = to_offset(inferred)
return subarr._deepcopy_if_needed(ref_to_data, copy)
@classmethod
def _generate(cls, start, end, periods, name, offset,
tz=None, normalize=False, ambiguous='raise', closed=None):
if com._count_not_none(start, end, periods) != 2:
raise ValueError('Must specify two of start, end, or periods')
_normalized = True
if start is not None:
start = Timestamp(start)
if end is not None:
end = Timestamp(end)
left_closed = False
right_closed = False
if start is None and end is None:
if closed is not None:
raise ValueError("Closed has to be None if not both of start"
"and end are defined")
if closed is None:
left_closed = True
right_closed = True
elif closed == "left":
left_closed = True
elif closed == "right":
right_closed = True
else:
raise ValueError("Closed has to be either 'left', 'right' or None")
try:
inferred_tz = tools._infer_tzinfo(start, end)
except:
raise TypeError('Start and end cannot both be tz-aware with '
'different timezones')
inferred_tz = tslib.maybe_get_tz(inferred_tz)
# these may need to be localized
tz = tslib.maybe_get_tz(tz)
if tz is not None:
date = start or end
if date.tzinfo is not None and hasattr(tz, 'localize'):
tz = tz.localize(date.replace(tzinfo=None)).tzinfo
if tz is not None and inferred_tz is not None:
if not inferred_tz == tz:
raise AssertionError("Inferred time zone not equal to passed "
"time zone")
elif inferred_tz is not None:
tz = inferred_tz
if start is not None:
if normalize:
start = normalize_date(start)
_normalized = True
else:
_normalized = _normalized and start.time() == _midnight
if end is not None:
if normalize:
end = normalize_date(end)
_normalized = True
else:
_normalized = _normalized and end.time() == _midnight
if hasattr(offset, 'delta') and offset != offsets.Day():
if inferred_tz is None and tz is not None:
# naive dates
if start is not None and start.tz is None:
start = start.tz_localize(tz, ambiguous=False)
if end is not None and end.tz is None:
end = end.tz_localize(tz, ambiguous=False)
if start and end:
if start.tz is None and end.tz is not None:
start = start.tz_localize(end.tz, ambiguous=False)
if end.tz is None and start.tz is not None:
end = end.tz_localize(start.tz, ambiguous=False)
if _use_cached_range(offset, _normalized, start, end):
index = cls._cached_range(start, end, periods=periods,
offset=offset, name=name)
else:
index = _generate_regular_range(start, end, periods, offset)
else:
if tz is not None:
# naive dates
if start is not None and start.tz is not None:
start = start.replace(tzinfo=None)
if end is not None and end.tz is not None:
end = end.replace(tzinfo=None)
if start and end:
if start.tz is None and end.tz is not None:
end = end.replace(tzinfo=None)
if end.tz is None and start.tz is not None:
start = start.replace(tzinfo=None)
if _use_cached_range(offset, _normalized, start, end):
index = cls._cached_range(start, end, periods=periods,
offset=offset, name=name)
else:
index = _generate_regular_range(start, end, periods, offset)
if tz is not None and getattr(index, 'tz', None) is None:
index = tslib.tz_localize_to_utc(_ensure_int64(index), tz,
ambiguous=ambiguous)
index = index.view(_NS_DTYPE)
# index is localized datetime64 array -> have to convert
# start/end as well to compare
if start is not None:
start = start.tz_localize(tz).asm8
if end is not None:
end = end.tz_localize(tz).asm8
if not left_closed and len(index) and index[0] == start:
index = index[1:]
if not right_closed and len(index) and index[-1] == end:
index = index[:-1]
index = cls._simple_new(index, name=name, freq=offset, tz=tz)
return index
@property
def _box_func(self):
return lambda x: Timestamp(x, freq=self.offset, tz=self.tz)
def _convert_for_op(self, value):
""" Convert value to be insertable to ndarray """
if self._has_same_tz(value):
return _to_m8(value)
raise ValueError('Passed item and index have different timezone')
def _local_timestamps(self):
utc = _utc()
if self.is_monotonic:
return tslib.tz_convert(self.asi8, utc, self.tz)
else:
values = self.asi8
indexer = values.argsort()
result = tslib.tz_convert(values.take(indexer), utc, self.tz)
n = len(indexer)
reverse = np.empty(n, dtype=np.int_)
reverse.put(indexer, np.arange(n))
return result.take(reverse)
@classmethod
def _simple_new(cls, values, name=None, freq=None, tz=None,
dtype=None, **kwargs):
"""
we require the we have a dtype compat for the values
if we are passed a non-dtype compat, then coerce using the constructor
"""
if not getattr(values, 'dtype', None):
# empty, but with dtype compat
if values is None:
values = np.empty(0, dtype=_NS_DTYPE)
return cls(values, name=name, freq=freq, tz=tz,
dtype=dtype, **kwargs)
values = np.array(values, copy=False)
if is_object_dtype(values):
return cls(values, name=name, freq=freq, tz=tz,
dtype=dtype, **kwargs).values
elif not is_datetime64_dtype(values):
values = _ensure_int64(values).view(_NS_DTYPE)
result = object.__new__(cls)
result._data = values
result.name = name
result.offset = freq
result.tz = tslib.maybe_get_tz(tz)
result._reset_identity()
return result
@property
def tzinfo(self):
"""
Alias for tz attribute
"""
return self.tz
@cache_readonly
def _timezone(self):
""" Comparable timezone both for pytz / dateutil"""
return tslib.get_timezone(self.tzinfo)
def _has_same_tz(self, other):
zzone = self._timezone
# vzone sholdn't be None if value is non-datetime like
if isinstance(other, np.datetime64):
# convert to Timestamp as np.datetime64 doesn't have tz attr
other = Timestamp(other)
vzone = tslib.get_timezone(getattr(other, 'tzinfo', '__no_tz__'))
return zzone == vzone
@classmethod
def _cached_range(cls, start=None, end=None, periods=None, offset=None,
name=None):
if start is None and end is None:
# I somewhat believe this should never be raised externally and
# therefore should be a `PandasError` but whatever...
raise TypeError('Must specify either start or end.')
if start is not None:
start = Timestamp(start)
if end is not None:
end = Timestamp(end)
if (start is None or end is None) and periods is None:
raise TypeError(
'Must either specify period or provide both start and end.')
if offset is None:
# This can't happen with external-facing code, therefore
# PandasError
raise TypeError('Must provide offset.')
drc = _daterange_cache
if offset not in _daterange_cache:
xdr = generate_range(offset=offset, start=_CACHE_START,
end=_CACHE_END)
arr = tools.to_datetime(list(xdr), box=False)
cachedRange = DatetimeIndex._simple_new(arr)
cachedRange.offset = offset
cachedRange.tz = None
cachedRange.name = None
drc[offset] = cachedRange
else:
cachedRange = drc[offset]
if start is None:
if not isinstance(end, Timestamp):
raise AssertionError('end must be an instance of Timestamp')
end = offset.rollback(end)
endLoc = cachedRange.get_loc(end) + 1
startLoc = endLoc - periods
elif end is None:
if not isinstance(start, Timestamp):
raise AssertionError('start must be an instance of Timestamp')
start = offset.rollforward(start)
startLoc = cachedRange.get_loc(start)
endLoc = startLoc + periods
else:
if not offset.onOffset(start):
start = offset.rollforward(start)
if not offset.onOffset(end):
end = offset.rollback(end)
startLoc = cachedRange.get_loc(start)
endLoc = cachedRange.get_loc(end) + 1
indexSlice = cachedRange[startLoc:endLoc]
indexSlice.name = name
indexSlice.offset = offset
return indexSlice
def _mpl_repr(self):
# how to represent ourselves to matplotlib
return tslib.ints_to_pydatetime(self.asi8, self.tz)
_na_value = tslib.NaT
"""The expected NA value to use with this index."""
@cache_readonly
def _is_dates_only(self):
from pandas.formats.format import _is_dates_only
return _is_dates_only(self.values)
@property
def _formatter_func(self):
from pandas.formats.format import _get_format_datetime64
formatter = _get_format_datetime64(is_dates_only=self._is_dates_only)
return lambda x: "'%s'" % formatter(x, tz=self.tz)
def __reduce__(self):
# we use a special reudce here because we need
# to simply set the .tz (and not reinterpret it)
d = dict(data=self._data)
d.update(self._get_attributes_dict())
return _new_DatetimeIndex, (self.__class__, d), None
def __setstate__(self, state):
"""Necessary for making this object picklable"""
if isinstance(state, dict):
super(DatetimeIndex, self).__setstate__(state)
elif isinstance(state, tuple):
# < 0.15 compat
if len(state) == 2:
nd_state, own_state = state
data = np.empty(nd_state[1], dtype=nd_state[2])
np.ndarray.__setstate__(data, nd_state)
self.name = own_state[0]
self.offset = own_state[1]
self.tz = own_state[2]
# provide numpy < 1.7 compat
if nd_state[2] == 'M8[us]':
new_state = np.ndarray.__reduce__(data.astype('M8[ns]'))
np.ndarray.__setstate__(data, new_state[2])
else: # pragma: no cover
data = np.empty(state)
np.ndarray.__setstate__(data, state)
self._data = data
self._reset_identity()
else:
raise Exception("invalid pickle state")
_unpickle_compat = __setstate__
def _add_datelike(self, other):
# adding a timedeltaindex to a datetimelike
if other is tslib.NaT:
return self._nat_new(box=True)
raise TypeError("cannot add a datelike to a DatetimeIndex")
def _sub_datelike(self, other):
# subtract a datetime from myself, yielding a TimedeltaIndex
from pandas import TimedeltaIndex
other = Timestamp(other)
if other is tslib.NaT:
result = self._nat_new(box=False)
# require tz compat
elif not self._has_same_tz(other):
raise TypeError("Timestamp subtraction must have the same "
"timezones or no timezones")
else:
i8 = self.asi8
result = i8 - other.value
result = self._maybe_mask_results(result, fill_value=tslib.iNaT)
return TimedeltaIndex(result, name=self.name, copy=False)
def _maybe_update_attributes(self, attrs):
""" Update Index attributes (e.g. freq) depending on op """
freq = attrs.get('freq', None)
if freq is not None:
# no need to infer if freq is None
attrs['freq'] = 'infer'
return attrs
def _add_delta(self, delta):
from pandas import TimedeltaIndex
name = self.name
if isinstance(delta, (Tick, timedelta, np.timedelta64)):
new_values = self._add_delta_td(delta)
elif isinstance(delta, TimedeltaIndex):
new_values = self._add_delta_tdi(delta)
# update name when delta is Index
name = com._maybe_match_name(self, delta)
elif isinstance(delta, DateOffset):
new_values = self._add_offset(delta).asi8
else:
new_values = self.astype('O') + delta
tz = 'UTC' if self.tz is not None else None
result = DatetimeIndex(new_values, tz=tz, name=name, freq='infer')
utc = _utc()
if self.tz is not None and self.tz is not utc:
result = result.tz_convert(self.tz)
return result
def _add_offset(self, offset):
try:
if self.tz is not None:
values = self.tz_localize(None)
else:
values = self
result = offset.apply_index(values)
if self.tz is not None:
result = result.tz_localize(self.tz)
return result
except NotImplementedError:
warnings.warn("Non-vectorized DateOffset being applied to Series "
"or DatetimeIndex", PerformanceWarning)
return self.astype('O') + offset
def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs):
from pandas.formats.format import _get_format_datetime64_from_values
format = _get_format_datetime64_from_values(self, date_format)
return tslib.format_array_from_datetime(self.asi8,
tz=self.tz,
format=format,
na_rep=na_rep)
[docs] def to_datetime(self, dayfirst=False):
return self.copy()
@Appender(_index_shared_docs['astype'])
[docs] def astype(self, dtype, copy=True):
dtype = np.dtype(dtype)
if is_object_dtype(dtype):
return self.asobject
elif is_integer_dtype(dtype):
return Index(self.values.astype('i8', copy=copy), name=self.name,
dtype='i8')
elif is_datetime64_ns_dtype(dtype):
if self.tz is not None:
return self.tz_convert('UTC').tz_localize(None)
elif copy is True:
return self.copy()
return self
elif is_string_dtype(dtype):
return Index(self.format(), name=self.name, dtype=object)
raise ValueError('Cannot cast DatetimeIndex to dtype %s' % dtype)
def _get_time_micros(self):
utc = _utc()
values = self.asi8
if self.tz is not None and self.tz is not utc:
values = self._local_timestamps()
return tslib.get_time_micros(values)
[docs] def to_series(self, keep_tz=False):
"""
Create a Series with both index and values equal to the index keys
useful with map for returning an indexer based on an index
Parameters
----------
keep_tz : optional, defaults False.
return the data keeping the timezone.
If keep_tz is True:
If the timezone is not set, the resulting
Series will have a datetime64[ns] dtype.
Otherwise the Series will have an datetime64[ns, tz] dtype; the
tz will be preserved.
If keep_tz is False:
Series will have a datetime64[ns] dtype. TZ aware
objects will have the tz removed.
Returns
-------
Series
"""
from pandas import Series
return Series(self._to_embed(keep_tz), index=self, name=self.name)
def _to_embed(self, keep_tz=False):
"""
return an array repr of this object, potentially casting to object
This is for internal compat
"""
if keep_tz and self.tz is not None:
# preserve the tz & copy
return self.copy(deep=True)
return self.values.copy()
[docs] def to_pydatetime(self):
"""
Return DatetimeIndex as object ndarray of datetime.datetime objects
Returns
-------
datetimes : ndarray
"""
return tslib.ints_to_pydatetime(self.asi8, tz=self.tz)
[docs] def to_period(self, freq=None):
"""
Cast to PeriodIndex at a particular frequency
"""
from pandas.tseries.period import PeriodIndex
if freq is None:
freq = self.freqstr or self.inferred_freq
if freq is None:
msg = ("You must pass a freq argument as "
"current index has none.")
raise ValueError(msg)
freq = get_period_alias(freq)
return PeriodIndex(self.values, name=self.name, freq=freq, tz=self.tz)
[docs] def snap(self, freq='S'):
"""
Snap time stamps to nearest occurring frequency
"""
# Superdumb, punting on any optimizing
freq = to_offset(freq)
snapped = np.empty(len(self), dtype=_NS_DTYPE)
for i, v in enumerate(self):
s = v
if not freq.onOffset(s):
t0 = freq.rollback(s)
t1 = freq.rollforward(s)
if abs(s - t0) < abs(t1 - s):
s = t0
else:
s = t1
snapped[i] = s
# we know it conforms; skip check
return DatetimeIndex(snapped, freq=freq, verify_integrity=False)
[docs] def union(self, other):
"""
Specialized union for DatetimeIndex objects. If combine
overlapping ranges with the same DateOffset, will be much
faster than Index.union
Parameters
----------
other : DatetimeIndex or array-like
Returns
-------
y : Index or DatetimeIndex
"""
self._assert_can_do_setop(other)
if not isinstance(other, DatetimeIndex):
try:
other = DatetimeIndex(other)
except TypeError:
pass
this, other = self._maybe_utc_convert(other)
if this._can_fast_union(other):
return this._fast_union(other)
else:
result = Index.union(this, other)
if isinstance(result, DatetimeIndex):
result.tz = this.tz
if (result.freq is None and
(this.freq is not None or other.freq is not None)):
result.offset = to_offset(result.inferred_freq)
return result
[docs] def to_perioddelta(self, freq):
"""
Calcuates TimedeltaIndex of difference between index
values and index converted to PeriodIndex at specified
freq. Used for vectorized offsets
.. versionadded:: 0.17.0
Parameters
----------
freq : Period frequency
Returns
-------
y : TimedeltaIndex
"""
return to_timedelta(self.asi8 - self.to_period(freq)
.to_timestamp().asi8)
[docs] def union_many(self, others):
"""
A bit of a hack to accelerate unioning a collection of indexes
"""
this = self
for other in others:
if not isinstance(this, DatetimeIndex):
this = Index.union(this, other)
continue
if not isinstance(other, DatetimeIndex):
try:
other = DatetimeIndex(other)
except TypeError:
pass
this, other = this._maybe_utc_convert(other)
if this._can_fast_union(other):
this = this._fast_union(other)
else:
tz = this.tz
this = Index.union(this, other)
if isinstance(this, DatetimeIndex):
this.tz = tz
if this.freq is None:
this.offset = to_offset(this.inferred_freq)
return this
[docs] def append(self, other):
"""
Append a collection of Index options together
Parameters
----------
other : Index or list/tuple of indices
Returns
-------
appended : Index
"""
name = self.name
to_concat = [self]
if isinstance(other, (list, tuple)):
to_concat = to_concat + list(other)
else:
to_concat.append(other)
for obj in to_concat:
if isinstance(obj, Index) and obj.name != name:
name = None
break
to_concat = self._ensure_compat_concat(to_concat)
to_concat, factory = _process_concat_data(to_concat, name)
return factory(to_concat)
[docs] def join(self, other, how='left', level=None, return_indexers=False):
"""
See Index.join
"""
if (not isinstance(other, DatetimeIndex) and len(other) > 0 and
other.inferred_type not in ('floating', 'mixed-integer',
'mixed-integer-float', 'mixed')):
try:
other = DatetimeIndex(other)
except (TypeError, ValueError):
pass
this, other = self._maybe_utc_convert(other)
return Index.join(this, other, how=how, level=level,
return_indexers=return_indexers)
def _maybe_utc_convert(self, other):
this = self
if isinstance(other, DatetimeIndex):
if self.tz is not None:
if other.tz is None:
raise TypeError('Cannot join tz-naive with tz-aware '
'DatetimeIndex')
elif other.tz is not None:
raise TypeError('Cannot join tz-naive with tz-aware '
'DatetimeIndex')
if self.tz != other.tz:
this = self.tz_convert('UTC')
other = other.tz_convert('UTC')
return this, other
def _wrap_joined_index(self, joined, other):
name = self.name if self.name == other.name else None
if (isinstance(other, DatetimeIndex) and
self.offset == other.offset and
self._can_fast_union(other)):
joined = self._shallow_copy(joined)
joined.name = name
return joined
else:
tz = getattr(other, 'tz', None)
return self._simple_new(joined, name, tz=tz)
def _can_fast_union(self, other):
if not isinstance(other, DatetimeIndex):
return False
offset = self.offset
if offset is None or offset != other.offset:
return False
if not self.is_monotonic or not other.is_monotonic:
return False
if len(self) == 0 or len(other) == 0:
return True
# to make our life easier, "sort" the two ranges
if self[0] <= other[0]:
left, right = self, other
else:
left, right = other, self
right_start = right[0]
left_end = left[-1]
# Only need to "adjoin", not overlap
try:
return (right_start == left_end + offset) or right_start in left
except (ValueError):
# if we are comparing an offset that does not propagate timezones
# this will raise
return False
def _fast_union(self, other):
if len(other) == 0:
return self.view(type(self))
if len(self) == 0:
return other.view(type(self))
# to make our life easier, "sort" the two ranges
if self[0] <= other[0]:
left, right = self, other
else:
left, right = other, self
left_start, left_end = left[0], left[-1]
right_end = right[-1]
if not self.offset._should_cache():
# concatenate dates
if left_end < right_end:
loc = right.searchsorted(left_end, side='right')
right_chunk = right.values[loc:]
dates = _concat._concat_compat((left.values, right_chunk))
return self._shallow_copy(dates)
else:
return left
else:
return type(self)(start=left_start,
end=max(left_end, right_end),
freq=left.offset)
def __iter__(self):
"""
Return an iterator over the boxed values
Returns
-------
Timestamps : ndarray
"""
# convert in chunks of 10k for efficiency
data = self.asi8
l = len(self)
chunksize = 10000
chunks = int(l / chunksize) + 1
for i in range(chunks):
start_i = i * chunksize
end_i = min((i + 1) * chunksize, l)
converted = tslib.ints_to_pydatetime(data[start_i:end_i],
tz=self.tz, freq=self.freq,
box=True)
for v in converted:
yield v
def _wrap_union_result(self, other, result):
name = self.name if self.name == other.name else None
if self.tz != other.tz:
raise ValueError('Passed item and index have different timezone')
return self._simple_new(result, name=name, freq=None, tz=self.tz)
[docs] def intersection(self, other):
"""
Specialized intersection for DatetimeIndex objects. May be much faster
than Index.intersection
Parameters
----------
other : DatetimeIndex or array-like
Returns
-------
y : Index or DatetimeIndex
"""
self._assert_can_do_setop(other)
if not isinstance(other, DatetimeIndex):
try:
other = DatetimeIndex(other)
except (TypeError, ValueError):
pass
result = Index.intersection(self, other)
if isinstance(result, DatetimeIndex):
if result.freq is None:
result.offset = to_offset(result.inferred_freq)
return result
elif (other.offset is None or self.offset is None or
other.offset != self.offset or
not other.offset.isAnchored() or
(not self.is_monotonic or not other.is_monotonic)):
result = Index.intersection(self, other)
if isinstance(result, DatetimeIndex):
if result.freq is None:
result.offset = to_offset(result.inferred_freq)
return result
if len(self) == 0:
return self
if len(other) == 0:
return other
# to make our life easier, "sort" the two ranges
if self[0] <= other[0]:
left, right = self, other
else:
left, right = other, self
end = min(left[-1], right[-1])
start = right[0]
if end < start:
return type(self)(data=[])
else:
lslice = slice(*left.slice_locs(start, end))
left_chunk = left.values[lslice]
return self._shallow_copy(left_chunk)
def _parsed_string_to_bounds(self, reso, parsed):
"""
Calculate datetime bounds for parsed time string and its resolution.
Parameters
----------
reso : Resolution
Resolution provided by parsed string.
parsed : datetime
Datetime from parsed string.
Returns
-------
lower, upper: pd.Timestamp
"""
if reso == 'year':
return (Timestamp(datetime(parsed.year, 1, 1), tz=self.tz),
Timestamp(datetime(parsed.year, 12, 31, 23,
59, 59, 999999), tz=self.tz))
elif reso == 'month':
d = tslib.monthrange(parsed.year, parsed.month)[1]
return (Timestamp(datetime(parsed.year, parsed.month, 1),
tz=self.tz),
Timestamp(datetime(parsed.year, parsed.month, d, 23,
59, 59, 999999), tz=self.tz))
elif reso == 'quarter':
qe = (((parsed.month - 1) + 2) % 12) + 1 # two months ahead
d = tslib.monthrange(parsed.year, qe)[1] # at end of month
return (Timestamp(datetime(parsed.year, parsed.month, 1),
tz=self.tz),
Timestamp(datetime(parsed.year, qe, d, 23, 59,
59, 999999), tz=self.tz))
elif reso == 'day':
st = datetime(parsed.year, parsed.month, parsed.day)
return (Timestamp(st, tz=self.tz),
Timestamp(Timestamp(st + offsets.Day(),
tz=self.tz).value - 1))
elif reso == 'hour':
st = datetime(parsed.year, parsed.month, parsed.day,
hour=parsed.hour)
return (Timestamp(st, tz=self.tz),
Timestamp(Timestamp(st + offsets.Hour(),
tz=self.tz).value - 1))
elif reso == 'minute':
st = datetime(parsed.year, parsed.month, parsed.day,
hour=parsed.hour, minute=parsed.minute)
return (Timestamp(st, tz=self.tz),
Timestamp(Timestamp(st + offsets.Minute(),
tz=self.tz).value - 1))
elif reso == 'second':
st = datetime(parsed.year, parsed.month, parsed.day,
hour=parsed.hour, minute=parsed.minute,
second=parsed.second)
return (Timestamp(st, tz=self.tz),
Timestamp(Timestamp(st + offsets.Second(),
tz=self.tz).value - 1))
elif reso == 'microsecond':
st = datetime(parsed.year, parsed.month, parsed.day,
parsed.hour, parsed.minute, parsed.second,
parsed.microsecond)
return (Timestamp(st, tz=self.tz), Timestamp(st, tz=self.tz))
else:
raise KeyError
def _partial_date_slice(self, reso, parsed, use_lhs=True, use_rhs=True):
is_monotonic = self.is_monotonic
if ((reso in ['day', 'hour', 'minute'] and
not (self._resolution < Resolution.get_reso(reso) or
not is_monotonic)) or
(reso == 'second' and
not (self._resolution <= Resolution.RESO_SEC or
not is_monotonic))):
# These resolution/monotonicity validations came from GH3931,
# GH3452 and GH2369.
raise KeyError
if reso == 'microsecond':
# _partial_date_slice doesn't allow microsecond resolution, but
# _parsed_string_to_bounds allows it.
raise KeyError
t1, t2 = self._parsed_string_to_bounds(reso, parsed)
stamps = self.asi8
if is_monotonic:
# we are out of range
if (len(stamps) and ((use_lhs and t1.value < stamps[0] and
t2.value < stamps[0]) or
((use_rhs and t1.value > stamps[-1] and
t2.value > stamps[-1])))):
raise KeyError
# a monotonic (sorted) series can be sliced
left = stamps.searchsorted(
t1.value, side='left') if use_lhs else None
right = stamps.searchsorted(
t2.value, side='right') if use_rhs else None
return slice(left, right)
lhs_mask = (stamps >= t1.value) if use_lhs else True
rhs_mask = (stamps <= t2.value) if use_rhs else True
# try to find a the dates
return (lhs_mask & rhs_mask).nonzero()[0]
def _possibly_promote(self, other):
if other.inferred_type == 'date':
other = DatetimeIndex(other)
return self, other
[docs] def get_value(self, series, key):
"""
Fast lookup of value from 1-dimensional ndarray. Only use this if you
know what you're doing
"""
if isinstance(key, datetime):
# needed to localize naive datetimes
if self.tz is not None:
key = Timestamp(key, tz=self.tz)
return self.get_value_maybe_box(series, key)
if isinstance(key, time):
locs = self.indexer_at_time(key)
return series.take(locs)
try:
return _maybe_box(self, Index.get_value(self, series, key),
series, key)
except KeyError:
try:
loc = self._get_string_slice(key)
return series[loc]
except (TypeError, ValueError, KeyError):
pass
try:
return self.get_value_maybe_box(series, key)
except (TypeError, ValueError, KeyError):
raise KeyError(key)
[docs] def get_value_maybe_box(self, series, key):
# needed to localize naive datetimes
if self.tz is not None:
key = Timestamp(key, tz=self.tz)
elif not isinstance(key, Timestamp):
key = Timestamp(key)
values = self._engine.get_value(_values_from_object(series),
key, tz=self.tz)
return _maybe_box(self, values, series, key)
[docs] def get_loc(self, key, method=None, tolerance=None):
"""
Get integer location for requested label
Returns
-------
loc : int
"""
if tolerance is not None:
# try converting tolerance now, so errors don't get swallowed by
# the try/except clauses below
tolerance = self._convert_tolerance(tolerance)
if isinstance(key, datetime):
# needed to localize naive datetimes
key = Timestamp(key, tz=self.tz)
return Index.get_loc(self, key, method, tolerance)
if isinstance(key, time):
if method is not None:
raise NotImplementedError('cannot yet lookup inexact labels '
'when key is a time object')
return self.indexer_at_time(key)
try:
return Index.get_loc(self, key, method, tolerance)
except (KeyError, ValueError, TypeError):
try:
return self._get_string_slice(key)
except (TypeError, KeyError, ValueError):
pass
try:
stamp = Timestamp(key, tz=self.tz)
return Index.get_loc(self, stamp, method, tolerance)
except (KeyError, ValueError):
raise KeyError(key)
def _maybe_cast_slice_bound(self, label, side, kind):
"""
If label is a string, cast it to datetime according to resolution.
Parameters
----------
label : object
side : {'left', 'right'}
kind : {'ix', 'loc', 'getitem'}
Returns
-------
label : object
Notes
-----
Value of `side` parameter should be validated in caller.
"""
assert kind in ['ix', 'loc', 'getitem', None]
if is_float(label) or isinstance(label, time) or is_integer(label):
self._invalid_indexer('slice', label)
if isinstance(label, compat.string_types):
freq = getattr(self, 'freqstr',
getattr(self, 'inferred_freq', None))
_, parsed, reso = parse_time_string(label, freq)
bounds = self._parsed_string_to_bounds(reso, parsed)
return bounds[0 if side == 'left' else 1]
else:
return label
def _get_string_slice(self, key, use_lhs=True, use_rhs=True):
freq = getattr(self, 'freqstr',
getattr(self, 'inferred_freq', None))
_, parsed, reso = parse_time_string(key, freq)
loc = self._partial_date_slice(reso, parsed, use_lhs=use_lhs,
use_rhs=use_rhs)
return loc
[docs] def slice_indexer(self, start=None, end=None, step=None, kind=None):
"""
Return indexer for specified label slice.
Index.slice_indexer, customized to handle time slicing.
In addition to functionality provided by Index.slice_indexer, does the
following:
- if both `start` and `end` are instances of `datetime.time`, it
invokes `indexer_between_time`
- if `start` and `end` are both either string or None perform
value-based selection in non-monotonic cases.
"""
# For historical reasons DatetimeIndex supports slices between two
# instances of datetime.time as if it were applying a slice mask to
# an array of (self.hour, self.minute, self.seconds, self.microsecond).
if isinstance(start, time) and isinstance(end, time):
if step is not None and step != 1:
raise ValueError('Must have step size of 1 with time slices')
return self.indexer_between_time(start, end)
if isinstance(start, time) or isinstance(end, time):
raise KeyError('Cannot mix time and non-time slice keys')
try:
return Index.slice_indexer(self, start, end, step, kind=kind)
except KeyError:
# For historical reasons DatetimeIndex by default supports
# value-based partial (aka string) slices on non-monotonic arrays,
# let's try that.
if ((start is None or isinstance(start, compat.string_types)) and
(end is None or isinstance(end, compat.string_types))):
mask = True
if start is not None:
start_casted = self._maybe_cast_slice_bound(
start, 'left', kind)
mask = start_casted <= self
if end is not None:
end_casted = self._maybe_cast_slice_bound(
end, 'right', kind)
mask = (self <= end_casted) & mask
indexer = mask.nonzero()[0][::step]
if len(indexer) == len(self):
return slice(None)
else:
return indexer
else:
raise
# alias to offset
def _get_freq(self):
return self.offset
def _set_freq(self, value):
self.offset = value
freq = property(fget=_get_freq, fset=_set_freq,
doc="get/set the frequncy of the Index")
year = _field_accessor('year', 'Y', "The year of the datetime")
month = _field_accessor('month', 'M',
"The month as January=1, December=12")
day = _field_accessor('day', 'D', "The days of the datetime")
hour = _field_accessor('hour', 'h', "The hours of the datetime")
minute = _field_accessor('minute', 'm', "The minutes of the datetime")
second = _field_accessor('second', 's', "The seconds of the datetime")
microsecond = _field_accessor('microsecond', 'us',
"The microseconds of the datetime")
nanosecond = _field_accessor('nanosecond', 'ns',
"The nanoseconds of the datetime")
weekofyear = _field_accessor('weekofyear', 'woy',
"The week ordinal of the year")
week = weekofyear
dayofweek = _field_accessor('dayofweek', 'dow',
"The day of the week with Monday=0, Sunday=6")
weekday = dayofweek
weekday_name = _field_accessor(
'weekday_name',
'weekday_name',
"The name of day in a week (ex: Friday)\n\n.. versionadded:: 0.18.1")
dayofyear = _field_accessor('dayofyear', 'doy',
"The ordinal day of the year")
quarter = _field_accessor('quarter', 'q', "The quarter of the date")
days_in_month = _field_accessor(
'days_in_month',
'dim',
"The number of days in the month\n\n.. versionadded:: 0.16.0")
daysinmonth = days_in_month
is_month_start = _field_accessor(
'is_month_start',
'is_month_start',
"Logical indicating if first day of month (defined by frequency)")
is_month_end = _field_accessor(
'is_month_end',
'is_month_end',
"Logical indicating if last day of month (defined by frequency)")
is_quarter_start = _field_accessor(
'is_quarter_start',
'is_quarter_start',
"Logical indicating if first day of quarter (defined by frequency)")
is_quarter_end = _field_accessor(
'is_quarter_end',
'is_quarter_end',
"Logical indicating if last day of quarter (defined by frequency)")
is_year_start = _field_accessor(
'is_year_start',
'is_year_start',
"Logical indicating if first day of year (defined by frequency)")
is_year_end = _field_accessor(
'is_year_end',
'is_year_end',
"Logical indicating if last day of year (defined by frequency)")
is_leap_year = _field_accessor(
'is_leap_year',
'is_leap_year',
"Logical indicating if the date belongs to a leap year")
@property
def time(self):
"""
Returns numpy array of datetime.time. The time part of the Timestamps.
"""
return self._maybe_mask_results(_algos.arrmap_object(
self.asobject.values,
lambda x: np.nan if x is tslib.NaT else x.time()))
@property
def date(self):
"""
Returns numpy array of datetime.date. The date part of the Timestamps.
"""
return self._maybe_mask_results(_algos.arrmap_object(
self.asobject.values, lambda x: x.date()))
[docs] def normalize(self):
"""
Return DatetimeIndex with times to midnight. Length is unaltered
Returns
-------
normalized : DatetimeIndex
"""
new_values = tslib.date_normalize(self.asi8, self.tz)
return DatetimeIndex(new_values, freq='infer', name=self.name,
tz=self.tz)
@Substitution(klass='DatetimeIndex', value='key')
@Appender(_shared_docs['searchsorted'])
[docs] def searchsorted(self, key, side='left', sorter=None):
if isinstance(key, (np.ndarray, Index)):
key = np.array(key, dtype=_NS_DTYPE, copy=False)
else:
key = _to_m8(key, tz=self.tz)
return self.values.searchsorted(key, side=side)
[docs] def is_type_compatible(self, typ):
return typ == self.inferred_type or typ == 'datetime'
@property
def inferred_type(self):
# b/c datetime is represented as microseconds since the epoch, make
# sure we can't have ambiguous indexing
return 'datetime64'
@cache_readonly
def dtype(self):
if self.tz is None:
return _NS_DTYPE
return DatetimeTZDtype('ns', self.tz)
@property
def is_all_dates(self):
return True
@cache_readonly
def is_normalized(self):
"""
Returns True if all of the dates are at midnight ("no time")
"""
return tslib.dates_normalized(self.asi8, self.tz)
@cache_readonly
def _resolution(self):
return period.resolution(self.asi8, self.tz)
[docs] def equals(self, other):
"""
Determines if two Index objects contain the same elements.
"""
if self.is_(other):
return True
if (not hasattr(other, 'inferred_type') or
other.inferred_type != 'datetime64'):
if self.offset is not None:
return False
try:
other = DatetimeIndex(other)
except:
return False
if self._has_same_tz(other):
return np.array_equal(self.asi8, other.asi8)
return False
[docs] def insert(self, loc, item):
"""
Make new Index inserting new item at location
Parameters
----------
loc : int
item : object
if not either a Python datetime or a numpy integer-like, returned
Index dtype will be object rather than datetime.
Returns
-------
new_index : Index
"""
freq = None
if isinstance(item, (datetime, np.datetime64)):
self._assert_can_do_op(item)
if not self._has_same_tz(item):
raise ValueError(
'Passed item and index have different timezone')
# check freq can be preserved on edge cases
if self.size and self.freq is not None:
if ((loc == 0 or loc == -len(self)) and
item + self.freq == self[0]):
freq = self.freq
elif (loc == len(self)) and item - self.freq == self[-1]:
freq = self.freq
item = _to_m8(item, tz=self.tz)
try:
new_dates = np.concatenate((self[:loc].asi8, [item.view(np.int64)],
self[loc:].asi8))
if self.tz is not None:
new_dates = tslib.tz_convert(new_dates, 'UTC', self.tz)
return DatetimeIndex(new_dates, name=self.name, freq=freq,
tz=self.tz)
except (AttributeError, TypeError):
# fall back to object index
if isinstance(item, compat.string_types):
return self.asobject.insert(loc, item)
raise TypeError(
"cannot insert DatetimeIndex with incompatible label")
[docs] def delete(self, loc):
"""
Make a new DatetimeIndex with passed location(s) deleted.
Parameters
----------
loc: int, slice or array of ints
Indicate which sub-arrays to remove.
Returns
-------
new_index : DatetimeIndex
"""
new_dates = np.delete(self.asi8, loc)
freq = None
if is_integer(loc):
if loc in (0, -len(self), -1, len(self) - 1):
freq = self.freq
else:
if is_list_like(loc):
loc = lib.maybe_indices_to_slice(
_ensure_int64(np.array(loc)), len(self))
if isinstance(loc, slice) and loc.step in (1, None):
if (loc.start in (0, None) or loc.stop in (len(self), None)):
freq = self.freq
if self.tz is not None:
new_dates = tslib.tz_convert(new_dates, 'UTC', self.tz)
return DatetimeIndex(new_dates, name=self.name, freq=freq, tz=self.tz)
[docs] def tz_convert(self, tz):
"""
Convert tz-aware DatetimeIndex from one time zone to another (using
pytz/dateutil)
Parameters
----------
tz : string, pytz.timezone, dateutil.tz.tzfile or None
Time zone for time. Corresponding timestamps would be converted to
time zone of the TimeSeries.
None will remove timezone holding UTC time.
Returns
-------
normalized : DatetimeIndex
Raises
------
TypeError
If DatetimeIndex is tz-naive.
"""
tz = tslib.maybe_get_tz(tz)
if self.tz is None:
# tz naive, use tz_localize
raise TypeError('Cannot convert tz-naive timestamps, use '
'tz_localize to localize')
# No conversion since timestamps are all UTC to begin with
return self._shallow_copy(tz=tz)
@deprecate_kwarg(old_arg_name='infer_dst', new_arg_name='ambiguous',
mapping={True: 'infer', False: 'raise'})
[docs] def tz_localize(self, tz, ambiguous='raise', errors='raise'):
"""
Localize tz-naive DatetimeIndex to given time zone (using
pytz/dateutil), or remove timezone from tz-aware DatetimeIndex
Parameters
----------
tz : string, pytz.timezone, dateutil.tz.tzfile or None
Time zone for time. Corresponding timestamps would be converted to
time zone of the TimeSeries.
None will remove timezone holding local time.
ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
- 'infer' will attempt to infer fall dst-transition hours based on
order
- bool-ndarray where True signifies a DST time, False signifies a
non-DST time (note that this flag is only applicable for
ambiguous times)
- 'NaT' will return NaT where there are ambiguous times
- 'raise' will raise an AmbiguousTimeError if there are ambiguous
times
errors : 'raise', 'coerce', default 'raise'
- 'raise' will raise a NonExistentTimeError if a timestamp is not
valid in the specified timezone (e.g. due to a transition from
or to DST time)
- 'coerce' will return NaT if the timestamp can not be converted
into the specified timezone
.. versionadded:: 0.19.0
infer_dst : boolean, default False (DEPRECATED)
Attempt to infer fall dst-transition hours based on order
Returns
-------
localized : DatetimeIndex
Raises
------
TypeError
If the DatetimeIndex is tz-aware and tz is not None.
"""
if self.tz is not None:
if tz is None:
new_dates = tslib.tz_convert(self.asi8, 'UTC', self.tz)
else:
raise TypeError("Already tz-aware, use tz_convert to convert.")
else:
tz = tslib.maybe_get_tz(tz)
# Convert to UTC
new_dates = tslib.tz_localize_to_utc(self.asi8, tz,
ambiguous=ambiguous,
errors=errors)
new_dates = new_dates.view(_NS_DTYPE)
return self._shallow_copy(new_dates, tz=tz)
[docs] def indexer_at_time(self, time, asof=False):
"""
Select values at particular time of day (e.g. 9:30AM)
Parameters
----------
time : datetime.time or string
Returns
-------
values_at_time : TimeSeries
"""
from dateutil.parser import parse
if asof:
raise NotImplementedError("'asof' argument is not supported")
if isinstance(time, compat.string_types):
time = parse(time).time()
if time.tzinfo:
# TODO
raise NotImplementedError("argument 'time' with timezone info is "
"not supported")
time_micros = self._get_time_micros()
micros = _time_to_micros(time)
return (micros == time_micros).nonzero()[0]
[docs] def indexer_between_time(self, start_time, end_time, include_start=True,
include_end=True):
"""
Select values between particular times of day (e.g., 9:00-9:30AM).
Return values of the index between two times. If start_time or
end_time are strings then tseres.tools.to_time is used to convert to
a time object.
Parameters
----------
start_time, end_time : datetime.time, str
datetime.time or string in appropriate format ("%H:%M", "%H%M",
"%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p",
"%I%M%S%p")
include_start : boolean, default True
include_end : boolean, default True
Returns
-------
values_between_time : TimeSeries
"""
start_time = to_time(start_time)
end_time = to_time(end_time)
time_micros = self._get_time_micros()
start_micros = _time_to_micros(start_time)
end_micros = _time_to_micros(end_time)
if include_start and include_end:
lop = rop = operator.le
elif include_start:
lop = operator.le
rop = operator.lt
elif include_end:
lop = operator.lt
rop = operator.le
else:
lop = rop = operator.lt
if start_time <= end_time:
join_op = operator.and_
else:
join_op = operator.or_
mask = join_op(lop(start_micros, time_micros),
rop(time_micros, end_micros))
return mask.nonzero()[0]
[docs] def to_julian_date(self):
"""
Convert DatetimeIndex to Float64Index of Julian Dates.
0 Julian date is noon January 1, 4713 BC.
http://en.wikipedia.org/wiki/Julian_day
"""
# http://mysite.verizon.net/aesir_research/date/jdalg2.htm
year = self.year
month = self.month
day = self.day
testarr = month < 3
year[testarr] -= 1
month[testarr] += 12
return Float64Index(day +
np.fix((153 * month - 457) / 5) +
365 * year +
np.floor(year / 4) -
np.floor(year / 100) +
np.floor(year / 400) +
1721118.5 +
(self.hour +
self.minute / 60.0 +
self.second / 3600.0 +
self.microsecond / 3600.0 / 1e+6 +
self.nanosecond / 3600.0 / 1e+9
) / 24.0)
DatetimeIndex._add_numeric_methods_disabled()
DatetimeIndex._add_logical_methods_disabled()
DatetimeIndex._add_datetimelike_methods()
def _generate_regular_range(start, end, periods, offset):
if isinstance(offset, Tick):
stride = offset.nanos
if periods is None:
b = Timestamp(start).value
# cannot just use e = Timestamp(end) + 1 because arange breaks when
# stride is too large, see GH10887
e = (b + (Timestamp(end).value - b) // stride * stride +
stride // 2 + 1)
# end.tz == start.tz by this point due to _generate implementation
tz = start.tz
elif start is not None:
b = Timestamp(start).value
e = b + np.int64(periods) * stride
tz = start.tz
elif end is not None:
e = Timestamp(end).value + stride
b = e - np.int64(periods) * stride
tz = end.tz
else:
raise ValueError("at least 'start' or 'end' should be specified "
"if a 'period' is given.")
data = np.arange(b, e, stride, dtype=np.int64)
data = DatetimeIndex._simple_new(data, None, tz=tz)
else:
if isinstance(start, Timestamp):
start = start.to_pydatetime()
if isinstance(end, Timestamp):
end = end.to_pydatetime()
xdr = generate_range(start=start, end=end,
periods=periods, offset=offset)
dates = list(xdr)
# utc = len(dates) > 0 and dates[0].tzinfo is not None
data = tools.to_datetime(dates)
return data
[docs]def date_range(start=None, end=None, periods=None, freq='D', tz=None,
normalize=False, name=None, closed=None, **kwargs):
"""
Return a fixed frequency datetime index, with day (calendar) as the default
frequency
Parameters
----------
start : string or datetime-like, default None
Left bound for generating dates
end : string or datetime-like, default None
Right bound for generating dates
periods : integer or None, default None
If None, must specify start and end
freq : string or DateOffset, default 'D' (calendar daily)
Frequency strings can have multiples, e.g. '5H'
tz : string or None
Time zone name for returning localized DatetimeIndex, for example
Asia/Hong_Kong
normalize : bool, default False
Normalize start/end dates to midnight before generating date range
name : str, default None
Name of the resulting index
closed : string or None, default None
Make the interval closed with respect to the given frequency to
the 'left', 'right', or both sides (None)
Notes
-----
2 of start, end, or periods must be specified
To learn more about the frequency strings, please see `this link
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
Returns
-------
rng : DatetimeIndex
"""
return DatetimeIndex(start=start, end=end, periods=periods,
freq=freq, tz=tz, normalize=normalize, name=name,
closed=closed, **kwargs)
[docs]def bdate_range(start=None, end=None, periods=None, freq='B', tz=None,
normalize=True, name=None, closed=None, **kwargs):
"""
Return a fixed frequency datetime index, with business day as the default
frequency
Parameters
----------
start : string or datetime-like, default None
Left bound for generating dates
end : string or datetime-like, default None
Right bound for generating dates
periods : integer or None, default None
If None, must specify start and end
freq : string or DateOffset, default 'B' (business daily)
Frequency strings can have multiples, e.g. '5H'
tz : string or None
Time zone name for returning localized DatetimeIndex, for example
Asia/Beijing
normalize : bool, default False
Normalize start/end dates to midnight before generating date range
name : str, default None
Name for the resulting index
closed : string or None, default None
Make the interval closed with respect to the given frequency to
the 'left', 'right', or both sides (None)
Notes
-----
2 of start, end, or periods must be specified
To learn more about the frequency strings, please see `this link
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
Returns
-------
rng : DatetimeIndex
"""
return DatetimeIndex(start=start, end=end, periods=periods,
freq=freq, tz=tz, normalize=normalize, name=name,
closed=closed, **kwargs)
def cdate_range(start=None, end=None, periods=None, freq='C', tz=None,
normalize=True, name=None, closed=None, **kwargs):
"""
**EXPERIMENTAL** Return a fixed frequency datetime index, with
CustomBusinessDay as the default frequency
.. warning:: EXPERIMENTAL
The CustomBusinessDay class is not officially supported and the API is
likely to change in future versions. Use this at your own risk.
Parameters
----------
start : string or datetime-like, default None
Left bound for generating dates
end : string or datetime-like, default None
Right bound for generating dates
periods : integer or None, default None
If None, must specify start and end
freq : string or DateOffset, default 'C' (CustomBusinessDay)
Frequency strings can have multiples, e.g. '5H'
tz : string or None
Time zone name for returning localized DatetimeIndex, for example
Asia/Beijing
normalize : bool, default False
Normalize start/end dates to midnight before generating date range
name : str, default None
Name for the resulting index
weekmask : str, Default 'Mon Tue Wed Thu Fri'
weekmask of valid business days, passed to ``numpy.busdaycalendar``
holidays : list
list/array of dates to exclude from the set of valid business days,
passed to ``numpy.busdaycalendar``
closed : string or None, default None
Make the interval closed with respect to the given frequency to
the 'left', 'right', or both sides (None)
Notes
-----
2 of start, end, or periods must be specified
To learn more about the frequency strings, please see `this link
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
Returns
-------
rng : DatetimeIndex
"""
if freq == 'C':
holidays = kwargs.pop('holidays', [])
weekmask = kwargs.pop('weekmask', 'Mon Tue Wed Thu Fri')
freq = CDay(holidays=holidays, weekmask=weekmask)
return DatetimeIndex(start=start, end=end, periods=periods, freq=freq,
tz=tz, normalize=normalize, name=name,
closed=closed, **kwargs)
def _to_m8(key, tz=None):
"""
Timestamp-like => dt64
"""
if not isinstance(key, Timestamp):
# this also converts strings
key = Timestamp(key, tz=tz)
return np.int64(tslib.pydt_to_i8(key)).view(_NS_DTYPE)
_CACHE_START = Timestamp(datetime(1950, 1, 1))
_CACHE_END = Timestamp(datetime(2030, 1, 1))
_daterange_cache = {}
def _naive_in_cache_range(start, end):
if start is None or end is None:
return False
else:
if start.tzinfo is not None or end.tzinfo is not None:
return False
return _in_range(start, end, _CACHE_START, _CACHE_END)
def _in_range(start, end, rng_start, rng_end):
return start > rng_start and end < rng_end
def _use_cached_range(offset, _normalized, start, end):
return (offset._should_cache() and
not (offset._normalize_cache and not _normalized) and
_naive_in_cache_range(start, end))
def _time_to_micros(time):
seconds = time.hour * 60 * 60 + 60 * time.minute + time.second
return 1000000 * seconds + time.microsecond
def _process_concat_data(to_concat, name):
klass = Index
kwargs = {}
concat = np.concatenate
all_dti = True
need_utc_convert = False
has_naive = False
tz = None
for x in to_concat:
if not isinstance(x, DatetimeIndex):
all_dti = False
else:
if tz is None:
tz = x.tz
if x.tz is None:
has_naive = True
if x.tz != tz:
need_utc_convert = True
tz = 'UTC'
if all_dti:
need_obj_convert = False
if has_naive and tz is not None:
need_obj_convert = True
if need_obj_convert:
to_concat = [x.asobject.values for x in to_concat]
else:
if need_utc_convert:
to_concat = [x.tz_convert('UTC').values for x in to_concat]
else:
to_concat = [x.values for x in to_concat]
# well, technically not a "class" anymore...oh well
klass = DatetimeIndex._simple_new
kwargs = {'tz': tz}
concat = _concat._concat_compat
else:
for i, x in enumerate(to_concat):
if isinstance(x, DatetimeIndex):
to_concat[i] = x.asobject.values
elif isinstance(x, Index):
to_concat[i] = x.values
factory_func = lambda x: klass(concat(x), name=name, **kwargs)
return to_concat, factory_func