from datetime import timedelta
import numpy as np
import warnings
import copy
import pandas as pd
from pandas.core.base import AbstractMethodError, GroupByMixin
from pandas.core.groupby import (BinGrouper, Grouper, _GroupBy, GroupBy,
SeriesGroupBy, groupby, PanelGroupBy)
from pandas.tseries.frequencies import to_offset, is_subperiod, is_superperiod
from pandas.tseries.index import DatetimeIndex, date_range
from pandas.tseries.tdi import TimedeltaIndex
from pandas.tseries.offsets import DateOffset, Tick, Day, _delta_to_nanoseconds
from pandas.tseries.period import PeriodIndex, period_range
import pandas.core.common as com
import pandas.core.algorithms as algos
import pandas.compat as compat
from pandas.compat.numpy import function as nv
from pandas.lib import Timestamp
from pandas._period import IncompatibleFrequency
import pandas.lib as lib
import pandas.tslib as tslib
from pandas.util.decorators import Appender
from pandas.core.generic import _shared_docs
_shared_docs_kwargs = dict()
class Resampler(_GroupBy):
"""
Class for resampling datetimelike data, a groupby-like operation.
See aggregate, transform, and apply functions on this object.
It's easiest to use obj.resample(...) to use Resampler.
Parameters
----------
obj : pandas object
groupby : a TimeGrouper object
axis : int, default 0
kind : str or None
'period', 'timestamp' to override default index treatement
Notes
-----
After resampling, see aggregate, apply, and transform functions.
Returns
-------
a Resampler of the appropriate type
"""
# to the groupby descriptor
_attributes = ['freq', 'axis', 'closed', 'label', 'convention',
'loffset', 'base', 'kind']
# API compat of allowed attributes
_deprecated_valids = _attributes + ['__doc__', '_cache', '_attributes',
'binner', 'grouper', 'groupby',
'sort', 'kind', 'squeeze', 'keys',
'group_keys', 'as_index', 'exclusions',
'_groupby']
# don't raise deprecation warning on attributes starting with these
# patterns - prevents warnings caused by IPython introspection
_deprecated_valid_patterns = ['_ipython', '_repr']
# API compat of disallowed attributes
_deprecated_invalids = ['iloc', 'loc', 'ix', 'iat', 'at']
def __init__(self, obj, groupby=None, axis=0, kind=None, **kwargs):
self.groupby = groupby
self.keys = None
self.sort = True
self.axis = axis
self.kind = kind
self.squeeze = False
self.group_keys = True
self.as_index = True
self.exclusions = set()
self.binner = None
self.grouper = None
if self.groupby is not None:
self.groupby._set_grouper(self._convert_obj(obj), sort=True)
def __unicode__(self):
""" provide a nice str repr of our rolling object """
attrs = ["{k}={v}".format(k=k, v=getattr(self.groupby, k))
for k in self._attributes if
getattr(self.groupby, k, None) is not None]
return "{klass} [{attrs}]".format(klass=self.__class__.__name__,
attrs=', '.join(attrs))
@property
def obj(self):
return self.groupby.obj
@property
def ax(self):
return self.groupby.ax
@property
def _typ(self):
""" masquerade for compat as a Series or a DataFrame """
if isinstance(self._selected_obj, pd.Series):
return 'series'
return 'dataframe'
def _deprecated(self, op):
warnings.warn(("\n.resample() is now a deferred operation\n"
"You called {op}(...) on this deferred object "
"which materialized it into a {klass}\nby implicitly "
"taking the mean. Use .resample(...).mean() "
"instead").format(op=op, klass=self._typ),
FutureWarning, stacklevel=3)
return self.mean()
def _make_deprecated_binop(op):
# op is a string
def _evaluate_numeric_binop(self, other):
result = self._deprecated(op)
return getattr(result, op)(other)
return _evaluate_numeric_binop
def _make_deprecated_unary(op, name):
# op is a callable
def _evaluate_numeric_unary(self):
result = self._deprecated(name)
return op(result)
return _evaluate_numeric_unary
def __array__(self):
return self._deprecated('__array__').__array__()
__gt__ = _make_deprecated_binop('__gt__')
__ge__ = _make_deprecated_binop('__ge__')
__lt__ = _make_deprecated_binop('__lt__')
__le__ = _make_deprecated_binop('__le__')
__eq__ = _make_deprecated_binop('__eq__')
__ne__ = _make_deprecated_binop('__ne__')
__add__ = __radd__ = _make_deprecated_binop('__add__')
__sub__ = __rsub__ = _make_deprecated_binop('__sub__')
__mul__ = __rmul__ = _make_deprecated_binop('__mul__')
__floordiv__ = __rfloordiv__ = _make_deprecated_binop('__floordiv__')
__truediv__ = __rtruediv__ = _make_deprecated_binop('__truediv__')
if not compat.PY3:
__div__ = __rdiv__ = _make_deprecated_binop('__div__')
__neg__ = _make_deprecated_unary(lambda x: -x, '__neg__')
__pos__ = _make_deprecated_unary(lambda x: x, '__pos__')
__abs__ = _make_deprecated_unary(lambda x: np.abs(x), '__abs__')
__inv__ = _make_deprecated_unary(lambda x: -x, '__inv__')
def __getattr__(self, attr):
if attr in self._internal_names_set:
return object.__getattribute__(self, attr)
if attr in self._attributes:
return getattr(self.groupby, attr)
if attr in self.obj:
return self[attr]
if attr in self._deprecated_invalids:
raise ValueError(".resample() is now a deferred operation\n"
"\tuse .resample(...).mean() instead of "
".resample(...)")
matches_pattern = any(attr.startswith(x) for x
in self._deprecated_valid_patterns)
if not matches_pattern and attr not in self._deprecated_valids:
self = self._deprecated(attr)
return object.__getattribute__(self, attr)
def __setattr__(self, attr, value):
if attr not in self._deprecated_valids:
raise ValueError("cannot set values on {0}".format(
self.__class__.__name__))
object.__setattr__(self, attr, value)
def __getitem__(self, key):
try:
return super(Resampler, self).__getitem__(key)
except (KeyError, com.AbstractMethodError):
# compat for deprecated
if isinstance(self.obj, com.ABCSeries):
return self._deprecated('__getitem__')[key]
raise
def __setitem__(self, attr, value):
raise ValueError("cannot set items on {0}".format(
self.__class__.__name__))
def _convert_obj(self, obj):
"""
provide any conversions for the object in order to correctly handle
Parameters
----------
obj : the object to be resampled
"""
obj = obj.consolidate()
return obj
def _get_binner_for_time(self):
raise AbstractMethodError(self)
def _set_binner(self):
"""
setup our binners
cache these as we are an immutable object
"""
if self.binner is None:
self.binner, self.grouper = self._get_binner()
def _get_binner(self):
"""
create the BinGrouper, assume that self.set_grouper(obj)
has already been called
"""
binner, bins, binlabels = self._get_binner_for_time()
bin_grouper = BinGrouper(bins, binlabels)
return binner, bin_grouper
def _assure_grouper(self):
""" make sure that we are creating our binner & grouper """
self._set_binner()
def plot(self, *args, **kwargs):
# for compat with prior versions, we want to
# have the warnings shown here and just have this work
return self._deprecated('plot').plot(*args, **kwargs)
[docs] def aggregate(self, arg, *args, **kwargs):
"""
Apply aggregation function or functions to resampled groups, yielding
most likely Series but in some cases DataFrame depending on the output
of the aggregation function
Parameters
----------
func_or_funcs : function or list / dict of functions
List/dict of functions will produce DataFrame with column names
determined by the function names themselves (list) or the keys in
the dict
Notes
-----
agg is an alias for aggregate. Use it.
Examples
--------
>>> s = Series([1,2,3,4,5],
index=pd.date_range('20130101',
periods=5,freq='s'))
2013-01-01 00:00:00 1
2013-01-01 00:00:01 2
2013-01-01 00:00:02 3
2013-01-01 00:00:03 4
2013-01-01 00:00:04 5
Freq: S, dtype: int64
>>> r = s.resample('2s')
DatetimeIndexResampler [freq=<2 * Seconds>, axis=0, closed=left,
label=left, convention=start, base=0]
>>> r.agg(np.sum)
2013-01-01 00:00:00 3
2013-01-01 00:00:02 7
2013-01-01 00:00:04 5
Freq: 2S, dtype: int64
>>> r.agg(['sum','mean','max'])
sum mean max
2013-01-01 00:00:00 3 1.5 2
2013-01-01 00:00:02 7 3.5 4
2013-01-01 00:00:04 5 5.0 5
>>> r.agg({'result' : lambda x: x.mean() / x.std(),
'total' : np.sum})
total result
2013-01-01 00:00:00 3 2.121320
2013-01-01 00:00:02 7 4.949747
2013-01-01 00:00:04 5 NaN
See also
--------
transform
Returns
-------
Series or DataFrame
"""
self._set_binner()
result, how = self._aggregate(arg, *args, **kwargs)
if result is None:
return self._groupby_and_aggregate(arg,
*args,
**kwargs)
return result
agg = aggregate
apply = aggregate
def _downsample(self, f):
raise AbstractMethodError(self)
def _upsample(self, f, limit=None):
raise AbstractMethodError(self)
def _gotitem(self, key, ndim, subset=None):
"""
sub-classes to define
return a sliced object
Parameters
----------
key : string / list of selections
ndim : 1,2
requested ndim of result
subset : object, default None
subset to act on
"""
self._set_binner()
grouper = self.grouper
if subset is None:
subset = self.obj
grouped = groupby(subset, by=None, grouper=grouper, axis=self.axis)
# try the key selection
try:
return grouped[key]
except KeyError:
return grouped
def _groupby_and_aggregate(self, how, grouper=None, *args, **kwargs):
""" revaluate the obj with a groupby aggregation """
if grouper is None:
self._set_binner()
grouper = self.grouper
obj = self._selected_obj
try:
grouped = groupby(obj, by=None, grouper=grouper, axis=self.axis)
except TypeError:
# panel grouper
grouped = PanelGroupBy(obj, grouper=grouper, axis=self.axis)
try:
result = grouped.aggregate(how, *args, **kwargs)
except Exception:
# we have a non-reducing function
# try to evaluate
result = grouped.apply(how, *args, **kwargs)
result = self._apply_loffset(result)
return self._wrap_result(result)
def _apply_loffset(self, result):
"""if loffset if set, offset the result index"""
loffset = self.loffset
if isinstance(loffset, compat.string_types):
loffset = to_offset(self.loffset)
needs_offset = (
isinstance(loffset, (DateOffset, timedelta)) and
isinstance(result.index, DatetimeIndex) and
len(result.index) > 0
)
if needs_offset:
result.index = result.index + loffset
return result
def _get_resampler_for_grouping(self, groupby, **kwargs):
""" return the correct class for resampling with groupby """
return self._resampler_for_grouping(self, groupby=groupby, **kwargs)
def _wrap_result(self, result):
""" potentially wrap any results """
if isinstance(result, com.ABCSeries) and self._selection is not None:
result.name = self._selection
return result
[docs] def pad(self, limit=None):
"""
Forward fill the values
Parameters
----------
limit : integer, optional
limit of how many values to fill
See Also
--------
Series.fillna
DataFrame.fillna
"""
return self._upsample('pad', limit=limit)
ffill = pad
[docs] def backfill(self, limit=None):
"""
Backward fill the values
Parameters
----------
limit : integer, optional
limit of how many values to fill
See Also
--------
Series.fillna
DataFrame.fillna
"""
return self._upsample('backfill', limit=limit)
bfill = backfill
[docs] def fillna(self, method, limit=None):
"""
Fill missing values
Parameters
----------
method : str, method of resampling ('ffill', 'bfill')
limit : integer, optional
limit of how many values to fill
See Also
--------
Series.fillna
DataFrame.fillna
"""
return self._upsample(method, limit=limit)
@Appender(_shared_docs['interpolate'] % _shared_docs_kwargs)
[docs] def interpolate(self, method='linear', axis=0, limit=None, inplace=False,
limit_direction='forward', downcast=None, **kwargs):
"""
Interpolate values according to different methods.
.. versionadded:: 0.18.1
"""
result = self._upsample(None)
return result.interpolate(method=method, axis=axis, limit=limit,
inplace=inplace,
limit_direction=limit_direction,
downcast=downcast, **kwargs)
[docs] def asfreq(self):
"""
return the values at the new freq,
essentially a reindex with (no filling)
"""
return self._upsample('asfreq')
[docs] def std(self, ddof=1, *args, **kwargs):
"""
Compute standard deviation of groups, excluding missing values
Parameters
----------
ddof : integer, default 1
degrees of freedom
"""
nv.validate_resampler_func('std', args, kwargs)
return self._downsample('std', ddof=ddof)
[docs] def var(self, ddof=1, *args, **kwargs):
"""
Compute variance of groups, excluding missing values
Parameters
----------
ddof : integer, default 1
degrees of freedom
"""
nv.validate_resampler_func('var', args, kwargs)
return self._downsample('var', ddof=ddof)
Resampler._deprecated_valids += dir(Resampler)
# downsample methods
for method in ['min', 'max', 'first', 'last', 'sum', 'mean', 'sem',
'median', 'prod', 'ohlc']:
def f(self, _method=method, *args, **kwargs):
nv.validate_resampler_func(_method, args, kwargs)
return self._downsample(_method)
f.__doc__ = getattr(GroupBy, method).__doc__
setattr(Resampler, method, f)
# groupby & aggregate methods
for method in ['count', 'size']:
def f(self, _method=method):
return self._downsample(_method)
f.__doc__ = getattr(GroupBy, method).__doc__
setattr(Resampler, method, f)
# series only methods
for method in ['nunique']:
def f(self, _method=method):
return self._downsample(_method)
f.__doc__ = getattr(SeriesGroupBy, method).__doc__
setattr(Resampler, method, f)
def _maybe_process_deprecations(r, how=None, fill_method=None, limit=None):
""" potentially we might have a deprecation warning, show it
but call the appropriate methods anyhow """
if how is not None:
# .resample(..., how='sum')
if isinstance(how, compat.string_types):
method = "{0}()".format(how)
# .resample(..., how=lambda x: ....)
else:
method = ".apply(<func>)"
# if we have both a how and fill_method, then show
# the following warning
if fill_method is None:
warnings.warn("how in .resample() is deprecated\n"
"the new syntax is "
".resample(...).{method}".format(
method=method),
FutureWarning, stacklevel=3)
r = r.aggregate(how)
if fill_method is not None:
# show the prior function call
method = '.' + method if how is not None else ''
args = "limit={0}".format(limit) if limit is not None else ""
warnings.warn("fill_method is deprecated to .resample()\n"
"the new syntax is .resample(...){method}"
".{fill_method}({args})".format(
method=method,
fill_method=fill_method,
args=args),
FutureWarning, stacklevel=3)
if how is not None:
r = getattr(r, fill_method)(limit=limit)
else:
r = r.aggregate(fill_method, limit=limit)
return r
class _GroupByMixin(GroupByMixin):
""" provide the groupby facilities """
def __init__(self, obj, *args, **kwargs):
parent = kwargs.pop('parent', None)
groupby = kwargs.pop('groupby', None)
if parent is None:
parent = obj
# initialize our GroupByMixin object with
# the resampler attributes
for attr in self._attributes:
setattr(self, attr, kwargs.get(attr, getattr(parent, attr)))
super(_GroupByMixin, self).__init__(None)
self._groupby = groupby
self._groupby.mutated = True
self._groupby.grouper.mutated = True
self.groupby = copy.copy(parent.groupby)
def _apply(self, f, **kwargs):
"""
dispatch to _upsample; we are stripping all of the _upsample kwargs and
performing the original function call on the grouped object
"""
def func(x):
x = self._shallow_copy(x, groupby=self.groupby)
if isinstance(f, compat.string_types):
return getattr(x, f)(**kwargs)
return x.apply(f, **kwargs)
result = self._groupby.apply(func)
return self._wrap_result(result)
_upsample = _apply
_downsample = _apply
_groupby_and_aggregate = _apply
class DatetimeIndexResampler(Resampler):
@property
def _resampler_for_grouping(self):
return DatetimeIndexResamplerGroupby
def _get_binner_for_time(self):
# this is how we are actually creating the bins
if self.kind == 'period':
return self.groupby._get_time_period_bins(self.ax)
return self.groupby._get_time_bins(self.ax)
def _downsample(self, how, **kwargs):
"""
Downsample the cython defined function
Parameters
----------
how : string / cython mapped function
**kwargs : kw args passed to how function
"""
self._set_binner()
how = self._is_cython_func(how) or how
ax = self.ax
obj = self._selected_obj
if not len(ax):
# reset to the new freq
obj = obj.copy()
obj.index.freq = self.freq
return obj
# do we have a regular frequency
if ax.freq is not None or ax.inferred_freq is not None:
if len(self.grouper.binlabels) > len(ax) and how is None:
# let's do an asfreq
return self.asfreq()
# we are downsampling
# we want to call the actual grouper method here
result = obj.groupby(
self.grouper, axis=self.axis).aggregate(how, **kwargs)
result = self._apply_loffset(result)
return self._wrap_result(result)
def _adjust_binner_for_upsample(self, binner):
""" adjust our binner when upsampling """
if self.closed == 'right':
binner = binner[1:]
else:
binner = binner[:-1]
return binner
def _upsample(self, method, limit=None):
"""
method : string {'backfill', 'bfill', 'pad',
'ffill', 'asfreq'} method for upsampling
limit : int, default None
Maximum size gap to fill when reindexing
See also
--------
.fillna
"""
self._set_binner()
if self.axis:
raise AssertionError('axis must be 0')
ax = self.ax
obj = self._selected_obj
binner = self.binner
res_index = self._adjust_binner_for_upsample(binner)
# if we have the same frequency as our axis, then we are equal sampling
if limit is None and to_offset(ax.inferred_freq) == self.freq:
result = obj.copy()
result.index = res_index
else:
result = obj.reindex(res_index, method=method,
limit=limit)
return self._wrap_result(result)
def _wrap_result(self, result):
result = super(DatetimeIndexResampler, self)._wrap_result(result)
# we may have a different kind that we were asked originally
# convert if needed
if self.kind == 'period' and not isinstance(result.index, PeriodIndex):
result.index = result.index.to_period(self.freq)
return result
class DatetimeIndexResamplerGroupby(_GroupByMixin, DatetimeIndexResampler):
"""
Provides a resample of a groupby implementation
.. versionadded:: 0.18.1
"""
@property
def _constructor(self):
return DatetimeIndexResampler
class PeriodIndexResampler(DatetimeIndexResampler):
@property
def _resampler_for_grouping(self):
return PeriodIndexResamplerGroupby
def _convert_obj(self, obj):
obj = super(PeriodIndexResampler, self)._convert_obj(obj)
offset = to_offset(self.freq)
if offset.n > 1:
if self.kind == 'period': # pragma: no cover
print('Warning: multiple of frequency -> timestamps')
# Cannot have multiple of periods, convert to timestamp
self.kind = 'timestamp'
# convert to timestamp
if not (self.kind is None or self.kind == 'period'):
obj = obj.to_timestamp(how=self.convention)
return obj
def aggregate(self, arg, *args, **kwargs):
result, how = self._aggregate(arg, *args, **kwargs)
if result is None:
result = self._downsample(arg, *args, **kwargs)
return result
agg = aggregate
def _get_new_index(self):
""" return our new index """
ax = self.ax
if len(ax) == 0:
values = []
else:
start = ax[0].asfreq(self.freq, how=self.convention)
end = ax[-1].asfreq(self.freq, how='end')
values = period_range(start, end, freq=self.freq).values
return ax._shallow_copy(values, freq=self.freq)
def _downsample(self, how, **kwargs):
"""
Downsample the cython defined function
Parameters
----------
how : string / cython mapped function
**kwargs : kw args passed to how function
"""
# we may need to actually resample as if we are timestamps
if self.kind == 'timestamp':
return super(PeriodIndexResampler, self)._downsample(how, **kwargs)
how = self._is_cython_func(how) or how
ax = self.ax
new_index = self._get_new_index()
# Start vs. end of period
memb = ax.asfreq(self.freq, how=self.convention)
if is_subperiod(ax.freq, self.freq):
# Downsampling
if len(new_index) == 0:
bins = []
else:
rng = np.arange(memb.values[0], memb.values[-1] + 1)
bins = memb.searchsorted(rng, side='right')
grouper = BinGrouper(bins, new_index)
return self._groupby_and_aggregate(how, grouper=grouper)
elif is_superperiod(ax.freq, self.freq):
return self.asfreq()
elif ax.freq == self.freq:
return self.asfreq()
raise IncompatibleFrequency(
'Frequency {} cannot be resampled to {}, as they are not '
'sub or super periods'.format(ax.freq, self.freq))
def _upsample(self, method, limit=None):
"""
method : string {'backfill', 'bfill', 'pad', 'ffill'}
method for upsampling
limit : int, default None
Maximum size gap to fill when reindexing
See also
--------
.fillna
"""
# we may need to actually resample as if we are timestamps
if self.kind == 'timestamp':
return super(PeriodIndexResampler, self)._upsample(method,
limit=limit)
ax = self.ax
obj = self.obj
new_index = self._get_new_index()
# Start vs. end of period
memb = ax.asfreq(self.freq, how=self.convention)
# Get the fill indexer
indexer = memb.get_indexer(new_index, method=method, limit=limit)
return self._wrap_result(_take_new_index(
obj, indexer, new_index, axis=self.axis))
class PeriodIndexResamplerGroupby(_GroupByMixin, PeriodIndexResampler):
"""
Provides a resample of a groupby implementation
.. versionadded:: 0.18.1
"""
@property
def _constructor(self):
return PeriodIndexResampler
class TimedeltaIndexResampler(DatetimeIndexResampler):
@property
def _resampler_for_grouping(self):
return TimedeltaIndexResamplerGroupby
def _get_binner_for_time(self):
return self.groupby._get_time_delta_bins(self.ax)
def _adjust_binner_for_upsample(self, binner):
""" adjust our binner when upsampling """
ax = self.ax
if is_subperiod(ax.freq, self.freq):
# We are actually downsampling
# but are in the asfreq path
# GH 12926
if self.closed == 'right':
binner = binner[1:]
else:
binner = binner[:-1]
return binner
class TimedeltaIndexResamplerGroupby(_GroupByMixin, TimedeltaIndexResampler):
"""
Provides a resample of a groupby implementation
.. versionadded:: 0.18.1
"""
@property
def _constructor(self):
return TimedeltaIndexResampler
def resample(obj, kind=None, **kwds):
""" create a TimeGrouper and return our resampler """
tg = TimeGrouper(**kwds)
return tg._get_resampler(obj, kind=kind)
resample.__doc__ = Resampler.__doc__
def get_resampler_for_grouping(groupby, rule, how=None, fill_method=None,
limit=None, kind=None, **kwargs):
""" return our appropriate resampler when grouping as well """
tg = TimeGrouper(freq=rule, **kwargs)
resampler = tg._get_resampler(groupby.obj, kind=kind)
r = resampler._get_resampler_for_grouping(groupby=groupby)
return _maybe_process_deprecations(r,
how=how,
fill_method=fill_method,
limit=limit)
class TimeGrouper(Grouper):
"""
Custom groupby class for time-interval grouping
Parameters
----------
freq : pandas date offset or offset alias for identifying bin edges
closed : closed end of interval; left or right
label : interval boundary to use for labeling; left or right
nperiods : optional, integer
convention : {'start', 'end', 'e', 's'}
If axis is PeriodIndex
Notes
-----
Use begin, end, nperiods to generate intervals that cannot be derived
directly from the associated object
"""
def __init__(self, freq='Min', closed=None, label=None, how='mean',
nperiods=None, axis=0,
fill_method=None, limit=None, loffset=None, kind=None,
convention=None, base=0, **kwargs):
freq = to_offset(freq)
end_types = set(['M', 'A', 'Q', 'BM', 'BA', 'BQ', 'W'])
rule = freq.rule_code
if (rule in end_types or
('-' in rule and rule[:rule.find('-')] in end_types)):
if closed is None:
closed = 'right'
if label is None:
label = 'right'
else:
if closed is None:
closed = 'left'
if label is None:
label = 'left'
self.closed = closed
self.label = label
self.nperiods = nperiods
self.kind = kind
self.convention = convention or 'E'
self.convention = self.convention.lower()
self.loffset = loffset
self.how = how
self.fill_method = fill_method
self.limit = limit
self.base = base
# always sort time groupers
kwargs['sort'] = True
super(TimeGrouper, self).__init__(freq=freq, axis=axis, **kwargs)
def _get_resampler(self, obj, kind=None):
"""
return my resampler or raise if we have an invalid axis
Parameters
----------
obj : input object
kind : string, optional
'period','timestamp','timedelta' are valid
Returns
-------
a Resampler
Raises
------
TypeError if incompatible axis
"""
self._set_grouper(obj)
ax = self.ax
if isinstance(ax, DatetimeIndex):
return DatetimeIndexResampler(obj,
groupby=self,
kind=kind,
axis=self.axis)
elif isinstance(ax, PeriodIndex) or kind == 'period':
return PeriodIndexResampler(obj,
groupby=self,
kind=kind,
axis=self.axis)
elif isinstance(ax, TimedeltaIndex):
return TimedeltaIndexResampler(obj,
groupby=self,
axis=self.axis)
raise TypeError("Only valid with DatetimeIndex, "
"TimedeltaIndex or PeriodIndex, "
"but got an instance of %r" % type(ax).__name__)
def _get_grouper(self, obj):
# create the resampler and return our binner
r = self._get_resampler(obj)
r._set_binner()
return r.binner, r.grouper, r.obj
def _get_binner_for_resample(self, kind=None):
# create the BinGrouper
# assume that self.set_grouper(obj) has already been called
ax = self.ax
if kind is None:
kind = self.kind
if kind is None or kind == 'timestamp':
self.binner, bins, binlabels = self._get_time_bins(ax)
elif kind == 'timedelta':
self.binner, bins, binlabels = self._get_time_delta_bins(ax)
else:
self.binner, bins, binlabels = self._get_time_period_bins(ax)
self.grouper = BinGrouper(bins, binlabels)
return self.binner, self.grouper, self.obj
def _get_binner_for_grouping(self, obj):
# return an ordering of the transformed group labels,
# suitable for multi-grouping, e.g the labels for
# the resampled intervals
binner, grouper, obj = self._get_grouper(obj)
l = []
for key, group in grouper.get_iterator(self.ax):
l.extend([key] * len(group))
if isinstance(self.ax, PeriodIndex):
grouper = binner.__class__(l, freq=binner.freq, name=binner.name)
else:
# resampling causes duplicated values, specifying freq is invalid
grouper = binner.__class__(l, name=binner.name)
# since we may have had to sort
# may need to reorder groups here
if self.indexer is not None:
indexer = self.indexer.argsort(kind='quicksort')
grouper = grouper.take(indexer)
return grouper
def _get_time_bins(self, ax):
if not isinstance(ax, DatetimeIndex):
raise TypeError('axis must be a DatetimeIndex, but got '
'an instance of %r' % type(ax).__name__)
if len(ax) == 0:
binner = labels = DatetimeIndex(
data=[], freq=self.freq, name=ax.name)
return binner, [], labels
first, last = ax.min(), ax.max()
first, last = _get_range_edges(first, last, self.freq,
closed=self.closed,
base=self.base)
tz = ax.tz
# GH #12037
# use first/last directly instead of call replace() on them
# because replace() will swallow the nanosecond part
# thus last bin maybe slightly before the end if the end contains
# nanosecond part and lead to `Values falls after last bin` error
binner = labels = DatetimeIndex(freq=self.freq,
start=first,
end=last,
tz=tz,
name=ax.name)
# a little hack
trimmed = False
if (len(binner) > 2 and binner[-2] == last and
self.closed == 'right'):
binner = binner[:-1]
trimmed = True
ax_values = ax.asi8
binner, bin_edges = self._adjust_bin_edges(binner, ax_values)
# general version, knowing nothing about relative frequencies
bins = lib.generate_bins_dt64(
ax_values, bin_edges, self.closed, hasnans=ax.hasnans)
if self.closed == 'right':
labels = binner
if self.label == 'right':
labels = labels[1:]
elif not trimmed:
labels = labels[:-1]
else:
if self.label == 'right':
labels = labels[1:]
elif not trimmed:
labels = labels[:-1]
if ax.hasnans:
binner = binner.insert(0, tslib.NaT)
labels = labels.insert(0, tslib.NaT)
# if we end up with more labels than bins
# adjust the labels
# GH4076
if len(bins) < len(labels):
labels = labels[:len(bins)]
return binner, bins, labels
def _adjust_bin_edges(self, binner, ax_values):
# Some hacks for > daily data, see #1471, #1458, #1483
bin_edges = binner.asi8
if self.freq != 'D' and is_superperiod(self.freq, 'D'):
day_nanos = _delta_to_nanoseconds(timedelta(1))
if self.closed == 'right':
bin_edges = bin_edges + day_nanos - 1
# intraday values on last day
if bin_edges[-2] > ax_values.max():
bin_edges = bin_edges[:-1]
binner = binner[:-1]
return binner, bin_edges
def _get_time_delta_bins(self, ax):
if not isinstance(ax, TimedeltaIndex):
raise TypeError('axis must be a TimedeltaIndex, but got '
'an instance of %r' % type(ax).__name__)
if not len(ax):
binner = labels = TimedeltaIndex(
data=[], freq=self.freq, name=ax.name)
return binner, [], labels
start = ax[0]
end = ax[-1]
labels = binner = TimedeltaIndex(start=start,
end=end,
freq=self.freq,
name=ax.name)
end_stamps = labels + 1
bins = ax.searchsorted(end_stamps, side='left')
# Addresses GH #10530
if self.base > 0:
labels += type(self.freq)(self.base)
return binner, bins, labels
def _get_time_period_bins(self, ax):
if not isinstance(ax, DatetimeIndex):
raise TypeError('axis must be a DatetimeIndex, but got '
'an instance of %r' % type(ax).__name__)
if not len(ax):
binner = labels = PeriodIndex(
data=[], freq=self.freq, name=ax.name)
return binner, [], labels
labels = binner = PeriodIndex(start=ax[0],
end=ax[-1],
freq=self.freq,
name=ax.name)
end_stamps = (labels + 1).asfreq(self.freq, 's').to_timestamp()
if ax.tzinfo:
end_stamps = end_stamps.tz_localize(ax.tzinfo)
bins = ax.searchsorted(end_stamps, side='left')
return binner, bins, labels
def _take_new_index(obj, indexer, new_index, axis=0):
from pandas.core.api import Series, DataFrame
if isinstance(obj, Series):
new_values = algos.take_1d(obj.values, indexer)
return Series(new_values, index=new_index, name=obj.name)
elif isinstance(obj, DataFrame):
if axis == 1:
raise NotImplementedError("axis 1 is not supported")
return DataFrame(obj._data.reindex_indexer(
new_axis=new_index, indexer=indexer, axis=1))
else:
raise ValueError("'obj' should be either a Series or a DataFrame")
def _get_range_edges(first, last, offset, closed='left', base=0):
if isinstance(offset, compat.string_types):
offset = to_offset(offset)
if isinstance(offset, Tick):
is_day = isinstance(offset, Day)
day_nanos = _delta_to_nanoseconds(timedelta(1))
# #1165
if (is_day and day_nanos % offset.nanos == 0) or not is_day:
return _adjust_dates_anchored(first, last, offset,
closed=closed, base=base)
if not isinstance(offset, Tick): # and first.time() != last.time():
# hack!
first = first.normalize()
last = last.normalize()
if closed == 'left':
first = Timestamp(offset.rollback(first))
else:
first = Timestamp(first - offset)
last = Timestamp(last + offset)
return first, last
def _adjust_dates_anchored(first, last, offset, closed='right', base=0):
# First and last offsets should be calculated from the start day to fix an
# error cause by resampling across multiple days when a one day period is
# not a multiple of the frequency.
#
# See https://github.com/pydata/pandas/issues/8683
first_tzinfo = first.tzinfo
first = first.tz_localize(None)
last = last.tz_localize(None)
start_day_nanos = first.normalize().value
base_nanos = (base % offset.n) * offset.nanos // offset.n
start_day_nanos += base_nanos
foffset = (first.value - start_day_nanos) % offset.nanos
loffset = (last.value - start_day_nanos) % offset.nanos
if closed == 'right':
if foffset > 0:
# roll back
fresult = first.value - foffset
else:
fresult = first.value - offset.nanos
if loffset > 0:
# roll forward
lresult = last.value + (offset.nanos - loffset)
else:
# already the end of the road
lresult = last.value
else: # closed == 'left'
if foffset > 0:
fresult = first.value - foffset
else:
# start of the road
fresult = first.value
if loffset > 0:
# roll forward
lresult = last.value + (offset.nanos - loffset)
else:
lresult = last.value + offset.nanos
# return (Timestamp(fresult, tz=first.tz),
# Timestamp(lresult, tz=last.tz))
return (Timestamp(fresult).tz_localize(first_tzinfo),
Timestamp(lresult).tz_localize(first_tzinfo))
def asfreq(obj, freq, method=None, how=None, normalize=False):
"""
Utility frequency conversion method for Series/DataFrame
"""
if isinstance(obj.index, PeriodIndex):
if method is not None:
raise NotImplementedError("'method' argument is not supported")
if how is None:
how = 'E'
new_index = obj.index.asfreq(freq, how=how)
new_obj = obj.copy()
new_obj.index = new_index
return new_obj
else:
if len(obj.index) == 0:
return obj.copy()
dti = date_range(obj.index[0], obj.index[-1], freq=freq)
dti.name = obj.index.name
rs = obj.reindex(dti, method=method)
if normalize:
rs.index = rs.index.normalize()
return rs