When loading the default sample data, .tz_localize('UTC') requires a different datetime type than is being passed. The parser is unable to standardize dates and is unable to convert 'start' to a datetime index.
I fixed this bug by explicitly converting the datetime column.
SOLUTION:
def meter_data_from_csv(
filepath_or_buffer,
tz=None,
start_col="start",
value_col="value",
gzipped=False,
freq=None,
**kwargs
):
""" Load meter data from a CSV file.
Default format::
start,value
2017-01-01T00:00:00+00:00,0.31
2017-01-02T00:00:00+00:00,0.4
2017-01-03T00:00:00+00:00,0.58
Parameters
----------
filepath_or_buffer : :any:str
or file-handle
File path or object.
tz : :any:str
, optional
E.g., 'UTC'
or 'US/Pacific'
start_col : :any:str
, optional, default 'start'
Date period start column.
value_col : :any:str
, optional, default 'value'
Value column, can be in any unit.
gzipped : :any:bool
, optional
Whether file is gzipped.
freq : :any:str
, optional
If given, apply frequency to data using :any:pandas.DataFrame.resample
.
**kwargs
Extra keyword arguments to pass to :any:pandas.read_csv
, such as
sep='|'
.
"""
read_csv_kwargs = {
"usecols": [start_col, value_col],
"dtype": {value_col: np.float64},
"parse_dates": [start_col],
"index_col": start_col,
}
if gzipped:
read_csv_kwargs.update({"compression": "gzip"})
# allow passing extra kwargs
read_csv_kwargs.update(kwargs)
df = pd.read_csv(filepath_or_buffer, **read_csv_kwargs)
**df.index = pd.to_datetime(df.index, utc=True)**
if tz is not None:
df = df.tz_convert(tz)
if freq == "hourly":
df = df.resample("H").sum()
elif freq == "daily":
df = df.resample("D").sum()
return df
ERROR:
TypeError Traceback (most recent call last)
in
1 #Daily Billing for Caltrack
2 meter_data, temperature_data, sample_metadata = (
----> 3 eemeter.load_sample("il-electricity-cdd-hdd-daily")
4 )
5
~/anaconda3/envs/eenv/lib/python3.7/site-packages/eemeter/samples/load.py in load_sample(sample)
80 meter_data_filename = metadata["meter_data_filename"]
81 with resource_stream("eemeter.samples", meter_data_filename) as f:
---> 82 meter_data = meter_data_from_csv(f, gzipped=True, freq=freq)
83
84 temperature_filename = metadata["temperature_filename"]
~/anaconda3/envs/eenv/lib/python3.7/site-packages/eemeter/io.py in meter_data_from_csv(filepath_or_buffer, tz, start_col, value_col, gzipped, freq, **kwargs)
81 read_csv_kwargs.update(kwargs)
82
---> 83 df = pd.read_csv(filepath_or_buffer, **read_csv_kwargs).tz_localize("UTC")
84 if tz is not None:
85 df = df.tz_convert(tz)
~/anaconda3/envs/eenv/lib/python3.7/site-packages/pandas/core/generic.py in tz_localize(self, tz, axis, level, copy, ambiguous, nonexistent)
9865 if level not in (None, 0, ax.name):
9866 raise ValueError("The level {0} is not valid".format(level))
-> 9867 ax = _tz_localize(ax, tz, ambiguous, nonexistent)
9868
9869 result = self._constructor(self._data, copy=copy)
~/anaconda3/envs/eenv/lib/python3.7/site-packages/pandas/core/generic.py in _tz_localize(ax, tz, ambiguous, nonexistent)
9848 ax_name = self._get_axis_name(axis)
9849 raise TypeError(
-> 9850 "%s is not a valid DatetimeIndex or " "PeriodIndex" % ax_name
9851 )
9852 else:
TypeError: index is not a valid DatetimeIndex or PeriodIndex