An error appears when using create_weekly_profile().
The problem is when setting a column as 'on' in resample() function it will become index automatically and not be in the column list anymore. I worked around it by using a duplicated column of 'start_date' temporarily. I guess there may be a nicer solution.
Finished Loading motion in: 0.7 seconds
Finished Loading activity_dailies in: 0.0 seconds
Finished Loading sleep in: 1.0 seconds
Finished Loading bed_occupancy in: 0.0 seconds
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File ~/py38/lib/python3.8/site-packages/pandas/core/resample.py:448, in Resampler._groupby_and_aggregate(self, how, *args, **kwargs)
447 else:
--> 448 result = grouped.aggregate(how, *args, **kwargs)
449 except DataError:
450 # got TypeErrors on aggregation
File ~/py38/lib/python3.8/site-packages/pandas/core/groupby/generic.py:894, in DataFrameGroupBy.aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
893 op = GroupByApply(self, func, args, kwargs)
--> 894 result = op.agg()
895 if not is_dict_like(func) and result is not None:
File ~/py38/lib/python3.8/site-packages/pandas/core/apply.py:169, in Apply.agg(self)
168 if is_dict_like(arg):
--> 169 return self.agg_dict_like()
170 elif is_list_like(arg):
171 # we require a list, but not a 'str'
File ~/py38/lib/python3.8/site-packages/pandas/core/apply.py:478, in Apply.agg_dict_like(self)
476 selection = obj._selection
--> 478 arg = self.normalize_dictlike_arg("agg", selected_obj, arg)
480 if selected_obj.ndim == 1:
481 # key only used for output
File ~/py38/lib/python3.8/site-packages/pandas/core/apply.py:601, in Apply.normalize_dictlike_arg(self, how, obj, func)
600 cols_sorted = list(safe_sort(list(cols)))
--> 601 raise KeyError(f"Column(s) {cols_sorted} do not exist")
603 aggregator_types = (list, tuple, dict)
KeyError: "Column(s) ['start_date'] do not exist"
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
File ~/py38/lib/python3.8/site-packages/pandas/core/groupby/groupby.py:1558, in GroupBy.apply(self, func, *args, **kwargs)
1557 try:
-> 1558 result = self._python_apply_general(f, self._selected_obj)
1559 except TypeError:
1560 # gh-20949
1561 # try again, with .apply acting as a filtering
(...)
1565 # fails on *some* columns, e.g. a numeric operation
1566 # on a string grouper column
File ~/py38/lib/python3.8/site-packages/pandas/core/groupby/groupby.py:1610, in GroupBy._python_apply_general(self, f, data, not_indexed_same, is_transform, is_agg)
1582 """
1583 Apply function f in python space
1584
(...)
1608 data after applying f
1609 """
-> 1610 values, mutated = self.grouper.apply(f, data, self.axis)
1611 if not_indexed_same is None:
File ~/py38/lib/python3.8/site-packages/pandas/core/groupby/ops.py:839, in BaseGrouper.apply(self, f, data, axis)
838 group_axes = group.axes
--> 839 res = f(group)
840 if not mutated and not _is_indexed_like(res, group_axes, axis):
File ~/py38/lib/python3.8/site-packages/pandas/core/resample.py:1208, in _GroupByMixin._apply.<locals>.func(x)
1206 return getattr(x, f)(**kwargs)
-> 1208 return x.apply(f, *args, **kwargs)
File ~/py38/lib/python3.8/site-packages/pandas/core/resample.py:355, in Resampler.aggregate(self, func, *args, **kwargs)
354 how = func
--> 355 result = self._groupby_and_aggregate(how, *args, **kwargs)
357 result = self._apply_loffset(result)
File ~/py38/lib/python3.8/site-packages/pandas/core/resample.py:460, in Resampler._groupby_and_aggregate(self, how, *args, **kwargs)
452 except (AttributeError, KeyError):
453 # we have a non-reducing function; try to evaluate
454 # alternatively we want to evaluate only a column of the input
(...)
458 # on Series, raising AttributeError or KeyError
459 # (depending on whether the column lookup uses getattr/__getitem__)
--> 460 result = grouped.apply(how, *args, **kwargs)
462 except ValueError as err:
File ~/py38/lib/python3.8/site-packages/pandas/core/groupby/groupby.py:1543, in GroupBy.apply(self, func, *args, **kwargs)
1541 return func(g, *args, **kwargs)
-> 1543 elif hasattr(nanops, "nan" + func):
1544 # TODO: should we wrap this in to e.g. _is_builtin_func?
1545 f = getattr(nanops, "nan" + func)
TypeError: can only concatenate str (not "NoneType") to str
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
File ~/py38/lib/python3.8/site-packages/pandas/core/resample.py:448, in Resampler._groupby_and_aggregate(self, how, *args, **kwargs)
447 else:
--> 448 result = grouped.aggregate(how, *args, **kwargs)
449 except DataError:
450 # got TypeErrors on aggregation
File ~/py38/lib/python3.8/site-packages/pandas/core/groupby/generic.py:894, in DataFrameGroupBy.aggregate(self, func, engine, engine_kwargs, *args, **kwargs)
893 op = GroupByApply(self, func, args, kwargs)
--> 894 result = op.agg()
895 if not is_dict_like(func) and result is not None:
File ~/py38/lib/python3.8/site-packages/pandas/core/apply.py:169, in Apply.agg(self)
168 if is_dict_like(arg):
--> 169 return self.agg_dict_like()
170 elif is_list_like(arg):
171 # we require a list, but not a 'str'
File ~/py38/lib/python3.8/site-packages/pandas/core/apply.py:478, in Apply.agg_dict_like(self)
476 selection = obj._selection
--> 478 arg = self.normalize_dictlike_arg("agg", selected_obj, arg)
480 if selected_obj.ndim == 1:
481 # key only used for output
File ~/py38/lib/python3.8/site-packages/pandas/core/apply.py:601, in Apply.normalize_dictlike_arg(self, how, obj, func)
600 cols_sorted = list(safe_sort(list(cols)))
--> 601 raise KeyError(f"Column(s) {cols_sorted} do not exist")
603 aggregator_types = (list, tuple, dict)
KeyError: "Column(s) ['start_date'] do not exist"
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
Cell In [4], line 1
----> 1 create_weekly_profile()
File ~/py38/lib/python3.8/site-packages/dcarte/derived/weekly_profile.py:326, in create_weekly_profile()
324 for dataset in parent_datasets.keys():
325 p_datasets = {d[0]:dcarte.load(*d) for d in parent_datasets[dataset]}
--> 326 LocalDataset(dataset_name = dataset,
327 datasets = p_datasets,
328 pipeline = [f'process_{dataset.lower()}'],
329 domain = domain,
330 module = module,
331 module_path = module_path,
332 dependencies = parent_datasets[dataset])
File <string>:20, in __init__(self, dataset_name, datasets, pipeline, domain, module_path, module, dependencies, since, until, delay, reapply, reload, update, home, compression, data_folder, data)
File ~/py38/lib/python3.8/site-packages/dcarte/local.py:90, in LocalDataset.__post_init__(self)
86 self.metadata = {'since': self.since,
87 'until': self.until,
88 'Mac': cfg['mac']}
89 self.register_dataset()
---> 90 self.load_dataset()
91 self.data = read_table(self.local_file)
File ~/py38/lib/python3.8/site-packages/dcarte/local.py:126, in LocalDataset.load_dataset(self)
124 if not path_exists(self.local_file) or self.reload:
125 set_path(self.local_file)
--> 126 self.process_dataset()
127 elif self.update:
128 self.update_dataset()
File ~/py38/lib/python3.8/site-packages/dcarte/local.py:149, in LocalDataset.process_dataset(self)
144 """process_dataset [summary]
145
146 [extended_summary]
147 """
148 for func in self.pipeline:
--> 149 self.data = getattr(self._module, func)(self)
150 # domains = pd.DataFrame(cfg['domains'])
151 # dataset = np.array([self.domain,self.dataset_name])
152 # # dataset_exist = (domains == dataset).all(axis=1).any()
153 # # if not dataset_exist:
154 # # self.register_dataset()
155 self.save_dataset()
File ~/dcarte/recipes/profile/weekly_profile.py:137, in process_sleep_dailies(obj)
135 sleep_periods = sleep_vitals_.join(habits_).join(sleep_states_).round(2)
136 sleep_periods = sleep_periods.dropna(subset=['time_in_bed','DEEP','hr_max'])
--> 137 sleep_metrics = resample_sleep_metrics(sleep_periods)
138 diurnal_habits = resample_sleep_metrics(sleep_periods,'Diurnal')
139 diurnal_habits = diurnal_habits.assign(nap_ibp = diurnal_habits.time_in_bed)
File ~/dcarte/recipes/profile/weekly_profile.py:149, in resample_sleep_metrics(sleep_periods, period_type)
147 def resample_sleep_metrics(sleep_periods,period_type:str="Nocturnal"):
148 habits = sleep_periods.query('period_type == @period_type').drop(columns=['period_type'])
--> 149 habits = (habits.
150 reset_index().
151 groupby('patient_id').
152 resample('1D',offset='12h',on = 'start_date').agg(
153 start_time = ('start_date', 'min'),
154 end_time = ('end_date', 'max'),
155 nb_awakenings = ('awake_events' ,lambda x: x if x.shape[0]==1 else x.sum()+x.shape[0]-1),
156 time_in_bed = ('time_in_bed' ,'sum'),
157 period_obs = ('period_obs' ,'sum'),
158 minutes_snoring = ('minutes_snoring' ,'sum'),
159 heart_rate = ('heart_rate', 'mean'),
160 hr_min = ('hr_min' ,'min'),
161 hr_max = ('hr_max' ,'max'),
162 respiratory_rate = ('respiratory_rate' ,'mean'),
163 rr_min = ('rr_min' ,'min'),
164 rr_max = ('rr_max' ,'max'),
165 AWAKE = ('AWAKE' ,'sum'),
166 DEEP = ('DEEP' ,'sum'),
167 OTHER = ('OTHER' ,'sum')
168 ).dropna())
169 habits = habits.assign(bed_time_period=(habits.end_time - habits.start_time)/np.timedelta64(1, 'h'))
170 habits = habits.assign(time_out_of_bed=(habits.bed_time_period - habits.time_in_bed))
File ~/py38/lib/python3.8/site-packages/pandas/core/resample.py:355, in Resampler.aggregate(self, func, *args, **kwargs)
353 if result is None:
354 how = func
--> 355 result = self._groupby_and_aggregate(how, *args, **kwargs)
357 result = self._apply_loffset(result)
358 return result
File ~/py38/lib/python3.8/site-packages/pandas/core/resample.py:1210, in _GroupByMixin._apply(self, f, *args, **kwargs)
1206 return getattr(x, f)(**kwargs)
1208 return x.apply(f, *args, **kwargs)
-> 1210 result = self._groupby.apply(func)
1211 return self._wrap_result(result)
File ~/py38/lib/python3.8/site-packages/pandas/core/groupby/groupby.py:1569, in GroupBy.apply(self, func, *args, **kwargs)
1559 except TypeError:
1560 # gh-20949
1561 # try again, with .apply acting as a filtering
(...)
1565 # fails on *some* columns, e.g. a numeric operation
1566 # on a string grouper column
1568 with self._group_selection_context():
-> 1569 return self._python_apply_general(f, self._selected_obj)
1571 return result
File ~/py38/lib/python3.8/site-packages/pandas/core/groupby/groupby.py:1610, in GroupBy._python_apply_general(self, f, data, not_indexed_same, is_transform, is_agg)
1573 @final
1574 def _python_apply_general(
1575 self,
(...)
1580 is_agg: bool = False,
1581 ) -> NDFrameT:
1582 """
1583 Apply function f in python space
1584
(...)
1608 data after applying f
1609 """
-> 1610 values, mutated = self.grouper.apply(f, data, self.axis)
1611 if not_indexed_same is None:
1612 not_indexed_same = mutated or self.mutated
File ~/py38/lib/python3.8/site-packages/pandas/core/groupby/ops.py:839, in BaseGrouper.apply(self, f, data, axis)
837 # group might be modified
838 group_axes = group.axes
--> 839 res = f(group)
840 if not mutated and not _is_indexed_like(res, group_axes, axis):
841 mutated = True
File ~/py38/lib/python3.8/site-packages/pandas/core/resample.py:1208, in _GroupByMixin._apply.<locals>.func(x)
1205 if isinstance(f, str):
1206 return getattr(x, f)(**kwargs)
-> 1208 return x.apply(f, *args, **kwargs)
File ~/py38/lib/python3.8/site-packages/pandas/core/resample.py:355, in Resampler.aggregate(self, func, *args, **kwargs)
353 if result is None:
354 how = func
--> 355 result = self._groupby_and_aggregate(how, *args, **kwargs)
357 result = self._apply_loffset(result)
358 return result
File ~/py38/lib/python3.8/site-packages/pandas/core/resample.py:460, in Resampler._groupby_and_aggregate(self, how, *args, **kwargs)
451 result = grouped.apply(how, *args, **kwargs)
452 except (AttributeError, KeyError):
453 # we have a non-reducing function; try to evaluate
454 # alternatively we want to evaluate only a column of the input
(...)
458 # on Series, raising AttributeError or KeyError
459 # (depending on whether the column lookup uses getattr/__getitem__)
--> 460 result = grouped.apply(how, *args, **kwargs)
462 except ValueError as err:
463 if "Must produce aggregated value" in str(err):
464 # raised in _aggregate_named
465 # see test_apply_without_aggregation, test_apply_with_mutated_index
File ~/py38/lib/python3.8/site-packages/pandas/core/groupby/groupby.py:1543, in GroupBy.apply(self, func, *args, **kwargs)
1540 with np.errstate(all="ignore"):
1541 return func(g, *args, **kwargs)
-> 1543 elif hasattr(nanops, "nan" + func):
1544 # TODO: should we wrap this in to e.g. _is_builtin_func?
1545 f = getattr(nanops, "nan" + func)
1547 else:
TypeError: can only concatenate str (not "NoneType") to str