`InternalError Traceback (most recent call last)
Cell In[50], line 1
----> 1 trainer.estimator.train(epochs=5)
File ~/miniconda3/envs/ncem/lib/python3.8/site-packages/ncem/estimators/base_estimator.py:1262, in Estimator.train(self, epochs, epochs_warmup, max_steps_per_epoch, batch_size, validation_batch_size, max_validation_steps, shuffle_buffer_size, patience, lr_schedule_min_lr, lr_schedule_factor, lr_schedule_patience, initial_epoch, monitor_partition, monitor_metric, log_dir, callbacks, early_stopping, reduce_lr_plateau, pretrain_decoder, decoder_epochs, decoder_patience, decoder_callbacks, aggressive, aggressive_enc_patience, aggressive_epochs, seed, **kwargs)
1245 self.train_normal(
1246 epochs=epochs_warmup,
1247 patience=patience,
(...)
1258 **kwargs,
1259 )
1260 initial_epoch += epochs_warmup
-> 1262 self.train_normal(
1263 epochs=epochs,
1264 patience=patience,
1265 lr_schedule_min_lr=lr_schedule_min_lr,
1266 lr_schedule_factor=lr_schedule_factor,
1267 lr_schedule_patience=lr_schedule_patience,
1268 initial_epoch=initial_epoch,
1269 monitor_partition=monitor_partition,
1270 monitor_metric=monitor_metric,
1271 log_dir=log_dir,
1272 callbacks=callbacks,
1273 early_stopping=early_stopping,
1274 reduce_lr_plateau=reduce_lr_plateau,
1275 **kwargs,
1276 )
File ~/miniconda3/envs/ncem/lib/python3.8/site-packages/ncem/estimators/base_estimator.py:1366, in Estimator.train_normal(self, epochs, patience, lr_schedule_min_lr, lr_schedule_factor, lr_schedule_patience, initial_epoch, monitor_partition, monitor_metric, log_dir, callbacks, early_stopping, reduce_lr_plateau, **kwargs)
1362 if callbacks is not None:
1363 # callbacks needs to be a list
1364 cbs += callbacks
-> 1366 history = self.model.training_model.fit(
1367 x=self.train_dataset,
1368 epochs=epochs,
1369 initial_epoch=initial_epoch,
1370 steps_per_epoch=self.steps_per_epoch,
1371 callbacks=cbs,
1372 validation_data=self.eval_dataset,
1373 validation_steps=self.validation_steps,
1374 verbose=2,
1375 **kwargs,
1376 ).history
1377 for k, v in history.items(): # append to history if train() has been called before.
1378 if k in self.history.keys():
File ~/miniconda3/envs/ncem/lib/python3.8/site-packages/keras/utils/traceback_utils.py:70, in filter_traceback..error_handler(*args, **kwargs)
67 filtered_tb = _process_traceback_frames(e.traceback)
68 # To get the full stack trace, call:
69 # tf.debugging.disable_traceback_filtering()
---> 70 raise e.with_traceback(filtered_tb) from None
71 finally:
72 del filtered_tb
File ~/miniconda3/envs/ncem/lib/python3.8/site-packages/tensorflow/python/eager/execute.py:52, in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
50 try:
51 ctx.ensure_initialized()
---> 52 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
53 inputs, attrs, num_outputs)
54 except core._NotOkStatusException as e:
55 if name is not None:
InternalError: Graph execution error:
Detected at node 'StatefulPartitionedCall' defined at (most recent call last):
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/ipykernel_launcher.py", line 17, in
app.launch_new_instance()
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/traitlets/config/application.py", line 1043, in launch_instance
app.start()
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 725, in start
self.io_loop.start()
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 215, in start
self.asyncio_loop.run_forever()
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
self._run_once()
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
handle._run()
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/asyncio/events.py", line 81, in _run
self._context.run(self._callback, *self._args)
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 513, in dispatch_queue
await self.process_one()
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 502, in process_one
await dispatch(*args)
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 409, in dispatch_shell
await result
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 729, in execute_request
reply_content = await reply_content
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/ipykernel/ipkernel.py", line 422, in do_execute
res = shell.run_cell(
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/ipykernel/zmqshell.py", line 540, in run_cell
return super().run_cell(*args, **kwargs)
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2961, in run_cell
result = self._run_cell(
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3016, in _run_cell
result = runner(coro)
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/IPython/core/async_helpers.py", line 129, in pseudo_sync_runner
coro.send(None)
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3221, in run_cell_async
has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3400, in run_ast_nodes
if await self.run_code(code, result, async=asy):
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3460, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "/tmp/ipykernel_3662462/1675290643.py", line 1, in
trainer.estimator.train(epochs=5)
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/ncem/estimators/base_estimator.py", line 1262, in train
self.train_normal(
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/ncem/estimators/base_estimator.py", line 1366, in train_normal
history = self.model.training_model.fit(
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
return fn(*args, **kwargs)
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/keras/engine/training.py", line 1650, in fit
tmp_logs = self.train_function(iterator)
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/keras/engine/training.py", line 1249, in train_function
return step_function(self, iterator)
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/keras/engine/training.py", line 1233, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/keras/engine/training.py", line 1222, in run_step
outputs = model.train_step(data)
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/keras/engine/training.py", line 1027, in train_step
self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 527, in minimize
self.apply_gradients(grads_and_vars)
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 1140, in apply_gradients
return super().apply_gradients(grads_and_vars, name=name)
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 634, in apply_gradients
iteration = self._internal_apply_gradients(grads_and_vars)
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 1166, in _internal_apply_gradients
return tf.internal.distribute.interim.maybe_merge_call(
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 1216, in _distributed_apply_gradients_fn
distribution.extended.update(
File "/home/duan/miniconda3/envs/ncem/lib/python3.8/site-packages/keras/optimizers/optimizer_experimental/optimizer.py", line 1211, in apply_grad_to_update_var
return self._update_step_xla(grad, var, id(self._var_key(var)))
Node: 'StatefulPartitionedCall'
libdevice not found at ./libdevice.10.bc
[[{{node StatefulPartitionedCall}}]] [Op:__inference_train_function_52682]`