Comments (9)
Hi @TTK95 , can you include some code example?
from clearml-server.
@jkhenning
I use it in my Callback to plot after each validation epoch a sample image.
On my workstation it works perfectly fine, but on my server it does not work after using this function.
Only the regular plt.show() is logged.
# import threading
import matplotlib
matplotlib.use('Agg') # Use the Agg backend (instead of TkAgg) to avoid crashes
from typing import Tuple, List
import lightning as L
import torch
from lightning.pytorch.callbacks import Callback
import matplotlib.pyplot as plt
import numpy as np
import traceback
from clearml import Task
try:
cm_logger = Task.current_task().get_logger()
except AttributeError:
cm_logger = None
class SampleMonitor(Callback):
"""
A callback to monitor and log sample images during validation.
Args:
num_img (int): Number of images to plot.
plot_frequency (int): Frequency of plotting images.
"""
def __init__(self, num_img: int = 1, plot_frequency: int = 10):
self.num_img = num_img
self.plot_frequency = plot_frequency
#self.lock = threading.Lock()
def infer(self, trainer, pl_module, data):
"""
Perform inference based on the task and plot the results.
Args:
trainer (L.Trainer): The Lightning Trainer object.
pl_module (L.LightningModule): The Lightning Module being trained.
data: Input data for inference.
Returns:
Inference result.
"""
task = pl_module._get_name()
if task == "VAE":
meta_data_dict, X, y = next(iter(trainer.val_dataloaders))
X = X.cuda()
pl_module.eval()
p, mu, logvar = pl_module(X)
for i in range(min(self.num_img, trainer.val_dataloaders.batch_size)):
X_ = X[i].permute(1, 2, 0).cpu().detach().numpy()
y_ = y[i].permute(1, 2, 0).cpu().detach().numpy()
p_ = p[i].permute(1, 2, 0).cpu().detach().numpy()
self.plot_images(X_, y_, p_, iteration=trainer.current_epoch, i=i)
elif task == "AE":
meta_data_dict, X, y, z = next(iter(trainer.val_dataloaders))
X = X.cuda()
pl_module.eval()
p, z_hat = pl_module(X)
for i in range(min(self.num_img, trainer.val_dataloaders.batch_size)):
X_ = X[i].permute(1, 2, 0).cpu().detach().numpy()
y_ = y[i].permute(1, 2, 0).cpu().detach().numpy()
p_ = p[i].permute(1, 2, 0).cpu().detach().numpy()
self.plot_images(X_, y_, p_, iteration=trainer.current_epoch, i=i)
else:
meta_data_dict, X, y = next(iter(trainer.val_dataloaders))
X = X.cuda()
pl_module.eval()
p = pl_module(X)
for i in range(min(self.num_img, trainer.val_dataloaders.batch_size)):
X_ = X[i].permute(1, 2, 0).cpu().detach().numpy()
y_ = y[i].permute(1, 2, 0).cpu().detach().numpy()
p_ = p[i].permute(1, 2, 0).cpu().detach().numpy()
self.plot_images(X_, y_, p_, iteration=trainer.current_epoch, i=i)
def on_validation_epoch_end(self, trainer: "L.Trainer", pl_module: "L.LightningModule") -> None:
"""
Called at the end of the validation epoch.
Args:
trainer (L.Trainer): The Lightning Trainer object.
pl_module (L.LightningModule): The Lightning Module being trained.
"""
try:
if trainer.current_epoch % self.plot_frequency == 0 or trainer.current_epoch == trainer.max_epochs - 1:
data = next(iter(trainer.val_dataloaders))
self.infer(trainer, pl_module, data)
except Exception as e:
print("Exception in SampleMonitor:")
traceback.print_exc()
print(e)
def plot_images(self, X, y, p, iteration, i, title="SampleMonitor"):
"""
Plots the input, ground truth and predicted images.
Args:
X (numpy.ndarray): The input image.
y (numpy.ndarray): The ground truth image.
p (numpy.ndarray): The predicted image.
Returns:
matplotlib.figure.Figure: The plotted figure.
"""
fig, ax = plt.subplots(1, 3, figsize=(16, 9))
ax[2].imshow(y, vmin=0, vmax=1)
ax[2].set_title("Groundtruth")
ax[0].imshow(X, vmin=0, vmax=1)
ax[0].set_title("Input")
ax[1].imshow(p, vmin=0, vmax=1)
ax[1].set_title("Prediction")
if cm_logger:
# save as plot
cm_logger.report_matplotlib_figure(
title=title,
series=f"Sample {i}",
iteration=iteration,
figure=fig,
report_interactive=True,
)
# save as image
cm_logger.report_matplotlib_figure(
title=title,
series=f"Sample {i}",
iteration=iteration,
figure=fig,
report_image=True,
)
plt.close(fig)
from clearml-server.
I also reinstalled ClearML, matplotlib, Pillow and tensorboard. Maybe I am missing something? Maybe some OS package what is not installed on my server?
from clearml-server.
Someone purged python3.10 on the server, due to the purge a lot of packages where removed. I reinstalled the following packages and it seems to work now again:
Packages:
Purge: python3-incremental:amd64 (21.3.0-1), lsb-release:amd64 (11.1.0ubuntu4), cifs-utils:amd64 (2:6.14-1ubuntu0.1), python3-blinker:amd64 (1.4+dfsg1-0.4), networkd-dispatcher:amd64 (2.1-2ubuntu0.22.04.2), pastebinit:amd64 (1.5.1-1ubuntu1), ssh-import-id:amd64 (5.11-0ubuntu1), python3.10:amd64 (3.10.12-1~22.04.2), python3-colorama:amd64 (0.4.4-1), python3-gi:amd64 (3.42.1-0ubuntu1), python3-tz:amd64 (2022.1-1ubuntu0.22.04.1), python3-pyasn1:amd64 (0.4.8-1), python3-samba:amd64 (2:4.15.13+dfsg-0ubuntu1.2), python3-distupgrade:amd64 (1:22.04.17), python3-dnspython:amd64 (2.1.0-1ubuntu1), sosreport:amd64 (4.4-1ubuntu1.22.04.1), python3-cffi-backend:amd64 (1.15.0-1build2), samba-vfs-modules:amd64 (2:4.15.13+dfsg-0ubuntu1.2), byobu:amd64 (5.133-1), python3-lib2to3:amd64 (3.10.8-1~22.04), ufw:amd64 (0.36.1-4ubuntu0.1), python3-lazr.restfulclient:amd64 (0.14.4-1), python3-jeepney:amd64 (0.7.1-3), ubuntu-drivers-common:amd64 (1:0.9.6.2~0.22.04.4), python3-ptyprocess:amd64 (0.7.0-3), python3-hyperlink:amd64 (21.0.0-3), python3-twisted:amd64 (22.1.0-2ubuntu2.3), python3-distro:amd64 (1.7.0-1), software-properties-common:amd64 (0.99.22.7), samba:amd64 (2:4.15.13+dfsg-0ubuntu1.2), python3-requests:amd64 (2.25.1+dfsg-2ubuntu0.1), python3-importlib-metadata:amd64 (4.6.4-1), xfsprogs:amd64 (5.13.0-1ubuntu2), plymouth-theme-ubuntu-text:amd64 (0.9.5+git20211018-1ubuntu3), python3-httplib2:amd64 (0.20.2-2), python3-hamcrest:amd64 (2.0.2-2), python3:amd64 (3.10.6-1~22.04), landscape-common:amd64 (19.12-0ubuntu13), python3-urllib3:amd64 (1.26.5-1~exp1), python3-software-properties:amd64 (0.99.22.7), python3-pyparsing:amd64 (2.4.7-1), python3-lazr.uri:amd64 (1.0.6-2), python3-jsonpatch:amd64 (1.32-2), cloud-guest-utils:amd64 (0.32-22-g45fe84a5-0ubuntu1), python3-more-itertools:amd64 (8.10.0-2), python3-automat:amd64 (20.2.0-1), samba-dsdb-modules:amd64 (2:4.15.13+dfsg-0ubuntu1.2), samba-common-bin:amd64 (2:4.15.13+dfsg-0ubuntu1.2), python3-attr:amd64 (21.2.0-1), python3-pkg-resources:amd64 (59.6.0-1.2ubuntu0.22.04.1), python3-wadllib:amd64 (1.3.6-1), python3-gdbm:amd64 (3.10.8-1~22.04), python3-talloc:amd64 (2.3.3-2build1), python3-babel:amd64 (2.8.0+dfsg.1-7), python3-pexpect:amd64 (4.8.0-2ubuntu1), python3-jsonschema:amd64 (3.2.0-0ubuntu2), python3-oauthlib:amd64 (3.2.0-1ubuntu0.1), cloud-init:amd64 (23.2.2-0ubuntu0~22.04.1), python3-openssl:amd64 (21.0.0-1), python3-click:amd64 (8.0.3-1), python3-service-identity:amd64 (18.1.0-6), open-vm-tools:amd64 (2:12.1.5-3~ubuntu0.22.04.2), nvidia-prime:amd64 (0.8.17.1), python3-json-pointer:amd64 (2.0-0ubuntu1), update-manager-core:amd64 (1:22.04.10), ubuntu-server-minimal:amd64 (1.481.1), python3-jinja2:amd64 (3.0.3-1), python3-xkit:amd64 (0.5.0ubuntu5), python3-apt:amd64 (2.4.0ubuntu2), python3-serial:amd64 (3.5-1), python3-netifaces:amd64 (0.11.0-1build2), python3-certifi:amd64 (2020.6.20-1), python3-gpg:amd64 (1.16.0-1.2ubuntu4.1), python3-markupsafe:amd64 (2.0.1-2build1), python3-zope.interface:amd64 (5.4.0-1build1), screen-resolution-extra:amd64 (0.18.2), python3-distro-info:amd64 (1.1ubuntu0.1), python3-jwt:amd64 (2.3.0-1ubuntu0.2), nvidia-settings:amd64 (510.47.03-0ubuntu1), python3-ldb:amd64 (2:2.4.4-0ubuntu0.22.04.2), python3-six:amd64 (1.16.0-3ubuntu1), python3-tdb:amd64 (1.4.5-2build1), python3-markdown:amd64 (3.3.6-1), python3-constantly:amd64 (15.1.0-2), python3-yaml:amd64 (5.4.1-1ubuntu1), python3-newt:amd64 (0.52.21-5ubuntu2), samba-libs:amd64 (2:4.15.13+dfsg-0ubuntu1.2), python3-update-manager:amd64 (1:22.04.10), netplan.io:amd64 (0.105-0ubuntu2~22.04.3), python3-distutils:amd64 (3.10.8-1~22.04), python3-apport:amd64 (2.20.11-0ubuntu82.5), python3-keyring:amd64 (23.5.0-1), python3-commandnotfound:amd64 (22.04.0), ubuntu-server:amd64 (1.481.1), python3-idna:amd64 (3.3-1), python3-pygments:amd64 (2.11.2+dfsg-2), python3-bcrypt:amd64 (3.2.0-1build1), python3-chardet:amd64 (4.0.0-1), unattended-upgrades:amd64 (2.8ubuntu1), python3-magic:amd64 (2:0.4.24-2), python3-debconf:amd64 (1.5.79ubuntu1), python3-configobj:amd64 (5.0.6-5), python3-setuptools:amd64 (59.6.0-1.2ubuntu0.22.04.1), ubuntu-release-upgrader-core:amd64 (1:22.04.17), command-not-found:amd64 (22.04.0), python3-cryptography:amd64 (3.4.8-1ubuntu2), ubuntu-advantage-tools:amd64 (28.1~22.04), python3-launchpadlib:amd64 (1.10.16-1), python3-pyrsistent:amd64 (0.18.1-1build1), python3-dbus:amd64 (1.2.18-3build1), python3-requests-toolbelt:amd64 (0.9.1-1), python3-pyasn1-modules:amd64 (0.2.1-1), python3-problem-report:amd64 (2.20.11-0ubuntu82.5), apport:amd64 (2.20.11-0ubuntu82.5), python3-zipp:amd64 (1.0.0-3), python3-secretstorage:amd64 (3.3.1-1), python3-systemd:amd64 (234-3ubuntu2), ubuntu-minimal:amd64 (1.481.1), update-notifier-common:amd64 (3.192.54.6), python3-debian:amd64 (0.1.43ubuntu1.1)
Install:
sudo apt-get install python3-incremental lsb-release cifs-utils python3-blinker networkd-dispatcher pastebinit ssh-import-id python3.10 python3-colorama python3-gi python3-tz python3-pyasn1 python3-samba python3-distupgrade python3-dnspython sosreport python3-cffi-backend samba-vfs-modules byobu python3-lib2to3 ufw python3-lazr.restfulclient python3-jeepney ubuntu-drivers-common python3-ptyprocess python3-hyperlink python3-twisted python3-distro software-properties-common samba python3-requests python3-importlib-metadata xfsprogs plymouth-theme-ubuntu-text python3-httplib2 python3-hamcrest python3 landscape-common python3-urllib3 python3-software-properties python3-pyparsing python3-lazr.uri python3-jsonpatch cloud-guest-utils python3-more-itertools python3-automat samba-dsdb-modules samba-common-bin python3-attr python3-pkg-resources python3-wadllib python3-gdbm python3-talloc python3-babel python3-pexpect python3-jsonschema python3-oauthlib cloud-init python3-openssl python3-click python3-service-identity open-vm-tools nvidia-prime python3-json-pointer update-manager-core ubuntu-server-minimal python3-jinja2 python3-xkit python3-apt python3-serial python3-netifaces python3-certifi python3-gpg python3-markupsafe python3-zope.interface screen-resolution-extra python3-distro-info python3-jwt nvidia-settings python3-ldb python3-six python3-tdb python3-markdown python3-constantly python3-yaml python3-newt samba-libs python3-update-manager netplan.io python3-distutils python3-apport python3-keyring python3-commandnotfound ubuntu-server python3-idna python3-pygments python3-bcrypt python3-chardet unattended-upgrades python3-magic python3-debconf python3-configobj python3-setuptools ubuntu-release-upgrader-core command-not-found python3-cryptography ubuntu-advantage-tools python3-launchpadlib python3-pyrsistent python3-dbus python3-requests-toolbelt python3-pyasn1-modules python3-problem-report apport python3-zipp python3-secretstorage python3-systemd ubuntu-minimal update-notifier-common python3-debian
so probably some dependencies somewhere
from clearml-server.
Need to re-open...
I think after a casual update & upgrade it broke again. cant find the issue. It was running all the time normaly, but when I started a new training now it has the same problem. After using the SampleMonitor, especially using the logger for matpplotlib, it just stops reporting. When I then abort the training, it shows only the total iterations in the gui, but not in the scalars sections.
Start-Date: 2023-09-26 11:31:12 Commandline: apt upgrade Requested-By: Upgrade: udev:amd64 (249.11-0ubuntu3.9, 249.11-0ubuntu3.10), openssh-client:amd64 (1:8.9p1-3ubuntu0.3, 1:8.9p1-3ubuntu0.4), python3-samba:amd64 (2:4.15.13+dfsg-0ubuntu1.3, 2:4.15.13+dfsg-0ubuntu1.4), systemd-timesyncd:amd64 (249.11-0ubuntu3.9, 249.11-0ubuntu3.10), libpam-systemd:amd64 (249.11-0ubuntu3.9, 249.11-0ubuntu3.10), samba-vfs-modules:amd64 (2:4.15.13+dfsg-0ubuntu1.3, 2:4.15.13+dfsg-0ubuntu1.4), libsystemd0:amd64 (249.11-0ubuntu3.9, 249.11-0ubuntu3.10), samba:amd64 (2:4.15.13+dfsg-0ubuntu1.3, 2:4.15.13+dfsg-0ubuntu1.4), libnss-systemd:amd64 (249.11-0ubuntu3.9, 249.11-0ubuntu3.10), libwbclient0:amd64 (2:4.15.13+dfsg-0ubuntu1.3, 2:4.15.13+dfsg-0ubuntu1.4), openssh-server:amd64 (1:8.9p1-3ubuntu0.3, 1:8.9p1-3ubuntu0.4), libsmbclient:amd64 (2:4.15.13+dfsg-0ubuntu1.3, 2:4.15.13+dfsg-0ubuntu1.4), systemd:amd64 (249.11-0ubuntu3.9, 249.11-0ubuntu3.10), samba-dsdb-modules:amd64 (2:4.15.13+dfsg-0ubuntu1.3, 2:4.15.13+dfsg-0ubuntu1.4), libudev1:amd64 (249.11-0ubuntu3.9, 249.11-0ubuntu3.10), samba-common-bin:amd64 (2:4.15.13+dfsg-0ubuntu1.3, 2:4.15.13+dfsg-0ubuntu1.4), libc6:amd64 (2.35-0ubuntu3.1, 2.35-0ubuntu3.3), locales:amd64 (2.35-0ubuntu3.1, 2.35-0ubuntu3.3), smbclient:amd64 (2:4.15.13+dfsg-0ubuntu1.3, 2:4.15.13+dfsg-0ubuntu1.4), libc-dev-bin:amd64 (2.35-0ubuntu3.1, 2.35-0ubuntu3.3), openssh-sftp-server:amd64 (1:8.9p1-3ubuntu0.3, 1:8.9p1-3ubuntu0.4), samba-libs:amd64 (2:4.15.13+dfsg-0ubuntu1.3, 2:4.15.13+dfsg-0ubuntu1.4), libc-bin:amd64 (2.35-0ubuntu3.1, 2.35-0ubuntu3.3), libc-devtools:amd64 (2.35-0ubuntu3.1, 2.35-0ubuntu3.3), libc6-dev:amd64 (2.35-0ubuntu3.1, 2.35-0ubuntu3.3), systemd-sysv:amd64 (249.11-0ubuntu3.9, 249.11-0ubuntu3.10), samba-common:amd64 (2:4.15.13+dfsg-0ubuntu1.3, 2:4.15.13+dfsg-0ubuntu1.4), thermald:amd64 (2.4.9-1ubuntu0.3, 2.4.9-1ubuntu0.4), ubuntu-advantage-tools:amd64 (28.1~22.04, 29.4~22.04) End-Date: 2023-09-26 11:31:31
from clearml-server.
Hi @TTK95 , I'm not sure what I'm seeing above...?
from clearml-server.
This is a log entry from the "apt" package manager on a Linux system. It shows information about a software upgrade that took place on September 26, 2023, starting at 11:31:12 and finishing at 11:31:31. The upgrade involved various packages with their respective versions before and after the upgrade listed.
So my question is, can one of the packages affect ClearML, especially the matplotlib reporting? @jkhenning
from clearml-server.
Doesn't look like it...
from clearml-server.
that's what my
less /var/log/apt/history.log
is showing...
from clearml-server.
Related Issues (20)
- Feature Request: Get server configuration parameters from AWS Secrets Manager [security]
- [Customising web-ui] - Projects are loading tasks in web ui of self hosting server but i want them to show datasets HOT 3
- generating clearml-reports HOT 13
- How to write artifacts to S3 from server side? HOT 1
- Nginx Not Loading Plotly.js Resource: ClearML Self-Hosted Docker HOT 7
- Failed Navigate From Overview to Experiments Details HOT 4
- Async Delete Always Failed when Removing Experiments (using Minio)
- nginx 0.6.x < 1.20.1 1-Byte Memory Overwrite RCE vulnerability HOT 2
- ElasticSearch UI and Redis UI? HOT 2
- The problem with scalars HOT 12
- Curl 7.69 < 8.4.0 Heap Buffer Overflow vulnerability HOT 2
- OpenSSL 1.1.1 < 1.1.1x Vulnerability HOT 1
- Elasticsearch image tag 7.17 does not exist HOT 4
- Git package is not installed by default in node:20-bookworm-slim HOT 1
- SERVER UNAVAILABLE HOT 4
- APP Credentials disapper in webapp HOT 20
- Scalar graphs legend is too narrow for experiments with long names HOT 7
- Update from 1.14.1 to 1.15.0 leads to several fatal issues when booting HOT 3
- AttributeError: module 'pkgutil' has no attribute 'ImpImporter'. HOT 3
- Web server Ipv6 error
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from clearml-server.