I am sanity checking our real-world RGB-D capture (collected with RTAB-map) with grad-SLAM for conceptgraphs.
everything works fine with replica, but as soon as I try to load in our dataset, device-side assertions happen. Unfortunately, error is non-deterministic, meaning it's not always thrown on the same frame.
This is the data loader I wrote to load RTAB-map datasets in conceptgraphs:
class RTABmapDataset(GradSLAMDataset):
"""A RGB-D dataset from a RTAB-map export"""
def __init__(
self,
config_dict: dict[str, Any],
basedir: str,
sequence: str,
stride: int | None = None,
start: int | None = 0,
end: int | None = -1,
desired_height: int | None = 480,
desired_width: int | None = 640,
load_embeddings: bool | None = False,
embedding_dir: str | None = "embeddings",
embedding_dim: int | None = 512,
**kwargs,
):
self.input_folder = os.path.join(basedir, sequence)
assert os.path.exists(self.input_folder), "Unknown dataset dir"
self.pose_path = os.path.join(self.input_folder, "poses.txt")
super().__init__(
config_dict,
stride=stride,
start=start,
end=end,
desired_height=desired_height,
desired_width=desired_width,
load_embeddings=load_embeddings,
embedding_dir=embedding_dir,
embedding_dim=embedding_dim,
**kwargs,
)
def get_filepaths(self):
# filter out filepaths without optimized poses
optim = set(
np.genfromtxt(fname=self.pose_path, delimiter=" ", skip_header=1)[:, -1].astype(int)
)
def isinoptim(x: str) -> bool:
x = os.path.basename(x).split(".")[0]
return int(x) in optim
embedding_paths = None
color_paths = glob.glob(os.path.join(self.input_folder, "rgb", "*.jpg"))
color_paths = natsorted(filter(isinoptim, color_paths))
depth_paths = glob.glob(os.path.join(self.input_folder, "depth", "*.png"))
depth_paths = natsorted(filter(isinoptim, depth_paths))
return color_paths, depth_paths, embedding_paths
def load_poses(self):
from scipy.spatial import transform
#
trajectory = np.genfromtxt(fname=self.pose_path, delimiter=" ", skip_header=1)
trajectory = trajectory[:, 1:-1] # discard timestamp and key
M = torch.tensor(
[
[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]
]
).float()
# assuming RTAB-map follows the RH coordinate system (x right, y up, z backward)
P = []
for state in trajectory:
C_to_W = np.zeros((4, 4))
C_to_W[3, 3] = 1.0
C_to_W[:3, :3] = transform.Rotation.from_quat(state[3:]).as_matrix()
C_to_W[:3, 3] = state[:3]
C_to_W = torch.from_numpy(C_to_W).float()
C_to_W = M @ C_to_W @ M.T
P.append(C_to_W)
return P
with the following config from a multi realsense setup (D435i + T265):
camera_params:
cx: 318.409
cy: 244.501
image_height: 480
image_width: 640
fx: 608.794
fy: 608.776
png_depth_scale: 1000.0
dataset_name: rtabmap
After processing 50-ish frames, such a warning keeps repeating over and over:
/path/to/venv/lib/python3.10/site-packages/gradslam/slam/fusionutils.py:188: RuntimeWarning: Max of dot product was 1.3151907920837402 > 1. Inputs were not normalized along dim (-1). Was this intentional?
warnings.warn(
2%|โโโโ | 67/3170 [00:09<10:56, 4.73it/s]
/path/to/venv/lib/python3.10/site-packages/gradslam/slam/fusionutils.py:188: RuntimeWarning: Max of dot product was 1.4051384925842285 > 1. Inputs were not normalized along dim (-1). Was this intentional?
warnings.warn(
2%|โโโโ
culminating in the following error:
../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [31679,0,0], thread: [122,0,0] Assertion -sizes[i] <= index && index < sizes[i] && "index out of bounds"
failed.
../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [31679,0,0], thread: [123,0,0] Assertion -sizes[i] <= index && index < sizes[i] && "index out of bounds"
failed.
../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [31679,0,0], thread: [124,0,0] Assertion -sizes[i] <= index && index < sizes[i] && "index out of bounds"
failed.
../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [31679,0,0], thread: [125,0,0] Assertion -sizes[i] <= index && index < sizes[i] && "index out of bounds"
failed.
../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [31679,0,0], thread: [126,0,0] Assertion -sizes[i] <= index && index < sizes[i] && "index out of bounds"
failed.
../aten/src/ATen/native/cuda/IndexKernel.cu:92: operator(): block: [31679,0,0], thread: [127,0,0] Assertion -sizes[i] <= index && index < sizes[i] && "index out of bounds"
failed.
4%|โโโโโโโ | 123/3170 [00:26<11:01, 4.60it/s]
Traceback (most recent call last):
File "/root/conceptgraphs/experiments/rtabmap-lingotto-600p-floor1-north/../../toolchain/run_slam_rgb.py", line 133, in
main(args)
File "/root/conceptgraphs/experiments/rtabmap-lingotto-600p-floor1-north/../../toolchain/run_slam_rgb.py", line 108, in main
pointclouds, _ = slam.step(pointclouds, frame_cur, frame_prev)
File "/root/miniconda3/envs/conceptgraphs/lib/python3.10/site-packages/gradslam/slam/icpslam.py", line 180, in step
pointclouds = self._map(pointclouds, live_frame, inplace)
File "/root/miniconda3/envs/conceptgraphs/lib/python3.10/site-packages/gradslam/slam/pointfusion.py", line 115, in _map
return update_map_fusion(
File "/root/miniconda3/envs/conceptgraphs/lib/python3.10/site-packages/gradslam/slam/fusionutils.py", line 899, in update_map_fusion
pc2im_bnhw = find_correspondences(pointclouds, rgbdimages, dist_th, dot_th)
File "/root/miniconda3/envs/conceptgraphs/lib/python3.10/site-packages/gradslam/slam/fusionutils.py", line 573, in find_correspondences
pc2im_bnhw, _ = find_similar_map_points(
File "/root/miniconda3/envs/conceptgraphs/lib/python3.10/site-packages/gradslam/slam/fusionutils.py", line 388, in find_similar_map_points
frame_points[pc2im_bnhw[:, 0], pc2im_bnhw[:, 1]] = vertex_maps[
RuntimeError: CUDA error: device-side assert triggered
The RGB-D capture is 3170-frame-long and should cover an area of about 30 x 10 meters.
Any idea what could trigger the error?
- pytorch version (e.g., 1.0): 2.1.2+cu121
- OS: ubuntu 22.04.4 LTS (x86_64)
- how you installed pytorch (
conda
, pip
, source): pip
- python version: 3.10.13
- CUDA/cuDNN version: 11.8.89
- GPU models and configuration: GPU 0: NVIDIA A10G