diff --git a/ml3d/datasets/pandaset.py b/ml3d/datasets/pandaset.py index 6037fc81..d39c620b 100644 --- a/ml3d/datasets/pandaset.py +++ b/ml3d/datasets/pandaset.py @@ -1,25 +1,46 @@ import os from os.path import join -import numpy as np -import pandas as pd from pathlib import Path import logging +import numpy as np +import pandas as pd from .base_dataset import BaseDataset, BaseDatasetSplit from ..utils import make_dir, DATASET log = logging.getLogger(__name__) + class Pandaset(BaseDataset): - """ This class is used to create a dataset based on the Pandaset autonomous + """This class is used to create a dataset based on the Pandaset autonomous driving dataset. - https://pandaset.org/ - - The dataset includes 42 semantic classes and covers more than 100 scenes, - each of which is 8 seconds long. - + PandaSet aims to promote and advance research and development in autonomous + driving and machine learning. The first open-source AV dataset available for + both academic and commercial use, PandaSet combines Hesai’s best-in-class + LiDAR sensors with Scale AI’s high-quality data annotation. + + PandaSet features data collected using a forward-facing LiDAR with + image-like resolution (PandarGT) as well as a mechanical spinning LiDAR + (Pandar64). The collected data was annotated with a combination of cuboid + and segmentation annotation (Scale 3D Sensor Fusion Segmentation). + + It features:: + + - 48,000+ camera images + - 16,000+ LiDAR sweeps + - 100+ scenes of 8s each + - 28 annotation classes + - 37 semantic segmentation labels + - Full sensor suite: 1x mechanical spinning LiDAR, 1x forward-facing LiDAR, 6x cameras, On-board GPS/IMU + + Website: https://pandaset.org/ + Code: https://github.com/scaleapi/pandaset-devkit + Download: https://www.kaggle.com/datasets/usharengaraju/pandaset-dataset/data + Data License: CC0: Public Domain (https://scale.com/legal/pandaset-terms-of-use) + Citation: https://arxiv.org/abs/2112.12610 """ + def __init__(self, dataset_path, name="Pandaset", @@ -27,30 +48,34 @@ def __init__(self, use_cache=False, ignored_label_inds=[], test_result_folder='./logs/test_log', - test_split=['115', '116', '117', '119', '120', '124', '139', '149', '158'], + test_split=[ + '115', '116', '117', '119', '120', '124', '139', '149', + '158' + ], training_split=[ - '001', '002', '003', '005', '011', '013', '015', '016', - '017', '019', '021', '023', '024', '027', '028', '029', - '030', '032', '033', '034', '035', '037', '038', '039', - '040', '041', '042', '043', '044', '046', '052', '053', - '054', '056', '057', '058', '064', '065', '066', '067', - '070', '071', '072', '073', '077', '078', '080', '084', - '088', '089', '090', '094', '095', '097', '098', '101', - '102', '103', '105', '106', '109', '110', '112', '113' + '001', '002', '003', '005', '011', '013', '015', '016', + '017', '019', '021', '023', '024', '027', '028', '029', + '030', '032', '033', '034', '035', '037', '038', '039', + '040', '041', '042', '043', '044', '046', '052', '053', + '054', '056', '057', '058', '064', '065', '066', '067', + '070', '071', '072', '073', '077', '078', '080', '084', + '088', '089', '090', '094', '095', '097', '098', '101', + '102', '103', '105', '106', '109', '110', '112', '113' + ], + validation_split=['122', '123'], + all_split=[ + '001', '002', '003', '005', '011', '013', '015', '016', + '017', '019', '021', '023', '024', '027', '028', '029', + '030', '032', '033', '034', '035', '037', '038', '039', + '040', '041', '042', '043', '044', '046', '052', '053', + '054', '056', '057', '058', '064', '065', '066', '067', + '069', '070', '071', '072', '073', '077', '078', '080', + '084', '088', '089', '090', '094', '095', '097', '098', + '101', '102', '103', '105', '106', '109', '110', '112', + '113', '115', '116', '117', '119', '120', '122', '123', + '124', '139', '149', '158' ], - validation_split=['122', '123'], - all_split=['001', '002', '003', '005', '011', '013', '015', '016', - '017', '019', '021', '023', '024', '027', '028', '029', - '030', '032', '033', '034', '035', '037', '038', '039', - '040', '041', '042', '043', '044', '046', '052', '053', - '054', '056', '057', '058', '064', '065', '066', '067', - '069', '070', '071', '072', '073', '077', '078', '080', - '084', '088', '089', '090', '094', '095', '097', '098', - '101', '102', '103', '105', '106', '109', '110', '112', - '113', '115', '116', '117', '119', '120', '122', '123', - '124', '139', '149', '158'], **kwargs): - """Initialize the function by passing the dataset and other details. Args: @@ -79,7 +104,7 @@ def __init__(self, self.label_to_names = self.get_label_to_names() self.num_classes = len(self.label_to_names) self.label_values = np.sort([k for k, v in self.label_to_names.items()]) - + @staticmethod def get_label_to_names(): """Returns a label to names dictionary object. @@ -89,48 +114,48 @@ def get_label_to_names(): values are the corresponding names. """ label_to_names = { - 1: "Reflection", - 2: "Vegetation", - 3: "Ground", - 4: "Road", - 5: "Lane Line Marking", - 6: "Stop Line Marking", - 7: "Other Road Marking", - 8: "Sidewalk", - 9: "Driveway", - 10: "Car", - 11: "Pickup Truck", - 12: "Medium-sized Truck", - 13: "Semi-truck", - 14: "Towed Object", - 15: "Motorcycle", - 16: "Other Vehicle - Construction Vehicle", - 17: "Other Vehicle - Uncommon", - 18: "Other Vehicle - Pedicab", - 19: "Emergency Vehicle", - 20: "Bus", - 21: "Personal Mobility Device", - 22: "Motorized Scooter", - 23: "Bicycle", - 24: "Train", - 25: "Trolley", - 26: "Tram / Subway", - 27: "Pedestrian", - 28: "Pedestrian with Object", - 29: "Animals - Bird", - 30: "Animals - Other", - 31: "Pylons", - 32: "Road Barriers", - 33: "Signs", - 34: "Cones", - 35: "Construction Signs", - 36: "Temporary Construction Barriers", - 37: "Rolling Containers", - 38: "Building", + 1: "Reflection", + 2: "Vegetation", + 3: "Ground", + 4: "Road", + 5: "Lane Line Marking", + 6: "Stop Line Marking", + 7: "Other Road Marking", + 8: "Sidewalk", + 9: "Driveway", + 10: "Car", + 11: "Pickup Truck", + 12: "Medium-sized Truck", + 13: "Semi-truck", + 14: "Towed Object", + 15: "Motorcycle", + 16: "Other Vehicle - Construction Vehicle", + 17: "Other Vehicle - Uncommon", + 18: "Other Vehicle - Pedicab", + 19: "Emergency Vehicle", + 20: "Bus", + 21: "Personal Mobility Device", + 22: "Motorized Scooter", + 23: "Bicycle", + 24: "Train", + 25: "Trolley", + 26: "Tram / Subway", + 27: "Pedestrian", + 28: "Pedestrian with Object", + 29: "Animals - Bird", + 30: "Animals - Other", + 31: "Pylons", + 32: "Road Barriers", + 33: "Signs", + 34: "Cones", + 35: "Construction Signs", + 36: "Temporary Construction Barriers", + 37: "Rolling Containers", + 38: "Building", 39: "Other Static Object" } return label_to_names - + def get_split(self, split): """Returns a dataset split. @@ -142,7 +167,7 @@ def get_split(self, split): A dataset split object providing the requested subset of the data. """ return PandasetSplit(self, split=split) - + def get_split_list(self, split): """Returns the list of data splits available. @@ -154,8 +179,8 @@ def get_split_list(self, split): A dataset split object providing the requested subset of the data. Raises: - ValueError: Indicates that the split name passed is incorrect. The split name should be one of - 'training', 'test', 'validation', or 'all'. + ValueError: Indicates that the split name passed is incorrect. The + split name should be one of 'training', 'test', 'validation', or 'all'. """ cfg = self.cfg dataset_path = cfg.dataset_path @@ -179,7 +204,7 @@ def get_split_list(self, split): file_list.append(join(pc_path, f)) return file_list - + def is_tested(self, attr): """Checks if a datum in the dataset has been tested. @@ -224,7 +249,7 @@ def save_test_result(self, results, attr): class PandasetSplit(BaseDatasetSplit): """This class is used to create a split for Pandaset dataset. - Args: + Args: dataset: The dataset to split. split: A string identifying the dataset split that is usually one of 'training', 'test', 'validation', or 'all'. @@ -233,6 +258,7 @@ class PandasetSplit(BaseDatasetSplit): Returns: A dataset split object providing the requested subset of the data. """ + def __init__(self, dataset, split='train'): super().__init__(dataset, split=split) log.info("Found {} pointclouds for {}".format(len(self.path_list), @@ -244,19 +270,16 @@ def __len__(self): def get_data(self, idx): pc_path = self.path_list[idx] label_path = pc_path.replace('lidar', 'annotations/semseg') - + points = pd.read_pickle(pc_path) labels = pd.read_pickle(label_path) - + intensity = points['i'].to_numpy().astype(np.float32) - points = points.drop(columns=['i', 't', 'd']).to_numpy().astype(np.float32) + points = points.drop(columns=['i', 't', 'd']).to_numpy().astype( + np.float32) labels = labels.to_numpy().astype(np.int32) - data = { - 'point': points, - 'intensity': intensity, - 'label': labels - } + data = {'point': points, 'intensity': intensity, 'label': labels} return data @@ -269,4 +292,5 @@ def get_attr(self, idx): attr = {'name': name, 'path': pc_path, 'split': self.split} return attr + DATASET._register_module(Pandaset)