methane_super_emitters.dataset

 1import torch
 2from torch.utils.data import DataLoader, Dataset
 3import os
 4import glob
 5import random
 6import numpy as np
 7from torchvision.transforms import Compose, ToTensor, Normalize
 8from methane_super_emitters.dataset_stats import normalize
 9
10
11class TROPOMISuperEmitterDataset(Dataset):
12    def __init__(self, data_dir, fields):
13        self.data_dir = data_dir
14        self.samples = []
15        self.positive_filenames = glob.glob(os.path.join(data_dir, "positive", "*.npz"))
16        negative_filenames = glob.glob(os.path.join(data_dir, "negative", "*.npz"))
17        self.negative_filenames = negative_filenames[: len(self.positive_filenames)]
18        for filename in self.positive_filenames:
19            data = np.load(filename)
20            self.samples.append((normalize(data, fields), 1.0))
21        for filename in self.negative_filenames:
22            data = np.load(filename)
23            self.samples.append((normalize(data, fields), 0.0))
24
25    def unload(self):
26        """Make garbage collector collect the data."""
27        self.samples = []
28        import gc
29
30        gc.collect()
31
32    def __len__(self):
33        return len(self.samples)
34
35    def __getitem__(self, idx):
36        img, label = self.samples[idx]
37        return torch.tensor(img, dtype=torch.float), torch.tensor(
38            label, dtype=torch.float
39        )
40
41
42class TROPOMISuperEmitterLocatorDataset(TROPOMISuperEmitterDataset):
43    def __init__(self, data_dir, fields):
44        self.data_dir = data_dir
45        self.samples = []
46        self.positive_filenames = glob.glob(os.path.join(data_dir, "positive", "*.npz"))
47        negative_filenames = glob.glob(os.path.join(data_dir, "negative", "*.npz"))
48        self.negative_filenames = negative_filenames[: len(self.positive_filenames)]
49        self.filenames = self.positive_filenames + self.negative_filenames
50        for filename in self.filenames:
51            data = np.load(filename)
52            self.samples.append((normalize(data, fields), data["location"]))
class TROPOMISuperEmitterDataset(typing.Generic[+_T_co]):
12class TROPOMISuperEmitterDataset(Dataset):
13    def __init__(self, data_dir, fields):
14        self.data_dir = data_dir
15        self.samples = []
16        self.positive_filenames = glob.glob(os.path.join(data_dir, "positive", "*.npz"))
17        negative_filenames = glob.glob(os.path.join(data_dir, "negative", "*.npz"))
18        self.negative_filenames = negative_filenames[: len(self.positive_filenames)]
19        for filename in self.positive_filenames:
20            data = np.load(filename)
21            self.samples.append((normalize(data, fields), 1.0))
22        for filename in self.negative_filenames:
23            data = np.load(filename)
24            self.samples.append((normalize(data, fields), 0.0))
25
26    def unload(self):
27        """Make garbage collector collect the data."""
28        self.samples = []
29        import gc
30
31        gc.collect()
32
33    def __len__(self):
34        return len(self.samples)
35
36    def __getitem__(self, idx):
37        img, label = self.samples[idx]
38        return torch.tensor(img, dtype=torch.float), torch.tensor(
39            label, dtype=torch.float
40        )

An abstract class representing a Dataset.

All datasets that represent a map from keys to data samples should subclass it. All subclasses should overwrite __getitem__(), supporting fetching a data sample for a given key. Subclasses could also optionally overwrite __len__(), which is expected to return the size of the dataset by many ~torch.utils.data.Sampler implementations and the default options of ~torch.utils.data.DataLoader. Subclasses could also optionally implement __getitems__(), for speedup batched samples loading. This method accepts list of indices of samples of batch and returns list of samples.

sampler that yields integral indices. To make it work with a map-style dataset with non-integral indices/keys, a custom sampler must be provided.

TROPOMISuperEmitterDataset(data_dir, fields)
13    def __init__(self, data_dir, fields):
14        self.data_dir = data_dir
15        self.samples = []
16        self.positive_filenames = glob.glob(os.path.join(data_dir, "positive", "*.npz"))
17        negative_filenames = glob.glob(os.path.join(data_dir, "negative", "*.npz"))
18        self.negative_filenames = negative_filenames[: len(self.positive_filenames)]
19        for filename in self.positive_filenames:
20            data = np.load(filename)
21            self.samples.append((normalize(data, fields), 1.0))
22        for filename in self.negative_filenames:
23            data = np.load(filename)
24            self.samples.append((normalize(data, fields), 0.0))
data_dir
samples
positive_filenames
negative_filenames
def unload(self):
26    def unload(self):
27        """Make garbage collector collect the data."""
28        self.samples = []
29        import gc
30
31        gc.collect()

Make garbage collector collect the data.

class TROPOMISuperEmitterLocatorDataset(typing.Generic[+_T_co]):
43class TROPOMISuperEmitterLocatorDataset(TROPOMISuperEmitterDataset):
44    def __init__(self, data_dir, fields):
45        self.data_dir = data_dir
46        self.samples = []
47        self.positive_filenames = glob.glob(os.path.join(data_dir, "positive", "*.npz"))
48        negative_filenames = glob.glob(os.path.join(data_dir, "negative", "*.npz"))
49        self.negative_filenames = negative_filenames[: len(self.positive_filenames)]
50        self.filenames = self.positive_filenames + self.negative_filenames
51        for filename in self.filenames:
52            data = np.load(filename)
53            self.samples.append((normalize(data, fields), data["location"]))

An abstract class representing a Dataset.

All datasets that represent a map from keys to data samples should subclass it. All subclasses should overwrite __getitem__(), supporting fetching a data sample for a given key. Subclasses could also optionally overwrite __len__(), which is expected to return the size of the dataset by many ~torch.utils.data.Sampler implementations and the default options of ~torch.utils.data.DataLoader. Subclasses could also optionally implement __getitems__(), for speedup batched samples loading. This method accepts list of indices of samples of batch and returns list of samples.

sampler that yields integral indices. To make it work with a map-style dataset with non-integral indices/keys, a custom sampler must be provided.

TROPOMISuperEmitterLocatorDataset(data_dir, fields)
44    def __init__(self, data_dir, fields):
45        self.data_dir = data_dir
46        self.samples = []
47        self.positive_filenames = glob.glob(os.path.join(data_dir, "positive", "*.npz"))
48        negative_filenames = glob.glob(os.path.join(data_dir, "negative", "*.npz"))
49        self.negative_filenames = negative_filenames[: len(self.positive_filenames)]
50        self.filenames = self.positive_filenames + self.negative_filenames
51        for filename in self.filenames:
52            data = np.load(filename)
53            self.samples.append((normalize(data, fields), data["location"]))
data_dir
samples
positive_filenames
negative_filenames
filenames