methane_super_emitters.create_dataset

 1import click
 2import numpy as np
 3import datetime
 4import netCDF4
 5import os
 6import glob
 7import math
 8import uuid
 9from joblib import Parallel, delayed
10from methane_super_emitters.utils import destripe, parse_date, patch_generator
11
12
13def process_tropomi_file(
14    file_path, month_path, day_path, output_dir, input_file, negative=False
15):
16    print(f"ANALYZING: {file_path}")
17    with open(input_file, "r") as csv_fd:
18        csv_data = csv_fd.readlines()[1:]
19    try:
20        for patch in patch_generator(file_path):
21            emitter = False
22            location = np.zeros((32, 32))
23            for csv_line in csv_data:
24                date, time, lat, lon, _, _, _ = csv_line.split(",")
25                if (
26                    (patch["lat"].min() < float(lat) < patch["lat"].max())
27                    and (patch["lon"].min() < float(lon) < patch["lon"].max())
28                    and (
29                        min(patch["time"])
30                        <= parse_date(date, time)
31                        <= max(patch["time"])
32                    )
33                    and patch["mask"].sum() < 0.2 * 32 * 32
34                ):
35                    print(f"FOUND: {csv_line}")
36                    emitter = True
37                    # Locate the pixel with the emitter
38                    for row in range(32):
39                        for col in range(32):
40                            min_lat = patch["lat_bounds"][row][col].min()
41                            max_lat = patch["lat_bounds"][row][col].max()
42                            min_lon = patch["lon_bounds"][row][col].min()
43                            max_lon = patch["lon_bounds"][row][col].max()
44                            if (
45                                min_lat < float(lat) < max_lat
46                                and min_lon < float(lon) < max_lon
47                            ):
48                                location[row][col] = 1
49                    if not negative:
50                        positive_path = os.path.join(
51                            output_dir,
52                            "positive",
53                            f"{date}_{time}_{lat}_{lon}_{np.random.randint(0, 10000):04d}.npz",
54                        )
55                        np.savez_compressed(positive_path, location=location, **patch)
56            if (
57                negative
58                and not emitter
59                and np.random.random() < 0.02
60                and patch["mask"].sum() < 0.2 * 32 * 32
61            ):
62                negative_path = os.path.join(
63                    output_dir, "negative", f"{uuid.uuid4()}.npz"
64                )
65                np.savez_compressed(negative_path, location=location, **patch)
66    except OSError:
67        pass
68
69
70@click.command()
71@click.option("-i", "--input-file", help="Input CSV with super-emitter locations")
72@click.option(
73    "-p",
74    "--prefix",
75    help="Folder with TROPOMI data",
76    default="/dss/dsstbyfs03/pn56su/pn56su-dss-0022/Sentinel-5p/L2/CH4/2021/",
77)
78@click.option("-o", "--output_dir", help="Output folder")
79@click.option("-n", "--njobs", help="Number of jobs", default=1)
80@click.option(
81    "--negative",
82    is_flag=True,
83    help="Whether to collect negative samples",
84    default=False,
85)
86def main(input_file, prefix, output_dir, njobs, negative):
87    for month_path in glob.glob(os.path.join(prefix, "*")):
88        for day_path in glob.glob(os.path.join(month_path, "*")):
89            Parallel(n_jobs=njobs)(
90                delayed(process_tropomi_file)(
91                    file_path, month_path, day_path, output_dir, input_file, negative
92                )
93                for file_path in glob.glob(os.path.join(day_path, "*.nc"))
94            )
95
96
97if __name__ == "__main__":
98    main()
def process_tropomi_file( file_path, month_path, day_path, output_dir, input_file, negative=False):
14def process_tropomi_file(
15    file_path, month_path, day_path, output_dir, input_file, negative=False
16):
17    print(f"ANALYZING: {file_path}")
18    with open(input_file, "r") as csv_fd:
19        csv_data = csv_fd.readlines()[1:]
20    try:
21        for patch in patch_generator(file_path):
22            emitter = False
23            location = np.zeros((32, 32))
24            for csv_line in csv_data:
25                date, time, lat, lon, _, _, _ = csv_line.split(",")
26                if (
27                    (patch["lat"].min() < float(lat) < patch["lat"].max())
28                    and (patch["lon"].min() < float(lon) < patch["lon"].max())
29                    and (
30                        min(patch["time"])
31                        <= parse_date(date, time)
32                        <= max(patch["time"])
33                    )
34                    and patch["mask"].sum() < 0.2 * 32 * 32
35                ):
36                    print(f"FOUND: {csv_line}")
37                    emitter = True
38                    # Locate the pixel with the emitter
39                    for row in range(32):
40                        for col in range(32):
41                            min_lat = patch["lat_bounds"][row][col].min()
42                            max_lat = patch["lat_bounds"][row][col].max()
43                            min_lon = patch["lon_bounds"][row][col].min()
44                            max_lon = patch["lon_bounds"][row][col].max()
45                            if (
46                                min_lat < float(lat) < max_lat
47                                and min_lon < float(lon) < max_lon
48                            ):
49                                location[row][col] = 1
50                    if not negative:
51                        positive_path = os.path.join(
52                            output_dir,
53                            "positive",
54                            f"{date}_{time}_{lat}_{lon}_{np.random.randint(0, 10000):04d}.npz",
55                        )
56                        np.savez_compressed(positive_path, location=location, **patch)
57            if (
58                negative
59                and not emitter
60                and np.random.random() < 0.02
61                and patch["mask"].sum() < 0.2 * 32 * 32
62            ):
63                negative_path = os.path.join(
64                    output_dir, "negative", f"{uuid.uuid4()}.npz"
65                )
66                np.savez_compressed(negative_path, location=location, **patch)
67    except OSError:
68        pass