methane_super_emitters.create_dataset
1import click 2import numpy as np 3import datetime 4import netCDF4 5import os 6import glob 7import math 8import uuid 9from joblib import Parallel, delayed 10from methane_super_emitters.utils import destripe, parse_date, patch_generator 11 12 13def process_tropomi_file( 14 file_path, month_path, day_path, output_dir, input_file, negative=False 15): 16 print(f"ANALYZING: {file_path}") 17 with open(input_file, "r") as csv_fd: 18 csv_data = csv_fd.readlines()[1:] 19 try: 20 for patch in patch_generator(file_path): 21 emitter = False 22 location = np.zeros((32, 32)) 23 for csv_line in csv_data: 24 date, time, lat, lon, _, _, _ = csv_line.split(",") 25 if ( 26 (patch["lat"].min() < float(lat) < patch["lat"].max()) 27 and (patch["lon"].min() < float(lon) < patch["lon"].max()) 28 and ( 29 min(patch["time"]) 30 <= parse_date(date, time) 31 <= max(patch["time"]) 32 ) 33 and patch["mask"].sum() < 0.2 * 32 * 32 34 ): 35 print(f"FOUND: {csv_line}") 36 emitter = True 37 # Locate the pixel with the emitter 38 for row in range(32): 39 for col in range(32): 40 min_lat = patch["lat_bounds"][row][col].min() 41 max_lat = patch["lat_bounds"][row][col].max() 42 min_lon = patch["lon_bounds"][row][col].min() 43 max_lon = patch["lon_bounds"][row][col].max() 44 if ( 45 min_lat < float(lat) < max_lat 46 and min_lon < float(lon) < max_lon 47 ): 48 location[row][col] = 1 49 if not negative: 50 positive_path = os.path.join( 51 output_dir, 52 "positive", 53 f"{date}_{time}_{lat}_{lon}_{np.random.randint(0, 10000):04d}.npz", 54 ) 55 np.savez_compressed(positive_path, location=location, **patch) 56 if ( 57 negative 58 and not emitter 59 and np.random.random() < 0.02 60 and patch["mask"].sum() < 0.2 * 32 * 32 61 ): 62 negative_path = os.path.join( 63 output_dir, "negative", f"{uuid.uuid4()}.npz" 64 ) 65 np.savez_compressed(negative_path, location=location, **patch) 66 except OSError: 67 pass 68 69 70@click.command() 71@click.option("-i", "--input-file", help="Input CSV with super-emitter locations") 72@click.option( 73 "-p", 74 "--prefix", 75 help="Folder with TROPOMI data", 76 default="/dss/dsstbyfs03/pn56su/pn56su-dss-0022/Sentinel-5p/L2/CH4/2021/", 77) 78@click.option("-o", "--output_dir", help="Output folder") 79@click.option("-n", "--njobs", help="Number of jobs", default=1) 80@click.option( 81 "--negative", 82 is_flag=True, 83 help="Whether to collect negative samples", 84 default=False, 85) 86def main(input_file, prefix, output_dir, njobs, negative): 87 for month_path in glob.glob(os.path.join(prefix, "*")): 88 for day_path in glob.glob(os.path.join(month_path, "*")): 89 Parallel(n_jobs=njobs)( 90 delayed(process_tropomi_file)( 91 file_path, month_path, day_path, output_dir, input_file, negative 92 ) 93 for file_path in glob.glob(os.path.join(day_path, "*.nc")) 94 ) 95 96 97if __name__ == "__main__": 98 main()
def
process_tropomi_file( file_path, month_path, day_path, output_dir, input_file, negative=False):
14def process_tropomi_file( 15 file_path, month_path, day_path, output_dir, input_file, negative=False 16): 17 print(f"ANALYZING: {file_path}") 18 with open(input_file, "r") as csv_fd: 19 csv_data = csv_fd.readlines()[1:] 20 try: 21 for patch in patch_generator(file_path): 22 emitter = False 23 location = np.zeros((32, 32)) 24 for csv_line in csv_data: 25 date, time, lat, lon, _, _, _ = csv_line.split(",") 26 if ( 27 (patch["lat"].min() < float(lat) < patch["lat"].max()) 28 and (patch["lon"].min() < float(lon) < patch["lon"].max()) 29 and ( 30 min(patch["time"]) 31 <= parse_date(date, time) 32 <= max(patch["time"]) 33 ) 34 and patch["mask"].sum() < 0.2 * 32 * 32 35 ): 36 print(f"FOUND: {csv_line}") 37 emitter = True 38 # Locate the pixel with the emitter 39 for row in range(32): 40 for col in range(32): 41 min_lat = patch["lat_bounds"][row][col].min() 42 max_lat = patch["lat_bounds"][row][col].max() 43 min_lon = patch["lon_bounds"][row][col].min() 44 max_lon = patch["lon_bounds"][row][col].max() 45 if ( 46 min_lat < float(lat) < max_lat 47 and min_lon < float(lon) < max_lon 48 ): 49 location[row][col] = 1 50 if not negative: 51 positive_path = os.path.join( 52 output_dir, 53 "positive", 54 f"{date}_{time}_{lat}_{lon}_{np.random.randint(0, 10000):04d}.npz", 55 ) 56 np.savez_compressed(positive_path, location=location, **patch) 57 if ( 58 negative 59 and not emitter 60 and np.random.random() < 0.02 61 and patch["mask"].sum() < 0.2 * 32 * 32 62 ): 63 negative_path = os.path.join( 64 output_dir, "negative", f"{uuid.uuid4()}.npz" 65 ) 66 np.savez_compressed(negative_path, location=location, **patch) 67 except OSError: 68 pass