def __init__(self, img_size, range01=False, rgb_order=False, dummy=False): images_dir = settings.get_data_dir('office_webcam') super(OfficeWebcamDataset, self).__init__(img_size, range01, rgb_order, images_dir, dummy=dummy)
def __init__(self, img_size, range01=False, rgb_order=False, dummy=False): test_dir = settings.get_data_dir('visda17_clf_test') file_list_path = os.path.join(test_dir, 'image_list.txt') super(TestDataset, self).__init__(img_size, range01, rgb_order, file_list_path, test_dir, has_ground_truth=False, dummy=dummy)
def __init__(self, img_size, range01=False, rgb_order=False, dummy=False): val_dir = settings.get_data_dir('visda17_clf_validation') file_list_path = os.path.join(val_dir, 'image_list.txt') super(ValidationDataset, self).__init__(img_size, range01, rgb_order, file_list_path, val_dir, has_ground_truth=True, dummy=dummy)
def run(): """ TODO: write docstring """ # Set environment variables settings.load() # Get PostgreSQL database credentials psql_credentials = settings.get_psql() # Create SQLAlchemy engine from database credentials engine = create_connection_from_dict(psql_credentials, 'postgresql') # Get data to process from postgres quants_df = execute_sql('select * from features.quants;', engine, read_file=False, return_df=True) data_dir = settings.get_data_dir() labeled_fishing_dir = data_dir / 'labeled_data' / 'fishing' labeled_nonfishing_dir = data_dir / 'labeled_data' / 'nonfishing' cnn_split_dir = data_dir / 'cnn_split' if cnn_split_dir.exists(): shutil.rmtree(cnn_split_dir, ignore_errors=False, onerror=None) cnn_split_dir.mkdir(parents=True, exist_ok=True) # Create labeled data print('Creating labeled data.') fishy_stuff = fishing_prefilter(quants_df) nonfish = nonfishing_dataframe_creator(quants_df, fishy_stuff) dataset = sampler(fishy_stuff, nonfish) trajectory_separator(dataset, data_dir) # Create train / test split print("Creating train/test split") split_data(labeled_fishing_dir, labeled_nonfishing_dir, cnn_split_dir, binary_name='fishing', set_seed=223) # Train the cnn run_cnn(cnn_split_dir, batchsize=256, epochs=50, color_mode='rgb', start_filters=8, depth=2, dense_count = 2, dense_neurons = 256, bnorm = False)
def __init__(self, img_size, range01=False, rgb_order=False, dummy=False): train_dir = settings.get_data_dir('visda17_clf_train') file_list_path = os.path.join(train_dir, 'image_list.txt') super(TrainDataset, self).__init__(img_size, range01, rgb_order, file_list_path, train_dir, has_ground_truth=True, dummy=dummy) self.object_ids = [] self.cam_yaw = [] self.light_yaw = [] self.cam_pitch = [] self.obj_id_to_idx = {} self.cam_yaw_to_idx = {} self.light_yaw_to_idx = {} self.cam_pitch_to_idx = {} for sample_idx, name in enumerate(self.names): fn, _ = os.path.splitext(name) object_id, _, tail = fn.partition('__') c_yaw, l_yaw, c_pitch = tail.split('_') c_yaw = float(c_yaw) l_yaw = float(l_yaw) c_pitch = float(c_pitch) obj_id_idx = self.obj_id_to_idx.setdefault(object_id, len(self.obj_id_to_idx)) c_yaw_idx = self.cam_yaw_to_idx.setdefault( c_yaw, len(self.cam_yaw_to_idx)) l_yaw_idx = self.light_yaw_to_idx.setdefault( l_yaw, len(self.light_yaw_to_idx)) c_pitch_idx = self.cam_pitch_to_idx.setdefault( c_pitch, len(self.cam_pitch_to_idx)) self.object_ids.append(obj_id_idx) self.cam_yaw.append(c_yaw_idx) self.light_yaw.append(l_yaw_idx) self.cam_pitch.append(c_pitch_idx) self.object_ids = np.array(self.object_ids, dtype=np.int32) self.cam_yaw = np.array(self.cam_yaw, dtype=np.int32) self.light_yaw = np.array(self.light_yaw, dtype=np.int32) self.cam_pitch = np.array(self.cam_pitch, dtype=np.int32) sample_ndxs = np.arange(len(self.object_ids)) self.samples_by_obj_id = [ sample_ndxs[self.object_ids == i] for i in range(len(self.obj_id_to_idx)) ] self.samples_by_cam_yaw = [ sample_ndxs[self.cam_yaw == i] for i in range(len(self.cam_yaw_to_idx)) ] self.samples_by_light_yaw = [ sample_ndxs[self.light_yaw == i] for i in range(len(self.light_yaw_to_idx)) ] self.samples_by_cam_pitch = [ sample_ndxs[self.cam_pitch == i] for i in range(len(self.cam_pitch_to_idx)) ] self.obj_X = self.ObjectImageAccessor(self)
def run(): """ Creates raw-cleaned-semantic schemas and populates the raw schema only. Parameters ---------- read_json: bool Whether or not the script should read original json files write_json: bool Whether or not the script should write csvs of ais files dirs: [str] List of names of the directories to import date_range: [int] List of two ints with the first and last day to collect files from Returns ------- None """ # Set environment variables settings.load() # Get root directory from environment base_dir = settings.get_base_dir() sql_dir = base_dir.joinpath('sql') data_dir = settings.get_data_dir() filtered_dir = data_dir.joinpath('ais_deduped') # Get PostgreSQL database credentials psql_credentials = settings.get_psql() # print('Running with credentials: ', psql_credentials) # Create SQLAlchemy engine from database credentials engine = create_connection_from_dict(psql_credentials, 'postgresql') ## ---- CREATE SCHEMAS ---- print("Creating schemas") execute_sql(os.path.join(sql_dir, 'create_schemas.sql'), engine, read_file=True) ## ---- CREATE TABLES ---- print("Creating tables") execute_sql(os.path.join(sql_dir, 'create_tables.sql'), engine, read_file=True) ## ---- UPLOAD TABLES ---- print("Processing scraped vessels table.") copy_csv_to_db(os.path.join(data_dir, 'updated_boats.csv'), 'raw.vessels', engine) print("Processing IUU list.") # load_iuu_list(os.path.join(data_dir, 'IUUList-20190902.txt'), engine) ## ---- UPLOAD SHAPEFILES ---- # print("Uploading shapefiles") # TODO: get this fully hooked up and working # load_shp(DATA_DIR, dir_dict, credentials_dict): ## ---- WRITE filtered CSVs to db ---- for path in filtered_dir.glob("*"): if path.is_dir(): filtered_subdir = path # this is where we upload csvs from the database # the intention is that we sometimes do this with previously parsed csvs print( f"Uploading csv files to database from {filtered_subdir.name}." ) try: load_csv(filtered_subdir, engine, 'raw.ais', sep='\t', quote='\b') except IsADirectoryError: #raise print('Found directory, not file') print(f"Finished converted json from {filtered_subdir.name}") ## ---- ClEAN DATA ---- print("Cleaning data") execute_sql(os.path.join(sql_dir, 'clean_data.sql'), engine, read_file=True) return
def run(min_pings_init=30, min_pings_split=20, min_dist=2.0): """ Runs feature generation that allows modeling stage to take place. Feature generation involves 3 main stages: - generating a sample to show the model - breaking sample up into trajectories - computing quantitative features on each trajectory - writing an image of each trajectory to folders grouped by 'vessel_type' :param min_pings_init: int The minimum number of AIS data points that must appear in a trajectory for it to be included in the sample. :param min_pings_split: int Applied after splitting trajectories at the gap. Should be smaller than min_pings_init. Ensures that split trajectories also have more than a certain minimum number of pings. :returns: None """ start = time.time() # Set environment variables settings.load() # Get PostgreSQL database credentials psql_credentials = settings.get_psql() base_dir = settings.get_base_dir() sql_dir = base_dir.joinpath('sql') data_dir = settings.get_data_dir() # Create SQLAlchemy engine from database credentials engine = create_connection_from_dict(psql_credentials, 'postgresql') # Create a sql table with complete trajectories sample_switch = input("Create new sample for Convolutional Neural Net? (Y/N)") if sample_switch in ['Y', 'y', '1', 'Yes']: print("Creating CNN sample.") create_cnn_sample(sql_dir, engine, min_pings_init=min_pings_init, min_dist=min_dist) # Get data to process from postgres execute_sql('drop table if exists features.quants;', engine, read_file=False) if (data_dir / 'trajectories').is_dir(): print("Removing old trajectories directory.") remove_dir(data_dir / 'trajectories') try: df = execute_sql("select * from features.cnn_sample", engine, read_file=False, return_df=True) print("Grabbing trajectory data") except db.exc.ProgrammingError: print("The table features.cnn_sample doesn't exist. Please create one.") raise SystemExit # Set data types of several key columns df = df.rename(columns={'time_stamp': 't'}) df['t'] = pd.to_datetime(df['t']) df['longitude'] = pd.to_numeric(df['longitude']) df['latitude'] = pd.to_numeric(df['latitude']) # Set df index df.index = df['t'] df_geo = df_to_geodf(df) # Filter by date and mmsi df_group = df_geo.groupby([pd.Grouper(freq='D'), 'mmsi']) # Loop through the grouped dataframes counter = 0 # Load basemap shape file base_map = geopandas.read_file( '/Akamai/ais_project_data/GSHHS_shp/c/GSHHS_c_L1.shp') # c: coarse, l: low, i: intermedate, h: high, f: full # Set CRS WGS 84 base_map = base_map.to_crs(epsg=4326) for name, group in df_group: if len(group) < min_pings_init: continue trajectory = mp.Trajectory(name, group) # Split the trajectory at the gap split_trajectories = list(trajectory.split_by_observation_gap(timedelta(minutes=30))) ### CREATE TRAJECTORY IDs for split_index, trajectory in enumerate(split_trajectories): # create a universal trajectory ID # format is: mmsi-date-split_index trajectory.df['traj_id'] = str(name[1]) + '-' + str(name[0].date()) + '-' + str(split_index) ### CREATE QUANT FEATURES AND WRITE IMAGES TO DISK for split in split_trajectories: # store the length of the split trajectory in km traj_length = split.get_length() / 1_000 if (len(split.df) < min_pings_split) or (traj_length < .5): print(f"Dropping a trajectory with length: {str(traj_length)} km and {str(len(split.df))} pings.") continue else: try: quants = compute_quants(split.df[['longitude', 'latitude']]) quants['traj_id'] = str(split.df['traj_id'].iloc[0]) quants['vessel_type'] = str(split.df['vessel_type'].iloc[0]) quants.to_sql('quants', engine, schema='features', if_exists='append', index=False) ### WRITE IMAGES TO DISK save_matplotlib_img(split, data_dir, base_map) counter += 1 except: print(f"An error occurred processing trajectory {split.df['traj_id'].iloc[0]}.") end = time.time() print(f"Generated features for {str(counter)} images in {str(round(end - start))} seconds.") return