def extract_controls(): parse_dates = [ "control_datetime_utc", "input_start_datetime_utc", "input_end_datetime_utc", ] dtypes = { "controller_id": "category", "control_type": "category", "facade": "category", "port_locode": "category", "mission_order": "category", "vessel_targeted": "category", "cnsp_called_unit": "category", "infraction": "category", "cooperative": "category", "diversion": "category", "escort_to_quay": "category", "seizure": "category", "gear_1_code": "category", "gear_2_code": "category", "gear_3_code": "category", "gear_1_was_controlled": "category", "gear_2_was_controlled": "category", "gear_3_was_controlled": "category", } return extract( db_name="fmc", query_filepath="fmc/controles.sql", parse_dates=parse_dates, dtypes=dtypes, )
def extract_nav_licences(): dtypes = { "sailing_category": "category", "nav_licence_expiration_date": "category" } return extract("ocan", "ocan/permis_navigation.sql", dtypes=dtypes)
def extract_fr_vessels(): # Sparse data type takes up less memory - especially for float data type # For string data, pd.SparseDtype does not reduce memory usage much. Using # pd.Categorical reduces memory usage much more. dtypes = { "length_nf": pd.SparseDtype("float", None), "width_nf": pd.SparseDtype("float", None), "gauge_nf": pd.SparseDtype("float", None), "power_nf": pd.SparseDtype("float", None), "vessel_phone_1_nf": "category", "vessel_phone_2_nf": "category", "vessel_phone_3_nf": "category", "vessel_phone_4_nf": "category", "vessel_email_1_nf": "category", "vessel_email_2_nf": "category", "operator_name_nf": "category", "operator_email_nf": "category", "operator_phone_1_nf": "category", "operator_phone_2_nf": "category", "proprietor_name_nf": "category", "proprietor_email_nf": "category", "proprietor_phone_1_nf": "category", "proprietor_phone_2_nf": "category", "vessel_type_nf": "category", "registry_port_nf": "category", "sailing_types_nf": "category", "fishing_gear_main_nfp": "category", "fishing_gear_secondary_nfp": "category", "fishing_gear_third_nfp": "category", } return extract("ocan", "ocan/navires_fr.sql", dtypes=dtypes)
def mock_extract_side_effect_(db_name, query_filepath, dtypes, parse_dates, mock_create_engine, mock_pd): def read_sql_mock(query, engine, **kwargs): return query mock_pd.read_sql.side_effect = read_sql_mock return extract( db_name=db_name, query_filepath=query_filepath, dtypes=None, parse_dates=parse_dates, )
def extract_floats(): dtypes = { "imo_f": "category", "cfr_f": "category", "external_immatriculation_f": "category", "vessel_name_f": "category", "ircs_f": "category", "mmsi_f": "category", "flag_state_f": "category", "district_code_f": "category", "district_f": "category", } return extract("ocan", "ocan/flotteurs.sql", dtypes=dtypes)
def extract_cee_vessels(): dtypes = { "fishing_gear_main_ncp": "category", "fishing_gear_secondary_ncp": "category", "fishing_gear_third_ncp": "category", "vessel_type_ncp": "category", "district_ncp": "category", "operator_name_ncp": "category", "operator_email_ncp": "category", "proprietor_email_ncp": "category", "length_ncp": pd.SparseDtype("float", None), "gauge_ncp": pd.SparseDtype("float", None), "power_ncp": pd.SparseDtype("float", None), } return extract("ocan", "ocan/navires_cee_peche.sql", dtypes=dtypes)
def extract_segments(): # pragma: no cover return extract( db_name="monitorfish_remote", query_filepath="monitorfish/fleet_segments.sql" )
def extract_catches(): # pragma: no cover return extract( db_name="monitorfish_remote", query_filepath="monitorfish/current_catches.sql" )
def extract_controllers(): return extract(db_name="fmc", query_filepath="fmc/controllers.sql")
def extract_non_cee_vessels(): dtypes = {"fishing_gear_main_nep": "category"} return extract("ocan", "ocan/navires_hors_cee_peche.sql", dtypes=dtypes)
def extract_last_controls(): return extract( db_name="monitorfish_remote", query_filepath="monitorfish/last_controls.sql", parse_dates=["last_control_datetime_utc"], )
def extract_last_positions(): return extract( db_name="monitorfish_remote", query_filepath="monitorfish/last_positions.sql", dtypes={"emission_period": str}, )
def extract_current_segments(): return extract(db_name="monitorfish_remote", query_filepath="monitorfish/current_segments.sql")
def extract_fishing_gear_codes(): return extract("ocan", "ocan/codes_engins.sql")
def extract_control_anteriority(): return extract( db_name="monitorfish_remote", query_filepath="monitorfish/control_anteriority.sql", )
def extract_infractions(): return extract("fmc", "fmc/natinf.sql")