def match(self, mmsi: str, source_date: str, new_data: pd.DataFrame) -> pd.DataFrame: if is_empty(new_data): return new_data matches = self.get_keys_and_matches(mmsi, source_date) result = pd.DataFrame for f_key, match in matches: match_data = pd.read_pickle(match.path) if is_empty(match_data): self.remove_u_file(f_key) continue min_time = new_data.iloc[0]["time"] match_min_time = match_data.iloc[0]["time"] if match_min_time <= min_time: # unlabeled data is older than current data result = pd.concat([match_data, new_data]) logger.write( f"Data match: MMSI {mmsi} | match min time: {match_min_time} new data time: {min_time}\n" f"\tmatch data lenght: {len(match_data)} new data length: {len(new_data.index)}" f"result data length: {len(result.index)}") else: # unlabeled data is newer than current data logger.write( f"Detected match where unlabeled data is newer than current data! Not combining!" ) self.remove_u_file(f_key) return result
def compare_one(config, result, expect): """Compare 1 line of arctern result and expected.""" value_x = result[1] value_y = expect[1] # c = config newvalue_x = convert_str(value_x) newvalue_y = convert_str(value_y) try: if isinstance(newvalue_x, bool): one_result_flag = (newvalue_x == newvalue_y) if not one_result_flag: print(result[0], newvalue_x, expect[0], newvalue_y) return one_result_flag if isinstance(newvalue_x, str): newvalue_x = newvalue_x.strip().upper() newvalue_y = newvalue_y.strip().upper() # check order : empty -> GEO_TYPES -> geocollection_types -> curve -> surface if (is_empty(newvalue_x) and is_empty(newvalue_y)): return True if is_geometry(newvalue_x) and is_geometry(newvalue_y): one_result_flag = compare_geometry(config, newvalue_x, newvalue_y) if not one_result_flag: print(result[0], newvalue_x, expect[0], newvalue_y) return one_result_flag if is_geometrycollection(newvalue_x) and is_geometrycollection( newvalue_y): one_result_flag = compare_geometrycollection( config, newvalue_x, newvalue_y) if not one_result_flag: print(result[0], newvalue_x, expect[0], newvalue_y) return one_result_flag if is_geometrytype(newvalue_x) and is_geometrytype(newvalue_y): one_result_flag = (newvalue_x == newvalue_y) if not one_result_flag: print(result[0], newvalue_x, expect[0], newvalue_y) return one_result_flag # print(result[0], newvalue_x, expect[0], newvalue_y) return False if isinstance(newvalue_x, (int, float)): return compare_floats(config, newvalue_x, newvalue_y) # if not one_result_flag: # print(result[0], newvalue_x, expect[0], newvalue_y) # return one_result_flag except ValueError as ex: print(repr(ex)) one_result_flag = False return one_result_flag
def load(path=None): """ Load configuration by given path or `CONFIG` environment. The environment variable is primary lookup :param str or None path: the configuration path :return: the configuration object :rtype: object """ if path is None: if 'CONFIG' in os.environ: path = os.environ['CONFIG'] elif len(sys.argv) > 1: path = sys.argv[1] else: raise Exception('Configuration parameter must be set') if is_empty(path): raise Exception('Configuration path is missing') if not os.path.exists(path): raise Exception(f'Configuration file does not exists "{path}"') conf = ConfigObj(path) # size of sections must be greater than one if len(conf.sections) == 0: raise Exception('There are no any sections') return ConfigParser.to_object(conf)
def link_reference_trajectories_to_amici_models(): """ We need to find the correct reference trajectory for each model. This is not fully trivial, as some models come from biomodels, some from JWS. """ # unset column model_info['ref_trajectory_path'] = None path_ref_biomodels = DIR_TRAJ_REF_BIOMODELS path_ref_jws = DIR_TRAJ_REF_JWS # iterate over models, write pyth to reference trajectory for sub_id in model_info.index: i_row = model_info.loc[sub_id] # we must discriminate between models from JWS and biomodels if is_empty(i_row['sedml_path']): # from biomodels, the ref trajectories were simulated with Copasi model_suffix = (i_row['sbml_path'].split('/')[-1]).split('.')[0] name = f'trajectories_copasi_strictest_{model_suffix.lower()}.tsv' ref = os.path.join(path_ref_biomodels, name) else: # from JWS online, reference trajectories were downloaded # refactor the name based on the sedml and the sbml file names name1 = (i_row['sedml_path'].split('/')[-1]).split('.')[0] name2 = (i_row['sbml_path'].split('/')[-1]).split('.')[0] ref = os.path.join(path_ref_jws, name1, name2, 'JWS_simulation.csv') # save path model_info.loc[sub_id, 'ref_trajectory_path'] = ref
def passes(self, url): split_url = urlparse(url) domain = split_url.hostname if is_empty(domain): return False is_allowed = (self._domain_match_regex.match(domain) is not None) self._logger.debug("Result of applying domain filter for url %s : %s"%(str(url), str(is_allowed))) return is_allowed
def init(size): nodes = [] # create nodes for j in range(0, size): for i in range(0, size): nodes.append(Node(i, j, not is_empty(i, j))) # add adjs for j in range(0, size): for i in range(0, size): nodes[j * size + i].add_adjs(nodes) return nodes
def get_minimum_time(df_1: pd.DataFrame, df_2: pd.DataFrame) -> pd.DataFrame: if is_empty(df_1) and is_empty(df_2): return pd.DataFrame() mmsis_1 = df_1["MMSI"].unique().tolist() mmsis_2 = df_2["MMSI"].unique().tolist() mmsis = list(set(mmsis_1) | set(mmsis_2)) # preallocate result result_df = pd.DataFrame(index=np.arange(0, len(mmsis)), columns=df_1.columns) for idx, mmsi in enumerate(mmsis): mmsi_df_1 = df_1.loc[df_1["MMSI"] == mmsi] mmsi_df_2 = df_2.loc[df_2["MMSI"] == mmsi] min_df_1 = mmsi_df_1.loc[mmsi_df_1["time"] == mmsi_df_1["time"].min()] min_df_2 = mmsi_df_2.loc[mmsi_df_2["time"] == mmsi_df_2["time"].min()] # make sure no duplicate arrival times occur in case of identical min timestamps len_df_1 = len(min_df_1.index) len_df_2 = len(min_df_2.index) if len_df_1 > 1: min_df_1 = min_df_1.drop(min_df_1.index[[1, len_df_1 - 1]]) if len_df_2 > 1: min_df_1 = min_df_2.drop(min_df_2.index[[1, len_df_2 - 1]]) # assign minimum of both DataFrames to result if len_df_1 > 0: if len_df_2 > 0: min_time_1 = min_df_1["time"].iloc[0] min_time_2 = min_df_2["time"].iloc[0] result_df.loc[idx] = min_df_1.iloc[ 0] if min_time_1 < min_time_2 else min_df_2.iloc[0] else: result_df.loc[idx] = min_df_1.iloc[0] elif len_df_2 > 0: result_df.loc[idx] = min_df_2.iloc[0] else: result_df.drop([idx]) return result_df
def get_reference_trajectories(): """Either use cached, or compute reference trajectories.""" # Create trajectory reference base folder os.makedirs(DIR_TRAJ_REF, exist_ok=True) # Copy cached trajectories if flag set if USE_CACHED_REF_TRAJ == 'YES': shutil.copytree(DIR_CACHE_TRAJ_REF_JWS, DIR_TRAJ_REF_JWS) shutil.copytree(DIR_CACHE_TRAJ_REF_BIOMODELS, DIR_TRAJ_REF_BIOMODELS) return # TODO We use cached biomodels simulations anyway, unless automatic # simulation is implemented here too. Until then, manually copy if # changes of the Biomodels simulations are intended. shutil.copytree(DIR_CACHE_TRAJ_REF_BIOMODELS, DIR_TRAJ_REF_BIOMODELS) # Target folder for JWS trajectories dir_traj_jws = os.path.join(DIR_TRAJ_REF, 'trajectories_reference_jws') # Create if not existent os.makedirs(dir_traj_jws) # Get jws model slug list url = "https://jjj.mib.ac.uk/rest/models/?format=json" jws_model_infos = json.loads(requests.get(url).text) # Loop over all models for i_submodel in model_info.index: if is_empty(model_info.loc[i_submodel, 'amici_path']): # Only get references for amici importable models continue # Path of the sedml file sedml_file = model_info.loc[i_submodel, 'sedml_path'] if is_empty(sedml_file): # Not from jws continue download_jws_reference_trajectory(i_submodel, sedml_file, jws_model_infos)
def fit_transform(self, df: pd.DataFrame, arrival_time: int) -> pd.DataFrame: if is_empty(df): return df min_timestamp = pd.Timestamp(df["time"].min(), unit="s") self.min_year_timestamp = pd.Timestamp(year=min_timestamp.year, month=1, day=1, hour=0).timestamp() self.arrival_time = arrival_time df = df.assign(time_scaled=df["time"] - self.min_year_timestamp) # data can be labeled # print("applying arrival time {} to df: e.g. {}".format(arrival_time, df.iloc[0]["time"])) if arrival_time > 0: df = df.assign(label=arrival_time - df["time"]) else: df = df.assign(label=-1) df = df.drop(columns=["time"]) return df
def main(): path = read("Please input SoloPi case path:") content = None with open(path) as f: content = f.read() if is_empty(content): log(TAG, "Path %s is invalid" % path) return case = Decoder().decode(content) if case is None: log(TAG, "Path %s is invalid" % path) return selections = [] for key in EXPORTERS: selections.append(key) # config export folder select = require_selection("Please select transform format", selections) handler: BaseExporter = EXPORTERS[selections[select]]() folder = read("Please input export folder:") os.mkdir(folder) main_file = os.path.join(folder, 'main.' + handler.get_file_extension()) img_folder = os.path.join(folder, 'screenshots') os.mkdir(img_folder) case_info = case.case_info case_info.root = folder case_info.img_root = img_folder case_list = case.case_list # export to main file with open(main_file, 'w') as f: f.write(handler.export_head(case_info)) f.write('\n') for i in range(len(case_list)): f.write(handler.export_step(case_list[i], i + 1, case_info)) f.write('\n') f.write(handler.export_tail(case_info)) log(TAG, "Export finished")
def is_rubbish_msg(self, donation): """ Rubbish message has few normal characters in it :param donation: :return: """ if util.is_empty(donation.message): return False message = donation.message try: message = unidecode(message) except Exception as e: logger.error('Error in unidecoding: %s' % repr(message)) text_only = re.sub(r'[^a-zA-Z]', '', message) return (float(len(text_only)) / len(donation.message)) <= 0.20
def identify_arrival_times( port: Port, df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]: # separation by inner square of port area lat_mask = ((df["Latitude"] > (port.latitude - port.inner_square_lat_radius)) & (df["Latitude"] < (port.latitude + port.inner_square_lat_radius))) df_outside_square: pd.DataFrame = df.loc[~lat_mask] df_inside_square: pd.DataFrame = df.loc[lat_mask] long_mask = ((df_inside_square["Longitude"] > (port.longitude - port.inner_square_long_radius)) & (df_inside_square["Longitude"] < (port.longitude + port.inner_square_long_radius))) df_outside_square.append(df_inside_square.loc[~long_mask]) df_inside_square = df_inside_square.loc[long_mask] # accurate separation outside of inner square but within port's radius radius_mask = df_outside_square.apply(radius_filter, args=(port, ), axis=1) df_outside_circle: pd.DataFrame = df_outside_square[ radius_mask] # training data series df_inside_circle: pd.DataFrame = df_outside_square[~radius_mask] # minimum timestamp of inside port area data-points is arrival time arrival_times: pd.DataFrame = get_minimum_time(df_inside_square, df_inside_circle) if is_empty(arrival_times): arrival_times = pd.DataFrame(columns=df_outside_circle.columns) return df_outside_circle, arrival_times
def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame: if is_empty(df): return df df = df.assign(time=df["time_scaled"] + self.min_year_timestamp) df = df.drop(columns=["time_scaled"]) return df
def submit(): # On submit, check if the mandatory fields exists or not form_data = request.form should_error = False if is_empty(form_data, "email"): # User hasn't provided Email ID # I hate the user, let me show my hatred real quick should_error = True error_msg = "No EMAIL ID was provided, cannot schedule alert" if is_empty(form_data, "age"): # User hasn't provided age # Are you kidding me!!! should_error = True error_msg = "No age was provided, cannot schedule alert" if is_empty(form_data, "pincode"): # User hasn't provided pincode should_error = True error_msg = "No pincode was provided, cannot schedule alert" if should_error: return error_msg, 400 email = form_data.get("email") age = int(form_data.get("age")) pincode = form_data.get("pincode") username = form_data.get("username") if not username: username = "******" start_date = form_data.get("start_date") if not start_date: start_date = datetime.today().strftime('%Y-%m-%d') end_date = form_data.get("end_date") if not end_date: end_date = (datetime.today() + timedelta(days=365)).strftime('%Y-%m-%d') cvc_type = form_data.get("cvc_type", "any") if cvc_type not in ("any", "Free", "Paid"): should_error = True error_msg = "Hey, please don't play around with the inputs you lil piece of shit" vaccine_choice = form_data.get("vaccine_choice", "any") if vaccine_choice not in ("any", "COVISHIELD", "COVAXIN"): should_error = True error_msg = "Hey, please don't play around with the inputs you lil piece of shit" pincode_set = set(pincode.split(";")) if should_error: return error_msg, 400 insert_user_data_to_db(email, age, pincode_set, start_date=start_date, end_date=end_date, fee_type=cvc_type, vaccine=vaccine_choice) en = EmailNotifier() en.acknowledge(email) return render_template("alert_success.html", username=username)
def get_subject(self): if is_empty(self.subject): return '(no subject)' else: return self.subject
def generate_dataset(file_path: str, output_dir: str, data_source: str, pm: PortManager) -> None: print(f"Extracting file from '{file_path}' of type '{data_source}'") min_number_of_rows = 10000 numerical_features = [ "time", "Latitude", "Longitude", "SOG", "COG", "Heading", "Width", "Length", "Draught" ] categorical_features = ["Ship type", "Navigational status"] df = pd.read_csv(file_path, ",", None) if data_source == "dma": df = df.drop(columns=[ "Type of mobile", "ROT", "IMO", "Callsign", "Name", "Cargo type", "Type of position fixing device", "ETA", "Data source type", "A", "B", "C", "D" ]) # unify data sources to 'dma' source elif data_source == "mc": df = df.rename(columns=mc_to_dma) df = df.drop(columns={ "VesselName", "IMO", "Callsign", "Cargo", "TransceiverClass" }) # fill NaN values with their defaults from official AIS documentation # https://api.vtexplorer.com/docs/response-ais.html # COG = 0 and SOG = 0 might not be the best default values df = df.fillna(value={ "Heading": 511, "SOG": 0, "COG": 0, "Width": 0, "Length": 0, "Draught": 0 }) # filter out of range values df = df.loc[(df["Latitude"] >= data_ranges["Latitude"]["min"]) & (df["Latitude"] <= data_ranges["Latitude"]["max"])] df = df.loc[(df["Longitude"] >= data_ranges["Longitude"]["min"]) & (df["Longitude"] <= data_ranges["Longitude"]["max"])] # assert if enough data remains if len(df.index) < min_number_of_rows: logger.write( f"Required {min_number_of_rows} rows of data, got {len(df.index)}") initialize(output_dir) scaler = None """ Find unique routes of a ship to a destination from data pool 1) Group by destination 2) Group by ship (MMSI) """ destinations: List[str] = df["Destination"].unique() for dest_column_header in destinations: if pd.isnull(dest_column_header): continue dest_name: str = get_destination_file_name(dest_column_header) port = pm.find_port(dest_name) # skip if no port data is set if port is None: continue for folder in output_folders: if not os.path.exists(os.path.join(output_dir, folder, port.name)): os.makedirs(os.path.join(output_dir, folder, port.name)) dest_df = df.loc[df["Destination"] == dest_column_header] dest_df = dest_df.drop(columns=["Destination"]) dest_df = format_timestamp_col(dest_df, data_source) # extract data-points that are sent while sitting in port to compute label x_df, arrival_times_df = pm.identify_arrival_times(port, dest_df) # skip port if all ships are hanging out in port area only if is_empty(x_df): logger.write( f"No data for port {port.name} outside of port area. " f"{x_df.index} number of data-points, {arrival_times_df.index} number of labels" ) continue # init route combiner on existing unlabeled data for current port rc = RouteCombiner(data_dir=os.path.join(output_dir, "unlabeled", port.name), csv_map_path=os.path.join(output_dir, "dma", "combinations.json")) rc.fit() # handle categorical data ship_types_df, ship_type_encoder = one_hot_encode( x_df.pop("Ship type"), "Ship type") nav_states_df, nav_status_encoder = one_hot_encode( x_df.pop("Navigational status"), "Navigational Status") arrival_times_df = arrival_times_df.drop( columns=["Ship type", "Navigational status"]) x_df = pd.concat([ x_df.reset_index(drop=True), ship_types_df.reset_index(drop=True), nav_states_df.reset_index(drop=True) ], axis=1) mmsis = x_df["MMSI"].unique() for idx, mmsi in enumerate(mmsis): # TODO: Handle ships that head to the same port more than once within the dataset ship_df = x_df.loc[x_df["MMSI"] == mmsi] arrival_time_df = arrival_times_df.loc[arrival_times_df["MMSI"] == mmsi] arrival_time = -1 if not is_empty(arrival_time_df): arrival_time = arrival_time_df.iloc[0]["time"] # drop rows sent after ship left the port ship_df = ship_df[ship_df["time"] <= arrival_time] if is_empty(ship_df): continue ship_df = ship_df.drop(columns=["MMSI"]) _, file_name = os.path.split(file_path) file_date = rc.date_from_source_csv(file_name) if arrival_time == -1: if rc.has_match(str(mmsi), file_date): ship_df = rc.match(str(mmsi), file_date, ship_df) f_path = os.path.join( output_dir, "unlabeled", port.name, obj_file("data_unlabeled", mmsi, file_date)) ship_df.to_pickle(f_path) continue if rc.has_match(str(mmsi), file_date): ship_df = rc.match(str(mmsi), file_date, ship_df) ship_df, labeler = generate_label(ship_df, arrival_time) cols_to_normalize = ship_df.columns.tolist() if scaler is None: scaler = init_scaler(x_df, cols_to_normalize) joblib.dump( scaler, os.path.join(output_dir, "encode", "normalizer.pkl")) ship_df_normalized = normalize(ship_df, scaler, cols_to_normalize) data_normalized = ship_df_normalized.to_numpy() routes_dir = os.path.join(output_dir, "routes", port.name) data_file_path = encode_data_file(mmsi, routes_dir, join=True) np.save(data_file_path, data_normalized) joblib.dump( labeler, os.path.join(output_dir, "encode", port.name, obj_file("labeler", mmsi))) joblib.dump( ship_type_encoder, os.path.join(output_dir, "encode", port.name, obj_file("ship_type", mmsi))) joblib.dump( nav_status_encoder, os.path.join(output_dir, "encode", port.name, obj_file("nav_status", mmsi)))
def validate_content_or_source(input): if not util.is_empty(input): return input.lower() print_error_and_fail('content_or_source')
def set_config_value(self, cmd, values=None, remove=False, under_directive=None): """ Sets command to the specified value in the configuration file. Loads file from the disk if server_config_data is None (file was not yet loaded). Modifies self.config_data, self.config_modified :param cmd: :param values: single value or array of values for multi-commands (e.g., push). None & remove -> remove all commands. Otherwise just commands with the given values are removed. :param remove: if True, configuration command is removed :param under_directive: if specified, command is placed under specified directive, if exists :return: True if file was modified """ # If file is not loaded - load if self.config_data is None: self.config_data = self.load_config_file_lines() # default position - end of the config file last_cmd_idx = len(self.config_data) - 1 file_changed = False values_set = False for idx, cfg in enumerate(self.config_data): if cfg.ltype not in [CONFIG_LINE_CMD, CONFIG_LINE_CMD_COMMENT]: continue if under_directive is not None and util.equals_any( cfg.cmd, under_directive): last_cmd_idx = idx if cfg.cmd != cmd: continue # Only commands of interest here last_cmd_idx = idx is_desired_value = cfg.params == values is_desired_value |= remove and values is None is_desired_value |= not remove and values is None and util.is_empty( cfg.params) value_idx = 0 if not remove and cfg.params == values else None if is_desired_value: if cfg.ltype == CONFIG_LINE_CMD and not remove: # Command is already set to the same value. File not modified. # Cannot quit yet, has to comment out other values if value_idx is not None: if not values_set: values_set = True else: cfg.ltype = CONFIG_LINE_CMD_COMMENT file_changed = True pass elif cfg.ltype == CONFIG_LINE_CMD: # Remove command - comment out cfg.ltype = CONFIG_LINE_CMD_COMMENT file_changed = True elif cfg.ltype == CONFIG_LINE_CMD_COMMENT and remove: # Remove && comment - leave as it is # Cannot quit yet, has to comment out other values pass else: # CONFIG_LINE_CMD_COMMENT and not remove. # Just change the type to active value - switch from comment to command # Cannot quit yet, has to comment out other values do_change = True if value_idx is not None: if not values_set: values_set = True else: do_change = False if do_change: cfg.ltype = CONFIG_LINE_CMD file_changed = True elif cfg.ltype == CONFIG_LINE_CMD and not remove: # Same command, but different value - comment this out # If remove is True, only desired values were removed. cfg.ltype = CONFIG_LINE_CMD_COMMENT file_changed = True if remove: self.config_modified |= file_changed return file_changed # Add those commands not set in the cycle above if not values_set: cl = ConfigLine(idx=None, raw=None, ltype=CONFIG_LINE_CMD, cmd=cmd, params=values) self.config_data.insert(last_cmd_idx + 1, cl) file_changed = True self.config_modified |= file_changed return file_changed