def create_json_for_db(): result = [] for info_dict in information_about_apartments: apartment_info = {} for key, value in info_dict.items(): if key == 'cost': apartment_info[key] = value elif key == 'rooms_info': number_of_rooms = re.match(r'[\d]', value) if number_of_rooms: apartment_info['rooms'] = int(number_of_rooms.group()) elif key == 'area_info': areas_info = [float(area) for area in value.split('/')] areas_info_length = len(areas_info) if areas_info_length == 1: apartment_info['area'] = areas_info[0] elif areas_info_length == 2: apartment_info['area'], apartment_info['living_area'] = areas_info elif areas_info_length == 3: apartment_info['area'], apartment_info['living_area'], apartment_info['kitchen_area'] = areas_info elif key == 'floors_info': if value: try: floors_info = [int(floor) for floor in value.split('/')] except Exception: continue floors_info_length = len(floors_info) if floors_info_length == 1: apartment_info['floor'] = floors_info[0] elif floors_info_length == 2: apartment_info['floor'], apartment_info['floors'] = floors_info elif key in ['conditions', 'walls_material']: if value and value not in CASHED: apartment_info[key] = mtranslate.translate(value, 'en').lower() CASHED[value] = apartment_info[key] elif value in CASHED: apartment_info[key] = CASHED[value] elif key == 'address': apartment_info['distance_to_center'] = get_distance(value, 'Майдан Незалежності, Київ') apartment_info['building_type'] = 'New building' apartment_info['city'] = 'Kyiv' result.append(apartment_info) print(apartment_info) load_apartments_info_to_db(data_to_db=result) os.remove('../json_files/kyiv_info.json') os.remove('../json_files/kyiv_apartment_page_links.json')
def append_coords(self, coords): """ Updates the driven distance, which is incremented by the distance between the specified coordinates and the previous ones. :param coords: The latest coordinates :type coords: dictionary """ if self.latest_coords: self.distance += get_distance(self.latest_coords, coords) self.latest_coords = coords
def __get_load_data_inside_circle(self, event_coords, event_time): """ Extracts all items detected by this truck within a circle centered on the specified coordinates. The radius of this circle is specified by the system configuration, in the **pickup_check_distance_window** option. This method is invoked when extracting item data for pickup events. It extracts RFID data from the following time window: - The window start is the lowest between 60 seconds before the pickup and the timestamp at which the clamp truck entered the circle centered around the pickup coordinates - The window end is the pickup time, incremented by the number of seconds specified by the system configuration, in the **pickup_post_seconds** option. This time window is internally passed to :func:`__get_load_data <database.Database._Database__get_load_data>`. :param event_coords: The pickup location :type event_coords: dict :param event_time: The pickup timestamp :type event_time: str :returns: The list of items detected by the RFID reader in the specified area. :rtype: list of dicts """ max_time = datetime.strptime(event_time, '%Y-%m-%d %H:%M:%S.%f') min_time = max_time - timedelta(seconds=60) sql = "SELECT x(coordinates) AS x, y(coordinates) AS y, timestamp \ FROM loc_data \ WHERE truck_id={} AND timestamp >= '{}' AND timestamp <= '{}' \ ORDER BY timestamp DESC".format(self.truck_id, min_time, max_time) self.cursor.execute(sql) loc_data = self.cursor.fetchall() load_query_start_time = min_time load_query_end_time = max_time + timedelta( seconds=self.config['pickup_post_seconds']) for location in loc_data: pickup_distance = get_distance(event_coords, location) if pickup_distance >= self.config['pickup_check_distance_window']: load_query_start_time = location['timestamp'] break return self.__get_load_data(load_query_start_time, load_query_end_time)
def event_distance_check(self, threshold, clamp_event_coords): """ Checks whether the distance between the current coordinates and the specified one is greater than the specified threshold. This is invoked to monitor when the truck drives away from the pickup or drop coordinates, to trigger the pickup and drop checks at the proper time. :param threshold: The distance threshold. :type threshold: float :param clamp_event_coords: The reference coordinates. :type clamp_event_coords: dict :return: *True* if the distance is greater than the threshold, *False* otherwise. :rtype: bool """ distance = get_distance(self.curr_loc_coords, clamp_event_coords) if distance > threshold: return True return False
def combine_nodule_predictions(dirs, train_set=True, nodule_th=0.5, extensions=[""]): print("Combining nodule predictions: ", "Train" if train_set else "Submission") if train_set: labels_df = pandas.read_csv("resources/stage1_labels.csv") else: labels_df = pandas.read_csv("resources/stage2_sample_submission.csv") mass_df = pandas.read_csv(settings.BASE_DIR + "masses_predictions.csv") mass_df.set_index(["patient_id"], inplace=True) # meta_df = pandas.read_csv(settings.BASE_DIR + "patient_metadata.csv") # meta_df.set_index(["patient_id"], inplace=True) data_rows = [] for index, row in labels_df.iterrows(): patient_id = row["id"] # mask = helpers.load_patient_images(patient_id, settings.EXTRACTED_IMAGE_DIR, "*_m.png") print(len(data_rows), " : ", patient_id) # if len(data_rows) > 19: # break cancer_label = row["cancer"] mass_pred = int(mass_df.loc[patient_id]["prediction"]) # meta_row = meta_df.loc[patient_id] # z_scale = meta_row["slice_thickness"] # x_scale = meta_row["spacingx"] # vendor_low = 1 if "1.2.276.0.28.3.145667764438817.42.13928" in meta_row["instance_id"] else 0 # vendor_high = 1 if "1.3.6.1.4.1.14519.5.2.1.3983.1600" in meta_row["instance_id"] else 0 # row_items = [cancer_label, 0, mass_pred, x_scale, z_scale, vendor_low, vendor_high] # mask.sum() row_items = [cancer_label, 0, mass_pred] # mask.sum() for magnification in [1, 1.5, 2]: pred_df_list = [] for extension in extensions: src_dir = settings.NDSB3_NODULE_DETECTION_DIR + "predictions" + str(int(magnification * 10)) + extension + "/" pred_nodules_df = pandas.read_csv(src_dir + patient_id + ".csv") pred_nodules_df = pred_nodules_df[pred_nodules_df["diameter_mm"] > 0] pred_nodules_df = pred_nodules_df[pred_nodules_df["nodule_chance"] > nodule_th] pred_df_list.append(pred_nodules_df) pred_nodules_df = pandas.concat(pred_df_list, ignore_index=True) nodule_count = len(pred_nodules_df) nodule_max = 0 nodule_median = 0 nodule_chance = 0 nodule_sum = 0 coord_z = 0 second_largest = 0 nodule_wmax = 0 count_rows = [] coord_y = 0 coord_x = 0 if len(pred_nodules_df) > 0: max_index = pred_nodules_df["diameter_mm"].argmax max_row = pred_nodules_df.loc[max_index] nodule_max = round(max_row["diameter_mm"], 2) nodule_chance = round(max_row["nodule_chance"], 2) nodule_median = round(pred_nodules_df["diameter_mm"].median(), 2) nodule_wmax = round(nodule_max * nodule_chance, 2) coord_z = max_row["coord_z"] coord_y = max_row["coord_y"] coord_x = max_row["coord_x"] rows = [] for row_index, row in pred_nodules_df.iterrows(): dist = helpers.get_distance(max_row, row) if dist > 0.2: nodule_mal = row["diameter_mm"] if nodule_mal > second_largest: second_largest = nodule_mal rows.append(row) count_rows = [] for row in rows: ok = True for count_row in count_rows: dist = helpers.get_distance(count_row, row) if dist < 0.2: ok = False if ok: count_rows.append(row) nodule_count = len(count_rows) row_items += [nodule_max, nodule_chance, nodule_count, nodule_median, nodule_wmax, coord_z, second_largest, coord_y, coord_x] row_items.append(patient_id) data_rows.append(row_items) # , "x_scale", "z_scale", "vendor_low", "vendor_high" columns = ["cancer_label", "mask_size", "mass"] for magnification in [1, 1.5, 2]: str_mag = str(int(magnification * 10)) columns.append("mx_" + str_mag) columns.append("ch_" + str_mag) columns.append("cnt_" + str_mag) columns.append("med_" + str_mag) columns.append("wmx_" + str_mag) columns.append("crdz_" + str_mag) columns.append("mx2_" + str_mag) columns.append("crdy_" + str_mag) columns.append("crdx_" + str_mag) columns.append("patient_id") res_df = pandas.DataFrame(data_rows, columns=columns) if not os.path.exists(settings.BASE_DIR + "xgboost_trainsets/"): os.mkdir(settings.BASE_DIR + "xgboost_trainsets/") target_path = settings.BASE_DIR + "xgboost_trainsets/" "train" + extension + ".csv" if train_set else settings.BASE_DIR + "xgboost_trainsets/" + "submission" + extension + ".csv" res_df.to_csv(target_path, index=False)
def create_json_for_db(): json_for_db = [] list_of_dicts = create_list_with_apartments_information() print('List of dict with apartments data created') for info_dt in list_of_dicts: result_dict = {} keys = [key for key in info_dt.keys() if key not in USELESS_KEYS] for key in keys: if 'грн/м' in info_dt[key]: result_dict['cost'] = int(info_dt['Ціна $'].replace('$', '').replace(' ', '')) * \ int(float(info_dt['Загальна площа'])) elif key == 'Ціна': result_dict['cost'] = int(info_dt['Ціна $'].replace( '$', '').replace(' ', '')) elif key == 'Ціна $': continue elif key == 'Адреса': result_dict[TRANSLATE_DICT[key]] = info_dt[key] else: if info_dt[key] not in CASHED and key in TRANSLATE_DICT: try: result_dict[ TRANSLATE_DICT[key]] = mtranslate.translate( info_dt[key], 'en') CASHED[info_dt[key]] = result_dict[TRANSLATE_DICT[key]] except Exception as error: print(error) elif key in TRANSLATE_DICT: result_dict[TRANSLATE_DICT[key]] = CASHED[info_dt[key]] print('Translate one') json_for_db.append(result_dict) print('Appended to result') result = [] for info_dt in json_for_db: for key in info_dt: if key == 'address': info_dt['distance_to_center'] = get_distance( info_dt[key], 'Львів Оперний театр') info_dt.pop(key) info_dt['city'] = 'Lviv' result.append(info_dt) load_apartments_info_to_db(data_to_db=result) os.remove('json_files/lviv_info.json') os.remove('json_files/lviv_apartment_page_links.json') return 1