def find_matching_data(self, data=None, file_name=None, save_data=True, max_iterations=9999): if data == None: data = open_json_file(self.combined_data_file_name) n = 0 for item in data: print("***\nFinding match for: {}, id: {}".format( data[item]["01.main_data"]["name"], item)) if self.cond_set(item=data[item], source="bj", type="lease") == True: print("requirements test passed") self.matching_procedure(operand=data[item], data=data, target_source="rm") self.matching_procedure(operand=data[item], data=data, target_source="oc") self.matching_procedure(operand=data[item], data=data, target_source="zhand") self.matching_procedure(operand=data[item], data=data, target_source="rm", match_method="address") self.matching_procedure(operand=data[item], data=data, target_source="oc", match_method="address") self.matching_procedure(operand=data[item], data=data, target_source="zhand", match_method="address") else: print("requirements not met") n += 1 if n >= max_iterations: break if save_data == True: if file_name == None: file_name = self.combined_data_file_name save_json_file(file_name=file_name, content=data)
def merge_data(self, input_file_name=None, outut_file_name=None, save_data=True, max_iterations=9999): if input_file_name == None: raise Exception("No input file") if outut_file_name == None: outut_file_name = self.merged_data_output_file data = open_json_file(input_file_name) output = self.merging_function(data, max_iterations=max_iterations) if save_data == True: save_json_file(file_name=outut_file_name, content=output) return output
def combine_data(self, file_name=None, save_data=True): if file_name == None: file_name = self.combined_data_file_name output = {} for file in [f for f in listdir("datasets")]: if "st2" in file: print(file) file_data = open_json_file("datasets/{}".format(file)) for item in file_data: output[item] = file_data[item] if save_data == True: save_json_file(file_name=file_name, content=output) return output
def parse_by_links(self, urls, output_file_name, max_iterations=9999, save_data=True): # parses through all elements output = [] n = 0 for e in urls: e = self.bug_fixer(url=e, set=self.name_of_set) output.append(self.fetch_raw_data(fetch_soup(e), url=e)) n += 1 if n == max_iterations: break if save_data == True: save_json_file(file_name=output_file_name, content=output)
def bug_fixing(self, input_file_name=None, outut_file_name=None, save_data=True, max_iterations=9999): if input_file_name == None: raise Exception("No input file") if outut_file_name == None: outut_file_name = self.bug_fixed_data_output_file data = open_json_file(input_file_name) data = self.fixing_completion_date_bug(data, max_iterations=max_iterations) data = self.fixing_parking_ratio_bug(data, max_iterations=max_iterations) data = self.fixing_address_bug(data, max_iterations=max_iterations) if save_data == True: save_json_file(file_name=outut_file_name, content=data) return data
def restruct_data(self, raw_data, set, file_name=None, max_iterations=9999, save_data=True): output = {} n = 1 for e in raw_data: if e is not None: item = deepcopy(self.item_pattern) # item = {} id = self.set_id(set=set, n=n) # item["01.main_data"] = {} item["01.main_data"]["name"] = self.s_01_name(e, set) item["01.main_data"]["type"] = self.s_01_type(e, set) item["01.main_data"]["source"] = self.s_01_source(e, set) item["01.main_data"]["id"] = id # item["02.location_details"] = {} item["02.location_details"]["city"] = self.s_02_city(e, set) item["02.location_details"]["district"] = self.s_02_district( e, set) item["02.location_details"]["address"] = self.s_02_address( e, set) # item["03.offer_details"] = {} item["03.offer_details"]["av_office"] = self.s_03_av_office( e, set) item["03.offer_details"][ "av_office_vol"] = self.s_03_av_office_vol(e, set) item["03.offer_details"][ "rent_office"] = self.s_03_rent_office(e, set) item["03.offer_details"][ "rent_retail"] = self.s_03_rent_retail(e, set) item["03.offer_details"][ "rent_warehouse"] = self.s_03_rent_warehouse(e, set) item["03.offer_details"][ "service_charge"] = self.s_03_service_charge(e, set) item["03.offer_details"][ "cost_parking_surface"] = self.s_03_cost_parking_surface( e, set) item["03.offer_details"][ "cost_parking_underground"] = self.s_03_cost_parking_underground( e, set) item["03.offer_details"][ "min_space_to_let"] = self.s_03_min_space_to_let(e, set) item["03.offer_details"]["min_lease"] = self.s_03_min_lease( e, set) item["03.offer_details"][ "add_on_factor"] = self.s_03_add_on_factor(e, set) # item["04.building_details"] = {} item["04.building_details"][ "building_status"] = self.s_04_building_status(e, set) item["04.building_details"][ "building_class"] = self.s_04_building_class(e, set) item["04.building_details"][ "total_net_space"] = self.s_04_total_net_space(e, set) item["04.building_details"][ "total_gross_space"] = self.s_04_total_gross_space(e, set) item["04.building_details"][ "completion_date"] = self.s_04_completion_date(e, set) item["04.building_details"][ "ground_floors"] = self.s_04_ground_floors(e, set) item["04.building_details"][ "underground_floors"] = self.s_04_underground_floors( e, set) item["04.building_details"][ "floor_plate"] = self.s_04_floor_plate(e, set) item["04.building_details"][ "no_surface_parking"] = self.s_04_no_surface_parking( e, set) item["04.building_details"][ "no_underground_parking"] = self.s_04_no_underground_parking( e, set) item["04.building_details"][ "parking_ratio"] = self.s_04_parking_ratio(e, set) item["04.building_details"][ "building_certification"] = self.s_04_building_certification( e, set) # item["05.fitout_standard"] = {} item["05.fitout_standard"][ "sprinklers"] = self.s_05_sprinklers(e, set) item["05.fitout_standard"][ "access_control"] = self.s_05_access_control(e, set) item["05.fitout_standard"][ "computer_cabling"] = self.s_05_computer_cabling(e, set) item["05.fitout_standard"][ "switchboard"] = self.s_05_switchboard(e, set) item["05.fitout_standard"][ "smoke_detectors"] = self.s_05_smoke_detectors(e, set) item["05.fitout_standard"][ "suspended_ceiling"] = self.s_05_suspended_ceiling(e, set) item["05.fitout_standard"][ "openable_windows"] = self.s_05_openable_windows(e, set) item["05.fitout_standard"][ "partition_walls"] = self.s_05_partition_walls(e, set) item["05.fitout_standard"][ "backup_power_supply"] = self.s_05_backup_power_supply( e, set) item["05.fitout_standard"][ "telephone_cabling"] = self.s_05_telephone_cabling(e, set) item["05.fitout_standard"][ "power_cabling"] = self.s_05_power_cabling(e, set) item["05.fitout_standard"][ "air_conditioning"] = self.s_05_air_conditioning(e, set) item["05.fitout_standard"][ "raised_floor"] = self.s_05_raised_floor(e, set) item["05.fitout_standard"]["carpeting"] = self.s_05_carpeting( e, set) item["05.fitout_standard"][ "fibre_optic_connections"] = self.s_05_fibre_optic_connections( e, set) item["05.fitout_standard"]["BMS"] = self.s_05_BMS(e, set) translate_dict = { "sprinklers": "sprinklers", "access control": "access_control", "computer cabling": "computer_cabling", "switchboard": "switchboard", "smoke/heat detectors": "smoke_detectors", "suspended ceiling": "suspended_ceiling", "openable windows": "openable_windows", "partition walls": "partition_walls", "backup power supply": "backup_power_supply", "telephone cabling": "telephone_cabling", "power cabling": "power_cabling", "air-conditioning": "air_conditioning", "raised floor": "raised_floor", "carpeting": "carpeting", "fibre optic connection": "fibre_optic_connections", "BMS": "BMS" } if set == "rm": for fitoout_e in e[-2]: item["05.fitout_standard"][ translate_dict[fitoout_e]] = False # item["09.metadata"] = {} item["09.metadata"]["rm_id"] = self.s_09_rm_id(e, set) item["09.metadata"]["rm_url"] = self.s_09_rm_url(e, set) item["09.metadata"]["rm_pic_url"] = self.s_09_rm_pic_url( e, set) item["09.metadata"]["bj_id"] = self.s_09_bj_id(e, set) item["09.metadata"]["bj_url"] = self.s_09_bj_url(e, set) item["09.metadata"]["bj_pic_url"] = self.s_09_bj_pic_url( e, set) item["09.metadata"]["oc_id"] = self.s_09_oc_id(e, set) item["09.metadata"]["oc_url"] = self.s_09_oc_url(e, set) item["09.metadata"]["oc_pic_url"] = self.s_09_oc_pic_url( e, set) item["09.metadata"]["add_info"] = self.s_09_add_info(e, set) output[id] = item n += 1 if n == max_iterations: break if save_data == True: if file_name == None: file_name = self.restruct_data_output_file save_json_file(file_name=file_name, content=output) return output
def data_to_json(self, function, input_file_name, output_file_name): data = self.open_csv(file_name=input_file_name) json_data = function(data) save_json_file(file_name=output_file_name, content=json_data)