def browse_data(self, data=None, file_name=None, max_iterations=9999): if file_name == None: file_name = self.combined_data_file_name if data == None: data = open_json_file(file_name) outcome_dict = {} n = 0 n1 = 0 for e in data: # if True == True: # print(data[e]) # for e in data[e]['02.location_details']['address'].split(): # if e not in outcome_dict.keys(): # outcome_dict[e] = 1 # else: # outcome_dict[e] += 1 if data[e]['01.main_data'][ 'source'] == "rm" and n <= max_iterations: print(data[e]) n += 1 if data[e]['01.main_data']['source'] == "zhand": print(data[e]) n1 += 1 if n1 >= max_iterations: break return outcome_dict
def find_matching_data(self, data=None, file_name=None, save_data=True, max_iterations=9999): if data == None: data = open_json_file(self.combined_data_file_name) n = 0 for item in data: print("***\nFinding match for: {}, id: {}".format( data[item]["01.main_data"]["name"], item)) if self.cond_set(item=data[item], source="bj", type="lease") == True: print("requirements test passed") self.matching_procedure(operand=data[item], data=data, target_source="rm") self.matching_procedure(operand=data[item], data=data, target_source="oc") self.matching_procedure(operand=data[item], data=data, target_source="zhand") self.matching_procedure(operand=data[item], data=data, target_source="rm", match_method="address") self.matching_procedure(operand=data[item], data=data, target_source="oc", match_method="address") self.matching_procedure(operand=data[item], data=data, target_source="zhand", match_method="address") else: print("requirements not met") n += 1 if n >= max_iterations: break if save_data == True: if file_name == None: file_name = self.combined_data_file_name save_json_file(file_name=file_name, content=data)
def browse_data(self, file_name=None, max_iterations=9999): if file_name == None: file_name = self.raw_data_output_file data = open_json_file(file_name) n = 0 for e in data: n += 1 print(n) print(e) if n >= max_iterations: break return data
def merge_data(self, input_file_name=None, outut_file_name=None, save_data=True, max_iterations=9999): if input_file_name == None: raise Exception("No input file") if outut_file_name == None: outut_file_name = self.merged_data_output_file data = open_json_file(input_file_name) output = self.merging_function(data, max_iterations=max_iterations) if save_data == True: save_json_file(file_name=outut_file_name, content=output) return output
def browse_data(self, data=None, file_name=None, max_iterations=9999): if file_name == None: file_name = self.restruct_data_output_file if data == None: data = open_json_file(file_name) n = 0 for e in data: n += 1 print(n) print(data[e]) if n >= max_iterations: break return data
def browse_data(self, data=None, file_name=None, max_iterations=9999): if file_name == None: file_name = self.merged_data_output_file if data == None: data = open_json_file(file_name) n = 0 for e in data: n += 1 print(n) print("{}: {}".format(e, data[e])) if n >= max_iterations: break return data
def combine_data(self, file_name=None, save_data=True): if file_name == None: file_name = self.combined_data_file_name output = {} for file in [f for f in listdir("datasets")]: if "st2" in file: print(file) file_data = open_json_file("datasets/{}".format(file)) for item in file_data: output[item] = file_data[item] if save_data == True: save_json_file(file_name=file_name, content=output) return output
def bug_fixing(self, input_file_name=None, outut_file_name=None, save_data=True, max_iterations=9999): if input_file_name == None: raise Exception("No input file") if outut_file_name == None: outut_file_name = self.bug_fixed_data_output_file data = open_json_file(input_file_name) data = self.fixing_completion_date_bug(data, max_iterations=max_iterations) data = self.fixing_parking_ratio_bug(data, max_iterations=max_iterations) data = self.fixing_address_bug(data, max_iterations=max_iterations) if save_data == True: save_json_file(file_name=outut_file_name, content=data) return data
def __init__(self, name_of_set): self.name_of_set = name_of_set self.raw_data = open_json_file("datasets/st1_raw_data_{}.json".format( self.name_of_set)) self.restruct_data_output_file = "datasets/st2_restruct_data_{}.json".format( self.name_of_set) self.item_pattern = { '01.main_data': { 'name': '', 'type': '', 'source': '', 'id': '', 'match_id': '', 'match_level': '', 'match_address': '', 'match_a_level': '', 'record_rating': '' }, '02.location_details': { 'city': '', 'district': '', 'address': '' }, '03.offer_details': { 'av_office': '', 'av_office_vol': '', 'rent_office': '', 'rent_retail': '', 'rent_warehouse': '', 'service_charge': '', 'cost_parking_surface': '', 'cost_parking_underground': '', 'min_space_to_let': '', 'min_lease': '', 'add_on_factor': '' }, '04.building_details': { 'building_status': '', 'building_class': '', 'total_net_space': '', 'total_gross_space': '', 'completion_date': '', 'ground_floors': '', 'underground_floors': '', 'floor_plate': '', 'no_surface_parking': '', 'no_underground_parking': '', 'parking_ratio': '', 'building_certification': '' }, '05.fitout_standard': { 'sprinklers': '', 'access_control': '', 'computer_cabling': '', 'switchboard': '', 'smoke_detectors': '', 'suspended_ceiling': '', 'openable_windows': '', 'partition_walls': '', 'backup_power_supply': '', 'telephone_cabling': '', 'power_cabling': '', 'air_conditioning': '', 'raised_floor': '', 'carpeting': '', 'fibre_optic_connections': '', 'BMS': '' }, '09.metadata': { 'rm_id': '', 'rm_url': '', 'rm_pic_url': '', 'bj_id': '', 'bj_url': '', 'bj_pic_url': '', 'oc_id': '', 'oc_url': '', 'oc_pic_url': '', 'add_info': '' } } self.translate_dict_bj = {"Log in": "", "Leased": ""}
def data_to_csv(self, function, input_file_name, output_file_name): data = open_json_file(input_file_name) csv_data = function(data) self.save_to_csv(file_name=output_file_name, content=csv_data)