def main(): append_new_policy() change_coverage_limit() modify_existing_policy() #truncate table if already exists if engine.dialect.has_table(engine, Cconf.table_name): logging.debug("Table " + Cconf.table_name + " already exists!") tt.truncate(Cconf.table_name) try: #Create table with specified columns df = pd.DataFrame.from_records(coverage_list, columns=Cconf.headers) #Convert date to datetime df["record_start_date"] = pd.to_datetime(df["record_start_date"]) df["record_end_date"] = pd.to_datetime(df["record_end_date"]) df["record_update_date"] = pd.to_datetime(df["record_update_date"]) #Load the table to database df.to_sql(Cconf.table_name, engine, index=False) modified_rec = len(coverage_list) - new_rec - limits_changed logging.debug(str(modified_rec) + " record(s) modified.") logging.debug("Table " + Cconf.table_name + " created and " + str(len(coverage_list)) + " total records written.") ##Perform SCD on coverage and coverage_il #select = text("SELECT scd2_coverage_2()") #db_connection.execution_options(autocommit=True).execute(select) #logging.debug("SCD operation successful") except Exception, e: print(str(e)) logging.debug(traceback.format_exc()) exit()
def generate_coverage_list(): prefix = '0' coverage_list = [] i = 0 k = 1 while(i < len(CConf.coverage_names)): j = 0 while(j < len(CConf.Coverage_limits)): val=6-len(str(k)) coverage_id = "COV01-1"+prefix*val+str(k) coverage_list.append([coverage_id, CConf.coverage_names[i], CConf.Coverage_limits[j]]) j = j + 1 k = k + 1 i = i + 1 #Connect to postgres engine = create_engine('postgresql://*****:*****@10.20.202.43:5432/datagen') #Truncate the table if exists if engine.dialect.has_table(engine, CConf.cl_table_name): tt.truncate(CConf.cl_table_name) #Create table with the specified columns df = pd.DataFrame.from_records(coverage_list, columns=CConf.cl_headers) #load to database df.to_sql(CConf.cl_table_name, engine, index=False) print(str(len(coverage_list))+" Coverage Limits Generated and Loaded to DB.")
def Generate(): i = 0 vin_data = [] data_list = [] #Load expensive cars data reader_expcars = csv.reader(open(DGconf.expcar_file_path)) expcar_data = list(reader_expcars) #Load other cars data reader_othercars = csv.reader(open(DGconf.othercar_file_path)) othercar_data = list(reader_othercars) #load VIN data reader2 = csv.reader(open(DGconf.VIN_file_path)) vin_data = list(reader2) start = time.time() while (i < DGconf.num_records): #Make a probability based random choice for car file_data = numpy.random.choice([expcar_data, othercar_data], p=DGconf.car_distribution) #Choose random car from the list chosen_car = random.choice(file_data) #Choose unique VIN chosen_row = vin_data[i + 1] data_list.append([ chosen_row[0], chosen_row[1], chosen_car[1], chosen_car[2], chosen_car[0], random.randint(35000, 99999), random.randint(1, 10), random.randint(8, 18) ]) i = i + 1 gen = time.time() print("Generation: " + str(gen - start)) print(str(i) + " Records processed!") #Connect to postgres engine = create_engine( 'postgresql://*****:*****@10.20.202.43:5432/datagen') #Truncate if table exists if engine.dialect.has_table(engine, DGconf.output_table_name): tt.truncate(DGconf.output_table_name) #Create table with specified columns df = pd.DataFrame.from_records(data_list, columns=DGconf.headers) #Load to database df.to_sql(DGconf.output_table_name, engine, index=False) print("Write: " + str(time.time() - gen)) print(str(len(data_list)) + " Records written!")
def main(): append_new_policy() change_coverage_limit() modify_existing_policy() #truncate table if already exists if engine.dialect.has_table(engine, Cconf.table_name): tt.truncate(Cconf.table_name) #Create table with specified columns df = pd.DataFrame.from_records(coverage_list, columns=Cconf.headers) #Convert date to datetime df["record_start_date"] = pd.to_datetime(df["record_start_date"]) df["record_end_date"] = pd.to_datetime(df["record_end_date"]) df["record_update_date"] = pd.to_datetime(df["record_update_date"]) #Load the table to database df.to_sql(Cconf.table_name, engine, index=False) if engine.dialect.has_table(engine, 'coverage_bkp'): tt.truncate('coverage_bkp') select = text("CREATE TABLE coverage_bkp AS SELECT * FROM coverage") db_connection.execution_options(autocommit=True).execute(select) select = text("SELECT * FROM coverage_il") result = db_connection.execute(select) test = pd.DataFrame(list(result), columns=result.keys()) print(test.head(40)) select = text("SELECT scd2_coverage_2()") db_connection.execution_options(autocommit=True).execute(select) select = text("SELECT * FROM coverage_bkp") result = db_connection.execute(select) test = pd.DataFrame(list(result), columns=result.keys()) print(test.tail(50)) print( str(len(coverage_list)) + " Records generated and written to the db.")
def generate_coverage(): cov_limits = [] VIN_list = [] policy_list = [] coverage_list = [] #connect to postgres engine = create_engine( 'postgresql://*****:*****@10.20.202.43:5432/datagen') db_connection = engine.connect() #load data from policy table select = text("SELECT * FROM policy") result = db_connection.execute(select) reader_policy_data = pd.DataFrame(list(result), columns=result.keys()) unique_policies = reader_policy_data['policynumber'].unique().tolist() reader_policy_data = reader_policy_data[[ 'policynumber', 'termeffectivedate', 'recordstartdate', 'policyaddressid', 'policystatus' ]] #load data from coverage limits table select = text("SELECT * FROM coverage_limit") result = db_connection.execute(select) reader_coverage_data = pd.DataFrame(list(result), columns=result.keys()) BI = reader_coverage_data[reader_coverage_data['coverage'] == 'BI'].values.tolist() PD = reader_coverage_data[reader_coverage_data['coverage'] == 'PD'].values.tolist() #load data from customer table select = text("SELECT * FROM customer") result = db_connection.execute(select) reader_customer_data = pd.DataFrame( list(result), columns=result.keys())[['address_id', 'license']] #load data from vehicle table select = text("SELECT * FROM vehicle") result = db_connection.execute(select) reader_vin_data = pd.DataFrame( list(result), columns=result.keys())[['vin', 'license_plate_no']] #reader_vin_data = pd.read_csv('/elastic_search_test/vehicle_data_1L.csv')[['VIN', 'License Plate No']] i = 0 start = time.time() while (i < len(unique_policies)): #Randomly choose whether to have 2 or 3 coverages total_coverages = numpy.random.choice([2, 3], p=[0.5, 0.5]) current_policy = reader_policy_data[ reader_policy_data['policynumber'] == unique_policies[i]].values.tolist() policy_no = current_policy[0][0] start_date = current_policy[0][1] family_id = current_policy[0][3] status = current_policy[len(current_policy) - 1][4] license_plates = reader_customer_data.loc[ reader_customer_data['address_id'] == family_id]['license'].values.tolist() k = 0 while (k < len(license_plates)): VIN = reader_vin_data.loc[ reader_vin_data['license_plate_no'] == license_plates[k]]['vin'].values.tolist()[0] if VIN != []: j = 0 while (j < total_coverages): if status == "Cancelled": end_date = str(current_policy[len(current_policy) - 1][2]) else: end_date = "" #select compulsory BI coverage and random limit if j == 0: cov_BI = random.choice(BI) cov_name = "BI" upadate_date = end_date coverage_list.append([ cov_BI[0], cov_name, VIN, policy_no, start_date, end_date, upadate_date ]) #select compulsory PD coverage and random limit elif j == 1: cov_PD = random.choice(PD) cov_name = "PD" upadate_date = end_date coverage_list.append([ cov_PD[0], cov_name, VIN, policy_no, start_date, end_date, upadate_date ]) #select optional random coverage and random limit else: cov_name = random.choice( ["Uninsured", "Underinsured", "Medical Payments"]) cov_other = random.choice(reader_coverage_data[ reader_coverage_data['coverage'] == cov_name].values.tolist()) if status == "Cancelled": coverage_list.append([ cov_other[0], cov_name, VIN, policy_no, start_date, end_date, upadate_date ]) else: end_date = str( numpy.random.choice([ current_policy[random.randint( 0, len(current_policy) - 1)][2], "" ], p=[0.4, 0.6])) coverage_list.append([ cov_other[0], cov_name, VIN, policy_no, start_date, end_date, upadate_date ]) j = j + 1 k = k + 1 i = i + 1 gen = time.time() print("Generate: " + str(gen - start)) #Truncate table if already exists if engine.dialect.has_table(engine, CConf.c_table_name): tt.truncate(CConf.c_table_name) #Create table with the specified columns df = pd.DataFrame.from_records(coverage_list, columns=CConf.c_headers) #Convert to datetime df["record_start_date"] = pd.to_datetime(df["record_start_date"]) df["record_end_date"] = pd.to_datetime(df["record_end_date"]) df["record_update_date"] = pd.to_datetime(df["record_update_date"]) #load to database df.to_sql(CConf.c_table_name, engine, index=False) print( str(len(coverage_list)) + " records written to DB in " + str(time.time() - gen))
k = k + 1 i = i + 1 except Exception, e: print(str(e)) logging.debug(traceback.format_exc()) exit() gen = time.time() print("Generate: " + str(gen - start)) #Truncate table if already exists if engine.dialect.has_table(engine, CConf.c_table_name): logging.debug("Table " + CConf.c_table_name + " already exists!") tt.truncate(CConf.c_table_name) try: #Create table with the specified columns df = pd.DataFrame.from_records(coverage_list, columns=CConf.c_headers) #Convert to datetime df["record_start_date"] = pd.to_datetime(df["record_start_date"]) df["record_end_date"] = pd.to_datetime(df["record_end_date"]) df["record_update_date"] = pd.to_datetime(df["record_update_date"]) #load to database df.to_sql(CConf.c_table_name, engine, index=False) logging.debug("Table " + CConf.c_table_name + " created and " + str(len(coverage_list)) + " records written.")
] claim_list.append(data) i = i + 1 except Exception, e: print(str(e)) logging.debug(traceback.format_exc()) exit() gen = time.time() print("Generate: " + str(gen - strt)) #Truncate table if already exists if engine.dialect.has_table(engine, cc.claim_table_name): logging.debug("Table " + cc.claim_table_name + " already exists!") tt.truncate(cc.claim_table_name) try: #Create table with the specified columns df = pd.DataFrame.from_records(claim_list, columns=cc.claim_headers) #Convert date to datetime df["claim_lossdate"] = pd.to_datetime(df["claim_lossdate"]) df["claim_reporteddate"] = pd.to_datetime(df["claim_reporteddate"]) df["claim_closedate"] = pd.to_datetime(df["claim_closedate"]) #load to database df.to_sql(cc.claim_table_name, engine, index=False) logging.debug(cc.claim_table_name + " created and " + str(len(claim_list)) + " records written.")
]) i = i + 1 last_id = last_id + 1 except Exception, e: print(str(e)) logging.debug(traceback.format_exc()) exit() print("Time2: " + str(time.time() - strt2)) #Truncate table if already exists if engine.dialect.has_table(engine, CConf.claim_il_table): logging.debug("Table " + CConf.claim_il_table + " already exists!") tt.truncate(CConf.claim_il_table) wrt = time.time() try: #Create table with the specified columns df = pd.DataFrame.from_records(claim_list, columns=CConf.claim_headers) #Convert date to datetime df["claim_lossdate"] = pd.to_datetime(df["claim_lossdate"]) df["claim_reporteddate"] = pd.to_datetime(df["claim_reporteddate"]) df["claim_closedate"] = pd.to_datetime(df["claim_closedate"]) #load to database df.to_sql(CConf.claim_il_table, engine, index=False) new_rec = len(claim_list) - old_rec
value = value - 1 inc_vehicle_id = VIN_list[value] VIN_list.pop(value) incident_desc = random.choice(incident_desc_list) driver = random.choice(relations) inc_is_the_vehicle_driveable = numpy.random.choice(["Y", ""], p=[0.8, 0.2]) incident_list.append([ inc_incident_id, inc_vehicle_id, incident_desc[0], driver, inc_is_the_vehicle_driveable ]) i = i + 1 print("Generate: " + str(time.time() - start)) #Truncate table if exists if engine.dialect.has_table(engine, Iconf.table_name): tt.truncate(Iconf.table_name) #Create table with the specified columns df = pd.DataFrame.from_records(incident_list, columns=Iconf.headers) #Load to database df.to_sql(Iconf.table_name, engine, index=False) print(str(i) + " Records Generated and loaded to DB!")