Exemplo n.º 1
0
def main():
    append_new_policy()
    change_coverage_limit()
    modify_existing_policy()

    #truncate table if already exists
    if engine.dialect.has_table(engine, Cconf.table_name):
        logging.debug("Table " + Cconf.table_name + " already exists!")
        tt.truncate(Cconf.table_name)

    try:
        #Create table with specified columns
        df = pd.DataFrame.from_records(coverage_list, columns=Cconf.headers)

        #Convert date to datetime
        df["record_start_date"] = pd.to_datetime(df["record_start_date"])
        df["record_end_date"] = pd.to_datetime(df["record_end_date"])
        df["record_update_date"] = pd.to_datetime(df["record_update_date"])

        #Load the table to database
        df.to_sql(Cconf.table_name, engine, index=False)
        modified_rec = len(coverage_list) - new_rec - limits_changed
        logging.debug(str(modified_rec) + " record(s) modified.")
        logging.debug("Table " + Cconf.table_name + " created and " +
                      str(len(coverage_list)) + " total records written.")

        ##Perform SCD on coverage and coverage_il
        #select = text("SELECT scd2_coverage_2()")
        #db_connection.execution_options(autocommit=True).execute(select)
        #logging.debug("SCD operation successful")

    except Exception, e:
        print(str(e))
        logging.debug(traceback.format_exc())
        exit()
def generate_coverage_list():
	prefix = '0'
	coverage_list = []
	i = 0
	k = 1

	while(i < len(CConf.coverage_names)):
	    j = 0
	    while(j < len(CConf.Coverage_limits)):
	        val=6-len(str(k))
	        coverage_id = "COV01-1"+prefix*val+str(k)
	        coverage_list.append([coverage_id, CConf.coverage_names[i], CConf.Coverage_limits[j]])
	        j = j + 1
		k = k + 1

	    i = i + 1
	
	#Connect to postgres
	engine = create_engine('postgresql://*****:*****@10.20.202.43:5432/datagen')

	#Truncate the table if exists
	if engine.dialect.has_table(engine, CConf.cl_table_name):
		tt.truncate(CConf.cl_table_name)

	#Create table with the specified columns
	df = pd.DataFrame.from_records(coverage_list, columns=CConf.cl_headers)
	
	#load to database
	df.to_sql(CConf.cl_table_name, engine, index=False)

	print(str(len(coverage_list))+" Coverage Limits Generated and Loaded to DB.")
Exemplo n.º 3
0
def Generate():

    i = 0
    vin_data = []
    data_list = []

    #Load expensive cars data
    reader_expcars = csv.reader(open(DGconf.expcar_file_path))
    expcar_data = list(reader_expcars)

    #Load other cars data
    reader_othercars = csv.reader(open(DGconf.othercar_file_path))
    othercar_data = list(reader_othercars)

    #load VIN data
    reader2 = csv.reader(open(DGconf.VIN_file_path))
    vin_data = list(reader2)

    start = time.time()

    while (i < DGconf.num_records):
        #Make a probability based random choice for car
        file_data = numpy.random.choice([expcar_data, othercar_data],
                                        p=DGconf.car_distribution)

        #Choose random car from the list
        chosen_car = random.choice(file_data)

        #Choose unique VIN
        chosen_row = vin_data[i + 1]
        data_list.append([
            chosen_row[0], chosen_row[1], chosen_car[1], chosen_car[2],
            chosen_car[0],
            random.randint(35000, 99999),
            random.randint(1, 10),
            random.randint(8, 18)
        ])
        i = i + 1

    gen = time.time()
    print("Generation: " + str(gen - start))
    print(str(i) + " Records processed!")

    #Connect to postgres
    engine = create_engine(
        'postgresql://*****:*****@10.20.202.43:5432/datagen')

    #Truncate if table exists
    if engine.dialect.has_table(engine, DGconf.output_table_name):
        tt.truncate(DGconf.output_table_name)

    #Create table with specified columns
    df = pd.DataFrame.from_records(data_list, columns=DGconf.headers)

    #Load to database
    df.to_sql(DGconf.output_table_name, engine, index=False)

    print("Write: " + str(time.time() - gen))
    print(str(len(data_list)) + " Records written!")
Exemplo n.º 4
0
def main():
    append_new_policy()
    change_coverage_limit()
    modify_existing_policy()

    #truncate table if already exists
    if engine.dialect.has_table(engine, Cconf.table_name):
        tt.truncate(Cconf.table_name)

    #Create table with specified columns
    df = pd.DataFrame.from_records(coverage_list, columns=Cconf.headers)

    #Convert date to datetime
    df["record_start_date"] = pd.to_datetime(df["record_start_date"])
    df["record_end_date"] = pd.to_datetime(df["record_end_date"])
    df["record_update_date"] = pd.to_datetime(df["record_update_date"])

    #Load the table to database
    df.to_sql(Cconf.table_name, engine, index=False)

    if engine.dialect.has_table(engine, 'coverage_bkp'):
        tt.truncate('coverage_bkp')

    select = text("CREATE TABLE coverage_bkp AS SELECT * FROM coverage")
    db_connection.execution_options(autocommit=True).execute(select)

    select = text("SELECT * FROM coverage_il")
    result = db_connection.execute(select)
    test = pd.DataFrame(list(result), columns=result.keys())
    print(test.head(40))

    select = text("SELECT scd2_coverage_2()")
    db_connection.execution_options(autocommit=True).execute(select)

    select = text("SELECT * FROM coverage_bkp")
    result = db_connection.execute(select)
    test = pd.DataFrame(list(result), columns=result.keys())
    print(test.tail(50))

    print(
        str(len(coverage_list)) + " Records generated and written to the db.")
Exemplo n.º 5
0
def generate_coverage():
    cov_limits = []
    VIN_list = []
    policy_list = []
    coverage_list = []

    #connect to postgres
    engine = create_engine(
        'postgresql://*****:*****@10.20.202.43:5432/datagen')
    db_connection = engine.connect()

    #load data from policy table
    select = text("SELECT * FROM policy")
    result = db_connection.execute(select)
    reader_policy_data = pd.DataFrame(list(result), columns=result.keys())
    unique_policies = reader_policy_data['policynumber'].unique().tolist()
    reader_policy_data = reader_policy_data[[
        'policynumber', 'termeffectivedate', 'recordstartdate',
        'policyaddressid', 'policystatus'
    ]]

    #load data from coverage limits table
    select = text("SELECT * FROM coverage_limit")
    result = db_connection.execute(select)
    reader_coverage_data = pd.DataFrame(list(result), columns=result.keys())
    BI = reader_coverage_data[reader_coverage_data['coverage'] ==
                              'BI'].values.tolist()
    PD = reader_coverage_data[reader_coverage_data['coverage'] ==
                              'PD'].values.tolist()

    #load data from customer table
    select = text("SELECT * FROM customer")
    result = db_connection.execute(select)
    reader_customer_data = pd.DataFrame(
        list(result), columns=result.keys())[['address_id', 'license']]

    #load data from vehicle table
    select = text("SELECT * FROM vehicle")
    result = db_connection.execute(select)
    reader_vin_data = pd.DataFrame(
        list(result), columns=result.keys())[['vin', 'license_plate_no']]
    #reader_vin_data = pd.read_csv('/elastic_search_test/vehicle_data_1L.csv')[['VIN', 'License Plate No']]

    i = 0
    start = time.time()
    while (i < len(unique_policies)):
        #Randomly choose whether to have 2 or 3 coverages
        total_coverages = numpy.random.choice([2, 3], p=[0.5, 0.5])
        current_policy = reader_policy_data[
            reader_policy_data['policynumber'] ==
            unique_policies[i]].values.tolist()
        policy_no = current_policy[0][0]
        start_date = current_policy[0][1]
        family_id = current_policy[0][3]
        status = current_policy[len(current_policy) - 1][4]
        license_plates = reader_customer_data.loc[
            reader_customer_data['address_id'] ==
            family_id]['license'].values.tolist()

        k = 0

        while (k < len(license_plates)):
            VIN = reader_vin_data.loc[
                reader_vin_data['license_plate_no'] ==
                license_plates[k]]['vin'].values.tolist()[0]

            if VIN != []:
                j = 0

                while (j < total_coverages):

                    if status == "Cancelled":
                        end_date = str(current_policy[len(current_policy) -
                                                      1][2])

                    else:
                        end_date = ""

                    #select compulsory BI coverage and random limit
                    if j == 0:
                        cov_BI = random.choice(BI)
                        cov_name = "BI"
                        upadate_date = end_date
                        coverage_list.append([
                            cov_BI[0], cov_name, VIN, policy_no, start_date,
                            end_date, upadate_date
                        ])

                    #select compulsory PD coverage and random limit
                    elif j == 1:
                        cov_PD = random.choice(PD)
                        cov_name = "PD"
                        upadate_date = end_date
                        coverage_list.append([
                            cov_PD[0], cov_name, VIN, policy_no, start_date,
                            end_date, upadate_date
                        ])

                    #select optional random coverage and random limit
                    else:
                        cov_name = random.choice(
                            ["Uninsured", "Underinsured", "Medical Payments"])
                        cov_other = random.choice(reader_coverage_data[
                            reader_coverage_data['coverage'] ==
                            cov_name].values.tolist())
                        if status == "Cancelled":
                            coverage_list.append([
                                cov_other[0], cov_name, VIN, policy_no,
                                start_date, end_date, upadate_date
                            ])
                        else:
                            end_date = str(
                                numpy.random.choice([
                                    current_policy[random.randint(
                                        0,
                                        len(current_policy) - 1)][2], ""
                                ],
                                                    p=[0.4, 0.6]))
                            coverage_list.append([
                                cov_other[0], cov_name, VIN, policy_no,
                                start_date, end_date, upadate_date
                            ])

                    j = j + 1
            k = k + 1

        i = i + 1

    gen = time.time()
    print("Generate: " + str(gen - start))

    #Truncate table if already exists
    if engine.dialect.has_table(engine, CConf.c_table_name):
        tt.truncate(CConf.c_table_name)

    #Create table with the specified columns
    df = pd.DataFrame.from_records(coverage_list, columns=CConf.c_headers)

    #Convert to datetime
    df["record_start_date"] = pd.to_datetime(df["record_start_date"])
    df["record_end_date"] = pd.to_datetime(df["record_end_date"])
    df["record_update_date"] = pd.to_datetime(df["record_update_date"])

    #load to database
    df.to_sql(CConf.c_table_name, engine, index=False)

    print(
        str(len(coverage_list)) + " records written to DB in " +
        str(time.time() - gen))
Exemplo n.º 6
0
                k = k + 1

            i = i + 1

    except Exception, e:
        print(str(e))
        logging.debug(traceback.format_exc())
        exit()

    gen = time.time()
    print("Generate: " + str(gen - start))

    #Truncate table if already exists
    if engine.dialect.has_table(engine, CConf.c_table_name):
        logging.debug("Table " + CConf.c_table_name + " already exists!")
        tt.truncate(CConf.c_table_name)

    try:
        #Create table with the specified columns
        df = pd.DataFrame.from_records(coverage_list, columns=CConf.c_headers)

        #Convert to datetime
        df["record_start_date"] = pd.to_datetime(df["record_start_date"])
        df["record_end_date"] = pd.to_datetime(df["record_end_date"])
        df["record_update_date"] = pd.to_datetime(df["record_update_date"])

        #load to database
        df.to_sql(CConf.c_table_name, engine, index=False)
        logging.debug("Table " + CConf.c_table_name + " created and " +
                      str(len(coverage_list)) + " records written.")
Exemplo n.º 7
0
            ]
            claim_list.append(data)
            i = i + 1

    except Exception, e:
        print(str(e))
        logging.debug(traceback.format_exc())
        exit()

    gen = time.time()
    print("Generate: " + str(gen - strt))

    #Truncate table if already exists
    if engine.dialect.has_table(engine, cc.claim_table_name):
        logging.debug("Table " + cc.claim_table_name + " already exists!")
        tt.truncate(cc.claim_table_name)

    try:
        #Create table with the specified columns
        df = pd.DataFrame.from_records(claim_list, columns=cc.claim_headers)

        #Convert date to datetime
        df["claim_lossdate"] = pd.to_datetime(df["claim_lossdate"])
        df["claim_reporteddate"] = pd.to_datetime(df["claim_reporteddate"])
        df["claim_closedate"] = pd.to_datetime(df["claim_closedate"])

        #load to database
        df.to_sql(cc.claim_table_name, engine, index=False)
        logging.debug(cc.claim_table_name + " created and " +
                      str(len(claim_list)) + " records written.")
            ])

            i = i + 1
            last_id = last_id + 1

    except Exception, e:
        print(str(e))
        logging.debug(traceback.format_exc())
        exit()

    print("Time2: " + str(time.time() - strt2))

    #Truncate table if already exists
    if engine.dialect.has_table(engine, CConf.claim_il_table):
        logging.debug("Table " + CConf.claim_il_table + " already exists!")
        tt.truncate(CConf.claim_il_table)

    wrt = time.time()

    try:
        #Create table with the specified columns
        df = pd.DataFrame.from_records(claim_list, columns=CConf.claim_headers)

        #Convert date to datetime
        df["claim_lossdate"] = pd.to_datetime(df["claim_lossdate"])
        df["claim_reporteddate"] = pd.to_datetime(df["claim_reporteddate"])
        df["claim_closedate"] = pd.to_datetime(df["claim_closedate"])

        #load to database
        df.to_sql(CConf.claim_il_table, engine, index=False)
        new_rec = len(claim_list) - old_rec
Exemplo n.º 9
0
        value = value - 1

    inc_vehicle_id = VIN_list[value]
    VIN_list.pop(value)

    incident_desc = random.choice(incident_desc_list)

    driver = random.choice(relations)

    inc_is_the_vehicle_driveable = numpy.random.choice(["Y", ""], p=[0.8, 0.2])

    incident_list.append([
        inc_incident_id, inc_vehicle_id, incident_desc[0], driver,
        inc_is_the_vehicle_driveable
    ])

    i = i + 1

print("Generate: " + str(time.time() - start))

#Truncate table if exists
if engine.dialect.has_table(engine, Iconf.table_name):
    tt.truncate(Iconf.table_name)

#Create table with the specified columns
df = pd.DataFrame.from_records(incident_list, columns=Iconf.headers)

#Load to database
df.to_sql(Iconf.table_name, engine, index=False)

print(str(i) + " Records Generated and loaded to DB!")