Exemplo n.º 1
0
def extract_change_card_status(tmp_data, data_current, Filename):

    outer_join = data_current.merge(tmp_data, how='outer', indicator=True)
    outer_join = outer_join[outer_join["_merge"] == "right_only"]

    # set 2 : identify new cardholder ID
    new_card_holder_id = set(outer_join["CardHolderID"].unique()).difference(
        data_current["CardHolderID"].unique())

    ### set 3 : insert old values into changes table
    data_to_change = data_current[data_current["CardHolderID"].isin(
        set(outer_join.loc[
            ~outer_join["CardHolderID"].isin(new_card_holder_id),
            "CardHolderID"]))]

    DateFile = pd.to_datetime(
        Filename.split("-")[1] + "-" + Filename.split("-")[2] + "-" +
        Filename.split("-")[3]) - datetime.timedelta(days=1)

    data_to_change["dt_change"] = DateFile

    InsertTableIntoDatabase(data_to_change,
                            "CHANGE_STATUS_CARTES",
                            "CARD_STATUS",
                            "Postgres",
                            "Creacard_Calypso",
                            DropTable=False)
Exemplo n.º 2
0
def generate_person_id_construction(schema):

    # create date ID
    query = """
    select distinct concat("BirthDate","LastName") as "combinaison"
    from "CUSTOMERS"."MASTER_ID"
    where "GoodCombinaison" = 1
    """

    engine = connect_to_database("Postgres", "Creacard_Calypso").CreateEngine()
    data = pd.read_sql(query, con=engine)
    engine.close()

    query = """
    
    CREATE TABLE "{}"."ID_PERSON"(
    
        "PERSON_ID" SERIAL,
        "combinaison" TEXT
    )
    
    """.format(schema)

    engine = connect_to_database("Postgres", "Creacard_Calypso").CreateEngine()
    engine.execute(query)
    engine.close()


    InsertTableIntoDatabase(data, TlbName="ID_PERSON", Schema=schema,
                            database_name="Creacard_Calypso",
                            database_type="Postgres",
                            DropTable=False,
                            InstertInParrell=False)


    query = """
    
    update "CUSTOMERS"."MASTER_ID"
    set "PERSON_ID" = T1."PERSON_ID"
    from "CUSTOMERS"."ID_PERSON" as T1
    where concat("CUSTOMERS"."MASTER_ID"."BirthDate", "CUSTOMERS"."MASTER_ID"."LastName") = T1."combinaison"
    
    """

    engine = connect_to_database("Postgres", "Creacard_Calypso").CreateEngine()
    engine.execute(query)
    engine.close()

    query = """
    
    update "CUSTOMERS"."MASTER_ID"
    set "PERSON_ID" = concat("USER_ID",'_',"MOBILE_ID")
    where "GoodCombinaison" = 0 and "PERSON_ID" is null 
    
    """

    engine = connect_to_database("Postgres", "Creacard_Calypso").CreateEngine()
    engine.execute(query)
    engine.close()
Exemplo n.º 3
0
def daily_card_status2(Data, filepath, database_type, database_name):
    #### constant variables

    # Table parameter for the temporary table
    TableParameter = {}
    TableParameter["ActivationDate"] = "timestamp without time zone"
    TableParameter["Address1"] = "TEXT"
    TableParameter["Address2"] = "TEXT"
    TableParameter["ApplicationName"] = "VARCHAR (50)"
    TableParameter["AvailableBalance"] = "double precision"
    TableParameter["BirthDate"] = "timestamp without time zone"
    TableParameter["CardHolderID"] = "VARCHAR (50)"
    TableParameter["CardStatus"] = "VARCHAR (100)"
    TableParameter["City"] = "VARCHAR (100)"
    TableParameter["Country"] = "VARCHAR (50)"
    TableParameter["CreationDate"] = "timestamp without time zone"
    TableParameter["DistributorCode"] = "INTEGER"
    TableParameter["Email"] = "TEXT"
    TableParameter["ExpirationDate"] = "timestamp without time zone"
    TableParameter["FirstName"] = "TEXT"
    TableParameter["IBAN"] = "TEXT"
    TableParameter["IsExcludedAddress"] = "INTEGER"
    TableParameter["IsRenewal"] = "INTEGER"
    TableParameter["KYC_Status"] = "VARCHAR (50)"
    TableParameter["LastName"] = "TEXT"
    TableParameter["NoMobile"] = "TEXT"
    TableParameter["PostCode"] = "VARCHAR (50)"
    TableParameter["Programme"] = "VARCHAR (50)"
    TableParameter["RenewalDate"] = "timestamp without time zone"
    TableParameter["UpdateBalanceDate"] = "timestamp without time zone"
    TableParameter["UpdateDate"] = "timestamp without time zone"

    keepcol = [
        "CardHolderID", "Email", "FirstName", "LastName", "City", "Country",
        "Card Status", "DistributorCode", "ApplicationName", "Date of Birth",
        "IBAN", "CreatedDate", "UpdatedDate", "Address1", "Address2",
        "PostCode", "KYC Status", "expirydate", "AvailableBalance", "NoMobile",
        "Programme"
    ]

    #### Step 1: Extract the data from the file and keep ony updated data
    # extract filedate
    FileName = filepath.split('/')[-1].replace(".csv", "")

    DateFile = pd.to_datetime(
        FileName.split("-")[1] + "-" + FileName.split("-")[2] + "-" +
        FileName.split("-")[3])

    # based on the file date, identify the appropriate names of columns
    if DateFile > pd.to_datetime('2019-03-12'):

        col_names = [
            "CardHolderID", "Cardnumber", "Email", "FirstName", "LastName",
            "City", "Country", "Card Status", "DistributorCode",
            "ApplicationName", "Date of Birth", "SortCodeAccNum", "IBAN",
            "CreatedDate", "UpdatedDate", "Address1", "Address2", "PostCode",
            "KYC Status", "expirydate", "AvailableBalance", "UDF2", "NoMobile",
            "Programme", "VPVR"
        ]

    elif DateFile < pd.to_datetime('2019-01-16'):

        col_names = [
            "CardHolderID", "Cardnumber", "Email", "FirstName", "LastName",
            "City", "Country", "Card Status", "DistributorCode",
            "ApplicationName", "Date of Birth", "SortCodeAccNum", "IBAN",
            "CreatedDate", "UpdatedDate", "Address1", "Address2", "PostCode",
            "KYC Status", "expirydate"
        ]

    else:

        col_names = [
            "CardHolderID", "Cardnumber", "Email", "FirstName", "LastName",
            "City", "Country", "Card Status", "DistributorCode",
            "ApplicationName", "Date of Birth", "SortCodeAccNum", "IBAN",
            "CreatedDate", "UpdatedDate", "Address1", "Address2", "PostCode",
            "KYC Status", "expirydate", "AvailableBalance", "UDF2", "NoMobile",
            "UDF3", "VPVR"
        ]

    # add the names of columns to the dataframe
    Data.columns = col_names

    # store the missing columns
    missing_columns = list(set(keepcol).difference(col_names))

    if missing_columns:  # if the list is not add new columns to the dataframe
        for col in missing_columns:
            Data[col] = None

    # keep track of available balance
    tmp_available_balance = Data[["CardHolderID", "AvailableBalance"]]
    tmp_available_balance["UpdateBalanceDate"] = datetime.datetime.now()

    # Store change values

    engine = connect_to_database("Postgres", "Creacard_Calypso").CreateEngine()
    query = """
    select distinct "CardHolderID","CardStatus","KYC_Status" from "CARD_STATUS"."STATUS_CARTES"
    """
    data_current = pd.read_sql(query, con=engine)

    data_current["CardHolderID"] = data_current["CardHolderID"].astype(str)
    data_current["KYC_Status"] = data_current["KYC_Status"].astype(str)
    data_current["CardStatus"] = data_current["CardStatus"].astype(str)

    #### Step 2: Transform the data

    # transform date columns to pd.datetime format in order to have a consistent format
    # of date over the database
    # Only transform updated date
    Data["UpdatedDate"] = pd.to_datetime(Data["UpdatedDate"],
                                         format="%b %d %Y %I:%M%p",
                                         errors='coerce')
    Data["CreatedDate"] = pd.to_datetime(Data["CreatedDate"],
                                         format="%b %d %Y %I:%M%p",
                                         errors='coerce')
    Data["Date of Birth"] = pd.to_datetime(Data["Date of Birth"],
                                           format="%b %d %Y %I:%M%p",
                                           errors='coerce')

    # transform expirydate
    Data["expirydate"] = Data["expirydate"].astype(str)
    Data["expirydate"] = "20" + Data["expirydate"].str[0:2] + "-" + Data[
        "expirydate"].str[2:] + "-01"
    Data["expirydate"] = pd.to_datetime(Data["expirydate"],
                                        format='%Y-%m-%d',
                                        errors='coerce')

    Data = Data[keepcol]

    # condition remove address
    AddressToRemove = [
        "77 OXFORD STREET LONDON", "17 RUE D ORLEANS", "TSA 51760",
        "77 Oxford Street London", "36 CARNABY STREET",
        "36 CARNABY STREET LONDON", "36 CARNABY STREET LONDON", "ADDRESS",
        "17 RUE D ORLEANS PARIS", "CreaCard Espana S L  Paseo de Gracia 59",
        "36 Carnaby Street London",
        "CREACARD SA Pl  Marcel Broodthaers 8 Box 5", "17 Rue D Orleans Paris",
        "CREACARD ESPANA S L  PASEO DE GRACIA 59", "CreaCard 17 rue d Orleans",
        "CREACARD SA PL  MARCEL BROODTHAERS 8 BOX 75",
        "CREACARD SA PL  MARCEL BROODTHAERS 8 BOX 75", "36 Carnaby Street",
        "77 OXFORD STREET"
    ]

    Data["IsExcludedAddress"] = (
        Data.Address1.isin(AddressToRemove)).astype(int)

    Data["ActivationDate"] = pd.NaT
    Data["IsRenewal"] = 0
    Data["RenewalDate"] = pd.NaT

    Data = Data[sorted(Data.columns)]

    colnames = [
        "ActivationDate", "Address1", "Address2", "ApplicationName",
        "AvailableBalance", "CardStatus", "CardHolderID", "City", "Country",
        "CreationDate", "BirthDate", "DistributorCode", "Email", "FirstName",
        "IBAN", "IsExcludedAddress", "IsRenewal", "KYC_Status", "LastName",
        "NoMobile", "PostCode", "Programme", "RenewalDate", "UpdateDate",
        "ExpirationDate"
    ]

    Data.columns = colnames

    Data["UpdateBalanceDate"] = datetime.datetime.now()

    Data = Data[sorted(Data.columns)]

    Data.loc[(Data["KYC_Status"] == '0') | (Data["KYC_Status"] == '0.0') |
             (Data["KYC_Status"] == 0), "KYC_Status"] = 'Anonyme'
    Data.loc[(Data["KYC_Status"] == '1') | (Data["KYC_Status"] == '1.0') |
             (Data["KYC_Status"] == 1), "KYC_Status"] = 'SDD'
    Data.loc[(Data["KYC_Status"] == '2') | (Data["KYC_Status"] == '2.0') |
             (Data["KYC_Status"] == 2), "KYC_Status"] = 'KYC'
    Data.loc[(Data["KYC_Status"] == '3') | (Data["KYC_Status"] == '3.0') |
             (Data["KYC_Status"] == 3), "KYC_Status"] = 'KYC LITE'

    Data["DistributorCode"] = Data["DistributorCode"].fillna(-1)
    Data["DistributorCode"] = Data["DistributorCode"].astype(int)

    Data["CardHolderID"] = Data["CardHolderID"].astype(str)
    Data["KYC_Status"] = Data["KYC_Status"].astype(str)
    Data["CardStatus"] = Data["CardStatus"].astype(str)

    # Step 1: Identify

    data_new = Data[["CardHolderID", "CardStatus", "KYC_Status"]]
    outer_join = data_current.merge(data_new, how='outer', indicator=True)
    outer_join = outer_join[outer_join["_merge"] == "right_only"]

    # set 2 : identify new cardholder ID
    new_card_holder_id = set(outer_join["CardHolderID"].unique()).difference(
        data_current["CardHolderID"].unique())

    ### set 3 : insert old values into changes table
    data_to_change = data_current[data_current["CardHolderID"].isin(
        set(outer_join.loc[
            ~outer_join["CardHolderID"].isin(new_card_holder_id),
            "CardHolderID"]))]

    FileName = filepath.split('/')[-1].replace(".csv", "")
    DateFile = pd.to_datetime(
        FileName.split("-")[1] + "-" + FileName.split("-")[2] + "-" +
        FileName.split("-")[3]) - datetime.timedelta(days=1)

    data_to_change["dt_change"] = DateFile

    InsertTableIntoDatabase(data_to_change,
                            "CHANGE_STATUS_CARTES",
                            "CARD_STATUS",
                            "Postgres",
                            "Creacard_Calypso",
                            DropTable=False)

    # find new cardholder ID update + card holder ID change

    DateFile = pd.to_datetime(
        FileName.split("-")[1] + "-" + FileName.split("-")[2] + "-" +
        FileName.split("-")[3])
    update_set = Data[(Data["UpdateDate"] >= DateFile)
                      & (Data["UpdateDate"] < DateFile + pd.Timedelta(days=1))]
    update_set = update_set.reset_index(drop=True)

    import numpy as np
    kk = pd.DataFrame(
        np.concatenate((outer_join["CardHolderID"].unique(),
                        update_set["CardHolderID"].unique()),
                       axis=0))
    kk.columns = ["CardHolderID"]
    kk = kk[kk["CardHolderID"] != '0']

    to_update = Data[Data["CardHolderID"].isin(kk["CardHolderID"].unique())]

    #### Step 3: Load these data into a temporary table

    con_postgres = connect_to_database(database_type,
                                       database_name).CreateEngine()
    query = """
    DROP TABLE IF EXISTS "TMP_UPDATE"."TMP_STATUS_CARTES"
    """
    con_postgres.execute(query)
    con_postgres.close()

    InsertTableIntoDatabase(to_update,
                            "TMP_STATUS_CARTES",
                            "TMP_UPDATE",
                            database_type,
                            database_name,
                            DropTable=True,
                            TableDict=TableParameter,
                            SizeChunck=10000)

    #### Step 5: Update new values

    query_delete = """

       DELETE FROM "CARD_STATUS"."STATUS_CARTES"
       USING "TMP_UPDATE"."TMP_STATUS_CARTES"
       WHERE 
       "CARD_STATUS"."STATUS_CARTES"."CardHolderID" = "TMP_UPDATE"."TMP_STATUS_CARTES"."CardHolderID"

       """

    con_postgres = connect_to_database(database_type,
                                       database_name).CreateEngine()
    con_postgres.execute(query_delete)
    con_postgres.close()

    query_update = """
    
    UPDATE "TMP_UPDATE"."TMP_STATUS_CARTES" 
    SET "IsRenewal" = CASE WHEN "DistributorCode" in ('203','914','915') then 1
    else 0
    end 
    """

    con_postgres = connect_to_database(database_type,
                                       database_name).CreateEngine()
    con_postgres.execute(query_update)
    con_postgres.close()

    query = """

           INSERT INTO "CARD_STATUS"."STATUS_CARTES"
           SELECT *
           FROM "TMP_UPDATE"."TMP_STATUS_CARTES" 

          """

    con_postgres = connect_to_database(database_type,
                                       database_name).CreateEngine()
    con_postgres.execute(query)
    con_postgres.close()

    # drop the temporary table
    con_postgres = connect_to_database(database_type,
                                       database_name).CreateEngine()
    query = """
          DROP TABLE IF EXISTS "TMP_UPDATE"."TMP_STATUS_CARTES"
          """
    con_postgres.execute(query)
    con_postgres.close()

    #### Step 6: Update available balance for all CHID

    tlb_param_balance = dict()
    tlb_param_balance["AvailableBalance"] = "double precision"
    tlb_param_balance["CardHolderID"] = "VARCHAR (50)"
    tlb_param_balance["UpdateBalanceDate"] = "timestamp without time zone"

    con_postgres = connect_to_database(database_type,
                                       database_name).CreateEngine()
    query = """
       DROP TABLE IF EXISTS "TMP_UPDATE"."TMP_AVAILABLE_BALANCE"
       """
    con_postgres.execute(query)
    con_postgres.close()

    InsertTableIntoDatabase(tmp_available_balance,
                            "TMP_AVAILABLE_BALANCE",
                            "TMP_UPDATE",
                            database_type,
                            database_name,
                            DropTable=True,
                            TableDict=tlb_param_balance,
                            SizeChunck=10000)

    con_postgres = connect_to_database(database_type,
                                       database_name).CreateEngine()

    query_balance = """

    UPDATE "CARD_STATUS"."STATUS_CARTES"
    SET "AvailableBalance" = T1."AvailableBalance",
    "UpdateBalanceDate" = T1."UpdateBalanceDate"
    from "TMP_UPDATE"."TMP_AVAILABLE_BALANCE" as T1
    WHERE 
    "CARD_STATUS"."STATUS_CARTES"."CardHolderID" = T1."CardHolderID"

    """
    con_postgres.execute(query_balance)
    con_postgres.close()

    con_postgres = connect_to_database(database_type,
                                       database_name).CreateEngine()
    query = """
       DROP TABLE IF EXISTS "TMP_UPDATE"."TMP_AVAILABLE_BALANCE"
       """
    con_postgres.execute(query)
    con_postgres.close()

    con_postgres = connect_to_database(database_type,
                                       database_name).CreateEngine()

    query = """

    update "CARD_STATUS"."STATUS_CARTES" as T1
    SET "ActivationDate" = "ActivationTime"
    FROM "CARD_STATUS"."ACTIVATION_REPORT" as T2
    WHERE 
    T1."CardHolderID" = T2."CardHolderID" and 
    "ActivationDate" is null 

    """

    con_postgres.execute(query)
    con_postgres.close()
Exemplo n.º 4
0
def extract_change_address(data_current, tmp_data, Filename):

    DateFile = pd.to_datetime(
        Filename.split("-")[1] + "-" + Filename.split("-")[2] + "-" +
        Filename.split("-")[3].replace(".csv", ""))

    # store new addresses based on new CardHolderID
    new_addresses = tmp_data[~tmp_data["CardHolderID"].
                             isin(data_current["CardHolderID"])]

    # tmp_data_current
    tmp_data_current = data_current.copy()

    tmp_data_current = tmp_data_current.set_index('CardHolderID')
    tmp_data_current = tmp_data_current.sort_index()

    tmp_data = tmp_data.set_index('CardHolderID')
    tmp_data = tmp_data[tmp_data.index.isin(tmp_data_current.index)]
    tmp_data = tmp_data.sort_index()

    for col in tmp_data_current.columns:
        tmp_data_current.loc[tmp_data_current[col].isna(), col] = ""

    for col in tmp_data.columns:
        tmp_data.loc[tmp_data[col].isna(), col] = ""

    tmp_data_current["PostCode"] = tmp_data_current["PostCode"].astype(str)
    tmp_data["PostCode"] = tmp_data["PostCode"].astype(str)

    tmp_data_current["concat_address"] = tmp_data_current["Address1"].str.lower().str.replace(" ", "") + \
                                         tmp_data_current["Address2"].str.lower().str.replace(" ", "") + \
                                         tmp_data_current["PostCode"].str.lower().str.replace(" ", "") + \
                                         tmp_data_current["Address2"].str.lower().str.replace(" ", "") + \
                                         tmp_data_current["City"].str.lower().str.replace(" ", "")

    tmp_data["concat_address"] = tmp_data["Address1"].str.lower().str.replace(
        " ", "") + tmp_data["Address2"].str.lower().str.replace(
            " ", "") + tmp_data["PostCode"].str.lower().str.replace(
                " ", "") + tmp_data["Address2"].str.lower().str.replace(
                    " ", "") + tmp_data["City"].str.lower().str.replace(
                        " ", "")

    tmp_data_current = tmp_data_current[tmp_data_current.index.isin(
        tmp_data.index)]
    tmp_data_current = tmp_data_current.sort_index()
    tmp_data = tmp_data.sort_index()

    A = tmp_data_current["concat_address"] != tmp_data["concat_address"]
    A = A[A == True]

    tmp_insert = data_current[data_current["CardHolderID"].isin(A.index)]

    tmp_insert = pd.concat([tmp_insert, new_addresses],
                           axis=0).reset_index(drop=True)

    tmp_insert["dt_change"] = DateFile - datetime.timedelta(days=1)

    TableParameter = {}
    TableParameter["CardHolderID"] = "VARCHAR (50)"
    TableParameter["Address1"] = "VARCHAR (100)"
    TableParameter["Address2"] = "VARCHAR (50)"
    TableParameter["PostCode"] = "VARCHAR (50)"
    TableParameter["City"] = "VARCHAR (100)"
    TableParameter["dt_change"] = "timestamp without time zone"

    schema = "CARD_STATUS"
    TlbName = "CHANGE_ADDRESSES_CARTES"

    InsertTableIntoDatabase(tmp_insert,
                            TlbName,
                            schema,
                            "Postgres",
                            "Creacard_Calypso",
                            TableDict=TableParameter,
                            DropTable=False)
Exemplo n.º 5
0
def extract_changes_email(tmp_data, data_current, FileName):

    DateFile = pd.to_datetime(
        FileName.split("-")[1] + "-" + FileName.split("-")[2] + "-" +
        FileName.split("-")[3].replace(".csv", ""))

    # store new addresses based on new CardHolderID
    new_chid = tmp_data[~tmp_data["CardHolderID"].
                        isin(data_current["CardHolderID"])]

    # tmp_data_current
    tmp_data_current = data_current.copy()

    tmp_data_current = tmp_data_current.set_index('CardHolderID')
    tmp_data_current = tmp_data_current.sort_index()

    tmp_data = tmp_data.set_index('CardHolderID')
    tmp_data = tmp_data[tmp_data.index.isin(tmp_data_current.index)]
    tmp_data = tmp_data.sort_index()

    for col in tmp_data_current.columns:
        if col == "FirstName" or col == "LastName":
            tmp_data_current[col] = tmp_data_current[col].str.replace(" ", "")
        tmp_data_current.loc[tmp_data_current[col].isna(), col] = ""

    for col in tmp_data.columns:
        if col == "FirstName" or col == "LastName":
            tmp_data[col] = tmp_data[col].str.replace(" ", "")
        tmp_data.loc[tmp_data[col].isna(), col] = ""

    tmp_data_current = tmp_data_current[tmp_data_current.index.isin(
        tmp_data.index)]
    tmp_data_current = tmp_data_current.sort_index()
    tmp_data = tmp_data.sort_index()

    A = tmp_data_current != tmp_data
    A = A.astype(int)

    A["sum_changes"] = A.sum(axis=1)

    A = A[A["sum_changes"] > 0]
    A.columns = [
        "Is_ch_Email", "Is_ch_FirstName", "Is_ch_LastName", "Is_ch_BirthDate",
        "Is_ch_IBAN", "Is_ch_NoMobile", "Is_ch_Programme", "total_ch"
    ]

    tmp_insert = data_current[data_current["CardHolderID"].isin(A.index)]
    tmp_insert = tmp_insert.reset_index(drop=True)
    A = A.reset_index(drop=False)
    tmp_insert = pd.merge(tmp_insert, A, on="CardHolderID", how="inner")

    tmp_insert["dt_change"] = DateFile - datetime.timedelta(days=1)

    TableParameter = {}
    TableParameter["CardHolderID"] = "VARCHAR (50)"
    TableParameter["Email"] = "TEXT"
    TableParameter["FirstName"] = "TEXT"
    TableParameter["LastName"] = "TEXT"
    TableParameter["BirthDate"] = "timestamp without time zone"
    TableParameter["IBAN"] = "TEXT"
    TableParameter["NoMobile"] = "TEXT"
    TableParameter["Programme"] = "VARCHAR (50)"
    TableParameter["Is_ch_Email"] = "INTEGER"
    TableParameter["Is_ch_FirstName"] = "INTEGER"
    TableParameter["Is_ch_LastName"] = "INTEGER"
    TableParameter["Is_ch_BirthDate"] = "INTEGER"
    TableParameter["Is_ch_IBAN"] = "INTEGER"
    TableParameter["Is_ch_NoMobile"] = "INTEGER"
    TableParameter["Is_ch_Programme"] = "INTEGER"
    TableParameter["total_ch"] = "INTEGER"
    TableParameter["dt_change"] = "timestamp without time zone"

    schema = "CARD_STATUS"
    TlbName = "CHANGE_CUSTOMERS_CARTES"

    InsertTableIntoDatabase(tmp_insert,
                            TlbName,
                            schema,
                            "Postgres",
                            "Creacard_Calypso",
                            TableDict=TableParameter,
                            DropTable=False)
Exemplo n.º 6
0
def calypso_ids_production(schema_main, connexion_postgres):
    """Compute new ids and ids that changed overtime

        Parameters
        -----------
        schema_main: str
            schema where ids are stored
        connexion_postgres: str
            name of postgres connexction (referred to conf_python file)

    """

    # extract condition (exclusion) of stored on the computer
    if sys.platform == "win32":
        folder_json = os.path.expanduser(
            '~') + "\\conf_python\\unique_id_conditions.json"
    else:
        folder_json = os.environ[
            'HOME'] + "/conf_python/unique_id_conditions.json"
    with open(folder_json, 'r') as JSON:
        conditions = json.load(JSON)

    condition = conditions["exclusion_cartes"]["request"]
    condition_on_email = conditions["condition_email"]["dataframe"]

    # generate new ids and handling customer's information that changed

    # Step - 0 :
    # - extract new card
    # - extract card that was already associated to a user id but with information that changed
    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()

    query = """

    select T1."CardHolderID", T1."NoMobile", lower(T1."Email") as "Email",
    T1."FirstName", T1."LastName", T1."BirthDate", T1."PostCode", T1."Address1", T1."Address2",
    T1."ActivationDate"
    from "CARD_STATUS"."STATUS_CARTES" as T1
    left join "{}"."MASTER_ID" as T2
    on T1."CardHolderID" = T2."CardHolderID"
    where (T1."NoMobile" is not null) and (T1."Email" !~* '.*creacard.*|.*prepaidfinancial.*|.*financial.*')
    and T2."USER_ID" is null

    UNION ALL


    select T1."CardHolderID", T1."NoMobile", lower(T1."Email") as "Email",
    T1."FirstName", T1."LastName", T1."BirthDate", T1."PostCode", T1."Address1", T1."Address2",
    T1."ActivationDate"
    from "CARD_STATUS"."STATUS_CARTES" as T1
    Join(
       select "CardHolderID"
       from "CARD_STATUS"."CHANGE_CUSTOMERS_CARTES"
       where "dt_change" >= date(now() - INTERVAL '1 DAY')::timestamp and
       ("Is_ch_BirthDate" = 1 or "Is_ch_Email" = 1 or "Is_ch_LastName" = 1 or "Is_ch_NoMobile" = 1) and 
       "NoMobile" is not null and "Email" !~* '.*creacard.*|.*prepaidfinancial.*|.*financial.*'
    ) as T2
    on T1."CardHolderID" = T2."CardHolderID"

    """.format(schema_main)

    data = pd.read_sql(query, con=engine)

    engine.close()

    # associtated new ids for identified cards
    if not data.empty:

        for var in [
                "FirstName", "LastName", "Address1", "Address2", "PostCode",
                "Email"
        ]:
            data[var] = data[var].str.encode('utf-8').astype(str)
            data.loc[data[var].isnull(), var] = ""
            data[var] = data[var].str.strip(" ")
            data[var] = data[var].str.replace(" ", "")
            data[var] = data[var].str.lower()

        data = data[~data["Email"].str.contains(
            '.*creacard.*|.*prepaidfinancial.*|.*financial.*', regex=True)]

        data["GoodEmail"] = 1
        data.loc[data["Email"].str.contains(condition_on_email, regex=True),
                 "GoodEmail"] = 0

        data["GoodCombinaison"] = 1
        data.loc[(data["LastName"].str.contains(
            conditions["condition_combinaison"]["LastName"], regex=True)) |
                 (data["BirthDate"].isnull()) |
                 (data["BirthDate"].isin(conditions["condition_combinaison"]
                                         ["BirthDate"].split(","))),
                 "GoodCombinaison"] = 0

        # Delete leading "00" at the start of string.

        data["NoMobile"] = data["NoMobile"].str.replace("^00", "", regex=True)

        # replace .0 at the end$

        data["NoMobile"] = data["NoMobile"].str.replace("\.0$", "", regex=True)

        # delete only literal '|' from string

        data["NoMobile"] = data["NoMobile"].str.replace("\|", "", regex=True)

        query = """

           DROP TABLE IF EXISTS "{}"."TMP_USER_ID" CASCADE

           """.format(schema_main)

        engine = connect_to_database("Postgres",
                                     connexion_postgres).CreateEngine()
        engine.execute(query)
        engine.close()

        query = """


           CREATE TABLE "{}"."TMP_USER_ID"(

               "CardHolderID" VARCHAR(50),
               "NoMobile" TEXT,
               "Email" TEXT,
               "FirstName" TEXT,
               "LastName" TEXT,
               "BirthDate" TEXT,
               "PostCode" TEXT,
               "Address1" TEXT,
               "Address2" TEXT,
               "ActivationDate" timestamp without time zone,
               "GoodEmail" INTEGER,
               "GoodCombinaison" INTEGER,
               "MOBILE_ID" INTEGER,
               "USER_ID" BIGINT,
               "CONTACT_ID" VARCHAR(50),
               "PERSON_ID" TEXT,
               "MOVIDON_ID" BIGINT
           )

           """.format(schema_main)

        engine = connect_to_database("Postgres",
                                     connexion_postgres).CreateEngine()
        engine.execute(query)
        engine.close()

        data = data[~data["NoMobile"].isnull()]
        data["MOBILE_ID"] = None
        data["USER_ID"] = None
        data["CONTACT_ID"] = None
        data["PERSON_ID"] = None
        data["MOVIDON_ID"] = None

        InsertTableIntoDatabase(data,
                                TlbName="TMP_USER_ID",
                                Schema=schema_main,
                                database_name=connexion_postgres,
                                database_type="Postgres",
                                DropTable=False,
                                InstertInParrell=False)

        # STEP 1 : handle new id mobile

        query = """


           update "{}"."TMP_USER_ID"
           set "MOBILE_ID" = T1."MOBILE_ID"
           from "{}"."ID_MOBILE" as T1
           where "{}"."TMP_USER_ID"."NoMobile" = T1."NoMobile"

           """.format(schema_main, schema_main, schema_main)

        engine = connect_to_database("Postgres",
                                     connexion_postgres).CreateEngine()
        engine.execute(query)

        query = """

           select "NoMobile", count(*) as "NUM_CARTES"
           from "CUSTOMERS"."TMP_USER_ID"
           where "MOBILE_ID" is null
           group by "NoMobile"

           """

        data = pd.read_sql(query, con=engine)

        engine.close()

        if not data.empty:
            # as the function serial was used to create the table i.e the ID
            # it is not necessary to specified it using python.
            # the function serial associates automatically and incrementally a
            # MOBILE_ID to a new inserted row
            InsertTableIntoDatabase(data,
                                    TlbName="ID_MOBILE",
                                    Schema='CUSTOMERS',
                                    database_name=connexion_postgres,
                                    database_type="Postgres",
                                    DropTable=False,
                                    InstertInParrell=False)

            query = """

               update "CUSTOMERS"."TMP_USER_ID"
               set "MOBILE_ID" = T1."MOBILE_ID"
               from "CUSTOMERS"."ID_MOBILE" as T1
               where "CUSTOMERS"."TMP_USER_ID"."NoMobile" = T1."NoMobile" and "CUSTOMERS"."TMP_USER_ID"."MOBILE_ID" is null


               """

            engine = connect_to_database("Postgres",
                                         connexion_postgres).CreateEngine()
            engine.execute(query)
            engine.close()

            # STEP 2 : handle new CONTACT_ID

        query = """
           update "CUSTOMERS"."TMP_USER_ID"
           set "CONTACT_ID" = "CardHolderID"
           """

        engine = connect_to_database("Postgres",
                                     connexion_postgres).CreateEngine()
        engine.execute(query)
        engine.close()

        # STEP 3 : handle new PERSON_ID

        query = """


           update "CUSTOMERS"."TMP_USER_ID"
           set "PERSON_ID" = T1."PERSON_ID"
           from "CUSTOMERS"."ID_PERSON" as T1
           where concat("CUSTOMERS"."TMP_USER_ID"."BirthDate", "CUSTOMERS"."TMP_USER_ID"."LastName") = T1."combinaison"
           and "CUSTOMERS"."TMP_USER_ID"."GoodCombinaison" = 1

           """

        engine = connect_to_database("Postgres",
                                     connexion_postgres).CreateEngine()
        engine.execute(query)

        query = """

           select distinct concat("BirthDate", "LastName") as "combinaison"
           from "CUSTOMERS"."TMP_USER_ID"
           where "PERSON_ID" is null and "GoodCombinaison" = 1


           """

        data = pd.read_sql(query, con=engine)

        engine.close()

    if not data.empty:
        # as the function serial was used to create the table i.e the ID
        # it is not necessary to specified it using python.
        # the function serial associates automatically and incrementally a
        # PERSON_ID to a new inserted row
        InsertTableIntoDatabase(data,
                                TlbName="ID_PERSON",
                                Schema='CUSTOMERS',
                                database_name=connexion_postgres,
                                database_type="Postgres",
                                DropTable=False,
                                InstertInParrell=False)

        query = """

           update "CUSTOMERS"."TMP_USER_ID"
           set "PERSON_ID" = T1."PERSON_ID"
           from "CUSTOMERS"."ID_PERSON" as T1
           where concat("CUSTOMERS"."TMP_USER_ID"."BirthDate", "CUSTOMERS"."TMP_USER_ID"."LastName") = T1."combinaison"
           and "CUSTOMERS"."TMP_USER_ID"."GoodCombinaison" = 1 and "CUSTOMERS"."TMP_USER_ID"."PERSON_ID" is null

           """

        engine = connect_to_database("Postgres",
                                     connexion_postgres).CreateEngine()
        engine.execute(query)
        engine.close()

    # step 4.1: make sur that we are keeping the max user id
    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()

    query = """

    select max("USER_ID") as "user_id_max"
    from "CUSTOMERS"."MASTER_ID"

    """

    id_max = pd.read_sql(query, con=engine)
    id_max = int(id_max.loc[0, "user_id_max"])

    engine.close()

    # STEP 4.2: identified cards that can be associated to a know USER_ID
    query = """
       UPDATE "CUSTOMERS"."TMP_USER_ID"
       set "USER_ID" = T1."USER_ID"
       from 
       (select "CardHolderID", "USER_ID"::integer
       from "CUSTOMERS"."MASTER_ID") as T1
       where "CUSTOMERS"."TMP_USER_ID"."CardHolderID" = T1."CardHolderID"
       """
    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)
    engine.close()

    query = """

       update "CUSTOMERS"."TMP_USER_ID"
       set "USER_ID" = T1."USER_ID"
       from "CUSTOMERS"."ID_USER" as T1
       where "CUSTOMERS"."TMP_USER_ID"."NoMobile" = T1."NoMobile" and
       "CUSTOMERS"."TMP_USER_ID"."USER_ID" is null

       """

    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)
    engine.close()

    query = """

       update "CUSTOMERS"."TMP_USER_ID"
       set "USER_ID" = T1."USER_ID"
       from "CUSTOMERS"."ID_USER" as T1
       where "CUSTOMERS"."TMP_USER_ID"."GoodEmail" = 1 and "CUSTOMERS"."TMP_USER_ID"."Email" = T1."Email" and
       "CUSTOMERS"."TMP_USER_ID"."USER_ID" is null

       """

    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)
    engine.close()

    query = """

       update "CUSTOMERS"."TMP_USER_ID"
       set "USER_ID" = T1."USER_ID"
       from "CUSTOMERS"."ID_USER" as T1
       where concat("CUSTOMERS"."TMP_USER_ID"."BirthDate", "CUSTOMERS"."TMP_USER_ID"."LastName") = T1."combinaison"
       and "CUSTOMERS"."TMP_USER_ID"."GoodCombinaison" = 1 and "CUSTOMERS"."TMP_USER_ID"."CONTACT_ID" is null

       """

    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)

    # STEP 4.4: Extract all cards (new cards and cards that were already associated to a USER_ID)
    # in order to re-associate USER_ID based on the sorted algorithm in order to always
    # be sure to keep the homogeneity and unicity of the USER_ID
    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()

    query = """

       select *
       from "CUSTOMERS"."TMP_USER_ID"


       """

    data = pd.read_sql(query, con=engine)
    data["combinaison"] = data["BirthDate"] + data["LastName"]
    data = data[[
        "NoMobile", "Email", "combinaison", "GoodEmail", "GoodCombinaison",
        "USER_ID"
    ]]

    query = """

    select "NoMobile", "Email", "combinaison", "GoodEmail", "GoodCombinaison", "USER_ID"
    from "CUSTOMERS"."ID_USER"


    """

    data_bis = pd.read_sql(query, con=engine)
    data_bis["USER_ID"] = data_bis["USER_ID"].astype(float)

    data = pd.concat([data, data_bis], axis=0)

    user_id = data[~data.duplicated(keep='first')]

    tic = time.time()
    sorted = False
    while sorted is False:

        tmp_user_id = user_id.groupby(
            "NoMobile")["USER_ID"].min().reset_index()
        tmp_user_id.columns = ["NoMobile", "TMP_USER_ID"]
        user_id = pd.merge(user_id, tmp_user_id, on="NoMobile", how="inner")
        user_id["USER_ID"] = user_id["TMP_USER_ID"]
        user_id = user_id.drop(columns='TMP_USER_ID', axis=1)

        tmp_user_id = user_id[user_id["GoodEmail"] == 1].groupby(
            "Email")["USER_ID"].min().reset_index()
        tmp_user_id.columns = ["Email", "TMP_USER_ID"]
        user_id = pd.merge(user_id, tmp_user_id, on="Email", how="left")
        user_id.loc[~user_id["TMP_USER_ID"].isnull(),
                    "USER_ID"] = user_id["TMP_USER_ID"]
        user_id = user_id.drop(columns='TMP_USER_ID', axis=1)

        tmp_user_id = user_id[user_id["GoodCombinaison"] == 1].groupby(
            "combinaison")["USER_ID"].min().reset_index()
        tmp_user_id.columns = ["combinaison", "TMP_USER_ID"]
        user_id = pd.merge(user_id, tmp_user_id, on="combinaison", how="left")
        user_id.loc[~user_id["TMP_USER_ID"].isnull(),
                    "USER_ID"] = user_id["TMP_USER_ID"]
        user_id = user_id.drop(columns='TMP_USER_ID', axis=1)

        non_unique_num = user_id.groupby(
            "NoMobile")["USER_ID"].nunique().sort_values().reset_index()
        non_unique_num = non_unique_num.loc[non_unique_num["USER_ID"] > 1,
                                            "NoMobile"]

        non_unique_email = user_id[user_id["GoodEmail"] == 1].groupby(
            "Email")["USER_ID"].nunique().sort_values().reset_index()
        non_unique_email = non_unique_email.loc[
            non_unique_email["USER_ID"] > 1, "Email"]

        non_unique_combi = user_id[user_id["GoodCombinaison"] == 1].groupby(
            "combinaison")["USER_ID"].nunique().sort_values().reset_index()
        non_unique_combi = non_unique_combi.loc[
            non_unique_combi["USER_ID"] > 1, "combinaison"]

        if (len(non_unique_num) > 0) or (len(non_unique_email) >
                                         0) or (len(non_unique_combi) > 0):
            sorted = False
        else:
            sorted = True

    toc = time.time() - tic

    tmp_use_id = user_id[~user_id["USER_ID"].isnull()]

    # STEP 4.5: associate new user_id to cards that already haven't
    user_id = user_id[user_id["USER_ID"].isnull()]
    user_id = compute_user_id(user_id, last_user_id=id_max)

    # STEP 4.6: Replace all ID_USER table by the new ones
    user_id = pd.concat([user_id, tmp_use_id], axis=0)
    user_id = user_id.reset_index(drop=True)

    query = """
    delete from "CUSTOMERS"."ID_USER"
    """

    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()

    engine.execute(query)
    engine.close()

    InsertTableIntoDatabase(user_id,
                            TlbName="ID_USER",
                            Schema='CUSTOMERS',
                            database_name=connexion_postgres,
                            database_type="Postgres",
                            DropTable=False,
                            InstertInParrell=False)

    # STEP 5: keep tracks of cards for which the user id changed

    # STEP 5.1: keep tracks of cards for which the user id changed

    query = """
    CREATE TABLE "CUSTOMERS"."TMP_MASTER_ID" as 
    SELECT "CardHolderID", "USER_ID"
    from "CUSTOMERS"."MASTER_ID"

    """
    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)
    engine.close()

    # STEP 5.1.1: delete cards already identified from MASTER_ID
    query = """
        delete from "CUSTOMERS"."MASTER_ID"
        where "CardHolderID" in (select T1."CardHolderID"
        from "CUSTOMERS"."TMP_USER_ID" as T1
        inner join "CUSTOMERS"."MASTER_ID" as T2
        ON T1."CardHolderID" = T2."CardHolderID")
        """

    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()

    engine.execute(query)
    engine.close()

    # STEP 5.2: add new cards to master ID
    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    query = """
    select *
    from "CUSTOMERS"."TMP_USER_ID"

    """
    data = pd.read_sql(query, con=engine)
    engine.close()

    data["PERSON_ID"] = data["PERSON_ID"].astype(str)
    data["PERSON_ID"] = data["PERSON_ID"].str.replace("\.0$", "", regex=True)

    InsertTableIntoDatabase(data,
                            TlbName="MASTER_ID",
                            Schema='CUSTOMERS',
                            database_name=connexion_postgres,
                            database_type="Postgres",
                            DropTable=False,
                            InstertInParrell=False)

    # STEP 5.3: update new user id
    query = """

    UPDATE "CUSTOMERS"."MASTER_ID"
    SET "USER_ID" = NULL
    """

    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)
    engine.close()

    query = """
        update "CUSTOMERS"."MASTER_ID"
        set "USER_ID" = T1."USER_ID"
        from(
        select distinct "NoMobile", "USER_ID"
        from "CUSTOMERS"."ID_USER") as T1
        where "CUSTOMERS"."MASTER_ID"."NoMobile" = T1."NoMobile"
    """

    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)
    engine.close()

    # STEP 5.4: store user id that changed
    query = """
    select T3.*
    from(

        select T1.*, T2."USER_ID" as "oth_user_id"
        from "CUSTOMERS"."MASTER_ID" as T1
        INNER JOIN "CUSTOMERS"."TMP_MASTER_ID" as T2
        on T1."CardHolderID" = T2."CardHolderID"

        ) as T3
    where T3."USER_ID" <> T3."oth_user_id"

    """

    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    data = pd.read_sql(query, con=engine)
    engine.close()

    data["USER_ID"] = data["oth_user_id"]

    data = data.drop(columns=["oth_user_id"], axis=1)

    data["dt_change"] = datetime.datetime.now() - datetime.timedelta(days=1)

    data["PERSON_ID"] = data["PERSON_ID"].astype(str)
    data["PERSON_ID"] = data["PERSON_ID"].str.replace("\.0$", "", regex=True)

    # insert these cards into the table that allows to track the change of ID
    # overtime
    InsertTableIntoDatabase(data,
                            TlbName="CHANGE_IDS",
                            Schema='CUSTOMERS',
                            database_name=connexion_postgres,
                            database_type="Postgres",
                            DropTable=False,
                            InstertInParrell=False)

    query = """
    drop table "CUSTOMERS"."TMP_MASTER_ID" cascade
    """
    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)
    engine.close()

    # step 5.5: make sure that all changes are taken into account

    query = """
    
    drop table if exists "CUSTOMERS"."TMP_CHANGES_IDS" cascade;

    create table "CUSTOMERS"."TMP_CHANGES_IDS" as 
    select T2.*, now() as "dt_change"
    from(
            select distinct T1."USER_ID" as "user_id_change", 
            T2."USER_ID" as "user_id_current"
            from "CUSTOMERS"."CHANGE_IDS" as T1
            inner join "CUSTOMERS"."MASTER_ID" as T2
            on T1."CardHolderID" = T2."CardHolderID"
            where T1."dt_change" >= date(now()-interval '1 days')
            ) as T3
    inner join "CUSTOMERS"."MASTER_ID" as T2
    on T2."USER_ID" = T3."user_id_change";
    
    
    
    """

    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)
    engine.close()

    query = """
    
        update "CUSTOMERS"."ID_USER"
    set "USER_ID" = T3."user_id_current"
    from(
        select distinct T1."USER_ID" as "user_id_change", 
        T2."USER_ID" as "user_id_current"
        from "CUSTOMERS"."CHANGE_IDS" as T1
        inner join "CUSTOMERS"."MASTER_ID" as T2
        on T1."CardHolderID" = T2."CardHolderID"
        where T1."dt_change" >= date(now()-interval '1 days')
        ) as T3
    where "CUSTOMERS"."ID_USER"."USER_ID" = T3."user_id_change"
        
    
    """
    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)
    engine.close()

    query = """
    
    insert into "CUSTOMERS"."CHANGE_IDS"
    select * from "CUSTOMERS"."TMP_CHANGES_IDS"
    
    
    """

    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)
    engine.close()

    query = """

    drop table if exists "CUSTOMERS"."TMP_CHANGES_IDS" cascade


    """

    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)
    engine.close()

    query = """

    UPDATE "CUSTOMERS"."MASTER_ID"
    SET "USER_ID" = NULL
    """

    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)
    engine.close()

    query = """
        update "CUSTOMERS"."MASTER_ID"
        set "USER_ID" = T1."USER_ID"
        from(
        select distinct "NoMobile", "USER_ID"
        from "CUSTOMERS"."ID_USER") as T1
        where "CUSTOMERS"."MASTER_ID"."NoMobile" = T1."NoMobile"
    """

    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)
    engine.close()

    # STEP 5.6: associated person id for cards which haven't
    query = """
        update "CUSTOMERS"."MASTER_ID"
        set "PERSON_ID" = concat("USER_ID",'_',"MOBILE_ID")
        where "GoodCombinaison" = 0

    """
    engine = connect_to_database("Postgres", connexion_postgres).CreateEngine()
    engine.execute(query)
    engine.close()
Exemplo n.º 7
0
def add_new_pos_transactions(database_type, database_name, _year, _month, _day, **kwargs):



    _tlbname = kwargs.get('tlbname', "POS_TRANSACTIONS")
    _schema = kwargs.get('schema', "TRANSACTIONS")

    date_start = datetime.datetime(_year, _month, _day)
    date_start_cond = str(date_start)[0:10]
    end_date = date_start + datetime.timedelta(days=1)
    end_date = str(end_date)[0:10]

    engine = connect_to_database(database_type, database_name).CreateEngine()

    # check if the date had already treated
    query = """
    select count(*)
    from "{}"."{}"
    where "TransactionTime" >= '{}' and "TransactionTime" < '{}'
    """.format(_schema,_tlbname,str(date_start), str(end_date))

    data = pd.read_sql(query, con=engine)

    if data.iloc[0, 0] == 0:

        query = """
                        SELECT "CardHolderID","MCC","Amount","MerchantName","TransactionTime","Currency",
                        "CardVPUType", "MerchantAddress", "MerchantCity", "MerchantCountry", "MerchantID", "TransactionID",
                        CASE WHEN "TransactionTP" in ('POS International') then 1 
                        else 0
                        end as "IsPOSInternational","TransactionTP",
                        '' as "UNIVERS", '' as "SOUS_UNIVERS"
                        FROM "TRANSACTIONS_MONTHLY"."MONTHLY_TRANSACTIONS_{}"
                        where "TransactionTP" IN ('POS International','POS Domestic') 
                        and "DebitCredit" IN ('Debit') 
                        and "TransactionResult" = 'APPROVED' and "TransactionTime" >= '{}' and "TransactionTime" < '{}'
        
        """.format(str(date_start.year) + str(date_start.month), date_start_cond, end_date)

        data = pd.read_sql(query, con=engine)

        if not data.empty:

            # Get the type of each variables
            columns_type = create_dictionnary_type_from_table(engine,"POS_TRANSACTIONS")
            # Create the TMP table for POS TRANSACTIONS
            # Drop the table
            query = """
            DROP TABLE IF EXISTS "TMP_UPDATE"."TMP_POS_TRANSACTIONS"
            """

            engine.execute(query)

            CreateTable(engine, "TMP_POS_TRANSACTIONS", "TMP_UPDATE", columns_type,keep_order=True)

            # Insert into table
            InsertTableIntoDatabase(data,
                                    TlbName="TMP_POS_TRANSACTIONS",
                                    Schema="TMP_UPDATE",
                                    database_type=database_type,
                                    database_name=database_name,
                                    DropTable=False)

            tic =time.time()

            fill_univers_sous_univers(database_type, database_name, "TMP_UPDATE", "TMP_POS_TRANSACTIONS")

            print("categorisation was done in {} seconds".format(time.time() - tic))

            engine = connect_to_database(database_type, database_name).CreateEngine()

            query = """
            insert into "{}"."{}"
            select * from "TMP_UPDATE"."TMP_POS_TRANSACTIONS"
            """.format(_schema, _tlbname)

            engine.execute(query)

            # Drop the table
            query = """
            DROP TABLE IF EXISTS "TMP_UPDATE"."TMP_POS_TRANSACTIONS"
            """

            engine.execute(query)

            engine.close()
        else:
            print("Any data for this date")

    else:
        print("this data had been already treated")
Exemplo n.º 8
0
	"GoodCombinaison" INTEGER,
	"GoodEmail" INTEGER,
	"USER_ID" INTEGER
)

"""

engine = connect_to_database("Postgres", "Creacard_Calypso").CreateEngine()
engine.execute(query)
engine.close()



InsertTableIntoDatabase(user_id, TlbName="ID_USER", Schema='CUSTOMERS',
                        database_name="Creacard_Calypso",
                        database_type="Postgres",
                        DropTable=False,
                        InstertInParrell=False)



engine = connect_to_database("Postgres", "Creacard_Calypso").CreateEngine()

query = """


UPDATE "CUSTOMERS"."MASTER_ID"
SET "USER_ID" = T1."USER_ID"
FROM "CUSTOMERS"."ID_USER" as T1
WHERE "CUSTOMERS"."MASTER_ID"."NoMobile" = T1."NoMobile" and "CUSTOMERS"."MASTER_ID"."USER_ID" is null
def daily_card_status2(Data, filepath, **kwargs):

    logger = kwargs.get('logger', None)

    # Table parameter for the temporary table
    TableParameter = {}
    TableParameter["ActivationDate"] = "timestamp without time zone"
    TableParameter["Address1"] = "TEXT"
    TableParameter["Address2"] = "TEXT"
    TableParameter["ApplicationName"] = "VARCHAR (50)"
    TableParameter["AvailableBalance"] = "double precision"
    TableParameter["BirthDate"] = "timestamp without time zone"
    TableParameter["CardHolderID"] = "VARCHAR (50)"
    TableParameter["CardStatus"] = "VARCHAR (100)"
    TableParameter["City"] = "VARCHAR (100)"
    TableParameter["Country"] = "VARCHAR (50)"
    TableParameter["CreationDate"] = "timestamp without time zone"
    TableParameter["DistributorCode"] = "INTEGER"
    TableParameter["Email"] = "TEXT"
    TableParameter["ExpirationDate"] = "timestamp without time zone"
    TableParameter["FirstName"] = "TEXT"
    TableParameter["IBAN"] = "TEXT"
    TableParameter["IsExcludedAddress"] = "INTEGER"
    TableParameter["IsRenewal"] = "INTEGER"
    TableParameter["KYC_Status"] = "VARCHAR (50)"
    TableParameter["LastName"] = "TEXT"
    TableParameter["LastChangeDate"] = "timestamp without time zone"
    TableParameter["LastAddressDate"] = "timestamp without time zone"
    TableParameter["LastCustomerDate"] = "timestamp without time zone"
    TableParameter["NoMobile"] = "TEXT"
    TableParameter["PostCode"] = "VARCHAR (50)"
    TableParameter["Programme"] = "VARCHAR (50)"
    TableParameter["RenewalDate"] = "timestamp without time zone"
    TableParameter["UpdateDate"] = "timestamp without time zone"

    keepcol = [
        "CardHolderID", "Email", "FirstName", "LastName", "City", "Country",
        "Card Status", "DistributorCode", "ApplicationName", "Date of Birth",
        "IBAN", "CreatedDate", "UpdatedDate", "Address1", "Address2",
        "PostCode", "KYC Status", "expirydate", "AvailableBalance", "NoMobile",
        "Programme"
    ]

    #### Step 1: Extract the data from the file and keep ony updated data
    # extract filedate
    FileName = filepath.split('/')[-1].replace(".csv", "")

    DateFile = pd.to_datetime(
        FileName.split("-")[1] + "-" + FileName.split("-")[2] + "-" +
        FileName.split("-")[3])

    # based on the file date, identify the appropriate names of columns
    if DateFile > pd.to_datetime('2019-03-12'):

        col_names = [
            "CardHolderID", "Cardnumber", "Email", "FirstName", "LastName",
            "City", "Country", "Card Status", "DistributorCode",
            "ApplicationName", "Date of Birth", "SortCodeAccNum", "IBAN",
            "CreatedDate", "UpdatedDate", "Address1", "Address2", "PostCode",
            "KYC Status", "expirydate", "AvailableBalance", "UDF2", "NoMobile",
            "Programme", "VPVR"
        ]

    elif DateFile < pd.to_datetime('2019-01-16'):

        col_names = [
            "CardHolderID", "Cardnumber", "Email", "FirstName", "LastName",
            "City", "Country", "Card Status", "DistributorCode",
            "ApplicationName", "Date of Birth", "SortCodeAccNum", "IBAN",
            "CreatedDate", "UpdatedDate", "Address1", "Address2", "PostCode",
            "KYC Status", "expirydate"
        ]

    else:

        col_names = [
            "CardHolderID", "Cardnumber", "Email", "FirstName", "LastName",
            "City", "Country", "Card Status", "DistributorCode",
            "ApplicationName", "Date of Birth", "SortCodeAccNum", "IBAN",
            "CreatedDate", "UpdatedDate", "Address1", "Address2", "PostCode",
            "KYC Status", "expirydate", "AvailableBalance", "UDF2", "NoMobile",
            "UDF3", "VPVR"
        ]

    # add the names of columns to the dataframe
    Data.columns = col_names

    # store the missing columns
    missing_columns = list(set(keepcol).difference(col_names))

    if missing_columns:  # if the list is not add new columns to the dataframe
        for col in missing_columns:
            Data[col] = None

    # Store change values

    engine = connect_to_database("Postgres", "Creacard_Calypso").CreateEngine()
    query = """
           select distinct "CardHolderID","CardStatus","KYC_Status" from "CARD_STATUS"."STATUS_CARTES"
       """
    data_current = pd.read_sql(query, con=engine)

    data_current["CardHolderID"] = data_current["CardHolderID"].astype(str)
    data_current["KYC_Status"] = data_current["KYC_Status"].astype(str)
    data_current["CardStatus"] = data_current["CardStatus"].astype(str)

    #### Step 2: Transform the data

    # transform date columns to pd.datetime format in order to have a consistent format
    # of date over the database
    # Only transform updated date
    Data["UpdatedDate"] = pd.to_datetime(Data["UpdatedDate"],
                                         format="%b %d %Y %I:%M%p",
                                         errors='coerce')
    Data["CreatedDate"] = pd.to_datetime(Data["CreatedDate"],
                                         format="%b %d %Y %I:%M%p",
                                         errors='coerce')
    Data["Date of Birth"] = pd.to_datetime(Data["Date of Birth"],
                                           format="%b %d %Y %I:%M%p",
                                           errors='coerce')

    # transform expirydate
    Data["expirydate"] = Data["expirydate"].astype(str)
    Data["expirydate"] = "20" + Data["expirydate"].str[0:2] + "-" + Data[
        "expirydate"].str[2:] + "-01"
    Data["expirydate"] = pd.to_datetime(Data["expirydate"],
                                        format='%Y-%m-%d',
                                        errors='coerce')

    Data = Data[keepcol]

    # condition remove address
    AddressToRemove = [
        "77 OXFORD STREET LONDON", "17 RUE D ORLEANS", "TSA 51760",
        "77 Oxford Street London", "36 CARNABY STREET",
        "36 CARNABY STREET LONDON", "36 CARNABY STREET LONDON", "ADDRESS",
        "17 RUE D ORLEANS PARIS", "CreaCard Espana S L  Paseo de Gracia 59",
        "36 Carnaby Street London",
        "CREACARD SA Pl  Marcel Broodthaers 8 Box 5", "17 Rue D Orleans Paris",
        "CREACARD ESPANA S L  PASEO DE GRACIA 59", "CreaCard 17 rue d Orleans",
        "CREACARD SA PL  MARCEL BROODTHAERS 8 BOX 75",
        "CREACARD SA PL  MARCEL BROODTHAERS 8 BOX 75", "36 Carnaby Street",
        "77 OXFORD STREET"
    ]

    Data["IsExcludedAddress"] = (
        Data.Address1.isin(AddressToRemove)).astype(int)

    Data["ActivationDate"] = pd.NaT
    Data["IsRenewal"] = 0
    Data["RenewalDate"] = pd.NaT
    Data["LastChangeDate"] = pd.NaT
    Data["LastAddressDate"] = pd.NaT
    Data["LastCustomerDate"] = pd.NaT

    Data = Data[sorted(Data.columns)]

    colnames = [
        "ActivationDate", "Address1", "Address2", "ApplicationName",
        "AvailableBalance", "CardStatus", "CardHolderID", "City", "Country",
        "CreationDate", "BirthDate", "DistributorCode", "Email", "FirstName",
        "IBAN", "IsExcludedAddress", "IsRenewal", "KYC_Status",
        "LastAddressDate", "LastChangeDate", "LastCustomerDate", "LastName",
        "NoMobile", "PostCode", "Programme", "RenewalDate", "UpdateDate",
        "ExpirationDate"
    ]

    Data.columns = colnames

    Data = Data[sorted(Data.columns)]

    Data.loc[(Data["KYC_Status"] == '0') | (Data["KYC_Status"] == '0.0') |
             (Data["KYC_Status"] == 0), "KYC_Status"] = 'Anonyme'
    Data.loc[(Data["KYC_Status"] == '1') | (Data["KYC_Status"] == '1.0') |
             (Data["KYC_Status"] == 1), "KYC_Status"] = 'SDD'
    Data.loc[(Data["KYC_Status"] == '2') | (Data["KYC_Status"] == '2.0') |
             (Data["KYC_Status"] == 2), "KYC_Status"] = 'KYC'
    Data.loc[(Data["KYC_Status"] == '3') | (Data["KYC_Status"] == '3.0') |
             (Data["KYC_Status"] == 3), "KYC_Status"] = 'KYC LITE'

    Data["DistributorCode"] = Data["DistributorCode"].fillna(-1)
    Data["DistributorCode"] = Data["DistributorCode"].astype(int)

    Data["CardHolderID"] = Data["CardHolderID"].astype(str)
    Data["KYC_Status"] = Data["KYC_Status"].astype(str)
    Data["CardStatus"] = Data["CardStatus"].astype(str)

    Data.loc[Data["DistributorCode"].isin(["203", "914", "915"]),
             "IsRenewal"] = 1

    Data = Data[sorted(Data.columns)]

    # Delete leading "00" at the start of string.

    Data["NoMobile"] = Data["NoMobile"].str.replace("^00", "", regex=True)

    # replace .0 at the end$

    Data["NoMobile"] = Data["NoMobile"].str.replace("\.0$", "", regex=True)

    # delete only literal '|' from string

    Data["NoMobile"] = Data["NoMobile"].str.replace("\|", "", regex=True)

    # Step 1: Identify

    data_new = Data[["CardHolderID", "CardStatus", "KYC_Status"]]
    outer_join = data_current.merge(data_new, how='outer', indicator=True)
    outer_join = outer_join[outer_join["_merge"] == "right_only"]

    # set 2 : identify new cardholder ID
    new_card_holder_id = set(outer_join["CardHolderID"].unique()).difference(
        data_current["CardHolderID"].unique())

    ### set 3 : insert old values into changes table
    data_to_change = data_current[data_current["CardHolderID"].isin(
        set(outer_join.loc[
            ~outer_join["CardHolderID"].isin(new_card_holder_id),
            "CardHolderID"]))]

    FileName = filepath.split('/')[-1].replace(".csv", "")
    DateFile = pd.to_datetime(
        FileName.split("-")[1] + "-" + FileName.split("-")[2] + "-" +
        FileName.split("-")[3]) - datetime.timedelta(days=1)

    data_to_change["dt_change"] = DateFile

    InsertTableIntoDatabase(data_to_change,
                            "CHANGE_STATUS_CARTES",
                            "CARD_STATUS",
                            "Postgres",
                            "Creacard_Calypso",
                            DropTable=False)

    engine = connect_to_database("Postgres", "Creacard_Calypso").CreateEngine()

    TlbName = "STATUS_CARTES"
    schema = "CARD_STATUS"
    database_type = "Postgres"
    database_name = "Creacard_Calypso"

    query_delete = """
    
    delete from "CARD_STATUS"."STATUS_CARTES"
    
    """
    tic = time.time()
    engine.execute(query_delete)
    print("delete took the data {} seconds".format(time.time() - tic))

    engine.close()

    data = splitDataFrameIntoSmaller(Data, chunkSize=100000)

    num_process = int(multiprocessing.cpu_count() / 4)
    tic = time.time()
    pool = Pool(num_process)
    pool.map(
        partial(insert_into_postgres_copyfrom,
                database_type=database_type,
                database_name=database_name,
                schema=schema,
                TlbName=TlbName), data)

    pool.close()
    pool.terminate()
    pool.join()
    toc = time.time() - tic

    print("ingestion was done in {} seconds ".format(toc))

    ### update the LastChangeDate columns (KYC & card status)

    con_postgres = connect_to_database(database_type,
                                       database_name).CreateEngine()

    query = """

       UPDATE "CARD_STATUS"."STATUS_CARTES"
       SET "LastChangeDate" = T1."max_date"
       FROM( 
       SELECT max("dt_change") as "max_date", "CardHolderID"
       FROM "CARD_STATUS"."CHANGE_STATUS_CARTES"
       GROUP BY "CardHolderID"
       ) as T1
       WHERE "CARD_STATUS"."STATUS_CARTES"."CardHolderID" = T1."CardHolderID"

       """

    con_postgres.execute(query)
    con_postgres.close()

    con_postgres = connect_to_database(database_type,
                                       database_name).CreateEngine()

    query = """

       update "CARD_STATUS"."STATUS_CARTES" as T1
       SET "ActivationDate" = "ActivationTime"
       FROM "CARD_STATUS"."ACTIVATION_REPORT" as T2
       WHERE 
       T1."CardHolderID" = T2."CardHolderID" and 
       "ActivationDate" is null 

       """

    con_postgres.execute(query)
    con_postgres.close()
def run():
    ### Inputs
    folder_2 = "F:/HISTORIQUE/CARD_STATUS/CardStatus2/"

    csv_params = {
        'sep': ",",
        'error_bad_lines': True,
        'encoding': 'iso-8859-1',
        'header': None
    }
    ingestion_params = dict()
    ingestion_params["protocole_type"] = "SFTP"
    ingestion_params["protocole_name"] = "pfs_sftp"
    ingestion_params["csv_params"] = csv_params
    ingestion_params["copy_to_filesystem"] = None

    Folder = "PCSFrance/CardStatus2/"
    batch_folder = "C:/Users/creacard/Documents/launchers/card_status/"

    database_type = "Postgres"
    database_name = "Creacard_Calypso"

    logger = LogErrorFromInsert.CreateLogger("card_status", "Ingestion_",
                                             'F:/LOGS/')

    tmp_ingestion = dict()

    # params task checkers
    param_task_checker = {}
    param_task_checker["TASK_TYPE"] = "TEXT"
    param_task_checker["TASK_DT"] = "timestamp without time zone"
    param_task_checker["STATUS"] = "TEXT"

    # date_now = datetime.datetime.now()
    date_now = datetime.datetime.now()

    _file_date_condition = dict()
    _file_date_condition["start"] = [
        date_now.year, date_now.month, date_now.day
    ]
    _file_date_condition["end"] = [date_now.year, date_now.month, date_now.day]

    ###

    ListOfFile = list_files_protocole(ingestion_params["protocole_type"],
                                      ingestion_params["protocole_name"],
                                      Folder)

    ii = pd.DataFrame(ListOfFile.FileName.str.replace(".csv", ""),
                      columns=["FileName"])
    Month = []
    Year = []
    Day = []
    ii = ii.FileName.str.split('-')
    for i in range(0, len(ii)):
        Year.append(ii[i][1])
        Month.append(ii[i][2])
        Day.append(ii[i][3])

    ListOfFile = pd.concat([
        ListOfFile,
        pd.DataFrame(Year, columns=['Year']),
        pd.DataFrame(Month, columns=['Month']),
        pd.DataFrame(Day, columns=['Day'])
    ],
                           axis=1)
    ListOfFile["FilePath"] = Folder + ListOfFile["FileName"]
    ListOfFile["FileTime"] = ListOfFile["Year"] + "-" + ListOfFile[
        "Month"] + "-" + ListOfFile["Day"]
    ListOfFile["FileTime"] = pd.to_datetime(ListOfFile.FileTime,
                                            errors='coerce')

    if _file_date_condition is not None:
        if len(_file_date_condition) == 2:
            ListOfFile = ListOfFile[
                (ListOfFile.loc[:, "FileTime"] >= datetime.datetime(
                    _file_date_condition["start"][0], _file_date_condition[
                        "start"][1], _file_date_condition["start"][2]))
                & (ListOfFile.loc[:, "FileTime"] <= datetime.datetime(
                    _file_date_condition["end"][0], _file_date_condition["end"]
                    [1], _file_date_condition["end"][2]))]
        else:
            ListOfFile = ListOfFile[(ListOfFile.loc[:, "FileTime"] >=
                                     datetime.datetime(
                                         _file_date_condition["start"][0],
                                         _file_date_condition["start"][1],
                                         _file_date_condition["start"][2]))]

    ListOfFile = ListOfFile.reset_index(drop=True)

    try:

        copy_by_putty(Folder, str(ListOfFile.loc[0, "FileName"]), folder_2,
                      batch_folder, "batch_putty_card_status.bat",
                      "args_card_status_batch.sftp")

        print(folder_2 + str(ListOfFile.loc[0, "FileName"]))

        # read the data
        Data = read_csv_protocole(
            "LOCAL", None, folder_2 + str(ListOfFile.loc[0, "FileName"]),
            ingestion_params["csv_params"])
        print(Data.head())

        daily_card_status2(Data, folder_2 + str(ListOfFile.loc[0, "FileName"]))

        tmp_ingestion["TASK_TYPE"] = "card_status_dayli"
        tmp_ingestion["TASK_DT"] = str(datetime.datetime.now())
        tmp_ingestion["STATUS"] = "ok"

        tmp_ingestion = pd.DataFrame.from_dict(tmp_ingestion, orient="index").T

        InsertTableIntoDatabase(tmp_ingestion,
                                "STATUS_TASKS",
                                "TASKS_CHECKER",
                                "Postgres",
                                "Creacard_Calypso",
                                DropTable=False,
                                TableDict=param_task_checker)

    except Exception as e:

        tmp_ingestion["TASK_TYPE"] = "card_status_dayli"
        tmp_ingestion["TASK_DT"] = str(datetime.datetime.now())
        tmp_ingestion["STATUS"] = "failed"

        tmp_ingestion = pd.DataFrame.from_dict(tmp_ingestion, orient="index").T

        InsertTableIntoDatabase(tmp_ingestion,
                                "STATUS_TASKS",
                                "TASKS_CHECKER",
                                "Postgres",
                                "Creacard_Calypso",
                                DropTable=False,
                                TableDict=param_task_checker)

        if logger is not None:
            logger.error(e, exc_info=True)
Exemplo n.º 11
0
def create_update_dictionnaries_categorisation(database_type,database_name):



    engine = connect_to_database(database_type, database_name).CreateEngine()

    CreateSchema(engine, "REFERENTIEL")

    # Conf files location
    if sys.platform == "win32":
        Folder = os.path.expanduser('~') + "\\conf_python\\categorisation_univers\\"
    else:
        Folder = os.environ['HOME'] + "/conf_python/categorisation_univers/"

    # Referentials table
    FileDescription = "description_univers.xlsx"

    DataDescritpion = pd.read_excel(Folder + FileDescription)

    TableParameter = {}
    TableParameter["UNIVERS_DATABASE"] = "VARCHAR (50)"
    TableParameter["SOUS_UNIVERS_DATABASE"] = "VARCHAR (100)"
    TableParameter["UNIVERS"] = "VARCHAR (50)"
    TableParameter["SOUS_UNIVERS"] = "VARCHAR (100)"
    TableParameter["DESCRIPTION"] = "TEXT"

    InsertTableIntoDatabase(DataDescritpion,
                            "UNIVERS_DESCRIPTION",
                            "REFERENTIEL",
                            database_type, database_name,
                            DropTable=True,
                            TableDict=TableParameter)

    del DataDescritpion
    del FileDescription

    # link between MCC and MCC_CODE
    FileMCC = "mcc_code_link.xlsx"

    DataFileMCC = pd.read_excel(Folder + FileMCC, dtype={'MCC_CODE': str, 'MCC': object})

    TableParameter = {}
    TableParameter["MCC_CODE"] = "VARCHAR (20)"
    TableParameter["MCC"] = "TEXT"

    InsertTableIntoDatabase(DataFileMCC,
                            "MCC_CODE_LINK",
                            "REFERENTIEL",
                            database_type, database_name,
                            DropTable=True,
                            TableDict=TableParameter)

    del FileMCC
    del DataFileMCC

    # Ingest MCC_CATEGORIES
    FileMCCCat = "mcc_categories.xlsx"

    DataMCCCat = pd.read_excel(Folder + FileMCCCat)

    TableParameter = {}
    TableParameter["MCC_NAME"] = "TEXT"
    TableParameter["SOUS_UNIVERS"] = "VARCHAR (100)"
    TableParameter["UNIVERS"] = "VARCHAR (50)"
    TableParameter["MCC_CODE"] = "VARCHAR (20)"
    TableParameter["NOTE"] = "INTEGER"

    InsertTableIntoDatabase(DataMCCCat,
                            "MCC_CATEGORIES",
                            "REFERENTIEL",
                            database_type, database_name,
                            DropTable=True,
                            TableDict=TableParameter)

    del FileMCCCat
    del DataMCCCat

    # Regex exclusion
    FileRegexExclu = "regex_merchant.xlsx"

    Data1 = pd.read_excel(Folder + FileRegexExclu, sheet_name='Regex exclu')
    Data1 = Data1[~Data1.UNIVERS.isna()]
    TableParameter = {}
    TableParameter["MCC"] = "TEXT"
    TableParameter["Regex"] = "TEXT"
    TableParameter["UNIVERS"] = "VARCHAR (50)"
    TableParameter["SOUS_UNIVERS"] = "VARCHAR (100)"
    TableParameter["NEW_REGEX"] = "TEXT"
    TableParameter["MCC_CODE"] = "VARCHAR (20)"

    InsertTableIntoDatabase(Data1,
                            "REGEX_EXCLUDED",
                            "REFERENTIEL",
                            database_type, database_name,
                            DropTable=True,
                            TableDict=TableParameter)

    del FileRegexExclu
    del Data1

    # Update MCC code for Regex excluded

    query = """
    UPDATE "REFERENTIEL"."REGEX_EXCLUDED"
    set "MCC_CODE" = T2."MCC_CODE"
    FROM "REFERENTIEL"."MCC_CATEGORIES" AS T2
    where "MCC" = T2."MCC_NAME"
    """

    engine.execute(query)

    # Regex inclusion
    FileRegexExclu = "regex_ajout.xlsx"

    Data1 = pd.read_excel(Folder + FileRegexExclu, sheet_name='Regex ajout')
    Data1 = Data1[~Data1.UNIVERS.isna()]
    TableParameter = {}
    TableParameter["Regex"] = "TEXT"
    TableParameter["UNIVERS"] = "VARCHAR (50)"
    TableParameter["SOUS_UNIVERS"] = "VARCHAR (100)"
    TableParameter["NEW_REGEX"] = "TEXT"

    InsertTableIntoDatabase(Data1,
                            "REGEX_INCLUDED",
                            "REFERENTIEL",
                            database_type, database_name,
                            DropTable=True,
                            TableDict=TableParameter)

    del FileRegexExclu
    del Data1

    engine.close()
Exemplo n.º 12
0
def create_tmp_id(schema, tlb, schema_main):

    if sys.platform == "win32":
        folder_json = os.path.expanduser('~') + "\\conf_python\\unique_id_conditions.json"
    else:
        folder_json = os.environ['HOME'] + "/conf_python/unique_id_conditions.json"
    with open(folder_json, 'r') as JSON:
        conditions = json.load(JSON)

    condition = conditions["exclusion_cartes"]["request"]
    condition_on_email = conditions["condition_email"]["dataframe"]

    engine = connect_to_database("Postgres", "Creacard_Calypso").CreateEngine()

    CreateSchema(engine, schema_main)

    query = """

      select "CardHolderID", "NoMobile", lower("Email") as "Email", "FirstName", "LastName", "BirthDate", "PostCode", "Address1", "Address2",
      "ActivationDate"
      from "{}"."{}"
      where {}

      """.format(schema, tlb, condition)

    data = pd.read_sql(query, con=engine)

    engine.close()
    for var in ["FirstName", "LastName", "Address1", "Address2", "PostCode", "Email"]:
        data[var] = data[var].str.encode('utf-8').astype(str)
        data.loc[data[var].isnull(), var] = ""
        data[var] = data[var].str.strip(" ")
        data[var] = data[var].str.replace(" ", "")
        data[var] = data[var].str.lower()

    data = data[~data["Email"].str.contains('.*creacard.*|.*prepaidfinancial.*|.*financial.*', regex=True)]

    data["GoodEmail"] = 1
    data.loc[data["Email"].str.contains(condition_on_email, regex=True), "GoodEmail"] = 0

    data["GoodCombinaison"] = 1
    data.loc[(data["LastName"].str.contains(conditions["condition_combinaison"]["LastName"], regex=True)) |
             (data["BirthDate"].isnull()) | (data["BirthDate"].isin(conditions["condition_combinaison"]["BirthDate"].split(","))), "GoodCombinaison"] = 0

    # Delete leading "00" at the start of string.

    data["NoMobile"] = data["NoMobile"].str.replace("^00", "", regex=True)

    # replace .0 at the end$

    data["NoMobile"] = data["NoMobile"].str.replace("\.0$", "", regex=True)

    # delete only literal '|' from string

    data["NoMobile"] = data["NoMobile"].str.replace("\|", "", regex=True)

    query = """
 
    DROP TABLE IF EXISTS "CUSTOMERS"."TMP_USER_ID" CASCADE

    """

    engine = connect_to_database("Postgres", "Creacard_Calypso").CreateEngine()
    engine.execute(query)
    engine.close()

    query = """
    
    
    CREATE TABLE "{}"."TMP_USER_ID"(
    
        "CardHolderID" VARCHAR(50),
        "NoMobile" TEXT,
        "Email" TEXT,
        "FirstName" TEXT,
        "LastName" TEXT,
        "BirthDate" TEXT,
        "PostCode" TEXT,
        "Address1" TEXT, 
        "Address2" TEXT,
        "ActivationDate" timestamp without time zone,
        "GoodEmail" INTEGER,
        "GoodCombinaison" INTEGER
    )
    
    """.format(schema_main)

    engine = connect_to_database("Postgres", "Creacard_Calypso").CreateEngine()
    engine.execute(query)
    engine.close()

    data = data[~data["NoMobile"].isnull()]

    InsertTableIntoDatabase(data, TlbName="TMP_USER_ID", Schema=schema_main,
                            database_name="Creacard_Calypso",
                            database_type="Postgres",
                            DropTable=False,
                            InstertInParrell=False)