def move_annual_company_data(self): i, j = 0, 0 dfac = db.pandas_read('SELECT ID, BatchID, CompanyID,[Company Name] FROM BAP.AnnualCompanyData') dfdc = db.pandas_read('SELECT CompanyID, CompanyName FROM Reporting.DimCompany') dfac['BasicName'] = dfac.apply(lambda dfs: CM.get_basic_name(dfs['Company Name']), axis=1) dfdc['BasicName'] = dfdc.apply(lambda dfs: CM.get_basic_name(dfs.CompanyName), axis=1) for i, c in dfac.iterrows(): dfc = dfdc[dfdc['BasicName'] == c.BasicName] val = dict() if len(dfc) > 0: i+=1 db.execute(sql.sql_annual_comapny_data_update.value.format(dfc.CompanyID.values[0], c.ID)) print(sql.sql_annual_comapny_data_update.value.format(dfc.CompanyID.values[0], c.ID)) else: j+=1 print(sql.sql_dim_company_insert.value) new_com_id = self.batch.get_table_seed('MaRSDataCatalyst.Reporting.DimCompany', 'CompanyID') + 1 val['CompanyID'] = new_com_id val['Company Name'] = c['Company Name'] val['Description'] = None val['Phone'] = None val['Phone2'] = None val['Fax'] = None val['Email'] = None val['Website'] = None val['CompanyType'] = None val['BatchID'] = c.BatchID val['ModifiedDate'] = str(dt.datetime.utcnow())[:-3] val['CreatedDate'] = str(dt.datetime.utcnow())[:-3] df = pd.DataFrame([val], columns=val.keys()) values = CM.df_list(df) db.bulk_insert(sql.sql_dim_company_insert.value, values) db.execute(sql.sql_annual_comapny_data_update.value.format(new_com_id, c.ID)) print('{} exists and {} doesn not exist'.format(i, j))
def update_cb_basic_company(self): df = db.pandas_read(sql.sql_cb_basic_company.value) for _, r in df.iterrows(): basic_name = CM.get_basic_name(r['name']) sql_update = sql.sql_cb_basic_company_update.value.format(basic_name, CM.sql_compliant(r['org_uuid'])) print(sql_update) db.execute(sql_update)
def update_tdw_basic_company(self): df = db.pandas_read(sql.sql_tdw_basic_company.value) for _, r in df.iterrows(): basic_name = CM.get_basic_name(r.legal_name) sql_update = sql.sql_tdw_basic_company_update.value.format(basic_name, CM.sql_compliant(r.legal_name)) print(sql_update) db.execute(sql_update)
def get_ventures(self): sql_venture = 'SELECT CompanyID, CompanyName FROM Reporting.DimCompany WHERE BasicName IS NULL AND CompanyName IS NOT NULL' #AND BatchID NOT IN (3496, 3497,3498, 3499)' data = self.db.pandas_read(sql_venture) sql_update = 'UPDATE Reporting.DimCompany SET BasicName = \'{}\' WHERE CompanyID = {}' for index, row in data.iterrows(): basic_name = common.get_basic_name(row[1]) # print(sql_update.format(basic_name, row[0])) self.db.execute(sql_update.format(basic_name, row[0]))
def update_basic_name(select, key, venture_name, update): data = DB.pandas_read(select) for _, r in data.iterrows(): basic_name = Common.get_basic_name(r['{}'.format(venture_name)]) ven_name = r['{}'.format(venture_name)] basic_name = basic_name.replace("'", "\''") sql_update = update.format( basic_name, Common.sql_compliant(r['{}'.format(key)])) DB.execute(sql_update) print('{}({})'.format(ven_name, basic_name))
def combine_missing_data(): quarterly_missing = BapQuarterly.file.combine_bap_missing_source_file( current_path=fp.path_missing_bap_etl.value) quarterly_missing = quarterly_missing.where(pd.notnull(quarterly_missing), None) quarterly_missing['BasicName'] = quarterly_missing.apply(lambda dfs: COM.get_basic_name(dfs.CompanyName), axis=1) df = quarterly_missing.where(pd.notnull(quarterly_missing), None) print(df.columns) dfs = df[['CompanyName', 'BasicName', 'Website', 'AnnualRevenue', 'NumberOfEmployees', 'FundingToDate', 'DataSource']] BapQuarterly.file.save_as_csv(dfs, '00 BAP Missing data Combined.xlsx', os.getcwd(), 'BAP Missing data') print(dfs.head())
def generate_basic_name(self, df): df['BasicName'] = df.apply(lambda dfs: CM.get_basic_name(dfs.Name), axis=1) return df