示例#1
0
	def move_annual_company_data(self):
		i, j = 0, 0
		dfac = db.pandas_read('SELECT ID, BatchID, CompanyID,[Company Name] FROM BAP.AnnualCompanyData')
		dfdc = db.pandas_read('SELECT CompanyID, CompanyName FROM Reporting.DimCompany')
		dfac['BasicName'] = dfac.apply(lambda dfs: CM.get_basic_name(dfs['Company Name']), axis=1)
		dfdc['BasicName'] = dfdc.apply(lambda dfs: CM.get_basic_name(dfs.CompanyName), axis=1)
		for i, c in dfac.iterrows():
			dfc = dfdc[dfdc['BasicName'] == c.BasicName]
			val = dict()
			if len(dfc) > 0:
				i+=1
				db.execute(sql.sql_annual_comapny_data_update.value.format(dfc.CompanyID.values[0], c.ID))
				print(sql.sql_annual_comapny_data_update.value.format(dfc.CompanyID.values[0], c.ID))
			else:
				j+=1
				print(sql.sql_dim_company_insert.value)
				new_com_id = self.batch.get_table_seed('MaRSDataCatalyst.Reporting.DimCompany', 'CompanyID') + 1
				val['CompanyID'] = new_com_id
				val['Company Name'] = c['Company Name']
				val['Description'] = None
				val['Phone'] = None
				val['Phone2'] = None
				val['Fax'] = None
				val['Email'] = None
				val['Website'] = None
				val['CompanyType'] = None
				val['BatchID'] = c.BatchID
				val['ModifiedDate'] = str(dt.datetime.utcnow())[:-3]
				val['CreatedDate'] = str(dt.datetime.utcnow())[:-3]
				df = pd.DataFrame([val], columns=val.keys())
				values = CM.df_list(df)
				db.bulk_insert(sql.sql_dim_company_insert.value, values)
				db.execute(sql.sql_annual_comapny_data_update.value.format(new_com_id, c.ID))
		print('{} exists and {} doesn not exist'.format(i, j))
示例#2
0
	def update_cb_basic_company(self):
		df = db.pandas_read(sql.sql_cb_basic_company.value)
		for _, r in df.iterrows():
			basic_name = CM.get_basic_name(r['name'])
			sql_update = sql.sql_cb_basic_company_update.value.format(basic_name, CM.sql_compliant(r['org_uuid']))
			print(sql_update)
			db.execute(sql_update)
示例#3
0
	def update_tdw_basic_company(self):
		df = db.pandas_read(sql.sql_tdw_basic_company.value)
		for _, r in df.iterrows():
			basic_name = CM.get_basic_name(r.legal_name)
			sql_update = sql.sql_tdw_basic_company_update.value.format(basic_name, CM.sql_compliant(r.legal_name))
			print(sql_update)
			db.execute(sql_update)
示例#4
0
 def get_ventures(self):
     sql_venture = 'SELECT CompanyID, CompanyName FROM Reporting.DimCompany WHERE BasicName IS NULL AND CompanyName IS NOT NULL'  #AND BatchID NOT IN (3496, 3497,3498, 3499)'
     data = self.db.pandas_read(sql_venture)
     sql_update = 'UPDATE Reporting.DimCompany SET BasicName = \'{}\' WHERE CompanyID = {}'
     for index, row in data.iterrows():
         basic_name = common.get_basic_name(row[1])
         # print(sql_update.format(basic_name, row[0]))
         self.db.execute(sql_update.format(basic_name, row[0]))
示例#5
0
 def update_basic_name(select, key, venture_name, update):
     data = DB.pandas_read(select)
     for _, r in data.iterrows():
         basic_name = Common.get_basic_name(r['{}'.format(venture_name)])
         ven_name = r['{}'.format(venture_name)]
         basic_name = basic_name.replace("'", "\''")
         sql_update = update.format(
             basic_name, Common.sql_compliant(r['{}'.format(key)]))
         DB.execute(sql_update)
         print('{}({})'.format(ven_name, basic_name))
示例#6
0
	def combine_missing_data():
		quarterly_missing = BapQuarterly.file.combine_bap_missing_source_file(
			current_path=fp.path_missing_bap_etl.value)
		quarterly_missing = quarterly_missing.where(pd.notnull(quarterly_missing), None)
		quarterly_missing['BasicName'] = quarterly_missing.apply(lambda dfs: COM.get_basic_name(dfs.CompanyName),
																 axis=1)
		df = quarterly_missing.where(pd.notnull(quarterly_missing), None)
		print(df.columns)
		dfs = df[['CompanyName', 'BasicName', 'Website', 'AnnualRevenue', 'NumberOfEmployees', 'FundingToDate',
				  'DataSource']]
		BapQuarterly.file.save_as_csv(dfs, '00 BAP Missing data Combined.xlsx', os.getcwd(), 'BAP Missing data')
		print(dfs.head())
示例#7
0
	def generate_basic_name(self, df):
		df['BasicName'] = df.apply(lambda dfs: CM.get_basic_name(dfs.Name), axis=1)
		return df