示例#1
0
 def __init__(self):
     self.MDCReport = common.df_list(
         db.pandas_read(
             'SELECT RICCompanyDataID, CompanyID,DataSource,BatchID,DateID,AdvisoryServicesHours,'
             'VolunteerMentorHours, AnnualRevenue, NumberEmployees,FundingToDate, FundingCurrentQuarter, '
             'HighPotential,SocialEnterprise '
             'FROM MDCReport.BAPQ.FactRICCompany'))
     self.MaRSDataCatalyst = common.df_list(
         db.pandas_read(
             'SELECT RICCompanyDataID, CompanyID,DataSourceID,BatchID,DateID,AdvisoryServicesHours,'
             'VolunteerMentorHours, AnnualRevenue, NumberEmployees,FundingToDate, FundingCurrentQuarter, '
             'HighPotential,SocialEnterprise FROM MaRSDataCatalyst.Reporting.FactRICCompanyData'
         ))
     self.records = []
示例#2
0
	def save_organization_detail(self, uuid, json_properties):

		# print('{}. UUID: {}'.format(self.i, uuid))
		df = self.db.pandas_read(self.enum.SQL.sql_org_detail_exists.value.format(uuid))
		if len(df) == 0:
			json_properties['org_uuid'] = uuid
			json_properties['batch'] = 3862
			json_properties['company_id'] = None
			json_properties['BasicName'] = None
			json_properties['fetched'] = 0
			df_properties = pd.DataFrame([json_properties], columns=self.org_columns)
			values = CM.df_list(df_properties)
			val = []
			tup = ()
			for l, j in enumerate(values[0]):
				if isinstance(values[0][l], list):
					val.append(''.join(str(x) for x in values[0][l]))
				elif isinstance(values[0][l], str):
					val.append(self.common.sql_compliant(values[0][l]).replace('\r',' ').replace('\n',' ').replace('(',' - ').replace(')',''))
				elif values[0][l] is None:
					val.append(self.common.sql_compliant(''))
				else:
					val.append(values[0][l])
			# print(val)
			tup = tuple(val)
			# print(tup)
			ival = [val]
			sql_insert = self.enum.SQL.sql_org_short_insert.value.format(tup)
			# print(sql_insert)
			sql_insert = sql_insert.replace('True', '1').replace('False','0').replace('"',"'")
			# print(sql_insert)
			self.db.execute(sql_insert)
		else:
			print('[{}] exists.'.format(json_properties['name']))
示例#3
0
	def move_annual_company_data(self):
		i, j = 0, 0
		dfac = db.pandas_read('SELECT ID, BatchID, CompanyID,[Company Name] FROM BAP.AnnualCompanyData')
		dfdc = db.pandas_read('SELECT CompanyID, CompanyName FROM Reporting.DimCompany')
		dfac['BasicName'] = dfac.apply(lambda dfs: CM.get_basic_name(dfs['Company Name']), axis=1)
		dfdc['BasicName'] = dfdc.apply(lambda dfs: CM.get_basic_name(dfs.CompanyName), axis=1)
		for i, c in dfac.iterrows():
			dfc = dfdc[dfdc['BasicName'] == c.BasicName]
			val = dict()
			if len(dfc) > 0:
				i+=1
				db.execute(sql.sql_annual_comapny_data_update.value.format(dfc.CompanyID.values[0], c.ID))
				print(sql.sql_annual_comapny_data_update.value.format(dfc.CompanyID.values[0], c.ID))
			else:
				j+=1
				print(sql.sql_dim_company_insert.value)
				new_com_id = self.batch.get_table_seed('MaRSDataCatalyst.Reporting.DimCompany', 'CompanyID') + 1
				val['CompanyID'] = new_com_id
				val['Company Name'] = c['Company Name']
				val['Description'] = None
				val['Phone'] = None
				val['Phone2'] = None
				val['Fax'] = None
				val['Email'] = None
				val['Website'] = None
				val['CompanyType'] = None
				val['BatchID'] = c.BatchID
				val['ModifiedDate'] = str(dt.datetime.utcnow())[:-3]
				val['CreatedDate'] = str(dt.datetime.utcnow())[:-3]
				df = pd.DataFrame([val], columns=val.keys())
				values = CM.df_list(df)
				db.bulk_insert(sql.sql_dim_company_insert.value, values)
				db.execute(sql.sql_annual_comapny_data_update.value.format(new_com_id, c.ID))
		print('{} exists and {} doesn not exist'.format(i, j))
示例#4
0
	def push_entity_to_db(self, json, org_uuid, sql_insert, uuid, i=0, fk_uuid='org_uuid', columns=[]):
		try:
			json_properties = None
			if CBDict.properties.value in json.keys():
				json_properties = json[CBDict.properties.value]
			elif json[CBDict.cardinality.value] == 'OneToOne':
				json_properties = json[CBDict.item.value][CBDict.properties.value]
			elif json[CBDict.cardinality.value] == 'OneToMany':
				json_properties = json[CBDict.items.value][i][CBDict.properties.value]
			if 'uuid' not in json_properties.keys():
				json_properties['uuid'] = uuid
			if fk_uuid not in json_properties.keys():
				json_properties[fk_uuid] = org_uuid
			# print(list(json_properties.keys()))
			df_properties = pd.DataFrame([json_properties], columns=json_properties.keys())
			if len(columns) > 0:
				df_properties = df_properties[columns]
			values = CM.df_list(df_properties)
			val = []
			for l, j in enumerate(values[0]):
				if isinstance(values[0][l], list):
					val.append(' , '.join(str(x) for x in values[0][l]))
				elif isinstance(values[0][l], str):
					val.append(self.common.sql_compliant(values[0][l]))
				else:
					val.append(values[0][l])
			db.bulk_insert(sql_insert, [val])
		except Exception as ex:
			print(ex)
示例#5
0
 def save_data_chunk(df,
                     sql_insert,
                     chunk_size=1000,
                     capture_fails=False,
                     fail_path_key=''):
     i = 0
     j = i + chunk_size
     total_size = len(df) + 1
     while i < total_size:
         now = int(round(time.time() * 1000))
         print('From {} to {}'.format(i, j))
         df_insert = df.iloc[i:j]
         values = Common.df_list(df_insert)
         if capture_fails:
             msg = DB.bulk_insert(sql_insert, values, rtrn_msg=True)
             if msg == 'FAILURE':
                 filename = '{}_fail_chunk_{}_to_{}.xlsx'.format(now, i, j)
                 if fail_path_key != '':
                     Common.save_as_excel(dfs=[df_insert],
                                          file_name=filename,
                                          path_key=fail_path_key)
                     print("\tCHUNK FAILED. SAVED TO {}".format(filename))
         else:
             DB.bulk_insert(sql_insert, values)
         print('-' * 150)
         i, j = i + chunk_size, j + chunk_size
         if j > total_size:
             j = total_size
示例#6
0
	def push_bap_missing_data_to_temp_table():
		 current_path = os.path.join(os.path.expanduser("~"), '/Users/mnadew/Box Sync/Workbench/BAP/BAP_FY18/FY18_Q3/for ETL/Missing data Reports')
		 os.chdir(current_path)
		 df = pd.read_excel('00 BAP Missing data Combined.xlsx', 'BAP Missing data')
		 df['CompanyID'] = 0
		 new_col = ['CompanyID','CompanyName','BasicName','Website','AnnualRevenue','NumberOfEmployees','FundingToDate','DataSource']
		 dfs = df[new_col]
		 sql = 'INSERT INTO BAP.BAP_FY18Q3_Missing_Data VALUES (?, ?, ?, ?, ?, ?, ?, ?)'
		 values = COM.df_list(dfs)
		 db.bulk_insert(sql, values)
示例#7
0
	def transfer_fact_ric_company_data():
		df = db.pandas_read(sql.sql_bap_fact_ric_data_fyq4.value)
		df_frc = BapQuarterly.get_proper_values(df)
		# BapQuarterly.update_month_year(df_frc)
		# df_frc['IntakeDate'] = pd.to_datetime(df_frc['IntakeDate'])
		df_frc['Age'] = None
		# df_frc['Date of Incorporation'] = pd.to_datetime(df_frc['Date of Incorporation'])
		# df_ric = df_frc.drop(columns=['ID', 'Incorporate year (YYYY)', 'Incorporation month (MM)'])
		# BapQuarterly.file.save_as_csv(df_frc, '00 FactRICCompany.xlsx', os.getcwd(), 'FactRICCompany')
		values_list = COM.df_list(df_frc)

		db.bulk_insert(sql.sql_bap_fact_ric_company_insert.value, values_list)
示例#8
0
 def update(self, table_name, source_id_col, company_id_col):
     etl = common.df_list(
         db.pandas_read('SELECT ' + source_id_col + ',' + company_id_col +
                        ' FROM ' + table_name))
     for index1, val1 in enumerate(self.source_table):
         for index2, val2 in enumerate(etl):
             if val1[0] == str(val2[0]):
                 db.execute('UPDATE ' + table_name + ' SET ' +
                            company_id_col + ' = ' + str(val1[1]) +
                            ' WHERE ' + source_id_col + ' = ' +
                            str(val2[0]))
                 break
示例#9
0
    def nomatch_create_new(self):
        """Add non-duplicate ventures that are new companies (-ve ID) as new ventures to the venture table """
        new_ventures = common.df_list(
            db.pandas_read(
                "SELECT * FROM MDC_DEV.dbo.ProcessedVenture AS a WHERE a.ID NOT IN "
                "(SELECT ID FROM MDC_DEV.dbo.EntityMap) AND a.ID < 0 "))
        sql = 'INSERT INTO MDC_DEV.dbo.Venture VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)'
        db.bulk_insert(sql, new_ventures)

        # Update ID to match Venture Table in the given source table
        if self.source_table is not None:
            sql = 'UPDATE ' + self.source_table + ' SET ID = b.ID FROM ' + self.source_table + ' AS a INNER JOIN MDC_DEV.dbo.Venture AS b ON a.Name = b.Name'
            db.execute(sql)
示例#10
0
	def insert_dim_company_source(self, new_company):
		try:
			date_time = str(dt.datetime.utcnow())[:-3]
			self.dim_company_source_id = self.get_table_seed('Reporting.DimCompanySource', 'SourceCompanyID') + 1
			dc = dict()
			dc['aSourceID'] = self.dim_company_source_id
			dc['bCompanyID'] = self.dim_company_id
			dc['cName'] = new_company['Name']
			dc['dSCC'] = None
			dc['eDataSource'] = new_company['DataSource']
			dc['eBatchID'] = new_company['BatchID']
			dc['fCT'] = None
			dc['gModified'] = date_time
			dc['hCreated'] = date_time
			df = pd.DataFrame.from_dict([dc], orient='columns')
			values = CM.df_list(df)
			db.bulk_insert(sql.sql_dim_company_source_insert.value, values)
		except Exception as ex:
			print(ex)
示例#11
0
	def insert_dim_company(self, new_company):
		try:
			self.dim_company_id = self.get_table_seed('Reporting.DimCompany', 'CompanyID') + 1
			date_time = str(dt.datetime.utcnow())[:-3]
			dc = dict()
			dc['aCompanyID'] = self.dim_company_id
			dc['bName'] = new_company['Name']
			dc['cDescription'] = None
			dc['dPhone'] = None
			dc['ePhone2'] = None
			dc['fFax'] = None
			dc['gEmail'] = None
			dc['hWebsite'] = new_company['Website']
			dc['iCompanyType'] = None
			dc['jBatchID'] = new_company['BatchID']
			dc['kModifiedDate'] = date_time
			dc['lCreatedDate'] = date_time
			df = pd.DataFrame.from_dict([dc], orient='columns')
			values = CM.df_list(df)
			db.bulk_insert(sql.sql_dim_company_insert.value, values)
		except Exception as es:
			print(es)
示例#12
0
    def fp_create_new(self):

        new_ventures = common.df_list(
            db.pandas_read(
                "SELECT * FROM MDC_DEV.dbo.ProcessedVenture AS a WHERE a.ID IN "
                "(SELECT ID FROM MDC_DEV.dbo.MatchingFalsePositives) AND a.ID < 0"
            ))
        sql = 'INSERT INTO MDC_DEV.dbo.Venture VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)'
        db.bulk_insert(sql, new_ventures)

        # Update MFP with the new ventures new ID
        db.execute(
            "UPDATE MDC_DEV.dbo.MatchingFalsePositives SET ID = a.ID "
            "FROM MDC_DEV.dbo.MatchingFalsePositives AS m INNER JOIN MDC_DEV.dbo.Venture AS a ON m.Name = a.Name"
        )
        db.execute(
            "UPDATE MDC_DEV.dbo.MatchingFalsePositives SET FalseID = a.ID "
            "FROM MDC_DEV.dbo.MatchingFalsePositives AS m INNER JOIN MDC_DEV.dbo.Venture AS a ON m.FalseName = a.Name"
        )

        # Update sourcetable with new ID
        if self.source_table is not None:
            sql = 'UPDATE ' + self.source_table + ' SET ID = b.ID FROM ' + self.source_table + 'as a INNER JOIN MDC_DEV.dbo.Venture AS b ON a.Name = b.Name'
            db.execute(sql)
示例#13
0
	def insert_new_venture(self):
		data = db.pandas_read(sql.sql_bap_new_company.value)
		values = CM.df_list(data)
		db.bulk_insert(sql.sql_venture_insert.value,values)
示例#14
0
 def __init__(self):
     self.source_table = common.df_list(
         db.pandas_read(
             'SELECT SourceID, ID, Name FROM MDC_DEV.dbo.SourceTable'))
示例#15
0
	def bulk_insert_annual_data(dataframe):
		val = COM.df_list(dataframe)
		db.bulk_insert(sql.sql_bap_ric_venture_annual_insert.value, val)
示例#16
0
	def bulk_insert_quarterly_data(dataframe):
		val = COM.df_list(dataframe)
		db.bulk_insert(sql.sql_bap_ric_venture_quarterly_insert.value, val)
示例#17
0
	def transfer_csv_program_youth(dataframe):
		val = COM.df_list(dataframe)
		db.bulk_insert(sql.sql_bap_ric_program_youth_insert.value, val)
示例#18
0
	def bap_insert(df):
		values_list = COM.df_list(df)
		db.bulk_insert(sql.sql_postal_code_insert.value, values_list)
示例#19
0
# # EXACT MATCHING
db.execute("DELETE FROM MDC_DEV.dbo.ProcessedVenture")
db.execute("INSERT INTO MDC_DEV.dbo.ProcessedVenture SELECT * FROM MDC_DEV.dbo.Venture")

# # Insert ACTia target list xlsx into ProcessedVenture to match with database
# df = common.xl_to_dfs('/Users/ssimmons/Documents/',input)
# df = df['ACTia_targetlist_2018']
# vals_to_insert = common.df_list(df)
db.execute("DELETE FROM MDC_DEV.dbo.SourceTable")
# sql = 'INSERT INTO MDC_DEV.dbo.SourceTable (ID,Name,Email,Phone) VALUES (?,?,?,?)' ## Edit based on dataset
# db.bulk_insert(sql, vals_to_insert)
db.execute('INSERT INTO MDC_DEV.dbo.SourceTable (SourceID, Name, BasicName, Website, Address, BatchID) '
           'SELECT ID,CompanyName, BasicName, Website, City, BatchID FROM MDCRaw.BAP.QuarterlyCompanyData')


source = common.df_list(db.pandas_read("SELECT SourceID FROM MDC_DEV.dbo.SourceTable"))


vals = []
k = -1
for i,v in enumerate(source):
    vals.append([k,v[0]])
    k -= 1

sql = 'UPDATE MDC_DEV.dbo.SourceTable SET ID = ? WHERE SourceID = ?'
db.bulk_insert(sql, vals)

print('Starting exact matching')
e1 = exact()
stime = time.time()
e1.match()