def update_tdw_basic_company(self): df = db.pandas_read(sql.sql_tdw_basic_company.value) for _, r in df.iterrows(): basic_name = CM.get_basic_name(r.legal_name) sql_update = sql.sql_tdw_basic_company_update.value.format(basic_name, CM.sql_compliant(r.legal_name)) print(sql_update) db.execute(sql_update)
def _main_(): # set pandas viewing options desired_width = 320 pd.set_option('display.width', desired_width) # open qs_metadata.xlsx and create df from first sheet path = os.getcwd() filename = '/qs_metadata.xlsx' meta_dfs = CM.xl_to_dfs(path, filename) sheetname = 'Sheet1' meta_df = meta_dfs[sheetname] # identify which columns have relevant metadata in them (might change this get all column names) meta_cols = list(meta_df) # build dict in format: 'column_name': [filter_values] q_meta_dict = {} for col in meta_cols: q_meta_dict[col] = CM.distinct_from_df(meta_df, col) # build dicts in format: 'filter_value': [question_ids]. Nest inside q_meta_dict q_meta_shelve = {} for key in q_meta_dict.keys(): if len(q_meta_dict[key]) > 0: col_dict = {} for filter_val in q_meta_dict[key]: id_list = ids_to_list(meta_df, 'id', str(key), filter_val) col_dict[filter_val] = id_list q_meta_shelve[key] = col_dict # create shelve dict q_meta_name = 'qs_metadata' create_write_shelve(q_meta_shelve, q_meta_name)
def get_proper_values(df): df['StageLevelID'] = df.apply(lambda dfs: COM.get_stage_level(dfs.Stage), axis=1) df['High Potential y/n'] = df.apply(lambda dfs: COM.get_yes_no(dfs['High Potential y/n']), axis=1) df['Social Enterprise y/n'] = df.apply(lambda dfs: COM.get_yes_no(dfs['Social Enterprise y/n']), axis=1) df['Youth'] = df.apply(lambda dfs: COM.get_yes_no(dfs['Youth']), axis=1) # df['Funding Raised to Date $CAN'] = df.apply(lambda dfs: BapQuarterly.split_funding_range(dfs['Funding Raised to Date $CAN']), axis=1) return df
def move_annual_company_data(self): i, j = 0, 0 dfac = db.pandas_read('SELECT ID, BatchID, CompanyID,[Company Name] FROM BAP.AnnualCompanyData') dfdc = db.pandas_read('SELECT CompanyID, CompanyName FROM Reporting.DimCompany') dfac['BasicName'] = dfac.apply(lambda dfs: CM.get_basic_name(dfs['Company Name']), axis=1) dfdc['BasicName'] = dfdc.apply(lambda dfs: CM.get_basic_name(dfs.CompanyName), axis=1) for i, c in dfac.iterrows(): dfc = dfdc[dfdc['BasicName'] == c.BasicName] val = dict() if len(dfc) > 0: i+=1 db.execute(sql.sql_annual_comapny_data_update.value.format(dfc.CompanyID.values[0], c.ID)) print(sql.sql_annual_comapny_data_update.value.format(dfc.CompanyID.values[0], c.ID)) else: j+=1 print(sql.sql_dim_company_insert.value) new_com_id = self.batch.get_table_seed('MaRSDataCatalyst.Reporting.DimCompany', 'CompanyID') + 1 val['CompanyID'] = new_com_id val['Company Name'] = c['Company Name'] val['Description'] = None val['Phone'] = None val['Phone2'] = None val['Fax'] = None val['Email'] = None val['Website'] = None val['CompanyType'] = None val['BatchID'] = c.BatchID val['ModifiedDate'] = str(dt.datetime.utcnow())[:-3] val['CreatedDate'] = str(dt.datetime.utcnow())[:-3] df = pd.DataFrame([val], columns=val.keys()) values = CM.df_list(df) db.bulk_insert(sql.sql_dim_company_insert.value, values) db.execute(sql.sql_annual_comapny_data_update.value.format(new_com_id, c.ID)) print('{} exists and {} doesn not exist'.format(i, j))
def update_cb_basic_company(self): df = db.pandas_read(sql.sql_cb_basic_company.value) for _, r in df.iterrows(): basic_name = CM.get_basic_name(r['name']) sql_update = sql.sql_cb_basic_company_update.value.format(basic_name, CM.sql_compliant(r['org_uuid'])) print(sql_update) db.execute(sql_update)
def save_data_chunk(df, sql_insert, chunk_size=1000, capture_fails=False, fail_path_key=''): i = 0 j = i + chunk_size total_size = len(df) + 1 while i < total_size: now = int(round(time.time() * 1000)) print('From {} to {}'.format(i, j)) df_insert = df.iloc[i:j] values = Common.df_list(df_insert) if capture_fails: msg = DB.bulk_insert(sql_insert, values, rtrn_msg=True) if msg == 'FAILURE': filename = '{}_fail_chunk_{}_to_{}.xlsx'.format(now, i, j) if fail_path_key != '': Common.save_as_excel(dfs=[df_insert], file_name=filename, path_key=fail_path_key) print("\tCHUNK FAILED. SAVED TO {}".format(filename)) else: DB.bulk_insert(sql_insert, values) print('-' * 150) i, j = i + chunk_size, j + chunk_size if j > total_size: j = total_size
def delete_old_ans(): # delete old ans using answer ids # store old ans in xl file old_ans_sql = CM.get_config('config.ini', 'secondary_etl', 'old_ans') old_ans_df = DB.pandas_read(old_ans_sql) DBInteractions.store_df(old_ans_df, '_OLD_PIPE_ANS') # run sql to delete old ans del_old_ans_sql = CM.get_config('config.ini', 'secondary_etl', 'del_old_ans') DB.execute(del_old_ans_sql)
def update_basic_name(select, key, venture_name, update): data = DB.pandas_read(select) for _, r in data.iterrows(): basic_name = Common.get_basic_name(r['{}'.format(venture_name)]) ven_name = r['{}'.format(venture_name)] basic_name = basic_name.replace("'", "\''") sql_update = update.format( basic_name, Common.sql_compliant(r['{}'.format(key)])) DB.execute(sql_update) print('{}({})'.format(ven_name, basic_name))
def push_bap_quarterly_to_database(): COM.change_working_directory(fp.path_bap_combined.value) bap = pd.read_excel('ETL_RICS_BAP_COMBINED_FY19Q1.xlsx', sheet_name=None) # BapQuarterly.transfer_csv_program(bap['csv_program16']) # BapQuarterly.transfer_csv_program_youth(bap['csv_program16_youth']) BapQuarterly.bulk_insert_quarterly_data(bap['Quarterly Company Data']) if BapQuarterly.quarter == 3: BapQuarterly.bulk_insert_annual_data(bap['Annual Company data'])
def __init__(self): self.MDCReport = common.df_list( db.pandas_read( 'SELECT RICCompanyDataID, CompanyID,DataSource,BatchID,DateID,AdvisoryServicesHours,' 'VolunteerMentorHours, AnnualRevenue, NumberEmployees,FundingToDate, FundingCurrentQuarter, ' 'HighPotential,SocialEnterprise ' 'FROM MDCReport.BAPQ.FactRICCompany')) self.MaRSDataCatalyst = common.df_list( db.pandas_read( 'SELECT RICCompanyDataID, CompanyID,DataSourceID,BatchID,DateID,AdvisoryServicesHours,' 'VolunteerMentorHours, AnnualRevenue, NumberEmployees,FundingToDate, FundingCurrentQuarter, ' 'HighPotential,SocialEnterprise FROM MaRSDataCatalyst.Reporting.FactRICCompanyData' )) self.records = []
def push_entity_to_db(self, json, org_uuid, sql_insert, uuid, i=0, fk_uuid='org_uuid', columns=[]): try: json_properties = None if CBDict.properties.value in json.keys(): json_properties = json[CBDict.properties.value] elif json[CBDict.cardinality.value] == 'OneToOne': json_properties = json[CBDict.item.value][CBDict.properties.value] elif json[CBDict.cardinality.value] == 'OneToMany': json_properties = json[CBDict.items.value][i][CBDict.properties.value] if 'uuid' not in json_properties.keys(): json_properties['uuid'] = uuid if fk_uuid not in json_properties.keys(): json_properties[fk_uuid] = org_uuid # print(list(json_properties.keys())) df_properties = pd.DataFrame([json_properties], columns=json_properties.keys()) if len(columns) > 0: df_properties = df_properties[columns] values = CM.df_list(df_properties) val = [] for l, j in enumerate(values[0]): if isinstance(values[0][l], list): val.append(' , '.join(str(x) for x in values[0][l])) elif isinstance(values[0][l], str): val.append(self.common.sql_compliant(values[0][l])) else: val.append(values[0][l]) db.bulk_insert(sql_insert, [val]) except Exception as ex: print(ex)
def save_organization_detail(self, uuid, json_properties): # print('{}. UUID: {}'.format(self.i, uuid)) df = self.db.pandas_read(self.enum.SQL.sql_org_detail_exists.value.format(uuid)) if len(df) == 0: json_properties['org_uuid'] = uuid json_properties['batch'] = 3862 json_properties['company_id'] = None json_properties['BasicName'] = None json_properties['fetched'] = 0 df_properties = pd.DataFrame([json_properties], columns=self.org_columns) values = CM.df_list(df_properties) val = [] tup = () for l, j in enumerate(values[0]): if isinstance(values[0][l], list): val.append(''.join(str(x) for x in values[0][l])) elif isinstance(values[0][l], str): val.append(self.common.sql_compliant(values[0][l]).replace('\r',' ').replace('\n',' ').replace('(',' - ').replace(')','')) elif values[0][l] is None: val.append(self.common.sql_compliant('')) else: val.append(values[0][l]) # print(val) tup = tuple(val) # print(tup) ival = [val] sql_insert = self.enum.SQL.sql_org_short_insert.value.format(tup) # print(sql_insert) sql_insert = sql_insert.replace('True', '1').replace('False','0').replace('"',"'") # print(sql_insert) self.db.execute(sql_insert) else: print('[{}] exists.'.format(json_properties['name']))
def get_all_rics_data(self): writer = pd.ExcelWriter('00 BAP FY18-1-2-3 Numbers.xlsx') for ric in self.rics: print(ric.upper()) self.Q1CompanyData_sheet = self.read_file_source(self.path_quarter_one, ric, WorkSheet.bap_company_old.value) self.Q2CompanyData_sheet = self.read_file_source(self.path_quarter_two, ric, WorkSheet.bap_company_old.value) self.Q3CompanyData_sheet = self.read_file_source(self.path_quarter_three, ric, WorkSheet.bap_company.value) data_source = Common.set_datasource(ric) batch1 = db.pandas_read(self.batch.format(self.year, self.Q1, data_source, SourceSystemType.RICCD_bap.value))['BatchID'] batch2 = db.pandas_read(self.batch.format(self.year, self.Q2, data_source, SourceSystemType.RICCD_bap.value))['BatchID'] batch3 = db.pandas_read(self.batch.format(self.year, self.Q3, data_source, SourceSystemType.RICCD_bap.value))['BatchID'] self.Q1CompanyData = db.pandas_read(self.selectQ1.format(str(batch1[0]) + ' ORDER BY CompanyName')) self.Q2CompanyData = db.pandas_read(self.select.format('Config.CompanyDataRaw', str(batch2[0]) + ' ORDER BY CompanyName')) self.Q3CompanyData = db.pandas_read(self.select.format('BAP.QuarterlyCompanyData', str(batch3[0]) + ' ORDER BY [Company Name]')) self.Q1CompanyData_fact_ric = db.pandas_read(self.select.format('Reporting.FactRICCompanyData', batch1[0])) self.Q2CompanyData_fact_ric = db.pandas_read(self.select.format('Reporting.FactRICCompanyData', batch2[0])) self.Q3CompanyData_fact_ric = db.pandas_read(self.select.format('Reporting.FactRICCompanyData', batch3[0])) self.Q1CompanyData_rollup = db.pandas_read(SQL.sql_rollup_select.value.format(self.year, 1, data_source)) self.Q2CompanyData_rollup = db.pandas_read(SQL.sql_rollup_select.value.format(self.year, 2, data_source)) self.Q3CompanyData_rollup = db.pandas_read(SQL.sql_rollup_select.value.format(self.year, 3, data_source)) df_ric = self.bap_summary() if df_ric is not None: df_ric.to_excel(writer, ric.upper(), index=False) df_ric = None writer.save()
def __init__(self, path): self.path = os.path.join(os.path.expanduser("~"), path) self.source_file = None self._set_folder() self.data_list = [] self.year, self.quarter = COM.fiscal_year_quarter( datetime.datetime.utcnow())
def __init__(self): box_path = Common.change_location(p.DATA) fl = FileService(box_path) self.ric_files = fl.get_source_file() self.okay = PatternFill(fgColor='E1F7DC', bgColor='C00000', fill_type='solid') self.amber = PatternFill(fgColor='F4B042', bgColor='C00000', fill_type='solid') self.header = PatternFill(fgColor='218c04', bgColor='C00000', fill_type='solid') self.empty = PatternFill(fgColor='f9462a', bgColor='C00000', fill_type='solid') self.red = PatternFill(fgColor='f72f11', bgColor='C00000', fill_type='solid') warnings.filterwarnings("ignore") self.quarter = 'Q1' self.year = '2019' self.youth = 'Youth' self.all_youth = 'ALL incl. youth' self.month_names = [ 'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'october', 'november', 'december' ] self.no_value = ['na', 'n/a', '', '0000-00-00', '*****']
def _main_(): print("Getting SQL query") sql = CM.get_config("config_sql.ini", "ann_survey_18", "caprevjob_by_ric") print("SQL: {}".format(sql)) print("Executing SQL to get dataframe of results") all_results = DB.pandas_read(sql) print("Creating column names") all_results['ConcatQ'] = all_results[['Cap/Rev/Emp', 'Question']].apply(lambda x: ' - '.join(x), axis=1) print("Splitting dataframe into one per RIC") split_frames = partition_by(all_results, "RIC_Program") print("Getting write path") user_path = os.path.expanduser("~") path = user_path + "/Box Sync/Workbench/BAP/Annual Survey FY2018/Results by RIC/" print("Path: {}".format(path)) print("Writing files to disc:") for ric in split_frames.keys(): x = split_frames[ric] x['rid_cid'] = x['resp_id'].astype(str) + '_' + x['Company_ID'].astype(str) x = spread(x, 'rid_cid', 'ConcatQ', 'Answer') x['rid_cid'] = x.index x['_resp_id'], x['_Company_ID'] = x['rid_cid'].str.split('_', 1).str x = x.apply(pd.to_numeric, errors='ignore') cols = x.columns.tolist() cols = cols[-2:] + cols[:-2] x = x[cols] for i in range(len(cols)): if str(cols[i])[0] == '_': cols[i] = cols[i][1:] x.columns = cols x = x.drop('rid_cid', axis=1) filename = "{} Survey Results".format(ric) write_to_xl(x, filename, path, 'Results') print("Wrote {} to path: {}".format(filename, path))
def combine_bap_missing_source_file(self, current_path=''): clms = [ 'CompanyName', 'Website', 'AnnualRevenue', 'NumberOfEmployees', 'FundingToDate', 'DataSource', 'Fiscal_Quarter', 'FiscalYear' ] if current_path != '': current_path = os.path.join(os.path.expanduser("~"), current_path) os.chdir(current_path) self.source_file = os.listdir(current_path) file_list = self.get_source_file() q_company = [] i = 0 for fl in file_list: try: i += 1 ds = COM.set_datasource(str(fl)) if ds is not None: com = pd.read_excel(fl, WS.bap_company.value) print('{}.[{}] {} -->{}'.format(i, ds, fl, len(com.columns))) # print(com.head()) com.insert(5, 'DataSource', ds) com.columns = clms q_company.append(com) else: print('\tMissing - {}'.format(fl)) except Exception as ex: print(ex) print('\n\n') bap_company = pd.concat(q_company) return bap_company
def sg_campaigns_json(self, surveyID, api_token, attempts=10, wait_sec=3): '''Takes Sgizmo surveyID, api token and returns campaigns as dataframe. int, str, -> dict ''' attempt_count = 0 URL = "https://restapica.surveygizmo.com/v5/survey/" + str( surveyID) + "/surveycampaign/?resultsperpage=500&" + api_token print(URL) for i in range(0, attempts): try: attempt_count += 1 output = requests.get(URL, verify=common.get_cert_path()) if output.ok: output = output.json() print("Success. Stored API output in json dict.") return output except KeyboardInterrupt: pass except: if attempt_count >= attempts: print("All attempts failed") return print("Likely SSLError. Trying again in", wait_sec, "second(s)...") sleep(wait_sec)
def get_list_json(self, api_token, attempts=6, wait_sec=3): """ Takes str api token and returns all surveys associated with account (in json-like dict). str -> dict """ attempt_count = 0 URL = "https://restapica.surveygizmo.com/v5/survey/?resultsperpage=500&" + str( api_token) for i in range(0, attempts): try: if attempt_count == 0: print(URL) attempt_count += 1 output = requests.get(URL, verify=common.get_cert_path()) if output.ok: output = output.json() print("Success. Stored API output in json dict.") return output except KeyboardInterrupt: pass except Exception as ex: if attempt_count >= attempts: print("All attempts failed") return print("Likely SSLError. Trying again in", wait_sec, "second(s)...", ex) sleep(wait_sec)
def sg_get_api_output(self, URL, attempts=10, wait_sec=3): '''Takes (in future) tokens, preferences, returns JSON file if successful after specified # of attempts. ints -> dict ''' from time import sleep attempt_count = 0 for i in range(0, attempts): try: attempt_count += 1 output = requests.get(URL, verify=common.get_cert_path()) if output.ok: output = output.json() print("Success. Stored API output in json dict.") return output except KeyboardInterrupt: pass except: if attempt_count >= attempts: print("All attempts failed") return print("Likely SSLError. Trying again in", wait_sec, "second(s)...") sleep(wait_sec)
def sg_emails_json(self, surveyID, campaign_id, api_token, attempts=10, wait_sec=3): '''Takes campaign id and api tokens and returns json-formatted dict with email messages. int, str, -> dict ''' from time import sleep attempt_count = 0 URL = "https://restapica.surveygizmo.com/v5/survey/" + str( surveyID) + "/surveycampaign/" + str( campaign_id) + "/emailmessage/" + "?" + api_token print(URL) for i in range(0, attempts): try: attempt_count += 1 output = requests.get(URL, verify=common.get_cert_path()) if output.ok: output = output.json() print("Success. Stored API output in json dict.") return output except KeyboardInterrupt: pass except: if attempt_count >= attempts: print("All attempts failed") return print("Likely SSLError. Trying again in", wait_sec, "second(s)...") sleep(wait_sec)
def split_venture_former_name(self): df = db.pandas_read('SELECT ID, CompanyName, [Former / Alternate Names] FROM MDCRaw.BAP.VentureQuarterlyData WHERE CompanyName LIKE \'%(%\' AND FiscalYear = 2019') for _, row in df.iterrows(): split = CM.venture_name_with_bracket_split(row['CompanyName']) # print('Current: {}\nName: {}\nAlternate: {}'.format(row['CompanyName'],split[0], split[1].replace('(','').replace(')',''))) # print('-' * 100) update = '''UPDATE MDCRaw.BAP.VentureQuarterlyData SET CompanyName = \'{}\' , [Former / Alternate Names] = \'{}\' WHERE ID = {} -- {}''' print(update.format(split[0], split[1].replace('(','').replace(')','').replace('formerly',''),row['ID'],row['CompanyName']))
def get_ventures(self): sql_venture = 'SELECT CompanyID, CompanyName FROM Reporting.DimCompany WHERE BasicName IS NULL AND CompanyName IS NOT NULL' #AND BatchID NOT IN (3496, 3497,3498, 3499)' data = self.db.pandas_read(sql_venture) sql_update = 'UPDATE Reporting.DimCompany SET BasicName = \'{}\' WHERE CompanyID = {}' for index, row in data.iterrows(): basic_name = common.get_basic_name(row[1]) # print(sql_update.format(basic_name, row[0])) self.db.execute(sql_update.format(basic_name, row[0]))
def transfer_fact_ric_aggregation(): date_id = COM.get_dateid(datevalue=None) metric_prg = [130, 132, 133, 129, 134, 63, 77, 60, 68, 67, 135, 136, 137] metric_prg_youth = [134, 138] df_program = db.pandas_read(sql.sql_company_aggregate_program.value.format(2018, 4))#(BapQuarterly.year, BapQuarterly.quarter)) df_program_youth = db.pandas_read(sql.sql_company_aggregate_program_youth.value.format(2018, 4))#(BapQuarterly.year, BapQuarterly.quarter)) values = [] for _, row in df_program.iterrows(): i = 7 while i < 20: m = i - 7 val = [] val.append(int(row['DataSource'])) # DataSource val.append(int(date_id)) # RICDateID val.append(int(metric_prg[m])) # MetricID val.append(int(row['BatchID'])) # BatchID if str(row[i]) in ['no data', 'n\\a', '-', 'n/a', 'nan']: val.append(-1.0) print(row[i]) else: val.append(round(float(row[i]), 2)) # AggregateNumber val.append(str(datetime.datetime.today())[:23]) # ModifiedDate val.append(str(datetime.datetime.today())[:23]) # CreatedDate val.append(row['Youth']) # Youth values.append(val) i = i + 1 # db.execute(sql.sql_bap_fra_insert.value.format(tuple(val))) for _, row in df_program_youth.iterrows(): j = 7 while j < 9: m = j - 7 val = [] val.append(int(row['DataSource'])) # DataSource val.append(int(date_id)) # RICDateID val.append(int(metric_prg_youth[m])) # MetricID val.append(int(row['BatchID'])) # BatchID if str(row[j]) in ['no data', 'n\\a', '-', 'n/a', 'nan']: val.append(-1.0) print(row[j]) else: val.append(round(float(row[j]), 2)) # AggregateNumber val.append(str(datetime.datetime.today())[:23]) # ModifiedDate val.append(str(datetime.datetime.today())[:23]) # CreatedDate val.append(row['Youth']) # Youth values.append(val) j = j + 1 # db.execute(sql.sql_bap_fra_insert.value.format(tuple(val))) for val in range(len(values)): print('{}. {}'.format(val,values[val])) # print('{}. {}'.format(val,values[val][1])) db.bulk_insert(sql.sql_bap_fact_ric_aggregation_insert.value, values)
class Json: keep_qids = CM.get_config('config.ini', 'secondary_etl', 'sg_del_qids') def __init__(self, json, surveyid): self.json = json self.surveyid = surveyid def filter_out(self): keeps = self.get_full_keys('question') filtered_dicts = [] for dic in self.json: filtered_dic = {} for key in keeps: if dic[key] != '': filtered_dic[key] = dic[key] filtered_dicts.append(filtered_dic) return filtered_dicts @staticmethod def extract_id(string): x = string.find("(") + 1 y = string.find(")") return string[x:y] def get_full_keys(self, key_str): d = self.json[0] keeps = self.keep_qids.split(',') full_keys = [] keys = list(d.keys()) full_keys.extend(keys[:11]) for key in keys[12:]: small_key = key[:18] if Json.extract_id(small_key) in keeps and key_str in small_key: full_keys.append(key) return full_keys def to_df(self): data = self.filter_out() all_ans = [] for resp in data: srid = resp['id'] for key in list(resp.keys())[11:]: qid = Json.extract_id(key[:18]) page_pipe = Json.extract_id(key[15:]) answer_str = str(resp[key]) ans = Answer(qid=qid, srid=srid, answer=answer_str, surveyid=self.surveyid, page_pipe=page_pipe) answer = ans.record() all_ans.append(answer) all_ans = pd.DataFrame(all_ans, columns=Answer.cols()) return all_ans
def check_columns_completeness(self): dfps = pd.DataFrame() dfpys = pd.DataFrame() dfqc = pd.DataFrame() dfac = pd.DataFrame() clm_lst = [] for fl in self.ric_files: Common.change_location(p.DATA) wb = openpyxl.load_workbook(fl, data_only=True) ric_file_name = fl[:-5] print('-' * 250) program_sheet = wb.get_sheet_by_name(WS.bap_program.value) df_ps = self.sheet_columns(program_sheet, ric_file_name, WS.bap_program.value) program_youth_sheet = wb.get_sheet_by_name( WS.bap_program_youth.value) df_pys = self.sheet_columns(program_youth_sheet, ric_file_name, WS.bap_program_youth.value) quarterly_company_sheet = wb.get_sheet_by_name( WS.bap_company.value) df_qc = self.sheet_columns(quarterly_company_sheet, ric_file_name, WS.bap_company.value) annual_company_sheet = wb.get_sheet_by_name( WS.bap_company_annual.value) df_ac = self.sheet_columns(annual_company_sheet, ric_file_name, WS.bap_company_annual.value) dfps = pd.concat([dfps, df_ps]) dfpys = pd.concat([dfpys, df_pys]) dfqc = pd.concat([dfqc, df_qc]) dfac = pd.concat([dfac, df_ac]) writer = pd.ExcelWriter('00 ALL_RIC_BAP_COLUMNS_FY19_Q1.xlsx') dfps.to_excel(writer, 'Program', index=False) dfpys.to_excel(writer, 'Program Youth', index=False) dfqc.to_excel(writer, 'Quarterly Company', index=False) dfac.to_excel(writer, 'Annual Company', index=False) Common.change_location(p.QA) print(os.getcwd()) writer.save()
def push_bap_missing_data_to_temp_table(): current_path = os.path.join(os.path.expanduser("~"), '/Users/mnadew/Box Sync/Workbench/BAP/BAP_FY18/FY18_Q3/for ETL/Missing data Reports') os.chdir(current_path) df = pd.read_excel('00 BAP Missing data Combined.xlsx', 'BAP Missing data') df['CompanyID'] = 0 new_col = ['CompanyID','CompanyName','BasicName','Website','AnnualRevenue','NumberOfEmployees','FundingToDate','DataSource'] dfs = df[new_col] sql = 'INSERT INTO BAP.BAP_FY18Q3_Missing_Data VALUES (?, ?, ?, ?, ?, ?, ?, ?)' values = COM.df_list(dfs) db.bulk_insert(sql, values)
def connect(dev=False): conn = 'conn' if dev: conn = 'devconn' try: con_str = Common.get_config('config.ini', 'db_connect', conn) conn = pyodbc.connect(con_str) return conn except Exception as ex: print('DB Server Connection Exception: {}'.format(ex)) return None
def check_qs_exist(self, survey_id): sql = CM.get_config("config.ini", "sql_queries", "check_questions_exist") sql = sql.replace("WHAT_SURVEY_ID", str(survey_id)) check = DB.pandas_read(sql) if check.iloc[0][0]: return True else: return False
def combine_missing_data(): quarterly_missing = BapQuarterly.file.combine_bap_missing_source_file( current_path=fp.path_missing_bap_etl.value) quarterly_missing = quarterly_missing.where(pd.notnull(quarterly_missing), None) quarterly_missing['BasicName'] = quarterly_missing.apply(lambda dfs: COM.get_basic_name(dfs.CompanyName), axis=1) df = quarterly_missing.where(pd.notnull(quarterly_missing), None) print(df.columns) dfs = df[['CompanyName', 'BasicName', 'Website', 'AnnualRevenue', 'NumberOfEmployees', 'FundingToDate', 'DataSource']] BapQuarterly.file.save_as_csv(dfs, '00 BAP Missing data Combined.xlsx', os.getcwd(), 'BAP Missing data') print(dfs.head())