Python Common示例，Shared.common.Common Python示例

示例#1

0

显示文件

	def update_tdw_basic_company(self):
		df = db.pandas_read(sql.sql_tdw_basic_company.value)
		for _, r in df.iterrows():
			basic_name = CM.get_basic_name(r.legal_name)
			sql_update = sql.sql_tdw_basic_company_update.value.format(basic_name, CM.sql_compliant(r.legal_name))
			print(sql_update)
			db.execute(sql_update)

示例#2

0

显示文件

文件： q_metadata.py 项目： soldiers1989/MDCetl

def _main_():

    # set pandas viewing options
    desired_width = 320
    pd.set_option('display.width', desired_width)

    # open qs_metadata.xlsx and create df from first sheet
    path = os.getcwd()
    filename = '/qs_metadata.xlsx'
    meta_dfs = CM.xl_to_dfs(path, filename)
    sheetname = 'Sheet1'
    meta_df = meta_dfs[sheetname]

    # identify which columns have relevant metadata in them (might change this get all column names)
    meta_cols = list(meta_df)

    # build dict in format: 'column_name': [filter_values]
    q_meta_dict = {}
    for col in meta_cols:
        q_meta_dict[col] = CM.distinct_from_df(meta_df, col)

    # build dicts in format: 'filter_value': [question_ids]. Nest inside q_meta_dict
    q_meta_shelve = {}
    for key in q_meta_dict.keys():
        if len(q_meta_dict[key]) > 0:
            col_dict = {}
            for filter_val in q_meta_dict[key]:
                id_list = ids_to_list(meta_df, 'id', str(key), filter_val)
                col_dict[filter_val] = id_list
                q_meta_shelve[key] = col_dict

    # create shelve dict
    q_meta_name = 'qs_metadata'
    create_write_shelve(q_meta_shelve, q_meta_name)

示例#3

0

显示文件

文件： bap.py 项目： soldiers1989/MDCetl

	def get_proper_values(df):
		df['StageLevelID'] = df.apply(lambda dfs: COM.get_stage_level(dfs.Stage), axis=1)
		df['High Potential y/n'] = df.apply(lambda dfs: COM.get_yes_no(dfs['High Potential y/n']), axis=1)
		df['Social Enterprise y/n'] = df.apply(lambda dfs: COM.get_yes_no(dfs['Social Enterprise y/n']), axis=1)
		df['Youth'] = df.apply(lambda dfs: COM.get_yes_no(dfs['Youth']), axis=1)
		# df['Funding Raised to Date $CAN'] = df.apply(lambda dfs: BapQuarterly.split_funding_range(dfs['Funding Raised to Date $CAN']), axis=1)
		return df

示例#4

0

显示文件

	def move_annual_company_data(self):
		i, j = 0, 0
		dfac = db.pandas_read('SELECT ID, BatchID, CompanyID,[Company Name] FROM BAP.AnnualCompanyData')
		dfdc = db.pandas_read('SELECT CompanyID, CompanyName FROM Reporting.DimCompany')
		dfac['BasicName'] = dfac.apply(lambda dfs: CM.get_basic_name(dfs['Company Name']), axis=1)
		dfdc['BasicName'] = dfdc.apply(lambda dfs: CM.get_basic_name(dfs.CompanyName), axis=1)
		for i, c in dfac.iterrows():
			dfc = dfdc[dfdc['BasicName'] == c.BasicName]
			val = dict()
			if len(dfc) > 0:
				i+=1
				db.execute(sql.sql_annual_comapny_data_update.value.format(dfc.CompanyID.values[0], c.ID))
				print(sql.sql_annual_comapny_data_update.value.format(dfc.CompanyID.values[0], c.ID))
			else:
				j+=1
				print(sql.sql_dim_company_insert.value)
				new_com_id = self.batch.get_table_seed('MaRSDataCatalyst.Reporting.DimCompany', 'CompanyID') + 1
				val['CompanyID'] = new_com_id
				val['Company Name'] = c['Company Name']
				val['Description'] = None
				val['Phone'] = None
				val['Phone2'] = None
				val['Fax'] = None
				val['Email'] = None
				val['Website'] = None
				val['CompanyType'] = None
				val['BatchID'] = c.BatchID
				val['ModifiedDate'] = str(dt.datetime.utcnow())[:-3]
				val['CreatedDate'] = str(dt.datetime.utcnow())[:-3]
				df = pd.DataFrame([val], columns=val.keys())
				values = CM.df_list(df)
				db.bulk_insert(sql.sql_dim_company_insert.value, values)
				db.execute(sql.sql_annual_comapny_data_update.value.format(new_com_id, c.ID))
		print('{} exists and {} doesn not exist'.format(i, j))

示例#5

0

显示文件

	def update_cb_basic_company(self):
		df = db.pandas_read(sql.sql_cb_basic_company.value)
		for _, r in df.iterrows():
			basic_name = CM.get_basic_name(r['name'])
			sql_update = sql.sql_cb_basic_company_update.value.format(basic_name, CM.sql_compliant(r['org_uuid']))
			print(sql_update)
			db.execute(sql_update)

示例#6

0

显示文件

 def save_data_chunk(df,
                     sql_insert,
                     chunk_size=1000,
                     capture_fails=False,
                     fail_path_key=''):
     i = 0
     j = i + chunk_size
     total_size = len(df) + 1
     while i < total_size:
         now = int(round(time.time() * 1000))
         print('From {} to {}'.format(i, j))
         df_insert = df.iloc[i:j]
         values = Common.df_list(df_insert)
         if capture_fails:
             msg = DB.bulk_insert(sql_insert, values, rtrn_msg=True)
             if msg == 'FAILURE':
                 filename = '{}_fail_chunk_{}_to_{}.xlsx'.format(now, i, j)
                 if fail_path_key != '':
                     Common.save_as_excel(dfs=[df_insert],
                                          file_name=filename,
                                          path_key=fail_path_key)
                     print("\tCHUNK FAILED. SAVED TO {}".format(filename))
         else:
             DB.bulk_insert(sql_insert, values)
         print('-' * 150)
         i, j = i + chunk_size, j + chunk_size
         if j > total_size:
             j = total_size

示例#7

0

显示文件

文件： secondary_etl.py 项目： soldiers1989/MDCetl

 def delete_old_ans():
     # delete old ans using answer ids
     #   store old ans in xl file
     old_ans_sql = CM.get_config('config.ini', 'secondary_etl', 'old_ans')
     old_ans_df = DB.pandas_read(old_ans_sql)
     DBInteractions.store_df(old_ans_df, '_OLD_PIPE_ANS')
     #   run sql to delete old ans
     del_old_ans_sql = CM.get_config('config.ini', 'secondary_etl',
                                     'del_old_ans')
     DB.execute(del_old_ans_sql)

示例#8

0

显示文件

 def update_basic_name(select, key, venture_name, update):
     data = DB.pandas_read(select)
     for _, r in data.iterrows():
         basic_name = Common.get_basic_name(r['{}'.format(venture_name)])
         ven_name = r['{}'.format(venture_name)]
         basic_name = basic_name.replace("'", "\''")
         sql_update = update.format(
             basic_name, Common.sql_compliant(r['{}'.format(key)]))
         DB.execute(sql_update)
         print('{}({})'.format(ven_name, basic_name))

示例#9

0

显示文件

文件： bap.py 项目： soldiers1989/MDCetl

	def push_bap_quarterly_to_database():
		COM.change_working_directory(fp.path_bap_combined.value)

		bap = pd.read_excel('ETL_RICS_BAP_COMBINED_FY19Q1.xlsx', sheet_name=None)

		# BapQuarterly.transfer_csv_program(bap['csv_program16'])
		# BapQuarterly.transfer_csv_program_youth(bap['csv_program16_youth'])
		BapQuarterly.bulk_insert_quarterly_data(bap['Quarterly Company Data'])
		if BapQuarterly.quarter == 3:
			BapQuarterly.bulk_insert_annual_data(bap['Annual Company data'])

示例#10

0

显示文件

文件： bap_check.py 项目： soldiers1989/MDCetl

 def __init__(self):
     self.MDCReport = common.df_list(
         db.pandas_read(
             'SELECT RICCompanyDataID, CompanyID,DataSource,BatchID,DateID,AdvisoryServicesHours,'
             'VolunteerMentorHours, AnnualRevenue, NumberEmployees,FundingToDate, FundingCurrentQuarter, '
             'HighPotential,SocialEnterprise '
             'FROM MDCReport.BAPQ.FactRICCompany'))
     self.MaRSDataCatalyst = common.df_list(
         db.pandas_read(
             'SELECT RICCompanyDataID, CompanyID,DataSourceID,BatchID,DateID,AdvisoryServicesHours,'
             'VolunteerMentorHours, AnnualRevenue, NumberEmployees,FundingToDate, FundingCurrentQuarter, '
             'HighPotential,SocialEnterprise FROM MaRSDataCatalyst.Reporting.FactRICCompanyData'
         ))
     self.records = []

示例#11

0

显示文件

文件： crunchbase.py 项目： soldiers1989/MDCetl

	def push_entity_to_db(self, json, org_uuid, sql_insert, uuid, i=0, fk_uuid='org_uuid', columns=[]):
		try:
			json_properties = None
			if CBDict.properties.value in json.keys():
				json_properties = json[CBDict.properties.value]
			elif json[CBDict.cardinality.value] == 'OneToOne':
				json_properties = json[CBDict.item.value][CBDict.properties.value]
			elif json[CBDict.cardinality.value] == 'OneToMany':
				json_properties = json[CBDict.items.value][i][CBDict.properties.value]
			if 'uuid' not in json_properties.keys():
				json_properties['uuid'] = uuid
			if fk_uuid not in json_properties.keys():
				json_properties[fk_uuid] = org_uuid
			# print(list(json_properties.keys()))
			df_properties = pd.DataFrame([json_properties], columns=json_properties.keys())
			if len(columns) > 0:
				df_properties = df_properties[columns]
			values = CM.df_list(df_properties)
			val = []
			for l, j in enumerate(values[0]):
				if isinstance(values[0][l], list):
					val.append(' , '.join(str(x) for x in values[0][l]))
				elif isinstance(values[0][l], str):
					val.append(self.common.sql_compliant(values[0][l]))
				else:
					val.append(values[0][l])
			db.bulk_insert(sql_insert, [val])
		except Exception as ex:
			print(ex)

示例#12

0

显示文件

文件： crunchbase.py 项目： soldiers1989/MDCetl

	def save_organization_detail(self, uuid, json_properties):

		# print('{}. UUID: {}'.format(self.i, uuid))
		df = self.db.pandas_read(self.enum.SQL.sql_org_detail_exists.value.format(uuid))
		if len(df) == 0:
			json_properties['org_uuid'] = uuid
			json_properties['batch'] = 3862
			json_properties['company_id'] = None
			json_properties['BasicName'] = None
			json_properties['fetched'] = 0
			df_properties = pd.DataFrame([json_properties], columns=self.org_columns)
			values = CM.df_list(df_properties)
			val = []
			tup = ()
			for l, j in enumerate(values[0]):
				if isinstance(values[0][l], list):
					val.append(''.join(str(x) for x in values[0][l]))
				elif isinstance(values[0][l], str):
					val.append(self.common.sql_compliant(values[0][l]).replace('\r',' ').replace('\n',' ').replace('(',' - ').replace(')',''))
				elif values[0][l] is None:
					val.append(self.common.sql_compliant(''))
				else:
					val.append(values[0][l])
			# print(val)
			tup = tuple(val)
			# print(tup)
			ival = [val]
			sql_insert = self.enum.SQL.sql_org_short_insert.value.format(tup)
			# print(sql_insert)
			sql_insert = sql_insert.replace('True', '1').replace('False','0').replace('"',"'")
			# print(sql_insert)
			self.db.execute(sql_insert)
		else:
			print('[{}] exists.'.format(json_properties['name']))

示例#13

0

显示文件

文件： Validate.py 项目： soldiers1989/MDCetl

	def get_all_rics_data(self):

		writer = pd.ExcelWriter('00 BAP FY18-1-2-3 Numbers.xlsx')
		for ric in self.rics:
			print(ric.upper())
			self.Q1CompanyData_sheet = self.read_file_source(self.path_quarter_one, ric, WorkSheet.bap_company_old.value)
			self.Q2CompanyData_sheet = self.read_file_source(self.path_quarter_two, ric, WorkSheet.bap_company_old.value)
			self.Q3CompanyData_sheet = self.read_file_source(self.path_quarter_three, ric, WorkSheet.bap_company.value)

			data_source = Common.set_datasource(ric)
			batch1 = db.pandas_read(self.batch.format(self.year, self.Q1, data_source, SourceSystemType.RICCD_bap.value))['BatchID']
			batch2 = db.pandas_read(self.batch.format(self.year, self.Q2, data_source, SourceSystemType.RICCD_bap.value))['BatchID']
			batch3 = db.pandas_read(self.batch.format(self.year, self.Q3, data_source, SourceSystemType.RICCD_bap.value))['BatchID']

			self.Q1CompanyData = db.pandas_read(self.selectQ1.format(str(batch1[0]) + ' ORDER BY CompanyName'))
			self.Q2CompanyData = db.pandas_read(self.select.format('Config.CompanyDataRaw', str(batch2[0]) + ' ORDER BY CompanyName'))
			self.Q3CompanyData = db.pandas_read(self.select.format('BAP.QuarterlyCompanyData', str(batch3[0]) + ' ORDER BY [Company Name]'))

			self.Q1CompanyData_fact_ric = db.pandas_read(self.select.format('Reporting.FactRICCompanyData', batch1[0]))
			self.Q2CompanyData_fact_ric = db.pandas_read(self.select.format('Reporting.FactRICCompanyData', batch2[0]))
			self.Q3CompanyData_fact_ric = db.pandas_read(self.select.format('Reporting.FactRICCompanyData', batch3[0]))

			self.Q1CompanyData_rollup = db.pandas_read(SQL.sql_rollup_select.value.format(self.year, 1, data_source))
			self.Q2CompanyData_rollup = db.pandas_read(SQL.sql_rollup_select.value.format(self.year, 2, data_source))
			self.Q3CompanyData_rollup = db.pandas_read(SQL.sql_rollup_select.value.format(self.year, 3, data_source))

			df_ric = self.bap_summary()
			if df_ric is not None:
				df_ric.to_excel(writer, ric.upper(), index=False)
			df_ric = None
		writer.save()

示例#14

0

显示文件

 def __init__(self, path):
     self.path = os.path.join(os.path.expanduser("~"), path)
     self.source_file = None
     self._set_folder()
     self.data_list = []
     self.year, self.quarter = COM.fiscal_year_quarter(
         datetime.datetime.utcnow())

示例#15

0

显示文件

文件： qa.py 项目： soldiers1989/MDCetl

    def __init__(self):
        box_path = Common.change_location(p.DATA)

        fl = FileService(box_path)
        self.ric_files = fl.get_source_file()

        self.okay = PatternFill(fgColor='E1F7DC',
                                bgColor='C00000',
                                fill_type='solid')
        self.amber = PatternFill(fgColor='F4B042',
                                 bgColor='C00000',
                                 fill_type='solid')
        self.header = PatternFill(fgColor='218c04',
                                  bgColor='C00000',
                                  fill_type='solid')
        self.empty = PatternFill(fgColor='f9462a',
                                 bgColor='C00000',
                                 fill_type='solid')
        self.red = PatternFill(fgColor='f72f11',
                               bgColor='C00000',
                               fill_type='solid')

        warnings.filterwarnings("ignore")

        self.quarter = 'Q1'
        self.year = '2019'
        self.youth = 'Youth'
        self.all_youth = 'ALL incl. youth'

        self.month_names = [
            'january', 'february', 'march', 'april', 'may', 'june', 'july',
            'august', 'september', 'october', 'november', 'december'
        ]
        self.no_value = ['na', 'n/a', '', '0000-00-00', '*****']

示例#16

0

显示文件

文件： CapRevJob_by_RIC.py 项目： soldiers1989/MDCetl

def _main_():
    print("Getting SQL query")
    sql = CM.get_config("config_sql.ini", "ann_survey_18", "caprevjob_by_ric")
    print("SQL: {}".format(sql))
    print("Executing SQL to get dataframe of results")
    all_results = DB.pandas_read(sql)

    print("Creating column names")
    all_results['ConcatQ'] = all_results[['Cap/Rev/Emp', 'Question']].apply(lambda x: ' - '.join(x), axis=1)
    print("Splitting dataframe into one per RIC")
    split_frames = partition_by(all_results, "RIC_Program")
    print("Getting write path")
    user_path = os.path.expanduser("~")
    path = user_path + "/Box Sync/Workbench/BAP/Annual Survey FY2018/Results by RIC/"
    print("Path: {}".format(path))

    print("Writing files to disc:")
    for ric in split_frames.keys():
        x = split_frames[ric]
        x['rid_cid'] = x['resp_id'].astype(str) + '_' + x['Company_ID'].astype(str)
        x = spread(x, 'rid_cid', 'ConcatQ', 'Answer')
        x['rid_cid'] = x.index
        x['_resp_id'], x['_Company_ID'] = x['rid_cid'].str.split('_', 1).str
        x = x.apply(pd.to_numeric, errors='ignore')
        cols = x.columns.tolist()
        cols = cols[-2:] + cols[:-2]
        x = x[cols]
        for i in range(len(cols)):
            if str(cols[i])[0] == '_':
                cols[i] = cols[i][1:]
        x.columns = cols
        x = x.drop('rid_cid', axis=1)
        filename = "{} Survey Results".format(ric)
        write_to_xl(x, filename, path, 'Results')
        print("Wrote {} to path: {}".format(filename, path))

示例#17

0

显示文件

    def combine_bap_missing_source_file(self, current_path=''):
        clms = [
            'CompanyName', 'Website', 'AnnualRevenue', 'NumberOfEmployees',
            'FundingToDate', 'DataSource', 'Fiscal_Quarter', 'FiscalYear'
        ]
        if current_path != '':
            current_path = os.path.join(os.path.expanduser("~"), current_path)
            os.chdir(current_path)
            self.source_file = os.listdir(current_path)

        file_list = self.get_source_file()
        q_company = []
        i = 0

        for fl in file_list:
            try:
                i += 1
                ds = COM.set_datasource(str(fl))
                if ds is not None:
                    com = pd.read_excel(fl, WS.bap_company.value)
                    print('{}.[{}] {} -->{}'.format(i, ds, fl,
                                                    len(com.columns)))
                    # print(com.head())
                    com.insert(5, 'DataSource', ds)
                    com.columns = clms
                    q_company.append(com)
                else:
                    print('\tMissing - {}'.format(fl))
            except Exception as ex:
                print(ex)
        print('\n\n')
        bap_company = pd.concat(q_company)

        return bap_company

示例#18

0

显示文件

文件： sg_campaign.py 项目： soldiers1989/MDCetl

    def sg_campaigns_json(self, surveyID, api_token, attempts=10, wait_sec=3):
        '''Takes Sgizmo surveyID, api token and returns
        campaigns as dataframe.
        int, str, -> dict
        '''

        attempt_count = 0
        URL = "https://restapica.surveygizmo.com/v5/survey/" + str(
            surveyID) + "/surveycampaign/?resultsperpage=500&" + api_token
        print(URL)
        for i in range(0, attempts):
            try:
                attempt_count += 1
                output = requests.get(URL, verify=common.get_cert_path())
                if output.ok:
                    output = output.json()
                    print("Success. Stored API output in json dict.")
                    return output
            except KeyboardInterrupt:
                pass
            except:
                if attempt_count >= attempts:
                    print("All attempts failed")
                    return
                print("Likely SSLError. Trying again in", wait_sec,
                      "second(s)...")
                sleep(wait_sec)

示例#19

0

显示文件

    def get_list_json(self, api_token, attempts=6, wait_sec=3):
        """
        Takes str api token and returns all surveys
        associated with account (in json-like dict).
        str -> dict
        """

        attempt_count = 0
        URL = "https://restapica.surveygizmo.com/v5/survey/?resultsperpage=500&" + str(
            api_token)
        for i in range(0, attempts):
            try:
                if attempt_count == 0:
                    print(URL)
                attempt_count += 1
                output = requests.get(URL, verify=common.get_cert_path())
                if output.ok:
                    output = output.json()
                    print("Success. Stored API output in json dict.")
                    return output
            except KeyboardInterrupt:
                pass
            except Exception as ex:
                if attempt_count >= attempts:
                    print("All attempts failed")
                    return
                print("Likely SSLError. Trying again in", wait_sec,
                      "second(s)...", ex)
                sleep(wait_sec)

示例#20

0

显示文件

文件： sg_responses.py 项目： soldiers1989/MDCetl

    def sg_get_api_output(self, URL, attempts=10, wait_sec=3):
        '''Takes (in future) tokens, preferences, returns JSON file if successful
        after specified # of attempts.
        ints -> dict
        '''
        from time import sleep
        attempt_count = 0

        for i in range(0, attempts):
            try:
                attempt_count += 1
                output = requests.get(URL, verify=common.get_cert_path())
                if output.ok:
                    output = output.json()
                    print("Success. Stored API output in json dict.")
                    return output
            except KeyboardInterrupt:
                pass
            except:
                if attempt_count >= attempts:
                    print("All attempts failed")
                    return
                print("Likely SSLError. Trying again in", wait_sec,
                      "second(s)...")
                sleep(wait_sec)

示例#21

0

显示文件

文件： sg_emails.py 项目： soldiers1989/MDCetl

    def sg_emails_json(self,
                       surveyID,
                       campaign_id,
                       api_token,
                       attempts=10,
                       wait_sec=3):
        '''Takes  campaign id and api tokens and returns
        json-formatted dict with email messages.
        int, str, -> dict
        '''

        from time import sleep
        attempt_count = 0
        URL = "https://restapica.surveygizmo.com/v5/survey/" + str(
            surveyID) + "/surveycampaign/" + str(
                campaign_id) + "/emailmessage/" + "?" + api_token
        print(URL)
        for i in range(0, attempts):
            try:
                attempt_count += 1
                output = requests.get(URL, verify=common.get_cert_path())
                if output.ok:
                    output = output.json()
                    print("Success. Stored API output in json dict.")
                    return output
            except KeyboardInterrupt:
                pass
            except:
                if attempt_count >= attempts:
                    print("All attempts failed")
                    return
                print("Likely SSLError. Trying again in", wait_sec,
                      "second(s)...")
                sleep(wait_sec)

示例#22

0

显示文件

	def split_venture_former_name(self):
		df = db.pandas_read('SELECT ID, CompanyName, [Former / Alternate Names] FROM MDCRaw.BAP.VentureQuarterlyData WHERE CompanyName LIKE \'%(%\' AND FiscalYear = 2019')
		for _, row in df.iterrows():
			split = CM.venture_name_with_bracket_split(row['CompanyName'])
			# print('Current: {}\nName: {}\nAlternate: {}'.format(row['CompanyName'],split[0], split[1].replace('(','').replace(')','')))
			# print('-' * 100)
			update = '''UPDATE MDCRaw.BAP.VentureQuarterlyData SET CompanyName = \'{}\' , [Former / Alternate Names] = \'{}\' WHERE ID = {} -- {}'''
			print(update.format(split[0], split[1].replace('(','').replace(')','').replace('formerly',''),row['ID'],row['CompanyName']))

示例#23

0

显示文件

 def get_ventures(self):
     sql_venture = 'SELECT CompanyID, CompanyName FROM Reporting.DimCompany WHERE BasicName IS NULL AND CompanyName IS NOT NULL'  #AND BatchID NOT IN (3496, 3497,3498, 3499)'
     data = self.db.pandas_read(sql_venture)
     sql_update = 'UPDATE Reporting.DimCompany SET BasicName = \'{}\' WHERE CompanyID = {}'
     for index, row in data.iterrows():
         basic_name = common.get_basic_name(row[1])
         # print(sql_update.format(basic_name, row[0]))
         self.db.execute(sql_update.format(basic_name, row[0]))

示例#24

0

显示文件

文件： bap.py 项目： soldiers1989/MDCetl

	def transfer_fact_ric_aggregation():
		date_id = COM.get_dateid(datevalue=None)
		metric_prg = [130, 132, 133, 129, 134, 63, 77, 60, 68, 67, 135, 136, 137]
		metric_prg_youth = [134, 138]
		
		df_program = db.pandas_read(sql.sql_company_aggregate_program.value.format(2018, 4))#(BapQuarterly.year, BapQuarterly.quarter))
		df_program_youth = db.pandas_read(sql.sql_company_aggregate_program_youth.value.format(2018, 4))#(BapQuarterly.year, BapQuarterly.quarter))
		
		values = []
		
		for _, row in df_program.iterrows():
			i = 7
			while i < 20:
				m = i - 7
				val = []
				val.append(int(row['DataSource']))  # DataSource
				val.append(int(date_id))  # RICDateID
				val.append(int(metric_prg[m]))  # MetricID
				val.append(int(row['BatchID']))  # BatchID
				
				if str(row[i]) in ['no data', 'n\\a', '-', 'n/a', 'nan']:
					val.append(-1.0)
					print(row[i])
				else:
					val.append(round(float(row[i]), 2))  # AggregateNumber
				val.append(str(datetime.datetime.today())[:23])  # ModifiedDate
				val.append(str(datetime.datetime.today())[:23])  # CreatedDate
				val.append(row['Youth'])  # Youth
				values.append(val)
				i = i + 1
				# db.execute(sql.sql_bap_fra_insert.value.format(tuple(val)))
		
		for _, row in df_program_youth.iterrows():
			
			j = 7
			while j < 9:
				m = j - 7
				val = []
				val.append(int(row['DataSource']))  # DataSource
				val.append(int(date_id))  # RICDateID
				val.append(int(metric_prg_youth[m]))  # MetricID
				val.append(int(row['BatchID']))  # BatchID
				if str(row[j]) in ['no data', 'n\\a', '-', 'n/a', 'nan']:
					val.append(-1.0)
					print(row[j])
				else:
					val.append(round(float(row[j]), 2))  # AggregateNumber
				val.append(str(datetime.datetime.today())[:23])  # ModifiedDate
				val.append(str(datetime.datetime.today())[:23])  # CreatedDate
				val.append(row['Youth'])  # Youth
				
				values.append(val)
				j = j + 1
				# db.execute(sql.sql_bap_fra_insert.value.format(tuple(val)))
		for val in range(len(values)):
			print('{}. {}'.format(val,values[val]))
			# print('{}. {}'.format(val,values[val][1]))
		db.bulk_insert(sql.sql_bap_fact_ric_aggregation_insert.value, values)

示例#25

0

显示文件

文件： secondary_etl.py 项目： soldiers1989/MDCetl

class Json:

    keep_qids = CM.get_config('config.ini', 'secondary_etl', 'sg_del_qids')

    def __init__(self, json, surveyid):
        self.json = json
        self.surveyid = surveyid

    def filter_out(self):
        keeps = self.get_full_keys('question')
        filtered_dicts = []
        for dic in self.json:
            filtered_dic = {}
            for key in keeps:
                if dic[key] != '':
                    filtered_dic[key] = dic[key]
            filtered_dicts.append(filtered_dic)

        return filtered_dicts

    @staticmethod
    def extract_id(string):
        x = string.find("(") + 1
        y = string.find(")")
        return string[x:y]

    def get_full_keys(self, key_str):
        d = self.json[0]
        keeps = self.keep_qids.split(',')
        full_keys = []
        keys = list(d.keys())
        full_keys.extend(keys[:11])
        for key in keys[12:]:
            small_key = key[:18]
            if Json.extract_id(small_key) in keeps and key_str in small_key:
                full_keys.append(key)
        return full_keys

    def to_df(self):
        data = self.filter_out()
        all_ans = []
        for resp in data:
            srid = resp['id']
            for key in list(resp.keys())[11:]:
                qid = Json.extract_id(key[:18])
                page_pipe = Json.extract_id(key[15:])
                answer_str = str(resp[key])
                ans = Answer(qid=qid,
                             srid=srid,
                             answer=answer_str,
                             surveyid=self.surveyid,
                             page_pipe=page_pipe)
                answer = ans.record()
                all_ans.append(answer)
        all_ans = pd.DataFrame(all_ans, columns=Answer.cols())
        return all_ans

示例#26

0

显示文件

文件： qa.py 项目： soldiers1989/MDCetl

    def check_columns_completeness(self):
        dfps = pd.DataFrame()
        dfpys = pd.DataFrame()
        dfqc = pd.DataFrame()
        dfac = pd.DataFrame()

        clm_lst = []
        for fl in self.ric_files:
            Common.change_location(p.DATA)
            wb = openpyxl.load_workbook(fl, data_only=True)
            ric_file_name = fl[:-5]
            print('-' * 250)

            program_sheet = wb.get_sheet_by_name(WS.bap_program.value)
            df_ps = self.sheet_columns(program_sheet, ric_file_name,
                                       WS.bap_program.value)
            program_youth_sheet = wb.get_sheet_by_name(
                WS.bap_program_youth.value)
            df_pys = self.sheet_columns(program_youth_sheet, ric_file_name,
                                        WS.bap_program_youth.value)
            quarterly_company_sheet = wb.get_sheet_by_name(
                WS.bap_company.value)
            df_qc = self.sheet_columns(quarterly_company_sheet, ric_file_name,
                                       WS.bap_company.value)
            annual_company_sheet = wb.get_sheet_by_name(
                WS.bap_company_annual.value)
            df_ac = self.sheet_columns(annual_company_sheet, ric_file_name,
                                       WS.bap_company_annual.value)

            dfps = pd.concat([dfps, df_ps])
            dfpys = pd.concat([dfpys, df_pys])
            dfqc = pd.concat([dfqc, df_qc])
            dfac = pd.concat([dfac, df_ac])

        writer = pd.ExcelWriter('00 ALL_RIC_BAP_COLUMNS_FY19_Q1.xlsx')
        dfps.to_excel(writer, 'Program', index=False)
        dfpys.to_excel(writer, 'Program Youth', index=False)
        dfqc.to_excel(writer, 'Quarterly Company', index=False)
        dfac.to_excel(writer, 'Annual Company', index=False)

        Common.change_location(p.QA)
        print(os.getcwd())
        writer.save()

示例#27

0

显示文件

文件： bap.py 项目： soldiers1989/MDCetl

	def push_bap_missing_data_to_temp_table():
		 current_path = os.path.join(os.path.expanduser("~"), '/Users/mnadew/Box Sync/Workbench/BAP/BAP_FY18/FY18_Q3/for ETL/Missing data Reports')
		 os.chdir(current_path)
		 df = pd.read_excel('00 BAP Missing data Combined.xlsx', 'BAP Missing data')
		 df['CompanyID'] = 0
		 new_col = ['CompanyID','CompanyName','BasicName','Website','AnnualRevenue','NumberOfEmployees','FundingToDate','DataSource']
		 dfs = df[new_col]
		 sql = 'INSERT INTO BAP.BAP_FY18Q3_Missing_Data VALUES (?, ?, ?, ?, ?, ?, ?, ?)'
		 values = COM.df_list(dfs)
		 db.bulk_insert(sql, values)

示例#28

0

显示文件

 def connect(dev=False):
     conn = 'conn'
     if dev:
         conn = 'devconn'
     try:
         con_str = Common.get_config('config.ini', 'db_connect', conn)
         conn = pyodbc.connect(con_str)
         return conn
     except Exception as ex:
         print('DB Server Connection Exception: {}'.format(ex))
         return None

示例#29

0

显示文件

文件： menu_actions.py 项目： soldiers1989/MDCetl

    def check_qs_exist(self, survey_id):

        sql = CM.get_config("config.ini", "sql_queries",
                            "check_questions_exist")
        sql = sql.replace("WHAT_SURVEY_ID", str(survey_id))
        check = DB.pandas_read(sql)

        if check.iloc[0][0]:
            return True
        else:
            return False

示例#30

0

显示文件

文件： bap.py 项目： soldiers1989/MDCetl

	def combine_missing_data():
		quarterly_missing = BapQuarterly.file.combine_bap_missing_source_file(
			current_path=fp.path_missing_bap_etl.value)
		quarterly_missing = quarterly_missing.where(pd.notnull(quarterly_missing), None)
		quarterly_missing['BasicName'] = quarterly_missing.apply(lambda dfs: COM.get_basic_name(dfs.CompanyName),
																 axis=1)
		df = quarterly_missing.where(pd.notnull(quarterly_missing), None)
		print(df.columns)
		dfs = df[['CompanyName', 'BasicName', 'Website', 'AnnualRevenue', 'NumberOfEmployees', 'FundingToDate',
				  'DataSource']]
		BapQuarterly.file.save_as_csv(dfs, '00 BAP Missing data Combined.xlsx', os.getcwd(), 'BAP Missing data')
		print(dfs.head())