Пример #1
0
 def delete_old_ans():
     # delete old ans using answer ids
     #   store old ans in xl file
     old_ans_sql = CM.get_config('config.ini', 'secondary_etl', 'old_ans')
     old_ans_df = DB.pandas_read(old_ans_sql)
     DBInteractions.store_df(old_ans_df, '_OLD_PIPE_ANS')
     #   run sql to delete old ans
     del_old_ans_sql = CM.get_config('config.ini', 'secondary_etl',
                                     'del_old_ans')
     DB.execute(del_old_ans_sql)
Пример #2
0
def _main_():
    print("Getting SQL query")
    sql = CM.get_config("config_sql.ini", "ann_survey_18", "caprevjob_by_ric")
    print("SQL: {}".format(sql))
    print("Executing SQL to get dataframe of results")
    all_results = DB.pandas_read(sql)

    print("Creating column names")
    all_results['ConcatQ'] = all_results[['Cap/Rev/Emp', 'Question']].apply(lambda x: ' - '.join(x), axis=1)
    print("Splitting dataframe into one per RIC")
    split_frames = partition_by(all_results, "RIC_Program")
    print("Getting write path")
    user_path = os.path.expanduser("~")
    path = user_path + "/Box Sync/Workbench/BAP/Annual Survey FY2018/Results by RIC/"
    print("Path: {}".format(path))

    print("Writing files to disc:")
    for ric in split_frames.keys():
        x = split_frames[ric]
        x['rid_cid'] = x['resp_id'].astype(str) + '_' + x['Company_ID'].astype(str)
        x = spread(x, 'rid_cid', 'ConcatQ', 'Answer')
        x['rid_cid'] = x.index
        x['_resp_id'], x['_Company_ID'] = x['rid_cid'].str.split('_', 1).str
        x = x.apply(pd.to_numeric, errors='ignore')
        cols = x.columns.tolist()
        cols = cols[-2:] + cols[:-2]
        x = x[cols]
        for i in range(len(cols)):
            if str(cols[i])[0] == '_':
                cols[i] = cols[i][1:]
        x.columns = cols
        x = x.drop('rid_cid', axis=1)
        filename = "{} Survey Results".format(ric)
        write_to_xl(x, filename, path, 'Results')
        print("Wrote {} to path: {}".format(filename, path))
Пример #3
0
class Json:

    keep_qids = CM.get_config('config.ini', 'secondary_etl', 'sg_del_qids')

    def __init__(self, json, surveyid):
        self.json = json
        self.surveyid = surveyid

    def filter_out(self):
        keeps = self.get_full_keys('question')
        filtered_dicts = []
        for dic in self.json:
            filtered_dic = {}
            for key in keeps:
                if dic[key] != '':
                    filtered_dic[key] = dic[key]
            filtered_dicts.append(filtered_dic)

        return filtered_dicts

    @staticmethod
    def extract_id(string):
        x = string.find("(") + 1
        y = string.find(")")
        return string[x:y]

    def get_full_keys(self, key_str):
        d = self.json[0]
        keeps = self.keep_qids.split(',')
        full_keys = []
        keys = list(d.keys())
        full_keys.extend(keys[:11])
        for key in keys[12:]:
            small_key = key[:18]
            if Json.extract_id(small_key) in keeps and key_str in small_key:
                full_keys.append(key)
        return full_keys

    def to_df(self):
        data = self.filter_out()
        all_ans = []
        for resp in data:
            srid = resp['id']
            for key in list(resp.keys())[11:]:
                qid = Json.extract_id(key[:18])
                page_pipe = Json.extract_id(key[15:])
                answer_str = str(resp[key])
                ans = Answer(qid=qid,
                             srid=srid,
                             answer=answer_str,
                             surveyid=self.surveyid,
                             page_pipe=page_pipe)
                answer = ans.record()
                all_ans.append(answer)
        all_ans = pd.DataFrame(all_ans, columns=Answer.cols())
        return all_ans
Пример #4
0
    def check_qs_exist(self, survey_id):

        sql = CM.get_config("config.ini", "sql_queries",
                            "check_questions_exist")
        sql = sql.replace("WHAT_SURVEY_ID", str(survey_id))
        check = DB.pandas_read(sql)

        if check.iloc[0][0]:
            return True
        else:
            return False
Пример #5
0
 def connect(dev=False):
     conn = 'conn'
     if dev:
         conn = 'devconn'
     try:
         con_str = Common.get_config('config.ini', 'db_connect', conn)
         conn = pyodbc.connect(con_str)
         return conn
     except Exception as ex:
         print('DB Server Connection Exception: {}'.format(ex))
         return None
Пример #6
0
	def __init__(self):
		self._path1 = Common.get_config('config.ini', 'box_file_path', 'path_validI')
		self._path2 = Common.get_config('config.ini', 'box_file_path', 'path_validII')
		self._path3 = Common.get_config('config.ini', 'box_file_path', 'path_validIII')

		self.pathQ1 = os.path.join(os.path.expanduser(self._path1))
		self.pathQ2 = os.path.join(os.path.expanduser(self._path2))
		self.pathQ3 = os.path.join(os.path.expanduser(self._path3))

		self._path_quarter_one = Common.get_config('config.ini', 'box_file_path', 'path_bap_validation_quarter_one')
		self._path_quarter_two = Common.get_config('config.ini', 'box_file_path', 'path_bap_validation_quarter_two')
		self._path_quarter_three = Common.get_config('config.ini', 'box_file_path', 'path_bap_validation_quarter_three')

		self.path_quarter_one = os.path.join(os.path.expanduser(self._path_quarter_one))
		self.path_quarter_two = os.path.join(os.path.expanduser(self._path_quarter_two))
		self.path_quarter_three = os.path.join(os.path.expanduser(self._path_quarter_three))

		self.year = 2018
		self.Q1 = '\'Q1\''
		self.Q2 = '\'Q2\''
		self.Q3 = '\'Q3\''
		self.Q4 = '\'Q4\''

		self.Q1CompanyData_sheet = None
		self.Q2CompanyData_sheet = None
		self.Q3CompanyData_sheet = None

		self.Q1CompanyData = None
		self.Q2CompanyData = None
		self.Q3CompanyData = None

		self.Q1CompanyData_dc = None
		self.Q2CompanyData_dc = None
		self.Q3CompanyData_dc = None

		self.Q1CompanyData_fact_ric = None
		self.Q2CompanyData_fact_ric = None
		self.Q3CompanyData_fact_ric = None

		self.Q1CompanyData_rollup = None
		self.Q2CompanyData_rollup = None
		self.Q3CompanyData_rollup = None

		self.source_file = None

		self.quarter_one_files = []
		self.quarter_two_files = []
		self.quarter_three_files = []

		self.dict_list = []

		self.rics = ['alliance', 'communitech', 'haltech', 'guelph', 'iion', 'innovationfactory', 'launchlab', 'mars',
		             'niagara', 'noic', 'norcat', 'ottawa', 'ric', 'spark', 'ssmic', 'venturelab', 'wetec']

		self.batch = '''SELECT * FROM Config.ImportBatch WHERE Year = {} AND Quarter = {} AND 
						DataSourceID = {} AND SourceSystemId = {} AND ImportStatusID = 5'''

		self.select = 'SELECT * FROM {} WHERE BatchID = {}'

		self.selectQ1 = '''
Пример #7
0
    def get_campaigns(self, api_token, survey_id, session_variables,
                      surveys_df):

        if survey_id == 'w':
            while type(survey_id) != int:
                try:
                    survey_id = int(
                        input(
                            "Enter ID of survey that you would like to retrieve campaign data for: "
                        ))
                    if self.return_to_main(survey_id) == 1:
                        return
                    survey_id = self.validate_survey_id(
                        survey_id, session_variables, api_token, surveys_df)
                    survey_id = self.validate_survey_id(
                        survey_id, session_variables, api_token, surveys_df)
                except ValueError:
                    continue

        campaigns_df = sg_campaign.sg_campaigns_df(survey_id, api_token)
        print(campaigns_df)
        campaigns_df["id"] = campaigns_df["id"].apply(pd.to_numeric,
                                                      errors='ignore')

        # remove campaigns from df that are already in DB
        c_sql = CM.get_config("config.ini", "sql_queries",
                              "campaigns_for_survey")
        c_sql = c_sql.replace("WHAT_SURVEY_ID", str(survey_id))
        db_cmpgns = DB.pandas_read(c_sql)
        if db_cmpgns is not None:
            db_cmpgns = db_cmpgns.apply(pd.to_numeric, errors='ignore')

        cmpgns_not_in_db = pd.merge(campaigns_df,
                                    db_cmpgns,
                                    how='left',
                                    indicator=True,
                                    on="id")
        cmpgns_not_in_db2 = cmpgns_not_in_db[cmpgns_not_in_db['_merge'] ==
                                             'left_only'].drop("_merge",
                                                               axis=1)
        # cmpgns_not_in_db2 = cmpgns_not_in_db2.apply(pd.to_numeric, errors='ignore')

        # insert campaigns into DB
        if len(cmpgns_not_in_db2) > 0:
            insert_cmpgns_sql = "insert_campaigns"

            self.df_to_db(cmpgns_not_in_db2,
                          insert_cmpgns_sql,
                          remove_single_quotes=False,
                          clean_numeric_cols=True)

        return campaigns_df
Пример #8
0
def partition_by(df, col_name):
    """ Splits df into multiple dfs, using values in col_name
    # df, str -> dict
    """
    sql = CM.get_config("config_sql.ini", "ann_survey_18", "distinct_RICs")
    split_by = DB.pandas_read(sql)
    split_by = split_by['RIC_Program'].tolist()
    frame_dict = {elem: '' for elem in split_by}

    for key in frame_dict.keys():
        query = '{} == \"{}\"'.format(str(col_name), str(key))
        frame_dict[key] = df.query(query)

    return frame_dict
Пример #9
0
    def del_survey_components(self, survey_id):

        del_sql = CM.get_config("config.ini", "sql_queries",
                                "del_all_for_survey")
        del_sql = del_sql.replace("WHAT_SURVEY", str(survey_id))
        DB.execute(del_sql)
        print("\nDeletion attempt was made. Survey components check:")

        comps_dict = {
            "questions": "select_questions",
            "options": "select_options",
            "answers": "select_answers",
            "responses": "select_responses",
            "emails": "select_emails",
            "campaigns": "select_campaigns"
        }

        for component, sql in comps_dict.items():
            sql = CM.get_config("config.ini", "sql_queries",
                                sql).replace("WHAT_SURVEY", str(survey_id))
            df = DB.pandas_read(sql)
            print("\nCount of {}: {}".format(component, len(df)))

        return
Пример #10
0
def save_as_excel(dfs, file_name, path_key):
    print(os.getcwd())
    print(len(dfs))
    path = Common.get_config('config.ini', 'box_file_path', path_key)
    box_path = os.path.join(os.path.expanduser("~"), path)
    os.chdir(box_path)
    try:
        writer = pd.ExcelWriter(file_name)
        j = 0
        for df in dfs:
            j += j
            sheet_name = 'SHEET {}'.format(j)
            df.to_excel(writer, sheet_name, index=False)
        writer.save()
    except Exception as ex:
        print(ex)
Пример #11
0
	def __init__(self):

		self.batch = BatchService()

		self.sql_update = CM.get_config('config_sql.ini', 'db_sql_general', 'sql_update')
		self.sql_data_by_batch = CM.get_config('config_sql.ini', 'db_sql_common', 'sql_data_by_batch')

		self.sql_dim_company = CM.get_config('config_sql.ini', 'da_sql_company', 'sql_dim_company')
		self.sql_dim_company_source = CM.get_config('config_sql.ini', 'da_sql_company', 'sql_dim_company_source')
		self.sql_dim_company_insert = CM.get_config('config_sql.ini', 'da_sql_company', 'sql_dim_company_insert')
		self.sql_dim_company_source_insert = CM.get_config('config_sql.ini', 'da_sql_company', 'sql_dim_company_source_insert')
		self.sql_dim_company_source_update = CM.get_config('config_sql.ini', 'da_sql_company', 'sql_dim_company_source_update')

		self.dim_company_id = 0
		self.dim_company_source_id = 0
		self.path = CM.get_config('config.ini', 'box_file_path', 'path_bap_company_matching')

		self.file = FileService(self.path)
Пример #12
0
    def df_to_db(self,
                 df,
                 sql_config_header,
                 remove_single_quotes=True,
                 return_vals=False,
                 clean_numeric_cols=False):

        df_headers, df_qmarks, df_vals = self.get_sql_params(
            df, remove_single_quotes=remove_single_quotes)
        df_header_str = self.get_header_str(df_headers)
        df_sql = CM.get_config("config.ini", "sql_queries", sql_config_header)
        df_sql = df_sql.replace("WHAT_HEADERS",
                                df_header_str).replace("WHAT_VALUES",
                                                       df_qmarks)

        if clean_numeric_cols:
            for lst in df_vals:
                for i in range(len(lst)):
                    element = lst[i]
                    try:
                        if str(element).lower() == "nan" or str(
                                element) == "0000-00-00 00:00:00" or str(
                                    element) == '':
                            new_val = None
                            if type(lst) == tuple:
                                lst = self.replace_tuple_val_at_index(
                                    lst, i, new_val)
                        if np.dtype(element) == 'int64':
                            new_val = int(str(lst[i]))
                            if type(lst) == tuple:
                                lst = self.replace_tuple_val_at_index(
                                    lst, i, new_val)
                    except AttributeError:
                        continue
                    except TypeError:
                        continue
                    except ValueError:
                        continue
                if len(df_vals) == 1 and type(df_vals[0]) == tuple:
                    df_vals[0] = lst

        DB.bulk_insert(df_sql, df_vals)

        if return_vals:
            return df_vals
Пример #13
0
    def write_survey_entries(self, api_token):

        year, quarter = CM.fiscal_year_quarter()

        api_surveys_df = self.get_surveys(api_token, prin=False)
        api_surveys_df = api_surveys_df.apply(pd.to_numeric, errors='ignore')

        db_surveys_sql = CM.get_config("config.ini", "sql_queries", "surveys")
        db_surveys_df = DB.pandas_read(db_surveys_sql)
        db_surveys_df = db_surveys_df.apply(pd.to_numeric, errors='ignore')

        surveys_not_in_db = pd.merge(api_surveys_df,
                                     db_surveys_df[['id']],
                                     how='outer',
                                     indicator=True,
                                     on="id")
        surveys_not_in_db2 = surveys_not_in_db[surveys_not_in_db['_merge'] ==
                                               'left_only'].drop("_merge",
                                                                 axis=1)

        # write surveys_not_in_db2 to db, one at a time so BatchService can be executed for each one
        for index in range(len(surveys_not_in_db2)):
            row = surveys_not_in_db2.iloc[index][:]
            df = pd.DataFrame([list(row.values)],
                              columns=list(surveys_not_in_db2))

            batch = BatchService()
            x = batch.create_new_batch(datasource=-1,
                                       systemsource=50,
                                       year=year,
                                       quarter=quarter)
            batch_id = x.iloc[-1][0]

            # add batchID to end of df
            df['BatchID'] = int(batch_id)

            self.df_to_db(df, "insert_survey_entry")

        pass
Пример #14
0
    def load(self):
        df = self.data
        DBInteractions.store_df(df, '_NEW_PIPE_ANS')
        sql = CM.get_config('config.ini', 'sql_queries', 'insert_as')
        sql = sql.replace(
            'WHAT_HEADERS',
            'id, question_id, option_id, survey_response_id, answer, page_pipe'
        )
        sql = sql.replace('WHAT_VALUES', '?,?,?,?,?,?')

        insert_vals = []

        for index, row in df.iterrows():
            vals = []
            for header in df.columns:
                vals.append(row[header])
            if len(df) == 1:
                t = tuple(vals)
                insert_vals.append(t)
            else:
                insert_vals.append(vals)

        DB.bulk_insert(sql, insert_vals, dev=False)
Пример #15
0
	def entities_script(self):
		self.sql_acquired_insert = CM.get_config('config_sql.ini', 'db_sql_crunchbase', 'sql_acquired_insert')
		self.sql_acquiree_insert = CM.get_config('config_sql.ini', 'db_sql_crunchbase', 'sql_acquiree_insert')
		self.sql_acquisition_insert = CM.get_config('config_sql.ini', 'db_sql_crunchbase', 'sql_acquisition_insert')
		self.sql_category_insert = CM.get_config('config_sql.ini', 'db_sql_crunchbase', 'sql_category_insert')
		self.sql_org_category_insert = CM.get_config('config_sql.ini', 'db_sql_crunchbase', 'sql_org_category_insert')
		self.sql_founders_insert = CM.get_config('config_sql.ini', 'db_sql_crunchbase', 'sql_founders_insert')
		self.sql_funding_rounds_insert = CM.get_config('config_sql.ini', 'db_sql_crunchbase', 'sql_funding_rounds_insert')
		self.sql_funds_insert = CM.get_config('config_sql.ini', 'db_sql_crunchbase', 'sql_funds_insert')
		self.sql_image_insert = CM.get_config('config_sql.ini', 'db_sql_crunchbase', 'sql_image_insert')
		self.sql_investments_insert = CM.get_config('config_sql.ini', 'db_sql_crunchbase', 'sql_investments_insert')
		self.sql_investors_insert = CM.get_config('config_sql.ini', 'db_sql_crunchbase', 'sql_investors_insert')
		self.sql_ipo_insert = CM.get_config('config_sql.ini', 'db_sql_crunchbase', 'sql_ipo_insert')
		self.sql_job_insert = CM.get_config('config_sql.ini', 'db_sql_crunchbase', 'sql_job_insert')
		self.sql_news_insert = CM.get_config('config_sql.ini', 'db_sql_crunchbase', 'sql_news_insert')
		self.sql_offices_insert = CM.get_config('config_sql.ini', 'db_sql_crunchbase', 'sql_offices_insert')
		self.sql_partners_insert = CM.get_config('config_sql.ini', 'db_sql_crunchbase', 'sql_partners_insert')
		self.sql_sub_organization_insert = CM.get_config('config_sql.ini', 'db_sql_crunchbase', 'sql_sub_organization_insert')
		self.sql_team_insert = CM.get_config('config_sql.ini', 'db_sql_crunchbase', 'sql_team_insert')
		self.sql_websites_insert = CM.get_config('config_sql.ini', 'db_sql_crunchbase', 'sql_websites_insert')
		self.sql_person_insert = CM.get_config('config_sql.ini', 'db_sql_crunchbase', 'sql_person_insert')
		self.sql_invested_in_insert = CM.get_config('config_sql.ini', 'db_sql_crunchbase', 'sql_invested_in_insert')
Пример #16
0
    def load_resps_ans_contacts__lists(self, survey_id, api_token):

        # get api resps
        print("\nGetting API responses (respondents)")
        api_ans, api_resps = self.get_ans(survey_id, api_token)

        print("\nGetting contact lists on account")
        contact_lists = self.get_contact_lists(survey_id, api_token)
        contact_lists = contact_lists.apply(pd.to_numeric, errors='ignore')

        print("\nGetting contact lists from DB")
        lists_in_db_sql = CM.get_config("config.ini", "sql_queries",
                                        "contact_lists")
        lists_in_db = DB.pandas_read(lists_in_db_sql)

        print(
            "\nChecking for diffs b/t API contact lists and DB contact lists")
        lists_not_in_db = pd.merge(contact_lists,
                                   lists_in_db,
                                   how='outer',
                                   indicator=True,
                                   on="id")
        lists_not_in_db2 = lists_not_in_db[lists_not_in_db['_merge'] ==
                                           'left_only'].drop("_merge", axis=1)

        if len(lists_not_in_db2) > 0:
            print(
                "\nOne or more new contact lists detected on acct. Loading into DB now"
            )
            insert_lists_sql = "insert_contactlists"
            lists_not_in_db2 = lists_not_in_db2.drop_duplicates()
            self.df_to_db(lists_not_in_db2,
                          insert_lists_sql,
                          remove_single_quotes=False)

        print(
            "\nGathering all contacts from all lists on acct into single dataframe"
        )
        all_contacts = []
        for list_id in contact_lists["id"]:
            contact_list = self.get_contacts(api_token, list_id)
            all_contacts.append(contact_list)

        # gather all contacts from current survey
        all_campaigns = self.get_campaigns(api_token, survey_id, 0, 0)
        for campaign_id in all_campaigns['id']:
            campaign_contacts = self.get_contacts(api_token,
                                                  list_id=0,
                                                  survey_id=survey_id,
                                                  campaign_id=campaign_id)
            if type(campaign_contacts) == int:
                continue
            else:
                all_contacts.append(campaign_contacts)

        all_contacts = pd.concat(all_contacts)
        all_contacts = all_contacts.apply(pd.to_numeric, errors='ignore')
        all_contacts['email_address'] = all_contacts[
            'email_address'].str.lower()

        print("\nGathering all contacts from DB")
        all_contacts_sql = CM.get_config("config.ini", "sql_queries",
                                         "all_contacts")
        all_db_contacts = DB.pandas_read(all_contacts_sql)
        all_db_contacts = all_db_contacts.apply(pd.to_numeric, errors='ignore')
        all_db_contacts['email_address'] = all_db_contacts[
            'email_address'].str.lower()

        contact_merge = pd.merge(all_contacts[[
            "id", "mdc_contact_id", "contact_list_id", "email_address",
            "firstname", "lastname"
        ]],
                                 all_db_contacts,
                                 how='left',
                                 on='email_address',
                                 indicator=True)

        new_contacts = contact_merge[[
            "id_x", "email_address", "firstname_x", "lastname_x"
        ]][contact_merge['_merge'] == 'left_only']
        new_contacts.columns = ["id", "email_address", "firstname", "lastname"]

        if len(new_contacts) > 0:
            print("Writing new contacts to DB.")
            insert_cs_sql = "insert_contacts"
            new_contacts = new_contacts.drop_duplicates()
            self.df_to_db(new_contacts, insert_cs_sql, clean_numeric_cols=True)
        else:
            print("\nNo new contacts to write to DB.")

        updated_db_contacts = DB.pandas_read(all_contacts_sql)
        updated_db_contacts = updated_db_contacts.apply(pd.to_numeric,
                                                        errors='ignore')
        updated_db_contacts['email_address'] = updated_db_contacts[
            'email_address'].str.lower()

        updated_contact_merge = pd.merge(all_contacts[[
            "id", "mdc_contact_id", "contact_list_id", "email_address",
            "firstname", "lastname"
        ]],
                                         updated_db_contacts,
                                         how='left',
                                         on='email_address',
                                         indicator=True)
        api_contacts_lists_df = updated_contact_merge[[
            "id_x", "id_y", "contact_list_id"
        ]]
        api_contacts_lists_df = api_contacts_lists_df.apply(pd.to_numeric,
                                                            errors='ignore')
        api_contacts_lists_df.columns = [
            "sg_cid", "mdc_contact_id", "contact_list_id"
        ]

        print("\nGetting Contacts__Lists table from DB.")
        db_cl_sql = CM.get_config("config.ini", "sql_queries",
                                  "all_contacts__lists")
        db_contacts_lists_df = DB.pandas_read(db_cl_sql)
        db_contacts_lists_df = db_contacts_lists_df.apply(pd.to_numeric,
                                                          errors='ignore')

        cl_merge = pd.merge(api_contacts_lists_df,
                            db_contacts_lists_df,
                            how='left',
                            indicator=True,
                            on=["sg_cid", "mdc_contact_id", "contact_list_id"])
        new_cl = cl_merge[["sg_cid", "mdc_contact_id", "contact_list_id"
                           ]][cl_merge["_merge"] == 'left_only']
        new_cl = new_cl.apply(pd.to_numeric, errors='ignore')

        # get api answers where response_id = resps.id

        # get db resps where resps.survey_id = survey_id
        print("\nGetting all responses for this survey from DB.")
        r_sql = CM.get_config("config.ini", "sql_queries",
                              "all_resps_for_survey")
        r_sql = r_sql.replace("WHAT_SURVEY_ID", str(survey_id))
        db_resps = DB.pandas_read(r_sql)
        db_resps["date_submitted"] = db_resps["date_submitted"].astype(str)

        print(
            "\nDetecting responses that have changed (looking for discrepancy between DB date_submitted and API date_submitted)"
        )
        # changed_resps = []
        i = 0
        changed_resps = pd.merge(db_resps[["id", "date_submitted"]],
                                 api_resps[["id", "date_submitted"]],
                                 how='outer',
                                 indicator=True,
                                 on=["id", "date_submitted"])
        changed_resps = changed_resps[[
            "id"
        ]][changed_resps["_merge"] == 'right_only']
        changed_resps = changed_resps["id"].tolist()
        print("{} responses changed".format(len(changed_resps)))

        print("\nDetecting responses in API that are not in DB at all.")
        resps_not_in_db = pd.merge(api_resps,
                                   db_resps[["id"]],
                                   how='outer',
                                   indicator=True,
                                   on="id")
        resps_not_in_db2 = resps_not_in_db[resps_not_in_db['_merge'] ==
                                           'left_only'].drop("_merge", axis=1)

        inserted_resps = []

        # SECOND INSERT OF contacts__lists
        new_cl = pd.merge(new_cl,
                          db_contacts_lists_df,
                          how='left',
                          indicator=True,
                          on=["sg_cid"])
        new_cl = new_cl[new_cl["_merge"] == 'left_only']
        new_cl = new_cl[["sg_cid", "mdc_contact_id_x", "contact_list_id_x"]]
        new_cl.columns = ["sg_cid", "mdc_contact_id", "contact_list_id"]

        if len(new_cl) > 0:
            print("Writing new entries to Contacts__Lists")
            insert_cl_sql = "insert_contacts_lists"
            new_cl = new_cl.drop_duplicates()
            self.df_to_db(new_cl, insert_cl_sql, clean_numeric_cols=True)
        else:
            print("\nNo new Contacts__Lists entries to write to DB.")

        # update Survey_Responses where date_submitted has changed for existing response
        if len(changed_resps) > 0:
            print(
                "\nUpdating DB respondent entries that have changed (have diff date_submitted)"
            )
            resp_headers, resp_qmarks, resp_vals = self.get_sql_params(
                api_resps)
            resp_header_str = self.get_header_str(resp_headers)

            update_r_sql = CM.get_config("config.ini", "sql_queries",
                                         "update_rs")
            for id in changed_resps:
                j = changed_resps.index(id)
                where_sql = "WHERE id = " + str(id)
                set_strs = ""
                for i in range(2, len(resp_headers)):
                    header = resp_headers[i]
                    val = resp_vals[j][i]
                    set_str = "[" + header + "]" + " = '" + str(val) + "', "
                    set_strs = set_strs + set_str
                final_update_sql = update_r_sql + set_strs[:-2] + " " + where_sql
                DB.execute(final_update_sql)

        # insert resps that aren't db at all
        if len(resps_not_in_db2) > 0:
            print("\nInserting new responses that aren't in DB at all")
            insert_resp_sql = "insert_rs"
            resps_not_in_db2 = resps_not_in_db2.drop_duplicates()

            self.df_to_db(resps_not_in_db2,
                          insert_resp_sql,
                          remove_single_quotes=False)

            for id in resps_not_in_db2["id"]:
                inserted_resps.append(id)

        # write to db only answers where answers.response_id is in list of response ids written to db above

        # del where id in changed_resps, then insert
        if len(changed_resps) > 0:
            print(
                "\nDeleting answers of respondents who updated their response."
            )
            update_a_sql = CM.get_config("config.ini", "sql_queries",
                                         "update_a_sql")
            changed_ans_df = api_ans[api_ans["survey_response_id"].isin(
                changed_resps)]
            ans_headers, ans_qmarks, ans_vals = self.get_sql_params(
                changed_ans_df)

            del_ans_sql = CM.get_config("config.ini", "sql_queries", "del_ans")
            for id in changed_resps:
                del_ans_sql_for_id = del_ans_sql.replace(
                    "WHAT_RESP_ID", str(id))
                DB.execute(del_ans_sql_for_id)
                inserted_resps.append(id)

        # insert ans where id in inserted_resps
        if len(inserted_resps) > 0:
            print(
                "\nInserting answers into DB (includes updated responses and new responses)"
            )
            ans_insert_df = api_ans[api_ans["survey_response_id"].isin(
                inserted_resps)]
            inserts_ans_sql = "insert_as"
            ans_insert_df = ans_insert_df.drop_duplicates()
            ans_vals = self.df_to_db(ans_insert_df,
                                     inserts_ans_sql,
                                     remove_single_quotes=False,
                                     return_vals=True)

        elif len(inserted_resps) == 0:
            print("\nNo new answers to insert or update.")
            return

        print("\nChecking that all answers were inserted")
        check_ans_sql = CM.get_config("config.ini", "sql_queries", "check_ans")
        inserted_resp_ids_str = ''
        for id in inserted_resps:
            inserted_resp_ids_str = inserted_resp_ids_str + str(id) + ", "
        inserted_resp_ids_str = inserted_resp_ids_str[:-2]
        check_ans_sql = check_ans_sql.replace("WHAT_RESP_IDS",
                                              inserted_resp_ids_str)
        ans_inserted_this_session = DB.pandas_read(check_ans_sql)

        if len(ans_inserted_this_session) != len(ans_vals):

            print(
                "\nNot all answers were loaded. Rolling back insert operation "
                "(deleting answers and responses inserted into DB)")
            # del ans inserted this session, if any
            del_ans_sql = CM.get_config("config.ini", "sql_queries",
                                        "del_ans_by_respids")
            del_ans_sql = del_ans_sql.replace("WHAT_RESP_IDS",
                                              inserted_resp_ids_str)
            DB.execute(del_ans_sql)

            # del resps inserted this session, if any
            del_resps_sql = CM.get_config("config.ini", "sql_queries",
                                          "del_resps_by_list")
            del_resps_sql = del_resps_sql.replace("WHAT_RESP_IDS",
                                                  inserted_resp_ids_str)
            DB.execute(del_resps_sql)

        elif len(ans_inserted_this_session) == len(ans_vals):
            print(
                "All answers successfully inserted. This means that all the responses that were inserted during this "
                "session have all their respective answers in the DB now.")
        return
Пример #17
0
	def __init__(self):
		super().__init__('', '', datasource=enum.DataSourceType.CRUNCH_BASE)
		self.file = FileService(os.getcwd())
		self.user_key = CM.get_config('config.ini', 'crunch_base', 'user_key')
		self.api_token = '&user_key=' + self.user_key + '&page={}'
		self.api_tokens = '?user_key=' + self.user_key + '&page={}'
		self.api_org_token = '?user_key=' + self.user_key

		self.url_org = CM.get_config('config.ini', 'crunch_base', 'url_org') + self.api_token
		self.url_people = CM.get_config('config.ini', 'crunch_base', 'url_person') + self.api_token
		self.url_cat = CM.get_config('config.ini', 'crunch_base', 'url_cat') + self.api_tokens
		self.url_loc = CM.get_config('config.ini', 'crunch_base', 'url_loc') + self.api_token

		self.path = CM.get_config('config.ini', 'box_file_path', 'path_crunchbase')

		self.org_summary = CM.get_config('config_sql.ini', 'db_sql_crunchbase', 'sql_organizations_insert')
		self.people = CM.get_config('config_sql.ini', 'db_sql_crunchbase', 'sql_people_insert')
		self.category = CM.get_config('config_sql.ini', 'db_sql_crunchbase', 'sql_category_insert')
		self.location = CM.get_config('config_sql.ini', 'db_sql_crunchbase', 'sql_location_insert')

		self.orgs_api_url = CM.get_config('config_sql.ini', 'db_sql_crunchbase', 'sql_orgs_summary')
		self.orgs_detail_insert = CM.get_config('config_sql.ini', 'db_sql_crunchbase', 'sql_orgnization_insert')
		self.orgs_summary_update = CM.get_config('config_sql.ini', 'db_sql_crunchbase', 'sql_orgs_summary_update')

		self.orgs_detail_update = CM.get_config('config_sql.ini', 'db_sql_crunchbase', 'sql_orgs_detail_update')

		self.data = None
		self.file_name = 'CB_{}_{}.csv'
		# self.file = FileService(self.path)
		self.org_uuid = None
		self.fk_uuid = 'org_uuid'
		self.one_to_one = 'OneToOne'

		self.i = 0

		self.entities_script()

		self.col_funding = ['uuid', 'org_uuid', 'permalink', 'api_path', 'web_path', 'api_url', 'funding_type', 'series',
					   'series_qualifier', 'announced_on',
					   'announced_on_trust_code', 'closed_on', 'closed_on_trust_code', 'money_raised',
					   'money_raised_currency_code',
					   'money_raised_usd', 'target_money_raised', 'target_money_raised_currency_code',
					   'target_money_raised_usd',
					   'pre_money_valuation', 'pre_money_valuation_currency_code', 'pre_money_valuation_usd', 'rank',
					   'created_at',
					   'updated_at']
		self.org_columns = ['org_uuid','batch', 'company_id', 'permalink', 'permalink_aliases', 'api_path', 'web_path', 'api_url','name', 'BasicName',
					   'also_known_as', 'short_description', 'description', 'profile_image_url',
					   'primary_role', 'role_company', 'role_investor', 'role_group', 'role_school',
					   'investor_type', 'founded_on', 'founded_on_trust_code', 'is_closed', 'closed_on',
					   'closed_on_trust_code', 'num_employees_min', 'num_employees_max', 'stock_exchange',
					   'stock_symbol', 'total_funding_usd', 'number_of_investments', 'homepage_url',
					   'contact_email', 'phone_number', 'rank', 'created_at', 'updated_at', 'fetched']
		self.org_summary_col = ['uuid', 'permalink', 'api_path', 'web_path', 'api_url', 'name', 'stock_exchange',
						  'stock_symbol', 'primary_role', 'short_description', 'profile_image_url',
						  'domain', 'homepage_url', 'facebook_url', 'twitter_url', 'linkedin_url',
						  'city_name', 'region_name', 'country_code', 'created_at', 'updated_at']
		self.office_col = ['uuid', 'org_uuid', 'name', 'street_1', 'street_2', 'postal_code', 'city', 'region',
						   'country','city_web_path','region_code2','region_web_path', 'country_code2',
						   'country_code3', 'country_web_path', 'latitude','longitude', 'created_at', 'updated_at' ]

		self.category_col = ['uuid' ,'org_uuid', 'name',
							 'web_path', 'category_groups',
							 'created_at', 'updated_at']
Пример #18
0
    def get_emails(self,
                   survey_id,
                   api_token,
                   session_variables,
                   surveys_df,
                   campaign_id='w'):

        if survey_id == 'w':
            while type(survey_id) != int:
                try:
                    survey_id = int(
                        input(
                            "Enter ID of survey that you would like to retrieve campaign data for: "
                        ))
                    if menu_actions.return_to_main(survey_id) == 1:
                        return
                    survey_id = self.validate_survey_id(
                        survey_id, session_variables, api_token, surveys_df)
                except ValueError:
                    continue

        while type(campaign_id) != int:
            try:
                campaign_id = int(
                    input(
                        "Enter ID of campaign that you would like to retrieve email msg data for: "
                    ))
                if menu_actions.return_to_main(campaign_id) == 1:
                    return
            except ValueError:
                continue
        emails_df = sg_emails.sg_emails_df(survey_id, campaign_id, api_token)
        emails_df["id"] = emails_df["id"].apply(pd.to_numeric, errors='ignore')

        print(emails_df)

        # remove campaigns from df that are already in DB
        e_sql = CM.get_config("config.ini", "sql_queries",
                              "emails_for_campaign")
        e_sql = e_sql.replace("WHAT_CAMPAIGN", str(campaign_id))
        db_em = DB.pandas_read(e_sql)

        em_not_in_db = pd.merge(emails_df,
                                db_em,
                                how='left',
                                indicator=True,
                                on="id")
        em_not_in_db2 = em_not_in_db[em_not_in_db['_merge'] ==
                                     'left_only'].drop("_merge", axis=1)

        # insert campaigns into DB
        if len(em_not_in_db2) > 0:
            insert_em_sql = "insert_emails"

            self.df_to_db(em_not_in_db2,
                          insert_em_sql,
                          remove_single_quotes=False,
                          clean_numeric_cols=True)

            # em_headers, em_qmarks, em_vals = self.get_sql_params(em_not_in_db2, remove_single_quotes=False)
            # em_header_str = self.get_header_str(em_headers)
            # em_sql = CM.get_config("config.ini", "sql_queries", "insert_emails")
            # em_sql = em_sql.replace("WHAT_HEADERS", em_header_str).replace("WHAT_VALUES", em_qmarks)
            # for lst in em_vals:
            #     for i in range(len(lst)):
            #         element = lst[i]
            #         try:
            #             if str(element).lower() == "nan":
            #                 lst[i] = None
            #         except AttributeError:
            #             continue
            #         except TypeError:
            #             continue
            # DB.bulk_insert(em_sql, em_vals)

        return emails_df
Пример #19
0
class BapQuarterly:
	desired_width = 420
	pd.set_option('display.width', desired_width)

	year, quarter = COM.fiscal_year_quarter(datetime.datetime.utcnow())
	quarter = quarter - 1
	batch = BatchService()
	bap_path_source = COM.get_config('config.ini', 'box_file_path', 'path_bap_source')
	bap_path_etl = COM.get_config('config.ini', 'box_file_path', 'path_bap_etl')
	file = FileService(bap_path_source)
	qa = BapQA()
	season = '19_Q1'
	company = CompanyService()

	@staticmethod
	def show_bap_quarterly_template():
		BapQuarterly.file.show_source_file()

	'''
	Checks if all the RICs send the right template with all the columns exists.
	'''
	@staticmethod
	def qa_bap_spread_sheet_by_ric():
		BapQuarterly.qa.check_rics_file(fp.path_bap_etl, fp.path_bap_qa)

	@staticmethod
	def combine_rics_bap_quarterly(combine_for):
		program, program_youth, company_quarterly, company_annually = BapQuarterly.file.read_source_file(
			FileType.SPREAD_SHEET.value, DS.BAP, combine_for, current_path=fp.path_bap_qa.value)
		file_name = '{}'.format(FN.bap_combined.value.format('19','1'))#(str(BapQuarterly.year - 1)[-2:], BapQuarterly.quarter))
		if combine_for == Combine.FOR_QA:
			file_name = 'QA_' + file_name
		else:
			file_name = 'ETL_' + file_name

		print('\nSave spreadsheet file named: {}'.format(file_name))

		save_location = COM.change_working_directory(fp.path_bap_combined.value)
		print(str(save_location))

		writer = pd.ExcelWriter(file_name)
		
		program.to_excel(writer, WS.bap_program.value, index=False)
		program_youth.to_excel(writer, WS.bap_program_youth.value, index=False)
		company_quarterly.to_excel(writer, WS.bap_company.value, index=False)
		if BapQuarterly.quarter == 3:
			company_annually.to_excel(writer, WS.bap_company_annual.value, index=False)
		writer.save()
		
		print('rics_spreasheet_combined.')

	@staticmethod
	def qa_bap_ric_combined(combined=False):
		BapQuarterly.qa.check_rics_file(fp.path_bap_combined, fp.path_bap_combined_dest, combined)

	@staticmethod
	def transfer_csv_program(dataframe):
		val = COM.df_list(dataframe)
		db.bulk_insert(sql.sql_bap_ric_program_insert.value, val)
	
	@staticmethod
	def transfer_csv_program_youth(dataframe):
		val = COM.df_list(dataframe)
		db.bulk_insert(sql.sql_bap_ric_program_youth_insert.value, val)
	
	@staticmethod
	def bulk_insert_quarterly_data(dataframe):
		val = COM.df_list(dataframe)
		db.bulk_insert(sql.sql_bap_ric_venture_quarterly_insert.value, val)

	@staticmethod
	def bulk_insert_annual_data(dataframe):
		val = COM.df_list(dataframe)
		db.bulk_insert(sql.sql_bap_ric_venture_annual_insert.value, val)

	@staticmethod
	def push_bap_quarterly_to_database():
		COM.change_working_directory(fp.path_bap_combined.value)

		bap = pd.read_excel('ETL_RICS_BAP_COMBINED_FY19Q1.xlsx', sheet_name=None)

		# BapQuarterly.transfer_csv_program(bap['csv_program16'])
		# BapQuarterly.transfer_csv_program_youth(bap['csv_program16_youth'])
		BapQuarterly.bulk_insert_quarterly_data(bap['Quarterly Company Data'])
		if BapQuarterly.quarter == 3:
			BapQuarterly.bulk_insert_annual_data(bap['Annual Company data'])

	@staticmethod
	def create_bap_batch():
		batch = BatchService()
		program = db.pandas_read(sql.sql_bap_distinct_batch.value.format(tbl.ric_program.value,BapQuarterly.year,BapQuarterly.quarter))
		program_youth = db.pandas_read(sql.sql_bap_distinct_batch.value.format(tbl.ric_program_youth.value, BapQuarterly.year,BapQuarterly.quarter))
		company = db.pandas_read(sql.sql_bap_distinct_batch.value.format(tbl.venture_data.value, BapQuarterly.year, BapQuarterly.quarter))
		comapny_annual = db.pandas_read(sql.sql_annual_bap_distinct_batch.value.format(tbl.venture_annual.value,BapQuarterly.year))

		# batch.create_bap_batch(program, BapQuarterly.year, BapQuarterly.quarter, tbl.ric_program.value, WS.bap_program.value, ss.RICPD_bap.value)
		# batch.create_bap_batch(program_youth, BapQuarterly.year, BapQuarterly.quarter, tbl.ric_program_youth.value, WS.bap_program_youth.value, ss.RICPDY_bap.value)
		batch.create_bap_batch(company, BapQuarterly.year, BapQuarterly.quarter, tbl.venture_data.value, WS.bap_company.value, ss.RICCD_bap.value)
		if BapQuarterly.quarter == 3:
			batch.create_bap_batch(comapny_annual, BapQuarterly.year, BapQuarterly.quarter, tbl.venture_annual.value, WS.bap_company_annual.value, ss.RICACD_bap.value)
	
	@staticmethod
	def transfer_bap_company():
		cs = CompanyService()
		cs.move_company_data()

	@staticmethod
	def get_proper_values(df):
		df['StageLevelID'] = df.apply(lambda dfs: COM.get_stage_level(dfs.Stage), axis=1)
		df['High Potential y/n'] = df.apply(lambda dfs: COM.get_yes_no(dfs['High Potential y/n']), axis=1)
		df['Social Enterprise y/n'] = df.apply(lambda dfs: COM.get_yes_no(dfs['Social Enterprise y/n']), axis=1)
		df['Youth'] = df.apply(lambda dfs: COM.get_yes_no(dfs['Youth']), axis=1)
		# df['Funding Raised to Date $CAN'] = df.apply(lambda dfs: BapQuarterly.split_funding_range(dfs['Funding Raised to Date $CAN']), axis=1)
		return df

	@staticmethod
	def transfer_fact_ric_company_data():
		df = db.pandas_read(sql.sql_bap_fact_ric_data_fyq4.value)
		df_frc = BapQuarterly.get_proper_values(df)
		# BapQuarterly.update_month_year(df_frc)
		# df_frc['IntakeDate'] = pd.to_datetime(df_frc['IntakeDate'])
		df_frc['Age'] = None
		# df_frc['Date of Incorporation'] = pd.to_datetime(df_frc['Date of Incorporation'])
		# df_ric = df_frc.drop(columns=['ID', 'Incorporate year (YYYY)', 'Incorporation month (MM)'])
		# BapQuarterly.file.save_as_csv(df_frc, '00 FactRICCompany.xlsx', os.getcwd(), 'FactRICCompany')
		values_list = COM.df_list(df_frc)

		db.bulk_insert(sql.sql_bap_fact_ric_company_insert.value, values_list)

	@staticmethod
	def split_funding_range(funding):
		funding_value = 0
		if funding == '$100-149k':
			funding_value = '100000'
		elif funding == '$10-24k':
			funding_value = '10000'
		elif funding == '$150-249k':
			funding_value = '150000'
		elif funding == '$1M-1.9M':
			funding_value = '1000000'
		elif funding == '$250-499k':
			funding_value = '250000'
		elif funding == '$25-49k':
			funding_value = '25000'
		elif funding == '$2-5M':
			funding_value = '2000000'
		elif funding == '$2M-5M':
			funding_value = '2000000'
		elif funding == '$500-999k':
			funding_value = '500000'
		elif funding == '$50-99k':
			funding_value = '50000'
		elif funding == '<$10k':
			funding_value = '1000'
		elif funding == '>$5M':
			funding_value = '5000000'

		return funding_value

	@staticmethod
	def update_month_year(df):
		i = 0
		for index, row in df.iterrows():
			if row['Incorporate year (YYYY)'] is not None and row['Incorporation month (MM)'] is not None:
				row['Date of Incorporation'] = parser.parse('{}-{}-15'.format(row['Incorporate year (YYYY)'], row['Incorporation month (MM)']))
				i+=1
				print('{}. {}'.format(i, row['Date of Incorporation']))
		# for index, row in df.iterrows():
		# 	if row['Incorporate year (YYYY)'] is not None and len(row['Incorporate year (YYYY)']) > 4:
		# 		update = 'UPDATE BAP.QuarterlyCompanyData SET [Incorporate year (YYYY)] = {} WHERE ID = {} -- {}'.format(parser.parse(row['Incorporate year (YYYY)']).year, row['ID'], parser.parse(row['Incorporate year (YYYY)']))
		# 		print(update)
		# dfs = df[df['Incorporate year (YYYY)'].isnull()]
		# for index, row in dfs.iterrows():
		# 	if row['Incorporation month (MM)'] is not None and len(row['Incorporation month (MM)']) > 2:
		# 		update = 'UPDATE BAP.QuarterlyCompanyData SET [Incorporate year (YYYY)] = {} WHERE ID = {}'.format(parser.parse(row['Incorporation month (MM)']).year, row['ID'])
		# 		print(update)
		# i= 0
		# for index, row in df.iterrows():
		# 	if row['Incorporation month (MM)'] is not None and len(row['Incorporation month (MM)']) > 2:
		# 		i += 1
		# 		update = 'UPDATE BAP.QuarterlyCompanyData SET [Incorporation month (MM)] = {} WHERE ID = {}'.format(parser.parse(row['Incorporation month (MM)']).month, row['ID'])
		# 		print(update)
		print('')

	@staticmethod
	def transfer_fact_ric_aggregation():
		date_id = COM.get_dateid(datevalue=None)
		metric_prg = [130, 132, 133, 129, 134, 63, 77, 60, 68, 67, 135, 136, 137]
		metric_prg_youth = [134, 138]
		
		df_program = db.pandas_read(sql.sql_company_aggregate_program.value.format(2018, 4))#(BapQuarterly.year, BapQuarterly.quarter))
		df_program_youth = db.pandas_read(sql.sql_company_aggregate_program_youth.value.format(2018, 4))#(BapQuarterly.year, BapQuarterly.quarter))
		
		values = []
		
		for _, row in df_program.iterrows():
			i = 7
			while i < 20:
				m = i - 7
				val = []
				val.append(int(row['DataSource']))  # DataSource
				val.append(int(date_id))  # RICDateID
				val.append(int(metric_prg[m]))  # MetricID
				val.append(int(row['BatchID']))  # BatchID
				
				if str(row[i]) in ['no data', 'n\\a', '-', 'n/a', 'nan']:
					val.append(-1.0)
					print(row[i])
				else:
					val.append(round(float(row[i]), 2))  # AggregateNumber
				val.append(str(datetime.datetime.today())[:23])  # ModifiedDate
				val.append(str(datetime.datetime.today())[:23])  # CreatedDate
				val.append(row['Youth'])  # Youth
				values.append(val)
				i = i + 1
				# db.execute(sql.sql_bap_fra_insert.value.format(tuple(val)))
		
		for _, row in df_program_youth.iterrows():
			
			j = 7
			while j < 9:
				m = j - 7
				val = []
				val.append(int(row['DataSource']))  # DataSource
				val.append(int(date_id))  # RICDateID
				val.append(int(metric_prg_youth[m]))  # MetricID
				val.append(int(row['BatchID']))  # BatchID
				if str(row[j]) in ['no data', 'n\\a', '-', 'n/a', 'nan']:
					val.append(-1.0)
					print(row[j])
				else:
					val.append(round(float(row[j]), 2))  # AggregateNumber
				val.append(str(datetime.datetime.today())[:23])  # ModifiedDate
				val.append(str(datetime.datetime.today())[:23])  # CreatedDate
				val.append(row['Youth'])  # Youth
				
				values.append(val)
				j = j + 1
				# db.execute(sql.sql_bap_fra_insert.value.format(tuple(val)))
		for val in range(len(values)):
			print('{}. {}'.format(val,values[val]))
			# print('{}. {}'.format(val,values[val][1]))
		db.bulk_insert(sql.sql_bap_fact_ric_aggregation_insert.value, values)
	
	@staticmethod
	def generate_bap_rolled_up():
		company = []
		i = 0
		df_frcd = db.pandas_read(sql.sql_bap_fact_ric_company.value.format(BapQuarterly.year))
		print('Number of record to process {} '.format(len(df_frcd)))
		df_fact_ds_quarter = db.pandas_read(sql.sql_bap_report_company_ds_quarter.value.format(BapQuarterly.year))
		df_FactRICRolledUp = pd.DataFrame(columns=clm.clmn_fact_ric_rolled_up.value)
		df_industry = db.pandas_read(sql.sql_industry_list_table.value)
		cq = BapQuarterly.quarter
		total = 0
		if not df_frcd.empty:
			for _, row in df_fact_ds_quarter.iterrows():
				company_id = row['CompanyID']
				data_source_id = row['DataSourceID']
				
				i = i + 1
				print('{}. {}'.format(i, company_id))
				# ['Q1', 'Q2']
				ls_q = []
				ls_quarters = \
				df_fact_ds_quarter.query('CompanyID == {} & DataSourceID == {}'.format(company_id, data_source_id))[
					'MinFQ'].tolist()
				ls = df_frcd.query('CompanyID == {}'.format(company_id))['FiscalQuarter'].tolist()
				
				for itm in ls_quarters:
					ls_q.append(itm[-1:])
				# if str(cq) not in ls_q:
				# 	ls_q.append(cq)
				df_agg = df_frcd.query('CompanyID == {} & DataSourceID == {}'.format(company_id, data_source_id))
				print(ls_q)
				for quarter in ls_q:
					if int(quarter) == cq and len(df_agg) > 1:
						current_quarter = 'FiscalQuarter == \'Q{}\''.format(quarter)
						previous_quarter = 'FiscalQuarter == \'Q{}\''.format(int(quarter) - 1)
					elif int(quarter) == cq and len(df_agg) == 1:
						current_quarter = 'FiscalQuarter == \'Q{}\''.format(quarter)
					elif int(quarter) < cq:
						current_quarter = 'FiscalQuarter == \'Q{}\''.format(quarter)
					
					try:
						batch_id = df_agg.query(current_quarter)['BatchID'].values[0] if not df_agg.query(
							current_quarter).empty else None
						min_date = -1
						current_date = -1
						vhs = df_agg.query(current_quarter)['VolunteerMentorHours'].values[0] if not df_agg.query(
							current_quarter).empty else None
						adv = df_agg.query(current_quarter)['AdvisoryServicesHours'].values[0] if not df_agg.query(
							current_quarter).empty else None
						
						if int(quarter) == cq:
							vhs_agg = df_agg['VolunteerMentorHours'].sum()
							adv_agg = df_agg['AdvisoryServicesHours'].sum()
							funding_agg = df_agg['FundingCurrentQuarter'].sum()
						else:
							vhs_agg = float(df_agg['VolunteerMentorHours'].sum()) - float(
								df_agg.query('FiscalQuarter == \'Q{}\''.format(cq))['VolunteerMentorHours'].values[
									0]) if not df_agg.query('FiscalQuarter == \'Q{}\''.format(cq)).empty else float(
								df_agg.query('FiscalQuarter == \'Q{}\''.format(cq - 1))['VolunteerMentorHours'].values[
									0])
							adv_agg = float(df_agg['AdvisoryServicesHours'].sum()) - float(
								df_agg.query('FiscalQuarter == \'Q{}\''.format(cq))['AdvisoryServicesHours'].values[
									0]) if not df_agg.query('FiscalQuarter == \'Q{}\''.format(cq)).empty else float(
								df_agg.query('FiscalQuarter == \'Q{}\''.format(cq - 1))['AdvisoryServicesHours'].values[
									0])
							funding_agg = float(df_agg['FundingCurrentQuarter'].sum()) - float(
								df_agg.query('FiscalQuarter == \'Q{}\''.format(cq))['FundingCurrentQuarter'].values[
									0]) if not df_agg.query('FiscalQuarter == \'Q{}\''.format(cq)).empty else float(
								df_agg.query('FiscalQuarter == \'Q{}\''.format(cq - 1))['FundingCurrentQuarter'].values[
									0])
						
						modified_date = datetime.datetime.utcnow().__str__()[:23]
						
						stage = df_agg.query(current_quarter)['Stage'].values[0] if not df_agg.query(
							current_quarter).empty else df_agg.query(previous_quarter)['Stage'].values[0]
						industry_sector = df_agg.query(current_quarter)['IndustrySector'].values[0] if not df_agg.query(
							current_quarter).empty else df_agg.query(previous_quarter)['IndustrySector'].values[0]
						socialEnterprise = df_agg.query(current_quarter)['SocialEnterprise'].values[
							0] if not df_agg.query(current_quarter).empty else \
						df_agg.query(previous_quarter)['SocialEnterprise'].values[0]
						highPotential = df_agg.query(current_quarter)['HighPotential'].values[0] if not df_agg.query(
							current_quarter).empty else df_agg.query(previous_quarter)['HighPotential'].values[0]
						youth = df_agg.query(current_quarter)['Youth'].values[0] if not df_agg.query(
							current_quarter).empty else df_agg.query(previous_quarter)['Youth'].values[0]
						dateOfIncorporation = df_agg.query(current_quarter)['DateOfIncorporation'].values[
							0] if not df_agg.query(current_quarter).empty else \
						df_agg.query(previous_quarter)['DateOfIncorporation'].values[0]
						
						annual_revenue = df_agg.query(current_quarter)['AnnualRevenue'].values[0] if not df_agg.query(
							current_quarter).empty else None
						funding_current_quarter = df_agg.query(current_quarter)['FundingCurrentQuarter'].values[
							0] if not df_agg.query(current_quarter).empty else None
						
						number_of_employees = df_agg.query(current_quarter)['NumberEmployees'].values[
							0] if not df_agg.query(current_quarter).empty else None
						intake_date = df_agg.query(current_quarter)['IntakeDate'].values[0] if not df_agg.query(
							current_quarter).empty else None
						lvl2_industry_name = df_industry.query('Industry_Sector == \'{}\''.format(industry_sector))[
							'Lvl2IndustryName'].values[0] if not df_industry.query(
							'Industry_Sector == \'{}\''.format(industry_sector)).empty else None
						dd = {'DataSourceID': data_source_id,
							  'CompanyID': company_id,
							  'MinDate': min_date,
							  'CurrentDate': current_date,
							  'VolunteerYTD': vhs_agg,
							  'AdvisoryHoursYTD': adv_agg,
							  'VolunteerThisQuarter': vhs,
							  'AdvisoryThisQuarter': adv,
							  'FiscalQuarter': quarter,
							  'BatchID': batch_id,
							  'ModifiedDate': modified_date,
							  'SocialEnterprise': socialEnterprise,
							  'Stage': stage,
							  'HighPotential': highPotential,
							  'Lvl2IndustryName': lvl2_industry_name,
							  'FiscalYear': BapQuarterly.year,
							  'Youth': youth,
							  'DateOfIncorporation': dateOfIncorporation,
							  'AnnualRevenue': annual_revenue,
							  'NumberEmployees': number_of_employees,
							  'FundingToDate': funding_current_quarter,
							  'IndustrySector': industry_sector,
							  'IntakeDate': intake_date,
							  'FundingCurrentQuarter': funding_agg
							  }
						print(dd.values())
						df = pd.DataFrame([dd], columns=clm.clmn_fact_ric_rolled_up.value)
						df_FactRICRolledUp = pd.concat([df_FactRICRolledUp, df])
					except Exception as ex:
						total = total + 1
						company.append(company_id)
						print(ex)
			df_FactRICRolledUp = df_FactRICRolledUp[clm.clmn_fact_ric_rolled_up.value]
			BapQuarterly.file.save_as_csv(df_FactRICRolledUp,
										  'BAP_Rolled_UP_{}.xlsx'.format(str(datetime.datetime.today())),
										  '/Users/mnadew/Box Sync/mnadew/IE/data/ETL/BAP')
			print(company)
			print('{} + {} = {}/ 6236 '.format(len(df_FactRICRolledUp), total, len(df_FactRICRolledUp) + total))
	
	# @staticmethod
	# def generate_bap_report():
	# 	pass
	
	@staticmethod
	def create_postal_code_list():
		pcdb = PostalCodeDatabase()
		results = pcdb.get_postalcodes_around_radius('T3Z', 2500)
		print(type(results))
		cl = ['postalcode', 'city', 'province', 'longitude', 'latitude', 'timezone', 'dst']
		dfs = pd.DataFrame(columns=cl)
		for r in results:
			df = pd.DataFrame([r.__dict__], columns=cl)
			dfs = pd.concat([dfs, df])
		dfs
		
	@staticmethod
	def read_postal_code():
		path = '/Users/mnadew/Box Sync/mnadew/PRD_DB_REVIEW'
		print(os.getcwd())
		os.chdir(path)
		print(os.getcwd())
		columns = ['FSALDU', 'LATITUDE', 'LONGITUDE', 'COMMNAME', 'CSDNAMEE', 'CSDNAMEF', 'CSDTYPENE', 'PRABB']
		df = pd.read_csv('postal_code_utf8.csv')
		df = df[columns]
		print(len(df))
		i = 846000
		j = 847000
		while j < 847001:
			print('From {} to {}'.format(i, j))
			df_ins = df.iloc[i:j]
			BapQuarterly.insert(df_ins)
			print(len(df_ins))
			i, j = i + 1000, j + 1000
			print('From {} to {}'.format(i, j))

	@staticmethod
	def bap_insert(df):
		values_list = COM.df_list(df)
		db.bulk_insert(sql.sql_postal_code_insert.value, values_list)

	@staticmethod
	def main():
		while True:
			fy, fq = COM.fiscal_year_quarter()
			print('_'*100)
			print('| WELCOME TO BAP QUARTERLY ETL\n| FISCAL YEAR:     {}\n| FISCAL QUARTER:     {}'.format(fy, fq - 1))
			print('_' * 100)
			menu = '''
			1: Show Source File for BAP quarterly FY18-Q3
			1a: CHECK Columns Completeness
			2: QA spreadsheet by RIC
			3: Combine RICs BQ spreadsheet
			4: QA RICs BQ combined spreadsheet
			5: Push RICs data ro the database
			6: Generate Batch for RICs FY18 -Q3
			7: Match Company name
			8: Push Company data to DIM COMPANY and DIM COMPANY SOURCE
			9: Push quarterly company data to FACT RIC COMPANY DATA
			10: Push Annual company data to FACT RIC COMPANY DATA
			11: push Program and Program youth data to FACT RIC Aggregation
			'''
			print(menu)

			option = input('\nChoose your option:\t')
			if str(option) == '1':
				BapQuarterly.show_bap_quarterly_template()
			if str(option) == '1a':
				BapQuarterly.qa.check_columns_completeness()
			if str(option) == '2':
				BapQuarterly.qa.check_rics_file()
			if str(option) == '3':
				pass
			if str(option) == '4':
				pass
			if str(option) == '5':
				pass
			if str(option) == '6':
				pass
			if str(option) == '7':
				pass
			if str(option) == '8':
				pass
			if str(option) == '9':
				pass
			if str(option) == '10':
				pass
			if str(option) == '11':
				pass
			if str(option) == '12':
				pass

	@staticmethod
	def tech_alliance_intake_date_TEMP():
		# update = 'UPDATE BAP.QuarterlyCompanyData SET [Date of Intake] = \'{}\' WHERE [Company Name] = \'{}\' AND DataSource = 6'
		update = ' SELECT * FROM BAP.QuarterlyCompanyData WHERE [Company Name] = \'{}\' AND DataSource = 6 UNION'
		current_path = os.path.join(os.path.expanduser("~"), '/Users/mnadew/Box Sync/Workbench/BAP/BAP_FY18/FY18_Q3/for ETL/Missing data Reports')
		os.chdir(current_path)
		df = pd.read_excel('01 TechAlliance_BAP_qtrly_perCompany_MISSING DATA(2).xlsx', 'Quarterly Company data')
		# df['BasicName'] = df.apply(lambda dfs: COM.update_cb_basic_name(dfs['Company Name']), axis=1)
		i = 0
		for i, r in df.iterrows():
			if r[2] is not None or r[2]== 'nan':
				# print(r[2])
				year = r[2][-4:]
				month = r[2][3:5]
				date = r[2][:2]
				i = i + 1
				# print('{}. {} ---> {}-{}-{}'.format(i, r[2], year, month, date))
				d = '{}-{}-{}'.format(year, month, date)
				# print(update.format(d, r[0]))
				print(update.format(r[0]))

	@staticmethod
	def combine_missing_data():
		quarterly_missing = BapQuarterly.file.combine_bap_missing_source_file(
			current_path=fp.path_missing_bap_etl.value)
		quarterly_missing = quarterly_missing.where(pd.notnull(quarterly_missing), None)
		quarterly_missing['BasicName'] = quarterly_missing.apply(lambda dfs: COM.get_basic_name(dfs.CompanyName),
																 axis=1)
		df = quarterly_missing.where(pd.notnull(quarterly_missing), None)
		print(df.columns)
		dfs = df[['CompanyName', 'BasicName', 'Website', 'AnnualRevenue', 'NumberOfEmployees', 'FundingToDate',
				  'DataSource']]
		BapQuarterly.file.save_as_csv(dfs, '00 BAP Missing data Combined.xlsx', os.getcwd(), 'BAP Missing data')
		print(dfs.head())

	@staticmethod
	def push_bap_missing_data_to_temp_table():
		 current_path = os.path.join(os.path.expanduser("~"), '/Users/mnadew/Box Sync/Workbench/BAP/BAP_FY18/FY18_Q3/for ETL/Missing data Reports')
		 os.chdir(current_path)
		 df = pd.read_excel('00 BAP Missing data Combined.xlsx', 'BAP Missing data')
		 df['CompanyID'] = 0
		 new_col = ['CompanyID','CompanyName','BasicName','Website','AnnualRevenue','NumberOfEmployees','FundingToDate','DataSource']
		 dfs = df[new_col]
		 sql = 'INSERT INTO BAP.BAP_FY18Q3_Missing_Data VALUES (?, ?, ?, ?, ?, ?, ?, ?)'
		 values = COM.df_list(dfs)
		 db.bulk_insert(sql, values)

	@staticmethod
	def bap_company_basic_name():
		db.update_basic_name(sql.sql_bap_basic_name.value,
							 'ID',
							 'CompanyName',
							 sql.sql_bap_basic_name_update.value)
Пример #20
0
        #   run sql to delete old ans
        del_old_ans_sql = CM.get_config('config.ini', 'secondary_etl',
                                        'del_old_ans')
        DB.execute(del_old_ans_sql)

    def etl(self):
        # clean
        self.clean_df()
        # delete old ans
        # DBInteractions.delete_old_ans()
        # load
        self.load()


if __name__ == '__main__':
    select_qs = CM.get_config("config_sql.ini", "ann_survey_18",
                              "select_ans_by_qids")
    domain = 'restapica'
    v = '4'
    survey = 'survey'
    surveyid = '50021327'
    resp = 'surveyresponse'
    params = {
        'resultsperpage': 200,
        "filter[field][0]=status&filter[operator][0]=!=&filter[value][0]":
        'deleted',
        'page': 1
    }
    api = API(API_TOKEN, domain, v, survey, surveyid, resp, params)
    print('Fetching data from API')
    data = api.get_data(test=False)
    j = Json(data, surveyid)
Пример #21
0
def _main_():
    ''' PELASE INSTALL CERTIFICATE AND REMOVE THIS, WHERE EVER THE CERTIFICATE IS '''
    # urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
    """Menu-selected actions for SGizmo API
    """

    menu_actions.write_survey_entries(API_TOKEN)

    menu = menu_actions.construct_menu()

    survey_id = 'w'
    selection = 0
    campaign_id = 'w'
    table_dict = 'w'
    surveys_df = None
    session_variables = []

    print("\nYou must enter a survey ID for this session.")
    # sleep(1)

    print("Downloading list of surveys now.")
    # sleep(1)

    surveys_df = menu_actions.get_surveys(API_TOKEN, with_stats=True)

    if surveys_df is not None:
        session_variables.append(1)

        # while type(survey_id) != int:
        #     try:
    survey_id = input("Enter a survey ID from above to use for this session: ")
    survey_id = menu_actions.validate_survey_id(survey_id,
                                                session_variables,
                                                API_TOKEN,
                                                surveys_df,
                                                mandatory=True)
    # except ValueError:
    #     continue

    menu_title = "\nMenu\n Quit: 99\n Back to main menu: -1\n============================================"

    # write all surveys from SG to DB, including BatchID

    while selection == 0:

        # main menu
        print(menu_title)

        # get survey title from survey_df
        survey_title = surveys_df.loc[surveys_df["id"] == str(survey_id),
                                      ["title"]]
        try:
            survey_title = survey_title.values[0][0]
        except IndexError:
            survey_title = None

        for key in menu:
            if survey_id != 'w' and key == 17:
                # specify current session survey
                print(
                    str(key) + ".\t" + str(menu[key]) + " (Set as " +
                    str(survey_id) + " : " + str(survey_title) + ")")
            elif key in session_variables:
                # strike through if action has been completed this session
                print('\u0336'.join(str(key) + ". " + menu[key]) + '\u0336' +
                      "   DONE")
            else:
                print(str(key) + ".\t" + menu[key])

        while type(selection) != int or selection not in range(
                1,
                len(menu) + 1):
            try:
                selection = int(input("\nEnter a valid option number: "))
                if selection == 99:
                    print("You entered 99.\nI can't believe you've done this.")
                    print('Farewell...')
                    break
            except ValueError:
                continue

        # get surveys
        if selection == 1:

            if 1 not in session_variables:
                surveys_df = menu_actions.get_surveys(API_TOKEN)
                if surveys_df is not None:
                    session_variables.append(1)

            elif 1 in session_variables:
                print(surveys_df)
                print("Surveys already downloaded from SGizmo API.")
                sleep(0.75)
                print("Returning to main menu.")
                sleep(1)

        # get campaigns
        elif selection == 3:

            campaigns_df = menu_actions.get_campaigns(API_TOKEN, survey_id,
                                                      session_variables,
                                                      surveys_df)
            session_variables.append(3)

        # get email msgs
        elif selection == 5:

            if 3 not in session_variables:
                print("You must download campaign data first.")
                get_campaigns = input("Get campaign data now? (y/n): ")
                if get_campaigns.lower() == "y":
                    campaigns_df = menu_actions.get_campaigns(
                        API_TOKEN, survey_id, session_variables, surveys_df)
                    if campaigns_df is not None:
                        session_variables.append(5)
                else:
                    print("Returning to main menu.")
                    sleep(1)

            elif 2 in session_variables:
                emails_df = menu_actions.get_emails(survey_id, API_TOKEN,
                                                    session_variables,
                                                    surveys_df)
                if emails_df is not None:
                    session_variables.append(5)

        # get contact lists
        elif selection == 9:

            if 9 not in session_variables:
                contact_list_df = menu_actions.get_contact_lists(
                    survey_id, API_TOKEN)
                # contact_list_df = sg_contact_lists.sg_contactlists_df(API_TOKEN)
                print(contact_list_df)
                if contact_list_df is not None:
                    session_variables.append(9)
            elif 9 in session_variables:
                print(contact_list_df)
                print(
                    "Contact lists already downloaded. Returning to main menu."
                )
                sleep(1.5)

        # get contacts on list
        elif selection == 10:

            list_id = 'w'
            while type(list_id) != int:
                try:
                    list_id = int(
                        input(
                            "Enter ID of contact list that you would like to retrieve: "
                        ))
                except ValueError:
                    continue
            if list_id != -1:
                contacts_df = menu_actions.get_contacts(API_TOKEN, list_id)

        # get respondents
        elif selection == 11:

            if 11 not in session_variables:
                resps_df = menu_actions.get_resps(survey_id, API_TOKEN)
                if resps_df is not None:
                    session_variables.append(11)
            elif 11 in session_variables:
                print("Already downloaded responses. Returning to main menu")
                sleep(1.5)

        # get questions or options
        elif selection in [12, 13]:

            if 12 not in session_variables and 13 not in session_variables:
                qs_df, os_df = menu_actions.get_qsos(survey_id, API_TOKEN)
                if qs_df is not None:
                    session_variables.append(12)
                if os_df is not None:
                    session_variables.append(13)

            if selection == 12 or (selection == 12
                                   and 12 in session_variables):
                print(qs_df)

            elif selection == 13 or (selection == 13
                                     and 13 in session_variables):
                print(os_df)

        # get answers
        elif selection == 14:

            if 14 not in session_variables:
                answers_df, resps_df = menu_actions.get_ans(
                    survey_id, API_TOKEN)
                if answers_df is not None:
                    session_variables.append(14)

            elif 14 in session_variables:
                print("Already downloaded answers. Returning to main menu.")
                sleep(1.5)

        # get response statuses
        elif selection == 6:

            try:
                reports_df, status_df = menu_actions.get_resp_stats(
                    survey_id, API_TOKEN)
            except TypeError:
                selection = 0
            if "JLAB" in survey_title:
                path = "/Users/gcree/Box Sync/MaRS DataCatalyst 2017 CONFIDENTIAL/JLABS Toronto Annual Survey 2017/Response_Status_Reports/"
                misc.write_to_xl(status_df.drop("invite_link", axis=1),
                                 "ResponseStatuses",
                                 out_path=path,
                                 sheetname="response_statuses")
            elif "annual" in survey_title.lower() and "2018" in survey_title:
                path = CM.get_config("config.ini", "paths",
                                     "survey2018_response_stats")
                misc.write_to_xl(status_df,
                                 'ResponseStatuses' + survey_title,
                                 out_path=path,
                                 sheetname="response_statuses")

        # get resp stats for all campaigns
        elif selection == 19:

            campaigns_df = menu_actions.get_campaigns(API_TOKEN, survey_id,
                                                      session_variables,
                                                      surveys_df)
            campaigns_df = campaigns_df[[
                'id', 'campaign_name', 'link_type', 'campaign_status'
            ]]
            reports_list = []
            status_list = []
            campaigns_df = campaigns_df[
                campaigns_df['campaign_status'] != 'Deleted']
            campaigns_df = campaigns_df[campaigns_df['link_type'] != 'link']
            for cid in campaigns_df["id"]:
                print('Downloading data for campaign id: {}'.format(cid))
                reports_df, status_df = menu_actions.get_resp_stats(
                    survey_id, API_TOKEN, campaign_id=int(cid))
                if len(reports_df) > 0:
                    reports_list.append(reports_df)
                    status_list.append(status_df)

            # concat all reports dfs and concat all status dfs
            if len(reports_list) == 0:
                pass
            elif len(reports_list) == 1:
                reports_df = reports_list[0]
                status_df = status_list[0]
            else:
                reports_df = pd.concat(reports_list)
                status_df = pd.concat(status_list)

            # left join campaigns <- reports <- statuses dfs
            df1 = pd.merge(campaigns_df,
                           reports_df,
                           how='inner',
                           left_on=["id"],
                           right_on=["campaign_id"])
            all_resp_stats = pd.merge(df1,
                                      status_df,
                                      how='left',
                                      left_on='id_y',
                                      right_on='report_id')
            all_resp_stats = all_resp_stats.drop('id_y', axis=1).drop(
                'campaign_id',
                axis=1).drop('report_id',
                             axis=1).drop('primary_RIC',
                                          axis=1).drop('venture_id', axis=1)
            all_resp_stats = all_resp_stats.rename(
                columns={'id_x': "campaign_id"})

            path_ini = CM.get_config("config.ini", "paths", "sandbox")
            path = CM.change_working_directory(path_ini)
            print(path)
            misc.write_to_xl(all_resp_stats,
                             'ResponseStatuses - {}'.format(survey_title),
                             out_path=path,
                             sheetname="response_statuses")

            # stat_table = 'MDCReport.Fact_Response_Status'
            # print('Truncating and writing to ' + stat_table)
            # trunc_sql = CM.get_config('config.ini', 'sql_queries', 'trunc_stat_rep')
            # DB.execute(trunc_sql)
            # ins_sql = CM.get_config('config.ini', 'sql_queries', 'insert_stat_rep_f')
            # ins_sql = ins_sql.replace('(WHAT_HEADERS) ', '')
            # values = CM.df_list(all_resp_stats)
            # val_num = len(values)
            # for i in range(val_num):
            #     try:
            #         val = []
            #         for l, j in enumerate(values[i]):
            #             if isinstance(values[i][l], list):
            #                 val.append(''.join(str(x) for x in values[i][l]))
            #             elif isinstance(values[i][l], str):
            #                 val.append(CM.sql_compliant(values[i][l]))
            #             else:
            #                 val.append(values[i][l])
            #         tup = tuple(val)
            #         ins_sql_final = ins_sql.format(tup)
            #         ins_sql_final = ins_sql_final.replace('"', '\'')
            #         DB.execute(ins_sql_final)
            #         print("Record {} of {} : SUCCESS".format(i, val_num))
            #     except Exception as e:
            #         print("Record number {} of {} : ERROR: {}".format(i, val_num, e))
            #         print("ERROR VALUES : {}".format(ins_sql))
            #         continue

        # set survey ID
        elif selection == 17:

            # survey_id_choice = 1
            r_u_sure = 0
            # if type(survey_id) == int:
            while str(r_u_sure).lower() not in ['n', 'y']:
                try:
                    r_u_sure = input("""
                    Warning: changing the surveyID for this session will 
                    clear the data downloaded for the previous survey during this session. 
                    Do you still want to change the survey ID? (y/n): """)
                    if str(r_u_sure).lower() == 'y':
                        session_variables[:] = [
                            y for y in session_variables if y in [1, 15]
                        ]
                        survey_id_choice = input(
                            "Survey ID has been reset. Enter new ID: ")
                        survey_id_choice = menu_actions.validate_survey_id(
                            survey_id_choice, session_variables, API_TOKEN,
                            surveys_df)
                        if survey_id_choice is not None:
                            survey_id = survey_id_choice
                    elif str(r_u_sure).lower() in ['n', str(-1)]:
                        print('Returning to main menu')
                        sleep(0.75)
                        selection = 0
                        break

                except ValueError:
                    continue

        # get all tables from schema into dfs
        elif selection == 15:

            # schema = "JLABS"
            # schema = str(input("Enter name of schema for which you would like to load all tables into dataframes: "))
            #
            # table_dict = menu_actions.get_db_tables(schema, printout=True)
            #
            # session_variables.append(15)
            #
            # # ========= Dependency query and dict ==========
            #
            # dependency_dict = menu_actions.get_dependencies(schema, printout=True)
            #
            # load_ordered_tables = menu_actions.get_load_order(schema, printout=True)
            pass

        # test get dependencies
        elif selection == 16:

            schema = str(
                input(
                    "Enter name of schema you would like to get dependencies for: "
                ))

            skipped = False
            if menu_actions.return_to_main(schema) == 1:
                print("skipped")
                skipped = True
                sleep(0.5)

            if not skipped:
                dependencies = sg_get_tables.get_dependencies(schema)
                print("\n", dependencies)

            dependency_dict = {}
            for i in range(0, len(dependencies)):
                fkt = dependencies.iloc[:, 0][i]
                reft = dependencies.iloc[:, 1][i]
                if fkt not in dependency_dict.keys():
                    dependency_dict[fkt] = []
                dependency_dict[fkt].append(reft)

            print("\nDependency dict: \n")
            for key in dependency_dict.keys():
                print(key, ":", dependency_dict[key])

            load_order = sg_get_tables.get_load_order(schema)
            print("\nLOAD ORDER:\n", load_order)

        # load survey into DB
        elif selection == 2:

            # if 12 not in session_variables:
            #     print("Pull in tables from DB before loading survey data into DB")
            #     print("Execute menu item 12.")
            #     sleep(1)
            #
            # elif 12 in session_variables:
            #     # check if surveyID selected is in survey DB table
            #     surveys_table = table_dict["Surveys"]
            #     survey_ids = []
            #     for id in surveys_table["id"]:
            #         survey_ids.append(id)

            # if survey_id in survey_ids:
            #     print("Survey already exists in database")
            # elif survey_id not in survey_ids:
            #     print("Survey does not exist in DB. Loading survey data now")

            print("\nLoading survey entry into DB")
            menu_actions.load_survey_entry(surveys_df, survey_id)

            print("\nLoading survey questions & options into DB")
            menu_actions.load_qsos(survey_id, API_TOKEN)

            session_variables.append(2)

        # load responses, answers, contacts, contact lists, and contacts__lists entries
        elif selection == 4:

            exist = menu_actions.check_qs_exist(survey_id)
            if exist:
                print(
                    "At least one question for this survey exists in DB. Proceeding to load answers into DB"
                )
                print("Loading Responses first...")

                menu_actions.load_resps_ans_contacts__lists(
                    survey_id, API_TOKEN)

            else:
                print(
                    "No questions for this survey exist in DB. Load questions before loading answers."
                )

        # write all current survey to DB
        elif selection == 7:

            menu_actions.write_all_survey_components_to_db(
                session_variables, surveys_df, survey_id, API_TOKEN)
            session_variables.append(7)

        # write all components of all surveys to DB
        elif selection == 8:

            menu_actions.do_everything_for_all_surveys(session_variables,
                                                       surveys_df, API_TOKEN)
            session_variables.append(8)

        elif selection == 18:

            sure = input(
                "\nAre you sure you wish to delete all components of current survey from the database? (y/n): "
            )
            if str(sure).lower() == "y":
                menu_actions.del_survey_components(survey_id)

        # quit program
        elif selection == 99:
            break

        selection = 0
Пример #22
0
def _main_():
    # make the damn ric dict: ricname: datasourceID (except CII & OSVP, number is not datasourceid)
    rics = {
        'MaRS Discovery District': {
            'db_name': 'MaRS Discovery District',
            'code': 7
        },
        'RIC Centre': {
            'db_name': 'RIC Centre',
            'code': 9
        },
        'Innovation Factory': {
            'db_name': 'Innovation Factory',
            'code': 12
        },
        'NWOIC': {
            'db_name': 'NWO Innovation Centre',
            'code': 14
        },
        'Invest Ottawa': {
            'db_name': 'Invest Ottawa',
            'code': 16
        },
        'IION': {
            'db_name': 'IION',
            'code': 5
        },
        'CII': {
            'db_name': 'MaRS Centre for Impact Investing',
            'code': -1
        },
        'OSVP': {
            'db_name': 'Ontario Scale-Up Voucher Program',
            'code': -1
        },
        'Innovation Guelph': {
            'db_name': 'Innovation Guelph',
            'code': 15
        },
        'WEtech': {
            'db_name': 'WEtech',
            'code': 2
        },
        'SSMIC': {
            'db_name': 'SSMIC',
            'code': 3
        },
        'TechAlliance': {
            'db_name': 'TechAlliance',
            'code': 6
        },
        'Haltech': {
            'db_name': 'Haltech',
            'code': 8
        },
        'Spark Centre': {
            'db_name': 'Spark Centre',
            'code': 10
        },
        'NORCAT': {
            'db_name': 'NORCAT',
            'code': 1
        },
        'VentureLAB': {
            'db_name': 'ventureLAB',
            'code': 11
        },
        'Innovate Niagara': {
            'db_name': 'Innovate Niagara',
            'code': 17
        },
        'Launch Lab': {
            'db_name': 'Launch Lab',
            'code': 13
        }
        # ,'Communitech': {'db_name': 'Communitech', 'code': 4}
    }

    with shelve.open(q_meta_name, 'r') as qs_metadata:

        print("Creating ric_qs dict")
        ric_qs = {}
        for ric in rics:
            if ric in list(qs_metadata['addedby'].keys()):
                ric_qids = include_list(ric)
                ric_qs[ric] = ric_qids
            # elif ric.lower() == 'communitech':
            #     ric_qs[ric] = qs_metadata['which_survey']['COMMUNITECH']
            else:
                ric_qs[ric] = qs_metadata['core/noncore']['core']

    print("Reading qs_metadata.xlsx to df")
    cwd = os.getcwd()
    user_path = os.path.expanduser("~")
    filename = '/qs_metadata.xlsx'
    meta_dfs = CM.xl_to_dfs(cwd, filename)
    sheetname = 'Sheet1'
    meta_df = meta_dfs[sheetname]

    # create master data dict with qid: concatted name (i.e., <survey_section - readable_name>)
    print("Creating master data dict")
    meta_df = meta_df.sort_values(by=['q_num'], ascending=[True])
    meta_df['col_title'] = meta_df['survey_section'].astype(
        str) + ' - ' + meta_df['readable_name']
    data_dict = meta_df[['id', 'col_title', 'title', 'q_num']]

    # split master data dict into one for each ric
    print("Splitting master data dict into 1 per RIC")
    ric_data_dicts = {}
    for ric in ric_qs.keys():
        qids_df = pd.DataFrame(ric_qs[ric], columns=['id'])
        ric_data_dict = pd.merge(qids_df, data_dict, how='inner', on=['id'])
        ric_data_dict.sort_values(by='q_num', inplace=True)
        ric_data_dicts[ric] = ric_data_dict

    # read questions and options from DB
    print("Reading questions and options from DB into qsos df")
    qsos_sql = CM.get_config("config_sql.ini", "ann_survey_18", "all_qsos")
    qsos = DB.pandas_read(qsos_sql)

    # add col_title column to qsos df
    qsos = pd.merge(qsos,
                    meta_df[['id', 'col_title', 'q_num']],
                    how='left',
                    left_on='qid',
                    right_on='id')
    qsos.drop('id', inplace=True, axis=1)
    print("Transforming qsos df")

    # put flag on 'ESSAY', 'TABLE', 'TEXTBOX', 'MENU', 'RADIO' so that their col_title does not change in next step
    qsos['multi_options'] = qsos.q_type.apply(multi_options)

    # for options, make col_title = col_title + "Option: " + [o_label]
    qsos['col_title'] = qsos.apply(opt_col_title, axis=1)
    qsos = qsos[qsos['q_num'] > 0]

    # capture correct order for columns for use later in formatting pivoted datasheets
    col_title_order = pd.Series(qsos.q_num.values,
                                index=qsos.col_title).to_dict()

    # read answers from DB
    print("Reading answers from DB into ans df")
    ans_sql = CM.get_config("config_sql.ini", "ann_survey_18",
                            "sel_ann_survey_res")
    ans = DB.pandas_read(ans_sql)

    # separate process for Communitech shared ventures
    # 1. get list of Communitech shared client answers
    print("Reading Communitech shared clients")
    comm_sql = CM.get_config("config_sql.ini", "ann_survey_18",
                             "sel_communitech_shared")
    comm_ans = DB.pandas_read(comm_sql)
    # 2. concat with rest of answers (?)
    ans = pd.concat([ans, comm_ans])

    # clean ans
    print("Cleaning ans df")
    ans.dropna(subset=['Answer'], inplace=True)
    ans['Answer'] = ans.apply(replacements, axis=1)
    ans['page_pipe'] = ans['page_pipe'].fillna('')

    # for each RIC
    print("\nPer RIC df datasheet creation:")
    for ric in ric_qs:

        # if ric == 'MaRS Discovery District':
        # turn that RIC's qid list into df
        print("\nRIC: {}".format(ric))
        print("Creating df of questions for {}".format(ric))
        qs_df = pd.DataFrame(ric_qs[ric], columns=['qid'])
        qs_df['ric'] = rics[ric]['db_name']

        # left join that df with qsos df on qid
        qs_df = pd.merge(qs_df, qsos, how='left', on='qid')

        # left join resulting df with ans df
        print("Left join qs with ans")
        ric_survey_results = pd.merge(
            qs_df,
            ans,
            how='left',
            left_on=['qid', 'oid', 'ric'],
            right_on=['QuestionID', 'OptionID', 'RIC_Program'])

        # drop empty answers and sort
        print("Clean ans")
        ric_survey_results = ric_survey_results[pd.notnull(
            ric_survey_results['Answer'])]
        ric_survey_results.sort_values(by='q_num', inplace=True)

        # ric_survey_results.dropna(subset=['Answer'])
        print("Pivot into datasheet for {}".format(ric))
        ric_datasheet = ric_survey_results[[
            'resp_id', 'CompanyID', 'col_title', 'Answer', 'page_pipe'
        ]].drop_duplicates()
        ric_datasheet['col_title'] = ric_datasheet[
            'col_title'] + ' ' + ric_datasheet['page_pipe'].astype(str)
        ric_datasheet['rid_cid'] = ric_datasheet['resp_id'].astype(
            float).astype(str) + '-' + ric_datasheet['CompanyID'].astype(str)
        ric_datasheet = ric_datasheet[['rid_cid', 'col_title', 'Answer']]

        try:
            ric_datasheet = ric_datasheet.pivot(index='rid_cid',
                                                columns='col_title',
                                                values='Answer')
            # ric_datasheet = pd.pivot_table(ric_datasheet, values='Answer', columns='col_title', index='rid_cid')

            ric_datasheet.reset_index(inplace=True)

            ric_datasheet['resp_id'], ric_datasheet[
                'CompanyID'] = ric_datasheet['rid_cid'].str.split('-', 1).str
            ric_datasheet.drop('rid_cid', axis=1, inplace=True)
            ric_datasheet = ric_datasheet.apply(pd.to_numeric, errors='ignore')

            # remove non-consenting responses
            for val in list(ric_datasheet):
                if 'consent' in str(val.lower()):
                    consent_col = val
                    ric_datasheet[consent_col] = ric_datasheet[
                        consent_col].str.replace(u"\u2019", "'")
                    ric_datasheet = ric_datasheet[
                        ric_datasheet[consent_col] != "I don't give consent"]
                    consent_col = ''
                    break

            # re-order columns to reflect q_num ordering
            cols = list(ric_datasheet)
            rid_cid = cols[-2:]
            q_cols = cols[:-2]
            ordered_q_cols = []
            for q in q_cols:
                if q[-2:] == '.0':
                    ordered_q_cols.append([col_title_order[q[:-8]], q])
                else:
                    ordered_q_cols.append([col_title_order[q.strip()], q])
            ordered_q_cols.sort()
            for i in range(len(ordered_q_cols)):
                ordered_q_cols[i] = ordered_q_cols[i][1]
            cols = rid_cid + ordered_q_cols
            ric_datasheet = ric_datasheet[cols]

            save_path = path_xl(
                user_path=user_path,
                path_extension=
                "Box Sync/Workbench/BAP/Annual Survey FY2018/DEV - Results to RICs/",
                filename=ric + '.xlsx')

            # pull out social impact companies separately for use later in CII datasheet
            if ric == 'MaRS Discovery District':
                soc_imp_df = ric_datasheet[
                    ric_datasheet['social_impact - Motives '] == 'Yes']

            if ric != 'CII':
                # save to disc
                results_sheets = [ric_datasheet, ric_data_dicts[ric]]
                sheetnames = ['SurveyData', 'DataDictionary']
                save_xls(results_sheets, save_path, sheetnames)
                print("Wrote to {}".format(save_path))
            else:
                print('Add extra tabs to {} datasheet'.format(ric))
                results_sheets = [
                    ric_datasheet, soc_imp_df, ric_data_dicts[ric],
                    ric_data_dicts['MaRS Discovery District']
                ]
                sheetnames = [
                    'CII_SurveyData', 'All_RICs_SocialImpact_SurveyData',
                    'CII_DataDict', 'MaRS_DataDict'
                ]
                save_xls(results_sheets, save_path, sheetnames)
                print("Wrote to {}".format(save_path))

        except ValueError as ex:
            print("!\nERROR FOR {}: {}\n!\n".format(ric, ex))

            # save conflicting answer values when pivot fails
            save_path = path_xl(
                user_path=user_path,
                path_extension=
                "Box Sync/Workbench/BAP/Annual Survey FY2018/DEV - Results to RICs/__dupies/",
                filename=ric + '_dupies' + '.xlsx')
            save_xls([
                ric_datasheet[ric_datasheet.duplicated(
                    ['rid_cid', 'col_title'], keep=False)]
            ], save_path, ['dupies'])
            continue
        pass