Пример #1
0
def wake_up_omnisci():
    # get OmniSci credentials
    dfcreds = pd.DataFrame()
    dfcreds = get_credentials(omnisci_keyfile)
    # connect to OmniSci, allowing time for the instance to wake
    connection = wake_and_connect_to_mapd(dfcreds['write_key_name'],
                                          dfcreds['write_key_secret'],
                                          mapdhost, mapddbname)
    return connection
def main():
    # connect to MapD
    dfcreds = get_credentials(omnisci_keyfile)
    connection = wake_and_connect_to_mapd(dfcreds['write_key_name'], dfcreds['write_key_secret'], mapdhost, mapddbname)
    # loop through tables
    if connection == 'RETRY':
        print('could not wake OmniSci; exiting')
    else:
        for table, file, dt, tformat, dropcols, in tables_and_files:
            load_new_table_mapd(connection, table, file, dt, tformat, dropcols, mapdhost, mapduser)
        # disconnect MapD
        disconnect_mapd(connection)
Пример #3
0
def main():
    # connect to MapD
    dfcreds = get_credentials(omnisci_keyfile)
    connection = wake_and_connect_to_mapd(dfcreds['write_key_name'],
                                          dfcreds['write_key_secret'],
                                          mapdhost, mapddbname)
    # loop through tables
    if connection == 'RETRY':
        print('could not wake OmniSci; exiting')
    else:
        for table, file in tables_and_files:
            df = pd.DataFrame()
            df = get_table_mapd(connection, table)
            df.sort_values(['repo', 'view_timestamp'], inplace=True)
            print(df.head(10))
            df.to_csv(file, index=False)
        # disconnect MapD
        disconnect_mapd(connection)
Пример #4
0
def main():

    # credentials
    dfcreds = get_credentials(
        keyfile)  # get the authentication information from the keyfile
    auth_header = dfcreds[
        'access_token']  # get the token from the authentication information

    # connect to github
    g = Github(
        auth_header)  # instantiate a github object; authorize with the token

    # setup dataframes
    df_s_main = pd.DataFrame()
    df_v_main = pd.DataFrame()
    df_r_main = pd.DataFrame()

    # loop through the list of repos
    for repo in repositories:
        r = g.get_repo(gproject + repo)

        # stars
        df_stars = get_stars(r)
        df_s_main = df_s_main.append(df_stars, ignore_index=True)

        # views
        df_views = get_views(r)
        df_v_main = df_v_main.append(df_views, ignore_index=True)

        # referrers
        df_referrers = get_referrers(r)
        df_r_main = df_r_main.append(df_referrers, ignore_index=True)

    # write stars to file
    print("writing stars to file")
    df_s_main.to_csv(file_path + "oss_git_stars.csv", index=False)

    # write views to file
    print("writing views to file")
    df_v_main.to_csv(file_path + "oss_git_views.csv", index=False)

    # write referrers to file
    print("writing referrers to file")
    df_r_main.to_csv(file_path + "oss_git_referrers.csv", index=False)
def main():
    # credentials
    dfcreds = get_credentials(
        keyfile)  # get the authentication information from the keyfile
    headers = {'content-type': 'application/json', 'HLIAMKey': dfcreds['key']}

    # Viewable Communities
    rViewableCommunities = requests.get(
        repo_path + 'api/v2.0/Communities/GetViewableCommunities',
        headers=headers)
    dfViewableCommunities = pd.read_json(rViewableCommunities.content)

    # Community Members
    payload = {
        "CommunityKey": 'd06df790-8ca4-4e54-91a0-244af0228ddc',
        "StartRecord": 1,
        "EndRecord": 1500
    }
    rCommunityMembers = requests.post(
        repo_path + 'api/v2.0/Communities/GetCommunityMembers',
        headers=headers,
        json=payload)
    dfCommunityMembers = pd.read_json(rCommunityMembers.content)
    # add a timestamp to the data
    dfCommunityMembers['cmtimestamp'] = dt.datetime.now()
    dfCommunityMembers.index.names = ['rowUID']
    dfCommunityMembers.drop(
        'Community', 1,
        inplace=True)  #remove the nested dictionary of community information
    dfCommunityMembers.to_csv(file_path + 'techsup_hl_communitymembers.csv',
                              index=False,
                              date_format="%Y-%m-%d")

    # Discussion Posts
    rDiscussionPosts = requests.get(
        repo_path +
        'api/v2.0/Discussions/GetDiscussionPosts?maxToRetrieve=5000',
        headers=headers)
    dfDiscussionPosts = pd.read_json(rDiscussionPosts.content)
    # add a timestamp to the data
    dfDiscussionPosts['dptimestamp'] = dt.datetime.now()
    dfDiscussionPosts.to_csv(file_path + 'techsup_hl_discussionposts.csv',
                             index=False,
                             date_format="%Y-%m-%d")
Пример #6
0
def main():
    # connect to MapD
    dfcreds = get_credentials(omnisci_keyfile)
    connection = wake_and_connect_to_mapd(dfcreds['write_key_name'],
                                          dfcreds['write_key_secret'],
                                          mapdhost, mapddbname)
    # loop through tables
    if connection != 'RETRY':
        for table, file, timestamp_cols, timestamp_format, timestamp_units, integer_cols, update_method in tables_and_files:
            if update_method == 'load':
                load_new_table_mapd(connection, table, file, timestamp_cols,
                                    timestamp_format, timestamp_units,
                                    integer_cols)
            elif update_method == 'append':
                append_new_table_mapd(connection, table, file, timestamp_cols,
                                      timestamp_format, timestamp_units,
                                      integer_cols)
            else:
                print('invalid or unspecified update method')
        # disconnect MapD
        disconnect_mapd(connection)
    else:
        print('could not wake OmniSci; exiting')
Пример #7
0
def main():
    # connect to MapD
    dfcreds = get_credentials(omnisci_keyfile)
    connection = wake_and_connect_to_mapd(dfcreds['write_key_name'],
                                          dfcreds['write_key_secret'],
                                          mapdhost, mapddbname)
    # loop through tables
    if connection == 'RETRY':
        print('could not wake OmniSci; exiting')
    else:
        for csv_file, renamed_cols, int8_cols, int32_cols, ts_cols, tf, str_cols, bool_cols in file_names:
            #get the contents of the file and turn them into a dataframe
            print("reading from file " + csv_file)
            dfnew = pd.read_csv(csv_file, index_col=False)
            #rename and recast columns
            parse_cols(dfnew, renamed_cols, int8_cols, int32_cols, ts_cols, tf,
                       str_cols, bool_cols)
            #append the contents of this file to the existing table
            print("appending file contents to table " + table_name)
            connection.load_table(
                table_name, dfnew, preserve_index=False,
                create=False)  #load the new table into OmniSci
        # disconnect MapD
        disconnect_mapd(connection)