Пример #1
0
def lambda_handler(params, context):
    '''
    entrance to invoke AWS lambda,
    variable params contains parameters passed in
    '''
    urls = {}

    # arranging the paths
    path = dataset.organize_path_lambda(params)

    # save the config file
    urls['config'] = dataset.save_remote_output(path['localSavePath'],
                                                path['remoteSavePath'],
                                                'config', params)
    # prepare input dataset
    df = dataset.get_remote_input(path['remoteReadPath'], path['filename'],
                                  path['localReadPath'])

    # execute the algorithm
    output = algorithm(df, params)

    # upload object to s3 bucket and return the url
    for key, value in output.items():
        if key != 'uid':
            urls[key] = dataset.save_remote_output(path['localSavePath'],
                                                   path['remoteSavePath'], key,
                                                   value)
        else:
            urls[key] = value

    return urls
Пример #2
0
    # entrance to invoke Batch
    urls = {}

    # default parameters
    parser = argparse.ArgumentParser(description="processing...")
    parser.add_argument('--remoteReadPath', required=True)
    parser.add_argument('--email', required=True)
    parser.add_argument('--sessionURL', required=True)
    params = vars(parser.parse_args())

    # arranging the paths
    path = dataset.organize_path_lambda(params)

    # prepare input dataset
    df = dataset.get_remote_input(path['remoteReadPath'],
                                  path['filename'],
                                  path['localReadPath'])

    img_urls = []
    source = path['remoteReadPath'].split('/')[-3]
    if source == "reddit-Search" or source == "reddit-Post" \
            or source == "crimson-Hexagon" or source == "reddit-Historical-Post"\
            and 'url' in list(df.columns):
        img_urls = df['url'].dropna().tolist()

    elif source == "twitter-Tweet" or source == "twitter-Timeline" \
            and 'entities.media.media_url' in list(df.columns):
        img_urls = df['entities.media.media_url'].dropna().tolist()

    elif source == 'flickr-Photo' and 'size.source' in list(df.columns):
        img_urls = df['size.source'].dropna().tolist()