Пример #1
0
def lambda_handler(params, context):
    '''
    entrance to invoke AWS lambda,
    variable params contains parameters passed in
    '''
    urls = {}

    # arranging the paths
    path = dataset.organize_path_lambda(params)

    # save the config file
    urls['config'] = dataset.save_remote_output(path['localSavePath'],
                                                path['remoteSavePath'],
                                                'config', params)
    # prepare input dataset
    df = dataset.get_remote_input(path['remoteReadPath'], path['filename'],
                                  path['localReadPath'])

    # execute the algorithm
    output = algorithm(df, params)

    # upload object to s3 bucket and return the url
    for key, value in output.items():
        if key != 'uid':
            urls[key] = dataset.save_remote_output(path['localSavePath'],
                                                   path['remoteSavePath'], key,
                                                   value)
        else:
            urls[key] = value

    return urls
Пример #2
0
    img_urls = []
    source = path['remoteReadPath'].split('/')[-3]
    if source == "reddit-Search" or source == "reddit-Post" \
            or source == "crimson-Hexagon" or source == "reddit-Historical-Post"\
            and 'url' in list(df.columns):
        img_urls = df['url'].dropna().tolist()

    elif source == "twitter-Tweet" or source == "twitter-Timeline" \
            and 'entities.media.media_url' in list(df.columns):
        img_urls = df['entities.media.media_url'].dropna().tolist()

    elif source == 'flickr-Photo' and 'size.source' in list(df.columns):
        img_urls = df['size.source'].dropna().tolist()

    else:
        raise ValueError("This data source does not support image collection!")

    urls = {}
    for img_url in img_urls:
        if ic.is_image(img_url):
            filename, binary = ic.crawler(img_url)
            dataset.save_local_output(path['localSavePath'], filename, binary)
    urls['images.zip'] = dataset.save_remote_output(
        path['localSavePath'], path['remoteSavePath'], 'images.zip')

    # push notification email
    notification(toaddr=params['email'], case=3,
                 filename=path['remoteSavePath'],
                 links=urls, sessionURL=params['sessionURL'])
Пример #3
0
    parser.add_argument('--sessionURL', required=True)

    # user specified parameters
    parsed, unknown = parser.parse_known_args()
    for arg in unknown:
        if arg.startswith("--"):
            parser.add_argument(arg, required=False)

    params = vars(parser.parse_args())

    # arranging the paths
    path = dataset.organize_path_lambda(params)

    # save the config file
    urls['config'] = dataset.save_remote_output(path['localSavePath'],
                                                path['remoteSavePath'],
                                                'config', params)

    # prepare input dataset
    df = dataset.get_remote_input(path['remoteReadPath'], path['filename'],
                                  path['localReadPath'])

    # execute the algorithm
    output = algorithm(df, params)

    # upload object to s3 bucket and return the url
    for key, value in output.items():
        if key != 'uid':
            urls[key] = dataset.save_remote_output(path['localSavePath'],
                                                   path['remoteSavePath'], key,
                                                   value)