示例#1
0
def run(devices,
        filters,
        round_interactions,
        last_timestamp_filter=None,
        days_back=config.days_look_back()):
    print('Gonna search for', devices)

    r, i = read_s3(devices,
                   filters,
                   round_interactions,
                   last_timestamp_filter=last_timestamp_filter,
                   days_back=config.days_look_back())

    print('read from s3, gonna send interactions', r, i)
    return r, i
示例#2
0
def get_paths_s3(device_id, days_back=config.days_look_back()):
    """
    Return the list of folders for the given :device_id: going back to :days_back: days
    :param device_id: device id
    :param days_back: days to go back
    :return:
    """
    today = datetime.now()
    paths = []
    for i in range(days_back + 1):
        d = today - timedelta(days=i)
        paths.append(
            str(d.year) + '/' + padding_zeroes(d.month, 2) + '/' +
            padding_zeroes(d.day, 2) + '/' + build_path_device(device_id))
    return paths
示例#3
0
def periodic_new_infected_interactions_check(debug=False):
    offset = 0
    continuation = True
    today = datetime.now()
    condition_infection = today - timedelta(config.days_look_back())

    sqs_client = boto3.resource('sqs', region_name=config.get_aws_region())
    queue_infection = sqs_client.Queue(config.get_sqs_patients_url())

    while continuation:
        res = mysql_handler.get_infected(condition_infection,
                                         offset=offset,
                                         limit=LIMIT_BATCH,
                                         debug=debug)
        print('ANALYZING INFECTED LATER', res)
        if res is None or len(res) == 0:
            continuation = False
            break

        device_ids = []
        min_timestamp_analysis = today
        for entry in res:
            device_ids.append(str(entry['device_id']))
            min_timestamp_analysis = min(min_timestamp_analysis,
                                         entry['last_analysis_timestamp'])

        recurrent_infected = {
            'recurrent': {
                'device_ids': device_ids,
                'timestamp_min_unix':
                datetime.timestamp(min_timestamp_analysis)
            }
        }
        print('sending new message:', recurrent_infected)
        if not debug:
            queue_infection.send_message(
                MessageBody=json.dumps(recurrent_infected))

        print('updating current devices last timestamp')
        mysql_handler.update_last_analysis_timestamp(device_ids,
                                                     today,
                                                     debug=debug)
        offset += LIMIT_BATCH
        if debug:
            break
示例#4
0
def read_s3(devices,
            filters,
            round_interactions=None,
            last_timestamp_filter=None,
            firstRun=True,
            days_back=config.days_look_back()):
    filter_id_2_filter = {}
    for fil in filters:
        filter_id_2_filter[fil['filter_id']] = fil
    filter_id_2_device_id_2_interactions, unique_interactions = select_s3_interactions(
        devices,
        filters,
        interactions_further=None,
        last_timestamp_filter=last_timestamp_filter,
        days_back=days_back)

    filter_id_2_connections = aggregate_result_select(
        filter_id_2_device_id_2_interactions, filter_id_2_filter,
        round_interactions)
    # second query for iphones:
    filter_id_2_device_id_2_interactions_further, unique_interactions_further = select_s3_interactions(
        list(unique_interactions),
        filters,
        interactions_further=filter_id_2_connections,
        last_timestamp_filter=last_timestamp_filter,
        days_back=days_back)
    filter_id_2_connections_further = aggregate_result_select(
        filter_id_2_device_id_2_interactions_further, filter_id_2_filter,
        round_interactions)
    for filter_id, interactions in filter_id_2_connections_further.items():
        for inter in interactions:
            if int(inter['id']) not in devices:
                # add to filter_id_2_connections
                if filter_id not in filter_id_2_connections:
                    filter_id_2_connections[filter_id] = []
                filter_id_2_connections[filter_id].append(inter)

    for uif in unique_interactions_further:
        if int(uif) not in devices:
            unique_interactions.append(uif)

    res = filter_id_2_connections, unique_interactions
    return res
示例#5
0
def select_s3_interactions(devices,
                           filters,
                           interactions_further=None,
                           last_timestamp_filter=None,
                           days_back=config.days_look_back()):
    """
    Return a list of interactions for the specified :device_id: for every :filters:
    :param devices:
    :param filters:
    :param last_timestamp_filter:
    :param days_back:
    :return: dictionary where the key is the filter_id and the value is the list of interactions, unique_interactions
    """

    filter_id2connections = {}
    all_interactions = set()
    if len(devices) > 0:
        file_names = list_files(devices,
                                last_timestamp_filter=last_timestamp_filter,
                                days_back=days_back)
        """
        filter_id2connections = {
            filter_id: {
                device_id: [interactions]
            }
        }
        """

        for fil in filters:
            interactions = None
            if interactions_further is not None and fil[
                    'filter_id'] in interactions_further:
                interactions = interactions_further[fil['filter_id']]
            query = build_query(devices, fil, interactions)
            print('Iterating for filter', fil)
            print(query)
            # interactions = set()
            interaction_id2_interactions = {}
            print('gonna query {} files'.format(len(file_names)))

            counter = 0
            pool_size = config.get_number_processes()
            processes = []

            ## Define an empty pool whith maximal concurrent processes
            pool = multiprocessing.Pool(processes=pool_size)

            # PREPOPULATE PROCESSES
            for i in range(0, min(pool_size, len(file_names))):
                processes.append(
                    pool.apply_async(query_file,
                                     args=(
                                         file_names[counter],
                                         query,
                                     )))
                counter += 1

            while True:
                for ip in range(len(processes) - 1, -1, -1):
                    process = processes[ip]
                    if process.ready():
                        # todo: sometimes the next process.get arise an error
                        interaction_id2_interactions_partial, unique_interactions = process.get(
                        )
                        all_interactions = all_interactions.union(
                            unique_interactions)
                        for iid, its in interaction_id2_interactions_partial.items(
                        ):
                            if iid not in interaction_id2_interactions:
                                interaction_id2_interactions[iid] = []
                            for item in its:
                                interaction_id2_interactions[iid].append(item)
                        processes.pop(ip)
                        if counter >= len(file_names):
                            continue
                        processes.append(
                            pool.apply_async(query_file,
                                             args=(
                                                 file_names[counter],
                                                 query,
                                             )))
                        counter += 1
                if len(processes) == 0:
                    break
            filter_id2connections[
                fil['filter_id']] = interaction_id2_interactions

    return filter_id2connections, list(all_interactions)
示例#6
0
def list_files(devices,
               last_timestamp_filter=None,
               days_back=config.days_look_back()):
    """
    Return the list of files for the given :device_id: going back :days_back: days
    :param devices:
    :param last_timestamp_filter:
    :param days_back:
    :return:
    """
    client = boto3.client('s3')
    if last_timestamp_filter is not None:
        print('gonna filtering out files before', last_timestamp_filter)
    file_names = set()
    for device_id in devices:
        paths = get_paths_s3(device_id, days_back=days_back)
        for p in paths:
            continuation = None
            iterate = True
            while iterate:
                if continuation is not None:
                    response = client.list_objects_v2(
                        Bucket=BUCKET,
                        MaxKeys=1000,
                        Prefix=p,
                        ContinuationToken=continuation)
                else:
                    response = client.list_objects_v2(Bucket=BUCKET,
                                                      MaxKeys=1000,
                                                      Prefix=p)
                if response['ResponseMetadata']['HTTPStatusCode'] == 200:
                    iterate = response['IsTruncated']
                    if 'Contents' in response:
                        for e in response['Contents']:
                            file_name_to_add = e['Key']
                            if last_timestamp_filter is not None:
                                if '/' in file_name_to_add and '_' in file_name_to_add:
                                    try:
                                        dir_tree = file_name_to_add.split('/')
                                        csv_file_name = dir_tree[len(dir_tree)
                                                                 - 1]
                                        date_file = parser.parse(
                                            csv_file_name.split('_')[0])
                                        if date_file < last_timestamp_filter:
                                            # file to skip since already analyzed
                                            print('skipping file',
                                                  file_name_to_add)
                                            continue
                                    except:
                                        print(
                                            'EXCEPTION on parsing datetime in filename '
                                            + file_name_to_add)
                            file_names.add(file_name_to_add)
                    # iterate = False
                    if 'NextContinuationToken' in response:
                        print('CONTINATION TOKEN')
                        continuation = response['NextContinuationToken']
                else:
                    print("Error", response)
                    break
    return list(file_names)