Exemplo n.º 1
0
def process_measure(measure_id, data, session):
    cell_measures = []
    wifi_measures = []
    measure_data = dict(
        measure_id=measure_id,
        lat=to_precise_int(data['lat']),
        lon=to_precise_int(data['lon']),
        time=encode_datetime(data['time']),
        accuracy=data['accuracy'],
        altitude=data['altitude'],
        altitude_accuracy=data['altitude_accuracy'],
    )
    measure_radio = RADIO_TYPE.get(data['radio'], -1)
    if data.get('cell'):
        # flatten measure / cell data into a single dict
        for c in data['cell']:
            c.update(measure_data)
            # use more specific cell type or
            # fall back to less precise measure
            if c['radio'] != '':
                c['radio'] = RADIO_TYPE.get(c['radio'], -1)
            else:
                c['radio'] = measure_radio
        cell_measures = data['cell']
    if data.get('wifi'):
        # filter out old-style sha1 hashes
        invalid_wifi_key = False
        for w in data['wifi']:
            w['key'] = key = normalize_wifi_key(w['key'])
            if not valid_wifi_pattern(key):
                invalid_wifi_key = True
                break

        if not invalid_wifi_key:
            # flatten measure / wifi data into a single dict
            for w in data['wifi']:
                w.update(measure_data)
            wifi_measures = data['wifi']
    return (cell_measures, wifi_measures)
Exemplo n.º 2
0
def process_measure(measure_id, data, session):
    cell_measures = []
    wifi_measures = []
    measure_data = dict(
        measure_id=measure_id,
        lat=to_precise_int(data['lat']),
        lon=to_precise_int(data['lon']),
        time=encode_datetime(data['time']),
        accuracy=data['accuracy'],
        altitude=data['altitude'],
        altitude_accuracy=data['altitude_accuracy'],
    )
    measure_radio = RADIO_TYPE.get(data['radio'], -1)
    if data.get('cell'):
        # flatten measure / cell data into a single dict
        for c in data['cell']:
            c.update(measure_data)
            # use more specific cell type or
            # fall back to less precise measure
            if c['radio'] != '':
                c['radio'] = RADIO_TYPE.get(c['radio'], -1)
            else:
                c['radio'] = measure_radio
        cell_measures = data['cell']
    if data.get('wifi'):
        # filter out old-style sha1 hashes
        invalid_wifi_key = False
        for w in data['wifi']:
            w['key'] = key = normalize_wifi_key(w['key'])
            if not valid_wifi_pattern(key):
                invalid_wifi_key = True
                break

        if not invalid_wifi_key:
            # flatten measure / wifi data into a single dict
            for w in data['wifi']:
                w.update(measure_data)
            wifi_measures = data['wifi']
    return (cell_measures, wifi_measures)
Exemplo n.º 3
0
def load_file(session, source_file, batch_size=100, userid=None):
    utcnow = util.utcnow()
    utcmin = utcnow - datetime.timedelta(120)

    with open(source_file, 'r') as fd:
        reader = csv.reader(fd, delimiter='\t', quotechar=None)

        counter = 0
        items = []
        for fields in reader:
            try:
                time = int(fields[0])
                if time == 0:  # pragma: no cover
                    # unknown time gets an old date
                    time = utcmin
                else:
                    # convert from unixtime to utc
                    time = datetime.datetime.utcfromtimestamp(time)

                key = normalized_wifi_key(str(fields[1]))
                if not valid_wifi_pattern(key):  # pragma: no cover
                    continue

                lat = float(fields[2])
                lon = float(fields[3])
                accuracy = int(fields[4])
                altitude = int(fields[5])
                altitude_accuracy = int(fields[6])
                channel = int(fields[7])
                signal = int(fields[8])

                wifi = dict(
                    key=key,
                    channel=channel,
                    signal=signal,
                )
                data = dict(
                    lat=lat,
                    lon=lon,
                    time=time,
                    accuracy=accuracy,
                    altitude=altitude,
                    altitude_accuracy=altitude_accuracy,
                    radio='',
                    cell=(),
                    wifi=[wifi],

                    # not sure if the importer has an actual file
                    # specification anywhere
                    heading=-255,
                    speed=-255,
                )
            except (ValueError, IndexError):
                continue

            items.append(data)
            counter += 1

            # flush every batch_size records
            if counter % batch_size == 0:
                process_measures(items, session, userid=userid)
                items = []
                session.flush()
                print('Added %s records.' % counter)

    # process the remaining items
    process_measures(items, session, userid=userid)
    print('Added %s records.' % counter)

    session.flush()
    return counter
Exemplo n.º 4
0
def load_file(session, source_file, batch_size=100, userid=None):
    utcnow = datetime.datetime.utcnow().replace(tzinfo=iso8601.UTC)
    utcmin = utcnow - datetime.timedelta(120)

    with open(source_file, 'r') as fd:
        reader = csv.reader(fd, delimiter='\t', quotechar=None)

        counter = 0
        items = []
        for fields in reader:
            try:
                time = int(fields[0])
                if time == 0:  # pragma: no cover
                    # unknown time gets an old date
                    time = utcmin
                else:
                    # convert from unixtime to utc
                    time = datetime.datetime.utcfromtimestamp(time)

                key = normalize_wifi_key(str(fields[1]))
                if not valid_wifi_pattern(key):  # pragma: no cover
                    continue

                lat = fields[2]
                lon = fields[3]
                accuracy = int(fields[4])
                altitude = int(fields[5])
                altitude_accuracy = int(fields[6])
                channel = int(fields[7])
                signal = int(fields[8])

                wifi = dict(
                    key=key,
                    channel=channel,
                    signal=signal,
                )
                data = dict(
                    lat=lat,
                    lon=lon,
                    time=time,
                    accuracy=accuracy,
                    altitude=altitude,
                    altitude_accuracy=altitude_accuracy,
                    radio='',
                    cell=(),
                    wifi=[wifi],
                )
            except (ValueError, IndexError):
                continue

            items.append(data)
            counter += 1

            # flush every batch_size records
            if counter % batch_size == 0:
                process_measures(items, session, userid=userid)
                items = []
                session.flush()
                print('Added %s records.' % counter)

    # process the remaining items
    process_measures(items, session, userid=userid)
    print('Added %s records.' % counter)

    session.flush()
    return counter