def load_file(session, source_file, batch_size=10000): utcnow = datetime.datetime.utcnow().replace(tzinfo=iso8601.UTC) utcmin = utcnow - datetime.timedelta(120) with open(source_file, 'r') as fd: reader = csv.reader(fd, delimiter='\t', quotechar=None) counter = 0 for fields in reader: try: time = int(fields[0]) if time == 0: # pragma: no cover # unknown time gets an old date time = utcmin else: # convert from unixtime to utc time = datetime.datetime.utcfromtimestamp(time) key = normalize_wifi_key(str(fields[5])) if key == '000000000000': # pragma: no cover continue lat = fields[1] lon = fields[2] signal = int(fields[3]) channel = int(fields[4]) wifi = dict( key=key, channel=channel, signal=signal, ) data = dict( lat=lat, lon=lon, time=time, accuracy=0, altitude=0, altitude_accuracy=0, radio='', cell=(), wifi=[wifi], ) except (ValueError, IndexError): continue # side effect, schedules async tasks process_measure(data, utcnow, session) # flush every 1000 new records counter += 1 if counter % batch_size == 0: session.flush() print('Added %s records.' % counter) # add the rest session.flush() return counter