def test_tarball_round_trip(named_temporary_file, fixture_db_session): voevent_etrees = fake.heartbeat_packets() # with open(assasn_non_ascii_packet_filepath, 'rb') as f: # voevent_etrees.append(vp.load(f)) s = fixture_db_session for etree in voevent_etrees: s.add(models.Voevent.from_etree(etree)) s.flush() voevent_dbrows = s.query(models.Voevent.ivorn, models.Voevent.xml).all() assert len(voevent_dbrows) == len(voevent_etrees) voevent_rowgen = list(models.Voevent.from_etree(v) for v in voevent_etrees) assert voevent_dbrows[0].ivorn == voevent_rowgen[0].ivorn assert voevent_dbrows[0].xml == voevent_rowgen[0].xml assert type(voevent_dbrows[0].xml) == type(voevent_rowgen[0].xml) assert type(voevent_rowgen[0].xml) == six.binary_type # Therefore it's crucial to test with an actual round-tripped dataset, # the 'voevent_dbrows' from above: fname = named_temporary_file.name filestore.write_tarball(voevent_dbrows, fname) loaded_voevents = [ vp.loads(s.xml) for s in filestore.tarfile_xml_generator(fname) ] def to_strings(voeventlist): return [vp.dumps(v) for v in voeventlist] def to_ivorn(voeventlist): return [v.attrib['ivorn'] for v in voeventlist] assert (to_ivorn(voevent_etrees) == to_ivorn(loaded_voevents)) assert (to_strings(voevent_etrees) == to_strings(loaded_voevents))
def load_from_tarfile(session, tarfile_path, check_for_duplicates, pkts_per_commit=1000): """ Iterate through xml files in a tarball and attempt to load into database. .. warning:: Very slow with duplicate checking enabled. Returns: tuple: (n_parsed, n_loaded) - Total number of packets parsed from tarbar, and number successfully loaded. """ tf_stream = tarfile_xml_generator(tarfile_path) logger.info("Loading: " + tarfile_path) n_parsed = 0 n_loaded = 0 for tarinf in tf_stream: try: v = vp.loads(tarinf.xml, check_version=False) if v.attrib['version'] != '2.0': logger.debug( 'Packet: {} is not VO-schema version 2.0.'.format( tarinf.name)) n_parsed += 1 except: logger.exception('Error loading file {}, skipping'.format( tarinf.name)) continue try: new_row = Voevent.from_etree(v) if check_for_duplicates: if ivorn_present(session, new_row.ivorn): logger.debug( "Ignoring duplicate ivorn: {} in file {}".format( new_row.ivorn, tarinf.name)) continue session.add(new_row) n_loaded += 1 except: logger.exception( 'Error converting file {} to database row, skipping'. format(tarinf.name)) continue if n_loaded % pkts_per_commit == 0: session.commit() session.commit() logger.info("Successfully parsed {} packets, of which loaded {}.".format(n_parsed, n_loaded)) return n_parsed, n_loaded
def test_tarball_round_trip(named_temporary_file, fixture_db_session): voevent_etrees = fake.heartbeat_packets() # with open(assasn_non_ascii_packet_filepath, 'rb') as f: # voevent_etrees.append(vp.load(f)) s = fixture_db_session for etree in voevent_etrees: s.add(models.Voevent.from_etree(etree)) s.flush() voevent_dbrows = s.query(models.Voevent.ivorn, models.Voevent.xml).all() assert len(voevent_dbrows) == len(voevent_etrees) voevent_rowgen = list(models.Voevent.from_etree(v) for v in voevent_etrees) assert voevent_dbrows[0].ivorn == voevent_rowgen[0].ivorn assert voevent_dbrows[0].xml == voevent_rowgen[0].xml # Here's the crux # A newly instantiated model will store a string type same as Python 2; # bytestring stores as bytestring, unicode as unicode. # However, after a round-trip to the database, proper typing has been # asserted and the bytestring is returned as unicode! assert type(voevent_dbrows[0].xml) != type(voevent_rowgen[0].xml) assert type(voevent_rowgen[0].xml) == str assert type(voevent_dbrows[0].xml) == unicode # Therefore it's crucial to test with an actual round-tripped dataset, # the 'voevent_dbrows' from above: fname = named_temporary_file.name filestore.write_tarball(voevent_dbrows, fname) loaded_voevents = [vp.loads(s.xml) for s in filestore.tarfile_xml_generator(fname)] def to_strings(voeventlist): return [vp.dumps(v) for v in voeventlist] def to_ivorn(voeventlist): return [v.attrib['ivorn'] for v in voeventlist] assert (to_ivorn(voevent_etrees) == to_ivorn(loaded_voevents)) assert (to_strings(voevent_etrees) == to_strings(loaded_voevents))