def test_simple_tarball_dump(named_temporary_file): voevent_etrees = fake.heartbeat_packets() voevent_rowgen = (models.Voevent.from_etree(v) for v in voevent_etrees) fname = named_temporary_file.name filestore.write_tarball(voevent_rowgen, fname) tarball = tarfile.open(fname) assert len(tarball.getmembers()) == len(voevent_etrees)
def test_tarball_round_trip(named_temporary_file, fixture_db_session): voevent_etrees = fake.heartbeat_packets() # with open(assasn_non_ascii_packet_filepath, 'rb') as f: # voevent_etrees.append(vp.load(f)) s = fixture_db_session for etree in voevent_etrees: s.add(models.Voevent.from_etree(etree)) s.flush() voevent_dbrows = s.query(models.Voevent.ivorn, models.Voevent.xml).all() assert len(voevent_dbrows) == len(voevent_etrees) voevent_rowgen = list(models.Voevent.from_etree(v) for v in voevent_etrees) assert voevent_dbrows[0].ivorn == voevent_rowgen[0].ivorn assert voevent_dbrows[0].xml == voevent_rowgen[0].xml assert type(voevent_dbrows[0].xml) == type(voevent_rowgen[0].xml) assert type(voevent_rowgen[0].xml) == six.binary_type # Therefore it's crucial to test with an actual round-tripped dataset, # the 'voevent_dbrows' from above: fname = named_temporary_file.name filestore.write_tarball(voevent_dbrows, fname) loaded_voevents = [ vp.loads(s.xml) for s in filestore.tarfile_xml_generator(fname) ] def to_strings(voeventlist): return [vp.dumps(v) for v in voeventlist] def to_ivorn(voeventlist): return [v.attrib['ivorn'] for v in voeventlist] assert (to_ivorn(voevent_etrees) == to_ivorn(loaded_voevents)) assert (to_strings(voevent_etrees) == to_strings(loaded_voevents))
def test_unicode_voevent_tarball_dump(named_temporary_file): ## Now try some unicode characters voevent_etrees = fake.heartbeat_packets() vp.set_author(voevent_etrees[0], contactName=u"€€€€") voevent_rowgen = (models.Voevent.from_etree(v) for v in voevent_etrees) fname = named_temporary_file.name filestore.write_tarball(voevent_rowgen, fname) tarball = tarfile.open(fname) assert len(tarball.getmembers()) == len(voevent_etrees)
def main(): args = handle_args() dburl = dbconfig.make_db_url(dbconfig.default_admin_db_params, args.dbname) if not db_utils.check_database_exists(dburl): raise RuntimeError("Database not found") filecount = 1 n_packets_written = 0 def get_tarfile_path(): if args.nsplit: suffix = '.{0:03d}.tar.bz2'.format(filecount) else: suffix = '.tar.bz2' return args.tarfile_pathstem + suffix session = Session(bind=create_engine(dburl)) if args.prefetch: qry = session.query(Voevent.ivorn, Voevent.xml) else: qry = session.query(Voevent) if args.all: logger.info("Dumping **all** packets currently in database") else: qry = qry.filter(Voevent.author_datetime < args.end) if args.start is not None: qry = qry.filter(Voevent.author_datetime >= args.start) logger.info("Fetching packets from {}".format(args.start)) else: logger.info("Fetching packets from beginning of time") logger.info("...until: {}".format(args.end)) qry = qry.order_by(Voevent.id) n_matching = qry.count() logger.info("Dumping {} packets".format(n_matching)) start_time = datetime.datetime.now() while n_packets_written < n_matching: logger.debug("Fetching batch of up to {} packets".format(args.nsplit)) voevents = qry.limit(args.nsplit).offset(n_packets_written).all() n_packets_written += write_tarball(voevents, get_tarfile_path()) elapsed = (datetime.datetime.now() - start_time).total_seconds() logger.info( "{} packets dumped so far, in {} ({:.0f} kilopacket/s)".format( n_packets_written, elapsed, n_packets_written / elapsed )) filecount += 1 session.close() logger.info("Wrote {} packets".format(n_packets_written)) return 0
def test_tarball_round_trip(named_temporary_file, fixture_db_session): voevent_etrees = fake.heartbeat_packets() # with open(assasn_non_ascii_packet_filepath, 'rb') as f: # voevent_etrees.append(vp.load(f)) s = fixture_db_session for etree in voevent_etrees: s.add(models.Voevent.from_etree(etree)) s.flush() voevent_dbrows = s.query(models.Voevent.ivorn, models.Voevent.xml).all() assert len(voevent_dbrows) == len(voevent_etrees) voevent_rowgen = list(models.Voevent.from_etree(v) for v in voevent_etrees) assert voevent_dbrows[0].ivorn == voevent_rowgen[0].ivorn assert voevent_dbrows[0].xml == voevent_rowgen[0].xml # Here's the crux # A newly instantiated model will store a string type same as Python 2; # bytestring stores as bytestring, unicode as unicode. # However, after a round-trip to the database, proper typing has been # asserted and the bytestring is returned as unicode! assert type(voevent_dbrows[0].xml) != type(voevent_rowgen[0].xml) assert type(voevent_rowgen[0].xml) == str assert type(voevent_dbrows[0].xml) == unicode # Therefore it's crucial to test with an actual round-tripped dataset, # the 'voevent_dbrows' from above: fname = named_temporary_file.name filestore.write_tarball(voevent_dbrows, fname) loaded_voevents = [vp.loads(s.xml) for s in filestore.tarfile_xml_generator(fname)] def to_strings(voeventlist): return [vp.dumps(v) for v in voeventlist] def to_ivorn(voeventlist): return [v.attrib['ivorn'] for v in voeventlist] assert (to_ivorn(voevent_etrees) == to_ivorn(loaded_voevents)) assert (to_strings(voevent_etrees) == to_strings(loaded_voevents))
def main(): args = handle_args() dburl = dbconfig.make_db_url(dbconfig.default_admin_db_params, args.dbname) if not db_utils.check_database_exists(dburl): raise RuntimeError("Database not found") filecount = 1 n_packets_written = 0 def get_tarfile_path(): if args.nsplit: suffix = '.{0:03d}.tar.bz2'.format(filecount) else: suffix = '.tar.bz2' return args.tarfile_pathstem + suffix session = Session(bind=create_engine(dburl)) if args.prefetch: qry = session.query(Voevent.ivorn, Voevent.xml) else: qry = session.query(Voevent) if args.all: logger.info("Dumping **all** packets currently in database") else: qry = qry.filter(Voevent.author_datetime < args.end) if args.start is not None: qry = qry.filter(Voevent.author_datetime >= args.start) logger.info("Fetching packets from {}".format(args.start)) else: logger.info("Fetching packets from beginning of time") logger.info("...until: {}".format(args.end)) qry = qry.order_by(Voevent.id) n_matching = qry.count() logger.info("Dumping {} packets".format(n_matching)) start_time = datetime.datetime.now() while n_packets_written < n_matching: logger.debug("Fetching batch of up to {} packets".format(args.nsplit)) voevents = qry.limit(args.nsplit).offset(n_packets_written).all() n_packets_written += write_tarball(voevents, get_tarfile_path()) elapsed = (datetime.datetime.now() - start_time).total_seconds() logger.info( "{} packets dumped so far, in {} ({:.0f} kilopacket/s)".format( n_packets_written, elapsed, n_packets_written / elapsed)) filecount += 1 session.close() logger.info("Wrote {} packets".format(n_packets_written)) return 0