Exemplo n.º 1
0
def test_simple_tarball_dump(named_temporary_file):
    voevent_etrees = fake.heartbeat_packets()
    voevent_rowgen = (models.Voevent.from_etree(v) for v in voevent_etrees)
    fname = named_temporary_file.name
    filestore.write_tarball(voevent_rowgen, fname)
    tarball = tarfile.open(fname)
    assert len(tarball.getmembers()) == len(voevent_etrees)
Exemplo n.º 2
0
def test_tarball_round_trip(named_temporary_file, fixture_db_session):
    voevent_etrees = fake.heartbeat_packets()
    # with open(assasn_non_ascii_packet_filepath, 'rb') as f:
    #     voevent_etrees.append(vp.load(f))
    s = fixture_db_session
    for etree in voevent_etrees:
        s.add(models.Voevent.from_etree(etree))
    s.flush()
    voevent_dbrows = s.query(models.Voevent.ivorn, models.Voevent.xml).all()
    assert len(voevent_dbrows) == len(voevent_etrees)
    voevent_rowgen = list(models.Voevent.from_etree(v) for v in voevent_etrees)
    assert voevent_dbrows[0].ivorn == voevent_rowgen[0].ivorn
    assert voevent_dbrows[0].xml == voevent_rowgen[0].xml

    assert type(voevent_dbrows[0].xml) == type(voevent_rowgen[0].xml)
    assert type(voevent_rowgen[0].xml) == six.binary_type

    # Therefore it's crucial to test with an actual round-tripped dataset,
    # the 'voevent_dbrows' from above:
    fname = named_temporary_file.name
    filestore.write_tarball(voevent_dbrows, fname)

    loaded_voevents = [
        vp.loads(s.xml) for s in filestore.tarfile_xml_generator(fname)
    ]

    def to_strings(voeventlist):
        return [vp.dumps(v) for v in voeventlist]

    def to_ivorn(voeventlist):
        return [v.attrib['ivorn'] for v in voeventlist]

    assert (to_ivorn(voevent_etrees) == to_ivorn(loaded_voevents))
    assert (to_strings(voevent_etrees) == to_strings(loaded_voevents))
Exemplo n.º 3
0
def test_simple_tarball_dump(named_temporary_file):
    voevent_etrees = fake.heartbeat_packets()
    voevent_rowgen = (models.Voevent.from_etree(v) for v in voevent_etrees)
    fname = named_temporary_file.name
    filestore.write_tarball(voevent_rowgen, fname)
    tarball = tarfile.open(fname)
    assert len(tarball.getmembers()) == len(voevent_etrees)
Exemplo n.º 4
0
def test_unicode_voevent_tarball_dump(named_temporary_file):
    ## Now try some unicode characters
    voevent_etrees = fake.heartbeat_packets()
    vp.set_author(voevent_etrees[0], contactName=u"€€€€")
    voevent_rowgen = (models.Voevent.from_etree(v) for v in voevent_etrees)
    fname = named_temporary_file.name
    filestore.write_tarball(voevent_rowgen, fname)
    tarball = tarfile.open(fname)
    assert len(tarball.getmembers()) == len(voevent_etrees)
Exemplo n.º 5
0
def test_unicode_voevent_tarball_dump(named_temporary_file):
    ## Now try some unicode characters
    voevent_etrees = fake.heartbeat_packets()
    vp.set_author(voevent_etrees[0], contactName=u"€€€€")
    voevent_rowgen = (models.Voevent.from_etree(v) for v in voevent_etrees)
    fname = named_temporary_file.name
    filestore.write_tarball(voevent_rowgen, fname)
    tarball = tarfile.open(fname)
    assert len(tarball.getmembers()) == len(voevent_etrees)
Exemplo n.º 6
0
def main():
    args = handle_args()
    dburl = dbconfig.make_db_url(dbconfig.default_admin_db_params, args.dbname)
    if not db_utils.check_database_exists(dburl):
        raise RuntimeError("Database not found")

    filecount = 1
    n_packets_written = 0

    def get_tarfile_path():
        if args.nsplit:
            suffix = '.{0:03d}.tar.bz2'.format(filecount)
        else:
            suffix = '.tar.bz2'
        return args.tarfile_pathstem + suffix

    session = Session(bind=create_engine(dburl))
    if args.prefetch:
        qry = session.query(Voevent.ivorn, Voevent.xml)
    else:
        qry = session.query(Voevent)

    if args.all:
        logger.info("Dumping **all** packets currently in database")
    else:
        qry = qry.filter(Voevent.author_datetime < args.end)
        if args.start is not None:
            qry = qry.filter(Voevent.author_datetime >= args.start)
            logger.info("Fetching packets from {}".format(args.start))
        else:
            logger.info("Fetching packets from beginning of time")
        logger.info("...until: {}".format(args.end))
    qry = qry.order_by(Voevent.id)

    n_matching = qry.count()
    logger.info("Dumping {} packets".format(n_matching))
    start_time = datetime.datetime.now()
    while n_packets_written < n_matching:
        logger.debug("Fetching batch of up to {} packets".format(args.nsplit))
        voevents = qry.limit(args.nsplit).offset(n_packets_written).all()

        n_packets_written += write_tarball(voevents,
                                           get_tarfile_path())
        elapsed = (datetime.datetime.now() - start_time).total_seconds()
        logger.info(
            "{} packets dumped so far, in {} ({:.0f} kilopacket/s)".format(
                n_packets_written,
                elapsed,
                n_packets_written / elapsed
            ))
        filecount += 1
    session.close()
    logger.info("Wrote {} packets".format(n_packets_written))
    return 0
Exemplo n.º 7
0
def test_tarball_round_trip(named_temporary_file, fixture_db_session):
    voevent_etrees = fake.heartbeat_packets()
    # with open(assasn_non_ascii_packet_filepath, 'rb') as f:
    #     voevent_etrees.append(vp.load(f))
    s = fixture_db_session
    for etree in voevent_etrees:
        s.add(models.Voevent.from_etree(etree))
    s.flush()
    voevent_dbrows = s.query(models.Voevent.ivorn, models.Voevent.xml).all()
    assert len(voevent_dbrows) == len(voevent_etrees)
    voevent_rowgen = list(models.Voevent.from_etree(v) for v in voevent_etrees)
    assert voevent_dbrows[0].ivorn == voevent_rowgen[0].ivorn
    assert voevent_dbrows[0].xml == voevent_rowgen[0].xml
    # Here's the crux
    # A newly instantiated model will store a string type same as Python 2;
    # bytestring stores as bytestring, unicode as unicode.
    # However, after a round-trip to the database, proper typing has been
    # asserted and the bytestring is returned as unicode!
    assert type(voevent_dbrows[0].xml) != type(voevent_rowgen[0].xml)
    assert type(voevent_rowgen[0].xml) == str
    assert type(voevent_dbrows[0].xml) == unicode

    # Therefore it's crucial to test with an actual round-tripped dataset,
    # the 'voevent_dbrows' from above:
    fname = named_temporary_file.name
    filestore.write_tarball(voevent_dbrows, fname)

    loaded_voevents = [vp.loads(s.xml) for s in
                       filestore.tarfile_xml_generator(fname)]

    def to_strings(voeventlist):
        return [vp.dumps(v) for v in voeventlist]

    def to_ivorn(voeventlist):
        return [v.attrib['ivorn'] for v in voeventlist]

    assert (to_ivorn(voevent_etrees) == to_ivorn(loaded_voevents))
    assert (to_strings(voevent_etrees) == to_strings(loaded_voevents))
def main():
    args = handle_args()
    dburl = dbconfig.make_db_url(dbconfig.default_admin_db_params, args.dbname)
    if not db_utils.check_database_exists(dburl):
        raise RuntimeError("Database not found")

    filecount = 1
    n_packets_written = 0

    def get_tarfile_path():
        if args.nsplit:
            suffix = '.{0:03d}.tar.bz2'.format(filecount)
        else:
            suffix = '.tar.bz2'
        return args.tarfile_pathstem + suffix

    session = Session(bind=create_engine(dburl))
    if args.prefetch:
        qry = session.query(Voevent.ivorn, Voevent.xml)
    else:
        qry = session.query(Voevent)

    if args.all:
        logger.info("Dumping **all** packets currently in database")
    else:
        qry = qry.filter(Voevent.author_datetime < args.end)
        if args.start is not None:
            qry = qry.filter(Voevent.author_datetime >= args.start)
            logger.info("Fetching packets from {}".format(args.start))
        else:
            logger.info("Fetching packets from beginning of time")
        logger.info("...until: {}".format(args.end))
    qry = qry.order_by(Voevent.id)

    n_matching = qry.count()
    logger.info("Dumping {} packets".format(n_matching))
    start_time = datetime.datetime.now()
    while n_packets_written < n_matching:
        logger.debug("Fetching batch of up to {} packets".format(args.nsplit))
        voevents = qry.limit(args.nsplit).offset(n_packets_written).all()

        n_packets_written += write_tarball(voevents, get_tarfile_path())
        elapsed = (datetime.datetime.now() - start_time).total_seconds()
        logger.info(
            "{} packets dumped so far, in {} ({:.0f} kilopacket/s)".format(
                n_packets_written, elapsed, n_packets_written / elapsed))
        filecount += 1
    session.close()
    logger.info("Wrote {} packets".format(n_packets_written))
    return 0