Exemplo n.º 1
0
def load_source(config, path, input_id):
    """Load a single model source from file, write to database.

    :param dict config: the configuration to use: database, host, user,
        password, upload ID.
    :param str path: the full path of the model source file
    :param int input_id: the database key of the associated input record
    :returns: `False` on failure, `True` on success
    """
    logger.info("path = %s" % path)
    logger.info("input_id = %s" % input_id)
    try:
        engine = db.create_engine(config["db"], config["user"], config["password"])
        src_loader = loader.SourceModelLoader(path, engine, input_id=input_id)
        results = src_loader.serialize()
        src_loader.close()
        logger.info("Total sources inserted: %s" % len(results))
    except Exception, e:
        logger.exception(e)
        return False
Exemplo n.º 2
0
def load_source(config, path, input_id):
    """Load a single model source from file, write to database.

    :param dict config: the configuration to use: database, host, user,
        password, upload ID.
    :param str path: the full path of the model source file
    :param int input_id: the database key of the associated input record
    :returns: `False` on failure, `True` on success
    """
    logger.info("path = %s" % path)
    logger.info("input_id = %s" % input_id)
    try:
        engine = db.create_engine(config["db"], config["user"],
                                  config["password"])
        src_loader = loader.SourceModelLoader(path, engine, input_id=input_id)
        results = src_loader.serialize()
        src_loader.close()
        logger.info("Total sources inserted: %s" % len(results))
    except Exception, e:
        logger.exception(e)
        return False
Exemplo n.º 3
0
    def _serialize_test_helper(self, test_file, expected_tables):
        engine = db.create_engine(TEST_DB, TEST_DB_USER, host=TEST_DB_HOST)
        src_loader = db_loader.SourceModelLoader(test_file, engine)

        results = src_loader.serialize()

        # we should get a 3 item list of results
        self.assertEquals(3, len(results))

        # We expect there to have been 3 inserts.
        # The results are a list of dicts with a single key.
        # The key is the table name (including table space);
        # the value is the id (as an int) of the new record.

        # First, check that the results includes the 3 tables we expect:
        result_tables = [x.keys()[0] for x in results]

        self.assertEqual(expected_tables, result_tables)

        # Everything appears to be fine, but let's query the database to make
        # sure the expected records are there.
        # At this point, we're not going to check every single value; we just
        # want to make sure the records made it into the database.
        tables = src_loader.meta.tables

        # list of tuples of (table name, id)
        table_id_pairs = [x.items()[0] for x in results]

        for table_name, record_id in table_id_pairs:
            table = tables[table_name]

            # run a query against the table object to get a ResultProxy
            result_proxy = table.select(table.c.id == record_id).execute()

            # there should be 1 record here
            self.assertEqual(1, result_proxy.rowcount)

        # clean up db resources
        src_loader.close()
Exemplo n.º 4
0
    def _serialize_test_helper(self, test_file, expected_tables):
        engine = db.create_engine(TEST_DB, TEST_DB_USER, host=TEST_DB_HOST)
        src_loader = db_loader.SourceModelLoader(test_file, engine)

        results = src_loader.serialize()

        # we should get a 3 item list of results
        self.assertEquals(3, len(results))

        # We expect there to have been 3 inserts.
        # The results are a list of dicts with a single key.
        # The key is the table name (including table space);
        # the value is the id (as an int) of the new record.

        # First, check that the results includes the 3 tables we expect:
        result_tables = [x.keys()[0] for x in results]

        self.assertEqual(expected_tables, result_tables)

        # Everything appears to be fine, but let's query the database to make
        # sure the expected records are there.
        # At this point, we're not going to check every single value; we just
        # want to make sure the records made it into the database.
        tables = src_loader.meta.tables

        # list of tuples of (table name, id)
        table_id_pairs = [x.items()[0] for x in results]

        for table_name, record_id in table_id_pairs:
            table = tables[table_name]

            # run a query against the table object to get a ResultProxy
            result_proxy = table.select(table.c.id == record_id).execute()

            # there should be 1 record here
            self.assertEqual(1, result_proxy.rowcount)

        # clean up db resources
        src_loader.close()
Exemplo n.º 5
0
    def test_csv_to_db_loader_end_to_end(self):
        """
            * Serializes the csv into the database
            * Queries the database for the data just inserted
            * Verifies the data against the csv
            * Deletes the inserted records from the database
        """
        def _pop_date_fields(csv):
            date_fields = ['year', 'month', 'day', 'hour', 'minute', 'second']
            res = [csv.pop(csv.index(field)) for field in date_fields]
            return res

        def _prepare_date(csv_r, date_fields):
            return [int(csv_r[field]) for field in date_fields]

        def _pop_geometry_fields(csv):
            unused_fields = ['longitude', 'latitude']
            [csv.pop(csv.index(field)) for field in unused_fields]

        def _retrieve_db_data(soup_db):

            # doing some "trickery" with *properties and primary_key,
            # to adapt the # code for sqlalchemy 0.7

            # surface join
            surf_join = soup_db.join(soup_db.catalog, soup_db.surface,
                properties={'id_surface': [soup_db.surface.c.id]},
                            exclude_properties=[soup_db.surface.c.id,
                                soup_db.surface.c.last_update],
                primary_key=[soup_db.surface.c.id])

            # magnitude join
            mag_join = soup_db.join(surf_join, soup_db.magnitude,
                properties={'id_magnitude': [soup_db.magnitude.c.id],
                        'id_surface': [soup_db.surface.c.id]},
                            exclude_properties=[soup_db.magnitude.c.id,
                                soup_db.magnitude.c.last_update,
                                soup_db.surface.c.last_update],
                primary_key=[soup_db.magnitude.c.id, soup_db.surface.c.id])

            return mag_join.order_by(soup_db.catalog.eventid).all()

        def _verify_db_data(csv_loader, db_rows):
            # skip the header
            csv_loader.csv_reader.next()
            csv_els = list(csv_loader.csv_reader)
            for csv_row, db_row in zip(csv_els, db_rows):
                csv_keys = csv_row.keys()
                # pops 'longitude', 'latitude' which are used to populate
                # geometry_columns
                _pop_geometry_fields(csv_keys)

                timestamp = _prepare_date(csv_row, _pop_date_fields(csv_keys))
                csv_time = csv_loader._date_to_timestamp(*timestamp)
                # first we compare the timestamps
                self.assertEqual(str(db_row.time), csv_time)

                # then, we cycle through the csv keys and consider some special
                # cases
                for csv_key in csv_keys:
                    db_val = getattr(db_row, csv_key)
                    csv_val = csv_row[csv_key]
                    if not len(csv_val.strip()):
                        csv_val = None
                    if csv_key == 'agency':
                        self.assertEqual(str(db_val), str(csv_val))
                    else:
                        self.assertEqual(float(db_val), float(csv_val))

        def _delete_db_data(soup_db, db_rows):
            # cleaning the db
            for db_row in db_rows:
                soup_db.delete(db_row)

        engine = db.create_engine(dbname=TEST_DB, user=TEST_EQCAT_DB_USER,
                                  password=TEST_DB_PASSWORD)

        csv_loader = db_loader.CsvModelLoader(self.csv_path, engine, 'eqcat')
        csv_loader.serialize()
        db_rows = _retrieve_db_data(csv_loader.soup)

        # rewind the file
        csv_loader.csv_fd.seek(0)

        _verify_db_data(csv_loader, db_rows)

        _delete_db_data(csv_loader.soup, db_rows)

        csv_loader.soup.commit()
Exemplo n.º 6
0
    def test_csv_to_db_loader_end_to_end(self):
        """
            * Serializes the csv into the database
            * Queries the database for the data just inserted
            * Verifies the data against the csv
            * Deletes the inserted records from the database
        """
        def _pop_date_fields(csv):
            date_fields = ['year', 'month', 'day', 'hour', 'minute', 'second']
            res = [csv.pop(csv.index(field)) for field in date_fields]
            return res

        def _prepare_date(csv_r, date_fields):
            return [int(csv_r[field]) for field in date_fields]

        def _pop_geometry_fields(csv):
            unused_fields = ['longitude', 'latitude']
            [csv.pop(csv.index(field)) for field in unused_fields]

        def _retrieve_db_data(soup_db):

            # doing some "trickery" with *properties and primary_key,
            # to adapt the # code for sqlalchemy 0.7

            # surface join
            surf_join = soup_db.join(
                soup_db.catalog,
                soup_db.surface,
                properties={'id_surface': [soup_db.surface.c.id]},
                exclude_properties=[
                    soup_db.surface.c.id, soup_db.surface.c.last_update
                ],
                primary_key=[soup_db.surface.c.id])

            # magnitude join
            mag_join = soup_db.join(
                surf_join,
                soup_db.magnitude,
                properties={
                    'id_magnitude': [soup_db.magnitude.c.id],
                    'id_surface': [soup_db.surface.c.id]
                },
                exclude_properties=[
                    soup_db.magnitude.c.id, soup_db.magnitude.c.last_update,
                    soup_db.surface.c.last_update
                ],
                primary_key=[soup_db.magnitude.c.id, soup_db.surface.c.id])

            return mag_join.order_by(soup_db.catalog.eventid).all()

        def _verify_db_data(csv_loader, db_rows):
            # skip the header
            csv_loader.csv_reader.next()
            csv_els = list(csv_loader.csv_reader)
            for csv_row, db_row in zip(csv_els, db_rows):
                csv_keys = csv_row.keys()
                # pops 'longitude', 'latitude' which are used to populate
                # geometry_columns
                _pop_geometry_fields(csv_keys)

                timestamp = _prepare_date(csv_row, _pop_date_fields(csv_keys))
                csv_time = csv_loader._date_to_timestamp(*timestamp)
                # first we compare the timestamps
                self.assertEqual(str(db_row.time), csv_time)

                # then, we cycle through the csv keys and consider some special
                # cases
                for csv_key in csv_keys:
                    db_val = getattr(db_row, csv_key)
                    csv_val = csv_row[csv_key]
                    if not len(csv_val.strip()):
                        csv_val = None
                    if csv_key == 'agency':
                        self.assertEqual(str(db_val), str(csv_val))
                    else:
                        self.assertEqual(float(db_val), float(csv_val))

        def _delete_db_data(soup_db, db_rows):
            # cleaning the db
            for db_row in db_rows:
                soup_db.delete(db_row)

        engine = db.create_engine(dbname=TEST_DB,
                                  user=TEST_EQCAT_DB_USER,
                                  password=TEST_DB_PASSWORD)

        csv_loader = db_loader.CsvModelLoader(self.csv_path, engine, 'eqcat')
        csv_loader.serialize()
        db_rows = _retrieve_db_data(csv_loader.soup)

        # rewind the file
        csv_loader.csv_fd.seek(0)

        _verify_db_data(csv_loader, db_rows)

        _delete_db_data(csv_loader.soup, db_rows)

        csv_loader.soup.commit()