Exemplo n.º 1
0
 def insert_the_statements(self, input_tuples):
     print("Loading %d statements..." % len(input_tuples))
     self.test_db.copy('raw_statements', [t[1:] for t in input_tuples],
                       self.test_data['raw_statements']['cols'][1:])
     print("Inserting agents...")
     dbu.insert_agents(self.test_db, 'raw')
     return
Exemplo n.º 2
0
def upload_statements(stmt_data_list, db=None):
    """Upload the statements to the database."""
    if db is None:
        db = get_primary_db()

    logger.info("Uploading %d statements to the database." %
                len(stmt_data_list))
    db.copy('statements', [s.make_tuple() for s in stmt_data_list],
            StatementData.get_cols())

    logger.info("Uploading agents to the database.")
    reading_id_set = set([sd.reading_id for sd in stmt_data_list])
    if len(reading_id_set):
        insert_agents(db, [sd.statement for sd in stmt_data_list],
                      db.Statements.reader_ref.in_(reading_id_set))
    return
Exemplo n.º 3
0
def upload_statements(stmt_data_list, db=None):
    """Upload the statements to the database."""
    if db is None:
        db = get_primary_db()

    logger.info("Uploading %d statements to the database." %
                len(stmt_data_list))
    db.copy('raw_statements', [s.make_tuple() for s in stmt_data_list],
            StatementData.get_cols())

    logger.info("Uploading agents to the database.")
    reading_id_set = set([sd.reading_id for sd in stmt_data_list])
    if len(reading_id_set):
        db_stmts = (db.select_one(db.RawStatements,
                                  db.RawStatements.uuid.like(s.uuid))
                    for s in stmt_data_list)
        insert_agents(db, 'raw', db_stmts, verbose=True)
    return
Exemplo n.º 4
0
    def add_statements(self, fraction=1, with_pa=False):
        """Add statements and agents to the database.

        Parameters
        ----------
        fraction : float between 0 and 1
            Default is 1. The fraction of remaining statements to be added.
        with_pa : bool
            Default False. Choose to run pre-assembly/incremental-preassembly
            on the added statements.
        """
        available_tuples = self.get_available_stmt_tuples()
        if fraction is not 1:
            num_stmts = fraction*len(available_tuples)
            input_tuples = random.sample(available_tuples, num_stmts)
        else:
            input_tuples = available_tuples

        print("Loading %d statements..." % len(input_tuples))
        if hasattr(self.test_db.RawStatements, 'id'):
            self.test_db.copy('raw_statements', input_tuples,
                               self.test_data['raw_statements']['cols'])
        else:
            self.test_db.copy('raw_statements', [t[1:] for t in input_tuples],
                              self.test_data['raw_statements']['cols'][1:])

        print("Inserting agents...")
        db_util.insert_agents(self.test_db, 'raw')

        if with_pa:
            print("Preassembling new statements...")
            if len(input_tuples) > 100:
                batch_size = len(input_tuples)//10
                pam = pm.PreassemblyManager(1, batch_size)
            else:
                pam = pm.PreassemblyManager()

            if self.used_stmt_tuples:
                pam.supplement_corpus(self.test_db)
            else:
                pam.create_corpus(self.test_db)

        return
Exemplo n.º 5
0
def upload_statements(stmt_data_list, db=None):
    """Upload the statements to the database."""
    if db is None:
        db = get_primary_db()

    logger.info("Uploading %d statements to the database." %
                len(stmt_data_list))
    db.copy('raw_statements', [s.make_tuple() for s in stmt_data_list],
            StatementData.get_cols())

    logger.info("Uploading agents to the database.")
    reading_id_set = set([sd.reading_id for sd in stmt_data_list])
    if len(reading_id_set):
        uuid_set = {s.statement.uuid for s in stmt_data_list}
        insert_agents(db,
                      'raw',
                      db.RawStatements.uuid.in_(uuid_set),
                      verbose=True,
                      override_default_query=True)
    return
Exemplo n.º 6
0
def _get_loaded_db(num_stmts,
                   batch_size=None,
                   split=None,
                   with_init_corpus=False):
    print("Creating and filling a test database:")
    db = _get_background_loaded_db()

    # Now load the statements. Much of this processing is the result of active
    # development, and once that is done, TODO: Format pickle to match
    copy_stmt_tuples, copy_col_names = _get_input_stmt_tuples(num_stmts)

    print("\tInserting the raw statements...")
    if split is None:
        db.copy('raw_statements', copy_stmt_tuples, copy_col_names)
        print("\tAdding agents...")
        db_util.insert_agents(db, 'raw')
        if with_init_corpus:
            print("\tAdding a preassembled corpus...")
            pa_manager = pm.PreassemblyManager()
            pa_manager.create_corpus(db)
    else:
        assert batch_size is not None
        num_initial = int(split * len(copy_stmt_tuples))
        stmt_tuples_initial = random.sample(copy_stmt_tuples, num_initial)
        stmt_tuples_new = list(
            set(copy_stmt_tuples) - set(stmt_tuples_initial))
        initial_datetime = datetime.now() - timedelta(days=2)
        db.copy('raw_statements',
                [t + (initial_datetime, ) for t in stmt_tuples_initial],
                copy_col_names + ('create_date', ))
        print("\tAdding agents...")
        db_util.insert_agents(db, 'raw')
        if with_init_corpus:
            print("\tAdding a preassembled corpus from first batch of raw "
                  "stmts...")
            pa_manager = pm.PreassemblyManager(batch_size=batch_size)
            pa_manager.create_corpus(db)
        print("\tInserting the rest of the raw statements...")
        new_datetime = datetime.now()
        db.copy('raw_statements',
                [t + (new_datetime, ) for t in stmt_tuples_new],
                copy_col_names + ('create_date', ))
        print("\tAdding agents...")
        db_util.insert_agents(db, 'raw')
    return db