Exemplo n.º 1
0
def db_load_models(articles):
    models = dict()
    inv_index = InvertedIndex()
    num_articles = len(articles)
    db = database.connect_to_database()
    list_of_ids = ",".join([str(article.id) for article in articles])
    query = "SELECT articleid,term,tf,count FROM articleswithterms WHERE articleid IN (%s)" % (list_of_ids)
    cur = db.cursor(cursorclass=MySQLdb.cursors.SSDictCursor)
    num_results = cur.execute(query)
    rows = cur.fetchall()
    model_values = dict()
    for row in rows:
        if model_values.has_key(row['articleid']):
            model_values[row['articleid']].append(row)
        else:
            model_values[row['articleid']] = [row]
    for article, index in zip(articles, xrange(num_articles)):
        if article.has_been_counted:
            print "Loading article " + str(index + 1) + "/" + str(num_articles)
            new_model = ArticleModel(article)
            new_model.from_db_values(model_values[article.id])
            all_terms = new_model.terms.all_terms()
            inv_index.add_term_occurences(all_terms, article.id)
            models[article.id] = new_model
    cur.close()
    db.close()
    return models, inv_index
Exemplo n.º 2
0
def count_terms_and_store(articles, store=True, title_weight=19, print_steps=False, leading_weight=1, stoplist_file="../stop_words"):
    if store:
        db = database.connect_to_database()
    models = dict()
    inv_index = InvertedIndex()
    num_articles = len(articles)
    for art, index in zip(articles, xrange(len(articles))):
        if print_steps:
            print "Counting terms of article " + str(index + 1) + "/" + str(num_articles)
        model = ArticleModel(art, title_weight, leading_weight, stoplist_file=stoplist_file)
        for term in model.count_terms():
            inv_index.add_term_ocurrence(term, model.article.id)
        if store:
            model.db_save(db)
        models[art.id] = model
    """ This isn't needed anymore
    total_counts = sum([model.total_term_counts for model in models.values()])
    if store:
        cur = db.cursor()
        query = "UPDATE terms_global SET totaltermcounts = totaltermcounts + %d WHERE id = 1" % (total_counts)
        cur.execute(query)
    """
    return models, inv_index