Exemplo n.º 1
0
                        "<"
                        + str(id)
                        + ","
                        + str(int(matches.groups()[1]) + 1)
                        + ","
                        + str(in_title[term.lower()])
                        + ">",
                    )
                except Exception, e:
                    print "80:", e
                    return inverted_index.replace(
                        matches.groups()[0], "<" + str(id) + "," + str(int(matches.groups()[1]) + 1) + ",0>"
                    )
            else:
                try:
                    return uummuuWord.add_to_index(inverted_index, id, 1, in_title[term.lower()])
                except Exception, e:
                    print "86:", e
                    return uummuuWord.add_to_index(inverted_index, id, 1, 0)

    else:
        matches = re.search("(<" + str(id) + ",(\d)*,([0|1])>)", inverted_index)
        if len(matches.groups()) == 0:
            return inverted_index
        else:
            if int(matches.groups()[1]) > 1:
                try:
                    return inverted_index.replace(
                        matches.groups()[0],
                        "<"
                        + str(id)
Exemplo n.º 2
0
    styled_objs, no_style_objs = main_style_parse(parser.styled_objects, parser.full_style, True);
    
    for key in styled_objs.keys():
        print key + ' ==> ' + str(styled_objs[key]);
        
def insert_into_db(cursor, doc_id, word, weight, freq, in_title):
    word = word.replace("'", "");
    if len(word) == 0:
        return;
    cursor.execute("SELECT invert_list FROM sites_index WHERE word = '%s';" %word);
    results = cursor.fetchall();
    present = False;
    if(len(word) > 124):
        word = word[0:124];
    try:
        entry = results[0][0];
        present = True;
    except Exception, e:
        entry = "";
        present = False;
    entry = add_to_index(entry, doc_id, weight, freq, in_title);
    if not present:
        cursor.execute("INSERT into sites_index(word, invert_list, freq, length, num_docs) VALUES ('%s', '%s', %d, %d, 1);" %(word, entry, freq, len(word)));
    else:
        cursor.execute("UPDATE sites_index set invert_list = '%s', num_docs = num_docs + 1, freq = freq + %d where word = '%s';" %(entry, freq, word));
    try:
        cursor.execute("INSERT INTO sites_worddoc(word, doc_id, occurrence, weight, in_title) VALUES ('%s', %d, %d, %d, %d);" %(word, doc_id, freq, weight, in_title));
    except Exception, e:
        print "word was:", word, 'length is:', len(word);
        print 'I quit because:', e;