def create_indexes(): db = connect_db() cursor = db.cursor() data = get_all_data_from_db(cursor) norm_redis = connect_word_to_norm_word() id_item_redis = connect_id_to_item() word_ids_redis = connect_word_to_bag_ids() word_ids_redis_quick = connect_word_to_bag_ids_quick() idBag_length_redis = connect_bag_id_to_length() idBag_bag = connect_bag_id_to_bag() for rec in data: item_id = rec[0] name = rec[1] print str(item_id) + " " + name id_item_redis.set(item_id, name) for i in range(1, 4): #print rec[i] cur_bag_of_words = filter_bag_of_words(normalize_bag_of_words_with_index(parse_line(rec[i]), norm_redis), STOP_LIST) idBag_length_redis.set((item_id - 1)*3 + i - 1, len(cur_bag_of_words)) idBag_bag.set((item_id - 1)*3 + i - 1, rec[i]) for word in cur_bag_of_words: word_ids_redis.sadd(word, (item_id - 1)*3 + i - 1) word_ids_redis_quick.zadd(word, 1, (item_id - 1)*3 + i - 1) disconnect_db(db)
def convert_tag_to_word_bag(tag, is_tag_normalized): if is_tag_normalized: tag_lego = tag.lower().split(" ") else: tag_lego = normalize_tag(tag) return filter_bag_of_words(tag_lego, STOP_LIST)