def dosearch(query):
    weight = 0
    raw_query = lower_letters(query)
    query = query_parser(query)
    query = query + [raw_query]
    id_list = []
    res_name = []
    weight = {}
    if query:
        for term in query:
            if term in t_inverted_index:
                for key, value in t_inverted_index[term].iteritems():
                    if key not in weight:
                        weight[key] = tdxidf_weighting(term, key)
                    else:
                        weight[key] = weight[key] + tdxidf_weighting(term, key)
                    if key not in id_list:
                        id_list.append(key)

        rank_list = calc_vector_space(query, id_list)
        rank_fin = []
        q = list(set(jieba.cut_for_search(raw_query)))
        if u" " in q:
            q.remove(u" ")
        cnt = []
        for key_index, key in reversed(list(enumerate(rank_list))):
            info_term = list(set(jieba.cut_for_search(id_info_list[key])))
            if u" " in info_term:
                info_term.remove(u" ")
            for term in q:
                if term in info_term:
                    cnt.append(key)

        freq_cnt = Counter(cnt)
        freq_cnt_tuples = freq_cnt.most_common()
        for item, cnt in freq_cnt_tuples:
            rank_fin.append(item)

        for item in rank_list:
            if item not in rank_fin:
                rank_fin.append(item)
        if id_list:
            for ids in rank_fin:
                res = os.path.splitext(doc_id_list[ids])[0]
                res = res[7:]
                res_name.append(res)
        return res_name
def build_dict_for_spell_check(t_inverted_index):
    sumsum = 0
    data = {}
    for term in t_inverted_index:
        if (type(term) is not str):
            term = term.decode("utf-8")
        for docID in t_inverted_index[term]:
            sumsum += t_inverted_index[term][docID]
        term = lower_letters(term)
        if (type(term) is not str):
            print type(term)
            term = term.decode("utf-8")
            print "f**k"
            print term
            print type(term)
        data[term] = sumsum
        sumsum = 0
    f = open('./data/spell.dat', 'wb')
    pickle.dump(data, f)
    f.close()
id_info_list = {}
play_and_share = {}
color = {}
auto_complete_list = []
for file_name in glob.glob(ur'./data/*.json'):
    f = codecs.open(file_name, 'r', 'utf-8')
    j = json.load(f)
    content = j["title"] + j["singer"] + j["album"] + j["lrc"]
    for tag in j["tag"]:
        content = content + tag
    color[j["title"]] = get_domi_color(j["title"])
    play_and_share[doc_id] = [j["play_count_num"], j["share"]]
    doc_id_list[doc_id] = file_name.encode("utf-8")
    seg_list = list(jieba.cut_for_search(content))
    seg_list = words_filter(seg_list)
    seg_list.append(lower_letters(j["title"]))
    seg_list.append(lower_letters(j["singer"]))
    seg_list.append(lower_letters(j["album"]))
    for tag in j["tag"]:
        seg_list.append(tag)
    auto_complete_list.append(j["title"])
    auto_complete_list.append(j["singer"])
    auto_complete_list.append(j["album"])
    term = list(set(seg_list))
    exclude = set(string.punctuation)
    info_str = j["title"] + " " + j["singer"] + " " + j["album"]
    info_str = lower_letters(info_str)
    info = ''.join(ch for ch in info_str if ch not in exclude)
    id_info_list[doc_id] = info
    if u" " in term:
        term.remove(u" ")