Пример #1
0
def psuedoRFSearch():
    query = sys.argv[1]

    df_psuedoRF = pd.DataFrame(
        columns=['Doc_no', 'rank', 'Product_title', 'Result_score'])

    index = MyIndexReader.MyIndexReader()
    pesudo_search = PseudoRFRetrievalModel.PseudoRFRetreivalModel(index)
    extractor = TransformQuery.TransformQuery()
    queries = extractor.getQuries(query)

    for query in queries:
        print(query.queryId, "\t", query.queryContent)
        results = pesudo_search.retrieveQuery(query, 20, 100, 0.4)
        rank = 1
        for result in results:
            df_psuedoRF = df_psuedoRF.append(
                {
                    'Doc_no': result.getDocNo(),
                    'rank': rank,
                    'Product_title': result.getDocTitle(),
                    'Result_score': result.getScore()
                },
                ignore_index=True)
            rank += 1
        print(df_psuedoRF)
Пример #2
0
def indexRead(term):
    index = MyIndexReader.MyIndexReader()
    # retrieve the token.
    df = index.DocFreq(term)
    ctf = index.CollectionFreq(term)
    #print(" >> the token \""+term+"\" appeared in "+ str(df) +" documents and "+ str(ctf) +" times in total")
    if df>0:
        posting = index.getPostingList(term)
        for docId in posting:
            docNo = index.getDocNo(docId)
def ReadIndex(type, token):
    # Initiate the index file reader.
    index = MyIndexReader.MyIndexReader(type)
    # retrieve the token.
    df = index.DocFreq(token)
    ctf = index.CollectionFreq(token)
    print(" >> the token \"" + token + "\" appeared in " + str(df) +
          " documents and " + str(ctf) + " times in total")
    if df > 0:
        posting = index.getPostingList(token)
        for docId in posting:
            docNo = index.getDocNo(docId)
            print(docNo + "\t" + str(docId) + "\t" + str(posting[docId]))
Пример #4
0
def qrmSearch():
    query = sys.argv[1]

    df_qrm = pd.DataFrame(columns = ['Doc_no','rank','Product_title', 'Result_score'])
        
    index = MyIndexReader.MyIndexReader()
    search = QueryRetreivalModel.QueryRetrievalModel(index)
    extractor = TransformQuery.TransformQuery()
    #extractor.getQuries(query)
    queries= extractor.getQuries(query)

    
    for query in queries:
        #print(query.queryId,"\t",query.queryContent)
        results = search.retrieveQuery(query, 20)
        rank = 1
        for result in results:
            df_qrm = df_qrm.append({'Doc_no': result.getDocNo(), 'rank': rank, 'Product_title': result.getDocTitle(), 'Result_score' : result.getScore()}, ignore_index=True)
            rank +=1
Пример #5
0
processed_file = Path.ResultHM1 + 'result_' + user + '.txt'
names_file = Path.FilesDictDir + 'dict_' + user + '.txt'
dict_file = Path.IndexDir + "dictionary_" + user
postings_file = Path.IndexDir + "postings_" + user

dr = open(names_file, "r", encoding='cp437')
while True:
    l = dr.readline()
    if not l:
        break
    l2 = l.split(":")
    doc_to_title_dict[int(l2[0])] = l2[1]
    pass

index = MyIndexReader.MyIndexReader(user)
search = QueryRetreivalModel.QueryRetrievalModel(index, user)
#preprocessing and indexing ends

extractor = ExtractQuery.ExtractQuery(Query_string)
#query execution starts
query = extractor.getQuries()

results = search.retrieveQuery(query, 4, miu, doc_to_title_dict, user)
rank = 1

final_json = []
if len(results) == 0:
    print("[]")
else:
    for result in results: