Пример #1
0
#!/usr/bin/env python
'''
@note: to be removed 
'''


import argparse
from lucenesearch.lucene_index import lucene_index
from lucenesearch.lucene_search import lucene_search

if __name__ == "__main__":
    

 
    arg_parser = argparse.ArgumentParser(description='Lucene index and search')
    arg_parser.add_argument("-d", dest="input_folder", type=str,
                            help="The root directory to index and search",
                             required=True)
    arg_parser.add_argument("-o", dest="output_folder", type=str, help="Output directory of index",
                         default="/home/abhiramj/code/temp/index",
                         required=False)
    arg_parser.add_argument("-q", dest="query_text", type=str, help="Query to search for")
    arg_parser.add_argument("-l", dest="limit", type=int, help="Maximum number of results")
    
    args = arg_parser.parse_args()
    lucene_index(args.input_folder,args.output_folder)
    responsive = lucene_search(args.output_folder, args.limit, args.query_text)
    
Пример #2
0
    query = raw_input('Enter query: ')  # 'Human computer interaction'
    limit = int(raw_input('Limit: '))
    
    
    if search_algorithm == 'LDA':
    
        None
        # Process the query 
        
#        responsive_docs, non_responsive_docs = process_query(query, dictionary, lda, index, doc_paths, limit)
#        nrd = np.array(non_responsive_docs)
#        nrd_paths = [os.path.join(dir_path, nrd[idx,2]) for idx, dir_path in enumerate(nrd[:,1])] # looks like i'm not getting full file paths
        
    elif search_algorithm == 'Lucene':
#        None 
        responsive_docs = lucene_search(lucene_index_file, limit, query)
        non_responsive_docs = []
        for file_name in find_files_in_folder(DATA_PATH):
            if os.path.dirname(file_name) is not lucene_index_file:       # skipping index directory
                if file_name not in responsive_docs:
                    non_responsive_docs.append(file_name)
        nrd_paths=non_responsive_docs 
    
    print 'Number of responsive documents:', len(responsive_docs)
    print 'Number of non responsive documents:', len(non_responsive_docs) 
    
    print 'The responsive files are: '
    for f in responsive_docs:
        print f