def irsolver(data_file, index) : from questions import get_input_data lucene.initVM() stops = CharArraySet(Version.LUCENE_4_10_1, 0, True) for s in stopwords : stops.add(s) analyzer = StandardAnalyzer(Version.LUCENE_4_10_1, stops) reader = IndexReader.open(SimpleFSDirectory(File(index))) searcher = IndexSearcher(reader) pred = [] mapp = { 1 : 'A', 2 : 'B', 3 : 'C', 4 : 'D'} idx, ques, ans = get_input_data(data_file) for acm, (idq, q, a) in enumerate(zip(idx, ques, ans)) : max_score = -1000000 best_ans = 'A' for i, ai in enumerate(a): sc = query(q, ai, analyzer, searcher) print(acm, i, sc) if sc > max_score : max_score = sc best_ans = mapp[i+1] pred.append(best_ans) return idx, pred
def irsolver(data_file, index): from questions import get_input_data lucene.initVM() stops = CharArraySet(Version.LUCENE_4_10_1, 0, True) for s in stopwords: stops.add(s) analyzer = StandardAnalyzer(Version.LUCENE_4_10_1, stops) reader = IndexReader.open(SimpleFSDirectory(File(index))) searcher = IndexSearcher(reader) pred = [] mapp = {1: 'A', 2: 'B', 3: 'C', 4: 'D'} idx, ques, ans = get_input_data(data_file) for acm, (idq, q, a) in enumerate(zip(idx, ques, ans)): max_score = -1000000 best_ans = 'A' for i, ai in enumerate(a): sc = query(q, ai, analyzer, searcher) print(acm, i, sc) if sc > max_score: max_score = sc best_ans = mapp[i + 1] pred.append(best_ans) return idx, pred
def get_wiki_docids(data_file, wikipedia_index): from questions import get_input_data data = get_input_data(data_file) lucene.initVM() stops = CharArraySet(Version.LUCENE_4_10_1, 0, True) for s in stopwords: stops.add(s) analyzer = StandardAnalyzer(Version.LUCENE_4_10_1, stops) reader = IndexReader.open(SimpleFSDirectory(File(wikipedia_index))) searcher = IndexSearcher(reader) generate_docids(data, data_file, analyzer, searcher)