def show_main_menu(): while True: print( '\n' 'What would you like to do:\n' ' (1) List statistics\n' ' (2) Display 3 cities with longest names\n' ' (3) Display county\'s name with the largest number of communities\n' ' (4) Display locations, that belong to more than one category\n' ' (5) Advanced search\n' ' (0) Exit program\n') option = input('Choose option: ') if option == '1': PTPrint.list_statistics_print(SearchEngine.list_statistics()) elif option == '2': PTPrint.cities_with_longest_names_print( SearchEngine.cities_with_longest_names(3)) elif option == '3': PTPrint.largest_number_of_communities_print( SearchEngine.largest_number_of_communities(4)) elif option == '4': PTPrint.more_then_one_cat_loc_print( SearchEngine.more_than_one_category_locations()) elif option == '5': Menu.advanced_search() elif option == '0': return False else: 'There is no such option in menu'
def setUp(self): self.strr = 'sun window tree apple, juse border films 23 45good' # 01234567890123456789012345678901234567890123456789 # 0 1 2 3 4 self.strr2 = 'Мы тестируем нашу программу для работы с окнами. ' # 01234567890123456789012345678901234567890123456789 # 0 1 2 3 4 self.strr3 = 'Первая строка для тестов.\n' # 01234567890123456789012345678901234567890123456789 # 0 1 2 3 4 self.strr4 = 'Вторая строка для тестов.' # 01234567890123456789012345678901234567890123456789 # 0 1 2 3 4 self.test_file = open('test_window_one.txt', 'w') self.test_file.write(self.strr) self.test_file.close() self.test_file = open('test_window_two.txt', 'w') self.test_file.write(self.strr2) self.test_file.close() self.test_file = open('test_window_three.txt', 'w') self.test_file.write(self.strr3) self.test_file.write(self.strr4) self.test_file.close() self.x = SearchEngine("test_db") self.x.database.update(idealdict)
def setUp(self): self.engine = SearchEngine('db_name') self.engine.database.update(database) with open("test1.txt", 'w') as file: file.write(test1) with open("test2.txt", 'w') as file: file.write(test2)
class TestMySearchEngine(unittest.TestCase): def setUp(self): self.x = SearchEngine("database") self.x.database.update(idealdict) def test_searchengine_type(self): result = self.x.search("round") self.assertIsInstance(result, dict) def test_MyError_type_number(self): with self.assertRaises(ValueError): self.x.search(15) def test_empty_string(self): result = self.x.search('') self.assertIsInstance(result, dict) self.assertEqual(result, {}) def test_search_by_token(self): result = self.x.search('The') self.assertIsInstance(result, dict) self.assertEqual(result, idealdict['The']) def tearDown(self): del self.x for f in os.listdir('.'): if f.startswith('database.'): os.remove(f)
class MASLsTest(unittest.TestCase): def setUp(self): self.x = SearchEngine() def tearDown(self): pass def test_list_empty(self): result = self.x.merge_and_sort_lists([]) wanted = [] self.assertEqual(list(result), wanted) def test_list_empty2(self): result = self.x.merge_and_sort_lists([[], [], []]) wanted = [] self.assertEqual(list(result), wanted) def test_1_list_int(self): result = self.x.merge_and_sort_lists([[9, 10, 11]]) wanted = [9, 10, 11] self.assertEqual(list(result), wanted) def test_2_list_int(self): result = self.x.merge_and_sort_lists([[1, 2, 3], [9, 10, 11]]) wanted = [1, 2, 3, 9, 10, 11] self.assertEqual(list(result), wanted) def test_3_list_int(self): result = self.x.merge_and_sort_lists([[1, 2, 3], [9, 10, 11], [4, 5, 6]]) wanted = [1, 2, 3, 4, 5, 6, 9, 10, 11] self.assertEqual(list(result), wanted) def test_3_list_int_dif_len(self): result = self.x.merge_and_sort_lists([[1, 2, 3], [9, 10, 11, 12, 13, 14, 15], [4, 5, 6, 7, 8]]) wanted = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] self.assertEqual(list(result), wanted) def test_3_list_int_empty(self): result = self.x.merge_and_sort_lists([[1, 2, 3], [], [4, 5, 6, 7, 8]]) wanted = [1, 2, 3, 4, 5, 6, 7, 8] self.assertEqual(list(result), wanted) def test_1_list_abc(self): result = self.x.merge_and_sort_lists([['c', 'a', 'b']]) wanted = ['c', 'a', 'b'] self.assertEqual(list(result), wanted) def test_3_list_abc_dif_len(self): result = self.x.merge_and_sort_lists([['a', 'b', 'c'], ['d', 'f', 'g', 'h']]) wanted = ['a', 'b', 'c', 'd', 'f', 'g', 'h'] self.assertEqual(list(result), wanted)
def main(): win = SearchEngine('database') while True: findstr = input("Слово для поиска: ") if findstr == "exit": break res = win.find_supplemented_window(findstr, 2) #2 - deth of window for k in res: #k - key print(k) for v in res[k]: print(v) #v- meaning from the list print(v.get_BB_string())
def cli(method): """ A search engine that searches tweets related to USA elections 2020 given a query. """ se = SearchEngine(method) return 0
def advanced_search(): while True: print('What would you like to do:\n' ' (1) Search by name\n' ' (2) Search by type\n' ' (3) Search by district\n' ' (0) Exit search\n') option = input('Choose option: ') if option == '0': return False search = input('Search for:') if option == '1': PTPrint.advanced_search_print( SearchEngine.advanced_search_by_name(search)) elif option == '2': PTPrint.advanced_search_print( SearchEngine.advanced_search_by_type(search)) elif option == '3': PTPrint.advanced_search_print( SearchEngine.advanced_search_by_district(search))
class TestForMultiWordSearch(unittest.TestCase): def setUp(self): self.x = SearchEngine("database") self.x.database.update(idealdict) def test_searchengine_type(self): result = self.x.multiple_search("round") self.assertIsInstance(result, dict) def test_MyError_type_number(self): with self.assertRaises(ValueError): self.x.multiple_search(15) def test_empty_string(self): result = self.x.multiple_search('') self.assertIsInstance(result, dict) self.assertEqual(result, {}) def test_search_by_token(self): result = self.x.multiple_search('The') self.assertIsInstance(result, dict) self.assertEqual(result, idealdict['The']) def test_search_two_tokens(self): result = self.x.multiple_search('The sun') self.assertIsInstance(result, dict) self.assertEqual(result, {'file2.txt': [Position(1, 4, 0), Position(5, 8, 0)]}) def tearDown(self): del self.x for f in os.listdir('.'): if f.startswith('database.'): os.remove(f)
def __init__(self, data_dir, feature_type, survey): data_dir = os.path.normpath(data_dir) data_name = os.path.basename(data_dir) # Search Engine lex_file = os.path.join(data_dir,data_name + '.lex') background_file = os.path.join(data_dir,data_name + '.background') inv_index_file = os.path.join(data_dir, data_name + '.index') doclengs_file = os.path.join(data_dir, data_name + '.doclength') self.searchengine = SearchEngine( lex_file = lex_file, background_file = background_file, inv_index_file = inv_index_file, doclengs_file = doclengs_file ) # State Machine self.statemachine = StateMachine( background = self.searchengine.background, inv_index = self.searchengine.inv_index, doclengs = self.searchengine.doclengs, data_dir = data_dir, feat = feature_type ) # Action Manager self.actionmanager = ActionManager( background = self.searchengine.background, doclengs = self.searchengine.doclengs, data_dir = data_dir, survey = survey ) # Training or Testing self.test_flag = True
def __init__(self, database, window_len, files=None, path=None): self.files = files # список файлов для анализа self.path = path # папка с текстовыми файлами self.window_len = window_len self.db_name = database indexator = Indexer(self.db_name) file_list = [] if self.files is not None: file_list.extend(self.files) if self.path is not None: for f in os.listdir(path=self.path): file_list.append(self.path + f) for p in file_list: print("Indexing file: ", p) indexator.prescribe_index(p) del indexator SearchEngine.__init__(self, database)
def _main(): try: _setup_logging() command_line_arguments = _parse_command_line_arguments() cell_values = PuzzleParser.read_from_file( command_line_arguments.input_file) search_summary = SearchEngine.find_solution( cell_values, command_line_arguments.algorithm, command_line_arguments.timeout_sec) _print_search_summary(search_summary) if command_line_arguments.output_file is not None: GridFormatter.write_to_file(search_summary.final_grid, command_line_arguments.output_file) except InvalidInputError as e: print("Failed to parse the input file {0}: {1}".format( command_line_arguments.input_file, e)) except (InvalidPuzzleError, NoSuchAlgorithmError) as e: print("Puzzle rejected by the search engine: {0}".format(e)) except (FileNotFoundError, IsADirectoryError, PermissionError) as e: print("I/O error: {0}".format(e)) except (ValueError, RuntimeError) as e: print("Unexpected error has occured: {0}".format(e)) finally: print()
regex = args.regex method_search_state = MethodSearch.all method_sorting = SortMethod.abc sortOrder = SortOrder.asc stat_method_state = MethodStat.count if args.count_mathces_arg and args.unique_mathces_arg: method_search_state = MethodSearch.unique_count else: if args.unique_mathces_arg: method_search_state = MethodSearch.unique if args.count_mathces_arg: method_search_state = MethodSearch.count if args.count_line_mathces_arg: method_search_state = MethodSearch.line if args.sort_method_arg: method_sorting = sort_method_map[args.sort_method_arg] if args.sort_order_arg: sortOrder = sort_order_map[args.sort_order_arg] sorter = Sorter(method_sorting, sortOrder) if args.stat_arg: stat_engine = StatEngine(regex, stat_method_map[args.stat_arg], sorter) print(stat_engine.begin(data_strings, args.num_print_rows_arg)) else: search_engine = SearchEngine(regex, method_search_state, sorter) print(search_engine.begin(data_strings, args.num_print_rows_arg))
for x, y in opts: if x == '-d': dictionary_file = y elif x == '-p': postings_file = y elif x == '-q': query_file = y elif x == '-o': results_file = y else: raise AssertionError('unhandled option') if dictionary_file == None or postings_file == None or query_file == None or results_file == None: print(f'usage: {sys.argv[0]} -d dictionary-file -p postings-file -q file-of-queries -o output-file-of-results') sys.exit(2) document_file = 'document.txt' dictionary = load_dictionary(dictionary_file) documents = load_documents(document_file) search_engine = SearchEngine(dictionary, documents, postings_file) query, relevant_doc_ids = read_query(query_file) with open(results_file, 'w') as f: f.seek(0) try: result = search_engine.search(query, relevant_doc_ids) f.write(' '.join([str(i) for i in result]) + '\n') except ParseError as e: f.write(f'parse error encountered: {e}')
class TestSearchEngine(unittest.TestCase): def setUp(self): self.engine = SearchEngine('db_name') self.engine.database.update(database) with open("test1.txt", 'w') as file: file.write(test1) with open("test2.txt", 'w') as file: file.write(test2) def test_empty(self): result = self.engine.search_one('') self.assertEqual(result, {}) def test_search_one(self): result = self.engine.search_one('test') self.assertEqual( result, { 'test1.txt': [Position_with_lines(11, 15, 0)], 'test2.txt': [Position_with_lines(3, 7, 0)] }) def test_search_many_one(self): result = self.engine.search_many('test') self.assertEqual( result, { 'test1.txt': [Position_with_lines(11, 15, 0)], 'test2.txt': [Position_with_lines(3, 7, 0)] }) def test_search_many_two(self): result = self.engine.search_many('my test') self.assertEqual( result, { 'test1.txt': [ Position_with_lines(8, 10, 0), Position_with_lines(11, 15, 0) ], 'test2.txt': [Position_with_lines(0, 2, 0), Position_with_lines(3, 7, 0)] }) def test_search_limit_offset_default(self): result = self.engine.search_limit_offset('test') self.assertEqual(result, {'test1.txt': [], 'test2.txt': []}) def test_search_limit_offset_all(self): result = self.engine.search_limit_offset('test', doclimit=2, docoffset=0, limits=[2, 2], offsets=[0, 0]) self.assertEqual( result, { 'test1.txt': ['this is my <strong>test</strong>'], 'test2.txt': ['my <strong>test</strong>'] }) def test_search_limit_offset_one(self): result = self.engine.search_limit_offset('test', doclimit=1, docoffset=0, limits=[2, 2], offsets=[0, 0]) self.assertEqual(result, { 'test1.txt': ['this is my <strong>test</strong>'], 'test2.txt': [] }) def test_search_limit_offset_shift(self): result = self.engine.search_limit_offset('test', doclimit=2, docoffset=1, limits=[2, 2], offsets=[0, 0]) self.assertEqual(result, { 'test1.txt': [], 'test2.txt': ['my <strong>test</strong>'] }) def test_search_many_limit_offset_one(self): result = self.engine.search_many_limit_offset('test', limit=1, offset=0, limits=[2, 2], offsets=[0, 0]) self.assertEqual(result, {'test1.txt': [Position_with_lines(11, 15, 0)]}) def test_search_many_limit_offset_shift(self): result = self.engine.search_many_limit_offset('test', limit=1, offset=1, limits=[2, 2], offsets=[0, 0]) self.assertEqual(result, {'test2.txt': [Position_with_lines(3, 7, 0)]}) def test_search_many_limit_offset_all(self): result = self.engine.search_many_limit_offset('test', limit=2, offset=0, limits=[2, 2], offsets=[0, 0]) self.assertEqual( result, { 'test1.txt': [Position_with_lines(11, 15, 0)], 'test2.txt': [Position_with_lines(3, 7, 0)] }) def test_generator(self): result = self.engine.generator( [[Position_with_lines(12, 13, 1), Position_with_lines(3, 7, 0)], [Position_with_lines(11, 15, 0), Position_with_lines(3, 7, 0)], []]) a = [] for r in result: a.append(r) self.assertEqual(a, [ Position_with_lines(11, 15, 0), Position_with_lines(3, 7, 0), Position_with_lines(12, 13, 1), Position_with_lines(3, 7, 0) ]) def test_search_many_limit_offset_gen_one(self): result = self.engine.search_many_limit_offset_gen('test', limit=1, offset=0, limits=[2, 2], offsets=[0, 0]) result_keys = list(result.keys()) self.assertEqual(result_keys, ['test1.txt']) for key in result.keys(): for data in result[key]: self.assertEqual(data, Position_with_lines(11, 15, 0)) def test_search_many_limit_offset_gen_shift(self): result = self.engine.search_many_limit_offset_gen('test', limit=1, offset=1, limits=[2, 2], offsets=[0, 0]) result_keys = list(result.keys()) self.assertEqual(result_keys, ['test2.txt']) for key in result.keys(): for data in result[key]: self.assertEqual(data, Position_with_lines(3, 7, 0)) def test_search_many_limit_offset_gen_all(self): result = self.engine.search_many_limit_offset_gen('test', limit=2, offset=0, limits=[2, 2], offsets=[0, 0]) result_keys = list(result.keys()) self.assertEqual(result_keys, ['test1.txt', 'test2.txt']) for key in result.keys(): for data in result[key]: self.assertEqual(data, database['test'][key][0]) def tearDown(self): del self.engine for filename in os.listdir(os.getcwd()): if filename == 'db_name' or filename.startswith('db_name'): os.remove(filename) if 'test1.txt' in os.listdir(os.getcwd()): os.remove('test1.txt') if 'test2.txt' in os.listdir(os.getcwd()): os.remove('test2.txt')
from searchengine import SearchEngine from tokenstore import TokenStore # Todo: reformat command line interface def command_line(arg=None): while True: raw_query = input("Please type in what do you want to search: ").lower() if " --" in raw_query and not arg: raw_query, arg = raw_query.split(" --") query = re.sub("[^A-Za-z0-9 ]+", "", raw_query) if len(query) > 32: print("the query is too long, please shorten the search word") else: return query.split(), arg if __name__ == '__main__': store = TokenStore() search_engine = SearchEngine(store) signal.signal(signal.SIGINT, lambda _s, _f: exit(0)) while True: search_queries, arg = command_line() search_engine.search(*[search_queries, False] if arg == "all" else [search_queries])
class WindowsTest(unittest.TestCase): def setUp(self): self.strr = 'sun window tree apple, juse border films 23 45good' # 01234567890123456789012345678901234567890123456789 # 0 1 2 3 4 self.strr2 = 'Мы тестируем нашу программу для работы с окнами. ' # 01234567890123456789012345678901234567890123456789 # 0 1 2 3 4 self.strr3 = 'Первая строка для тестов.\n' # 01234567890123456789012345678901234567890123456789 # 0 1 2 3 4 self.strr4 = 'Вторая строка для тестов.' # 01234567890123456789012345678901234567890123456789 # 0 1 2 3 4 self.test_file = open('test_window_one.txt', 'w') self.test_file.write(self.strr) self.test_file.close() self.test_file = open('test_window_two.txt', 'w') self.test_file.write(self.strr2) self.test_file.close() self.test_file = open('test_window_three.txt', 'w') self.test_file.write(self.strr3) self.test_file.write(self.strr4) self.test_file.close() self.x = SearchEngine("test_db") self.x.database.update(idealdict) def tearDown(self): del self.x file_list = os.listdir(path=".") for i in file_list: if i == 'test_window_one.txt' or i == 'test_window_two.txt' or i == 'test_window_three.txt': os.remove(i) if i.startswith('test_db.'): os.remove(i) def test_wrong_input_error(self): with self.assertRaises(ValueError): files = ['test_window_one.txt'] win = self.x.find_supplemented_window_lim_v3( files, 3, 0, 1, [(0, 1)]) def test_absent_key(self): result = self.x.find_supplemented_window_lim_v3( 'zzzz', 1, 0, 1, [(0, 1)]) self.assertEqual(result, {}) def test_empty_string(self): result = self.x.find_supplemented_window_lim_v3('', 1, 0, 1, [(0, 1)]) self.assertIsInstance(result, dict) self.assertEqual(result, {}) def test_get_window_begin(self): result = self.x.find_supplemented_window_lim_v3( 'sun', 1, 0, 1, [(0, 1)]) res = result['test_window_one.txt'][0] win = TokenWindow(self.strr, [Position(0, 3, 0)], 0, 50) ideal = {'test_window_one.txt': [win]} self.assertEqual(res.allString, win.allString) self.assertEqual(res, win) self.assertEqual(result, ideal) def test_get_window_simple(self): result = self.x.find_supplemented_window_lim_v3( 'tree', 2, 0, 1, [(0, 1)]) res = result['test_window_one.txt'][0] win = TokenWindow(self.strr, [Position(11, 15, 0)], 0, 50) ideal = {'test_window_one.txt': [win]} self.assertEqual(res.allString, win.allString) self.assertEqual(res, win) self.assertEqual(result, ideal) def test_get_window_end(self): result = self.x.find_supplemented_window_lim_v3( 'good', 1, 0, 1, [(0, 1)]) res = result['test_window_one.txt'][0] win = TokenWindow(self.strr, [Position(46, 50, 0)], 0, 50) ideal = {'test_window_one.txt': [win]} self.assertEqual(res.allString, win.allString) self.assertEqual(res, win) self.assertEqual(result, ideal) def test_get_window_simple2(self): result = self.x.find_supplemented_window_lim_v3( 'нашу', 2, 0, 1, [(0, 1)]) res = result['test_window_two.txt'][0] win = TokenWindow(self.strr2, [Position(13, 17, 0)], 0, 49) ideal = {'test_window_two.txt': [win]} self.assertEqual(res.allString, win.allString) self.assertEqual(res, win) self.assertEqual(result, ideal) def test_get_window_simple_two_line(self): result = self.x.find_supplemented_window_lim_v3( 'Вторая', 1, 0, 1, [(0, 1)]) res = result['test_window_three.txt'][0] win = TokenWindow(self.strr4, [Position(0, 6, 1)], 0, 25) ideal = {'test_window_three.txt': [win]} self.assertEqual(res.allString, win.allString) self.assertEqual(res, win) self.assertEqual(result, ideal) def test_get_window_two_result(self): result = self.x.find_supplemented_window_lim_v3( 'тестов', 1, 0, 1, [(0, 2)]) res1 = result['test_window_three.txt'][0] res2 = result['test_window_three.txt'][1] win1 = TokenWindow(self.strr3.replace('\n', ''), [Position(18, 24, 0)], 0, 25) win2 = TokenWindow(self.strr4, [Position(18, 24, 1)], 0, 25) ideal = {'test_window_three.txt': [win1, win2]} self.assertEqual(res1.allString, win1.allString) self.assertEqual(res1, win1) self.assertEqual(res2.allString, win2.allString) self.assertEqual(res2, win2) self.assertEqual(result, ideal) def test_get_window_two_result2(self): result = self.x.find_supplemented_window_lim_v3( 'тестов', 1, 0, 1, [(0, 1)]) res1 = result['test_window_three.txt'][0] win1 = TokenWindow(self.strr3.replace('\n', ''), [Position(18, 24, 0)], 0, 25) ideal = {'test_window_three.txt': [win1]} self.assertEqual(res1.allString, win1.allString) self.assertEqual(res1, win1) self.assertEqual(result, ideal) def test_get_window_two_result3(self): result = self.x.find_supplemented_window_lim_v3( 'тестов', 1, 0, 1, [(1, 2)]) res1 = result['test_window_three.txt'][0] win1 = TokenWindow(self.strr4.replace('\n', ''), [Position(18, 24, 1)], 0, 25) ideal = {'test_window_three.txt': [win1]} self.assertEqual(res1.allString, win1.allString) self.assertEqual(res1, win1) self.assertEqual(result, ideal) def test_get_window_two_word(self): result = self.x.find_supplemented_window_lim_v3( 'Мы работы', 2, 0, 1, [(0, 2)]) res = result['test_window_two.txt'][0] win = TokenWindow( self.strr2, [Position(0, 2, 0), Position(32, 38, 0)], 0, 49) ideal = {'test_window_two.txt': [win]} self.assertEqual(res.allString, win.allString) self.assertEqual(res, win) self.assertEqual(result, ideal) def test_get_window_wrong_offset(self): result = self.x.find_supplemented_window_lim_v3( 'tree', 2, 0, 1, [(2, 1)]) res = result['test_window_one.txt'] ideal = {'test_window_one.txt': []} self.assertEqual(res, []) self.assertEqual(result, ideal)
'D:/9th semester/Information Retrieval Lab/package/scrapper/data/corpus1.pkl' ) + p.get_corpus( 'D:/9th semester/Information Retrieval Lab/package/scrapper/data/corpus2.pkl' ) + p.get_corpus( 'D:/9th semester/Information Retrieval Lab/package/scrapper/data/corpus3.pkl' ) + p.get_corpus( 'D:/9th semester/Information Retrieval Lab/package/scrapper/data/corpus4.pkl' ) + p.get_corpus( 'D:/9th semester/Information Retrieval Lab/package/scrapper/data/corpus5.pkl' ) cat_data = pd.DataFrame( columns=['id', 'tokens', 'category', 'category_code', 'pred_category']) cat_data = t.get_categorised_data( 'D:/9th semester/Information Retrieval Lab/package/scrapper/data/data_categorisation1_final.pkl', cat_data) cat_data = t.get_categorised_data( 'D:/9th semester/Information Retrieval Lab/package/scrapper/data/data_categorisation2_final.pkl', cat_data) cat_data = t.get_categorised_data( 'D:/9th semester/Information Retrieval Lab/package/scrapper/data/data_categorisation3_final.pkl', cat_data) cat_data = t.get_categorised_data( 'D:/9th semester/Information Retrieval Lab/package/scrapper/data/data_categorisation4_final.pkl', cat_data) cat_data = t.get_categorised_data( 'D:/9th semester/Information Retrieval Lab/package/scrapper/data/data_categorisation5_final.pkl', cat_data) engine = SearchEngine(model, corpus, cat_data) print('Done!') application.run()
class DialogueManager(object): def __init__(self, data_dir, feature_type, survey): data_dir = os.path.normpath(data_dir) data_name = os.path.basename(data_dir) # Search Engine lex_file = os.path.join(data_dir,data_name + '.lex') background_file = os.path.join(data_dir,data_name + '.background') inv_index_file = os.path.join(data_dir, data_name + '.index') doclengs_file = os.path.join(data_dir, data_name + '.doclength') self.searchengine = SearchEngine( lex_file = lex_file, background_file = background_file, inv_index_file = inv_index_file, doclengs_file = doclengs_file ) # State Machine self.statemachine = StateMachine( background = self.searchengine.background, inv_index = self.searchengine.inv_index, doclengs = self.searchengine.doclengs, data_dir = data_dir, feat = feature_type ) # Action Manager self.actionmanager = ActionManager( background = self.searchengine.background, doclengs = self.searchengine.doclengs, data_dir = data_dir, survey = survey ) # Training or Testing self.test_flag = True def __call__(self, query, ans, test_flag = False): """ Set inital parameters for every session Return: state(firstpass): 1 dim vector """ self.query = query #self.ans = ( None if test_flag else ans ) self.ans = ans # Interaction Parameters, action and current turn number self.cur_action = -1 # Action None self.curtHorizon = 0 # Previous retrieved results and MAP self.ret = None self.lastMAP = 0. self.MAP = 0. # Termination indicator self.terminal = False # Training or Testing self.test_flag = test_flag def gen_state_feature(self): assert self.actionmanager.posmodel != None # Search Engine Retrieves Result self.ret = self.searchengine.retrieve( self.actionmanager.posmodel,\ self.actionmanager.negmodel ) self.curtHorizon += 1 # Feature Extraction feature = self.statemachine( ret = self.ret, action_type = self.cur_action, curtHorizon = self.curtHorizon, posmodel = self.actionmanager.posprior, negmodel = self.actionmanager.negprior, posprior = self.actionmanager.posprior, negprior = self.actionmanager.negprior ) # Record mean average precision self.lastMAP = self.MAP self.MAP = self.evalAP(self.ret,self.ans) return feature def request(self,action_type): ''' Sends request to simulator for more query info ''' self.cur_action = action_type request = {} request['ret'] = self.ret request['action'] = self.actionmanager.actionTable[ action_type ] return request def expand_query(self,response): ''' Passes response to action manager for query expansion ''' posmodel, negmodel = self.actionmanager.expand_query(response) self.posmodel = posmodel self.negmodel = negmodel return posmodel, negmodel def evalAP(self,ret,ans): tp = [ float(ans.has_key(docID)) for docID,val in ret ] atp = np.cumsum(tp) precision = [ atp[idx] / (idx+1) * tp[idx] for idx,(docID,val) in enumerate(ret) ] return ( sum(precision)/len(ans) if len(ans) else 0. ) def evalMAP(self,ret,ans): APs = [ evalAP(ret[i],ans[i]) for i in xrange(len(ret)) ] print("warning!! MAP") return sum(APs)/len(APs) def calculate_reward(self): if self.terminal: reward = self.actionmanager.costTable[ 4 ] else: reward = self.actionmanager.costTable[ self.cur_action ] +\ self.actionmanager.costTable['lambda'] * (self.MAP - self.lastMAP) return reward def show(self): self.terminal = True params = {'ret': self.ret } return params def game_over(self): if self.terminal or self.curtHorizon >= 5: self.query = None self.ans = None self.actionmanager.posmodel = None return True, self.MAP return False, self.MAP
disabled_f_doc = '' if limit_doc >= len(res[k]): disabled_f_doc = 'disabled' disabled_b_doc = '' if offset_doc == 0: disabled_b_doc = 'disabled' for j, v in enumerate(res[k]): if j == limit_doc: break re += '<li>' + v.get_BB_string() + '</li>' re += '</ul>' result += form_limit.format(k, str(i), str(offset_doc), str(limit_doc), re, disabled_f_doc, disabled_b_doc) ret_result = body.format(postvars['findstr'][0], str(offset), str(limit), result, disabled_f, disabled_b) return ret_result if __name__ == '__main__': httpd = HTTPServer(('localhost', 80), custom_handler) httpd.search_engine = SearchEngine('database') print('Start server.') httpd.serve_forever()
def setUp(self): self.x = SearchEngine()
def find_solution(puzzle, algorithm, timeout_sec = 10): cell_values = PuzzleParser.read_from_string(puzzle) search_summary = SearchEngine.find_solution(cell_values, algorithm, timeout_sec) formatted_final_grid = GridFormatter.write_to_string(search_summary.final_grid) return (search_summary, formatted_final_grid)
import sys from flask import request from flask_api import FlaskAPI sys.path.append(os.path.abspath("../indexer/src")) from searchengine import SearchEngine from tokenstore import TokenStore app = FlaskAPI(__name__) @app.route("/") def index(): return app.send_static_file('html/index.html') @app.route("/api/search", methods=["POST"]) def search() -> list: """ curl -X POST http://127.0.0.1:5000/api/search -d queries="hello world" """ return search_engine.search(request.data.get("queries").lower().split(",")) if __name__ == "__main__": store = TokenStore() search_engine = SearchEngine(store) app.run(debug=True, host="0.0.0.0")
def do_POST(self): """ POST handler for query """ try: content_length = int(self.headers['Content-Length']) body = str(self.rfile.read(content_length)) print("body = " + body) query, limit, offset, limits, offsets, action, action_doc, action_exists = self.parse_url( body) print("query = " + query) print("doclimit = " + limit) print("docoffset = " + offset) print("action = " + action) print("actiondoc = " + action_doc) if action_exists: offsets = self.get_new_offset_limit(action, action_doc, offsets, limits) print('limits = ' + str(limits)) print('offsets = ' + str(offsets)) search_engine = SearchEngine('database') r = search_engine.search_limit_offset(query, 4, limit, offset, limits, offsets) myresp = '' myresp += 'Documents Limit<br><input type="text" name="limit" value="' + str( limit) + '"><br>' myresp += 'Documents Offset<br><input type="text" name="offset" value="' + str( offset) + '"><br>' key_list = list(r.keys()) key_list.sort() j = 0 for key in key_list: myresp += '<ol>\n' myresp += '<li>' + key + '</li>\n<ul>' myresp += 'Limit<br><input type="text" name="doc' + str( j) + 'limit" value="' + limits[j] + '"><br>' myresp += 'Offset<br><input type="text" name=doc' + str( j) + 'offset" value="' + offsets[j] + '"><br>' myresp += '<input type="submit" name=action' + str( j) + ' value="perv">' myresp += '<input type="submit" name=action' + str( j) + ' value="back">' myresp += '<input type="submit" name=action' + str( j) + ' value="next"> <br>' for val in r[key]: myresp += '<li>' + val + '</li>' myresp += '</ul>' j = j + 1 myresp += '</ol>' self.send_response(200) self.send_header("Content-type", "text/html; charset=utf-8") self.end_headers() self.wfile.write( bytes((resp + data.format(query, myresp)), "utf-8")) except TypeError: response = 'fields "limit" and "offset" can not take a negative or fractional values' self.wfile.write(bytes((resp + data.format('', response)), "utf-8")) except Exception as ex: response = '<br>Uuups. Something went wrong. Error message: ' + str( ex) + '<br>' self.send_response(200) self.send_header("Content-type", "text/html; charset=utf-8") self.end_headers() files = os.listdir(".\\") i = 0 response += 'Documents Limit<br><input type="text" name="limit" value="0"><br>' response += 'Documents Offset<br><input type="text" name="offset" value="0"><br>' for f in files: if re.match(".*\.txt", f): response += (f + "<br>") response += 'Limit<br><input type="text" name=doc' + str( i) + 'limit value="0"><br>' response += 'Offset<br><input type="text" name=doc' + str( i) + 'offset value="0"><br>' response += '<input type="submit" name=action' + str( i) + ' value="perv">' response += '<input type="submit" name=action' + str( i) + ' value="back">' response += '<input type="submit" name=action' + str( i) + ' value="next"> <br>' i = i + 1 self.wfile.write( bytes((resp + data.format('', 'Not Found<br>' + response)), "utf-8"))
class WindowsTest(unittest.TestCase): def setUp(self): self.strr = 'sun window tree apple, juse border films 23 45good' # 01234567890123456789012345678901234567890123456789 # 0 1 2 3 4 self.strr2 = 'Мы тестируем нашу программу для работы с окнами. ' # 01234567890123456789012345678901234567890123456789 # 0 1 2 3 4 self.strr3 = 'Первая строка для тестов.\n' # 01234567890123456789012345678901234567890123456789 # 0 1 2 3 4 self.strr4 = 'Вторая строка для тестов.' # 01234567890123456789012345678901234567890123456789 # 0 1 2 3 4 self.test_file = open('test_window_one.txt', 'w') self.test_file.write(self.strr) self.test_file.close() self.test_file = open('test_window_two.txt', 'w') self.test_file.write(self.strr2) self.test_file.close() self.test_file = open('test_window_three.txt', 'w') self.test_file.write(self.strr3) self.test_file.write(self.strr4) self.test_file.close() self.x = SearchEngine("test_db") self.x.database.update(idealdict) def tearDown(self): del self.x file_list = os.listdir(path=".") for i in file_list: if i == 'test_window_one.txt' or i == 'test_window_two.txt' or i == 'test_window_three.txt': os.remove(i) if i.startswith('test_db.'): os.remove(i) def test_wrong_input_error(self): with self.assertRaises(ValueError): files = ['test_window_one.txt'] win = self.x.multiple_search_lim(3, 0, 1) def test_absent_key(self): result = self.x.multiple_search_lim('zzzz', 0, 1) self.assertEqual(result, {}) def test_empty_string(self): result = self.x.multiple_search_lim('', 0, 1) self.assertIsInstance(result, dict) self.assertEqual(result, {}) def test_get_window_begin(self): result = self.x.multiple_search_lim('sun', 0, 1) res = result['test_window_one.txt'][0] t = [Position(0, 3, 0)] ideal = {'test_window_one.txt': t} self.assertEqual(result, ideal) def test_get_window_simple(self): result = self.x.multiple_search_lim('tree', 0, 1) res = result['test_window_one.txt'][0] t = [Position(11, 15, 0)] ideal = {'test_window_one.txt': t} self.assertEqual(result, ideal) def test_get_window_end(self): result = self.x.multiple_search_lim('good', 0, 1) res = result['test_window_one.txt'][0] t = [Position(46, 50, 0)] ideal = {'test_window_one.txt': t} self.assertEqual(result, ideal) def test_get_window_simple2(self): result = self.x.multiple_search_lim('нашу', 0, 1) res = result['test_window_two.txt'][0] t = [Position(13, 17, 0)] ideal = {'test_window_two.txt': t} self.assertEqual(result, ideal) def test_get_window_simple_two_line(self): result = self.x.multiple_search_lim('Вторая', 0, 1) res = result['test_window_three.txt'][0] t = [Position(0, 6, 1)] ideal = {'test_window_three.txt': t} self.assertEqual(result, ideal) def test_get_window_two_result(self): result = self.x.multiple_search_lim('тестов', 0, 1) res1 = result['test_window_three.txt'][0] res2 = result['test_window_three.txt'][1] t = [Position(18, 24, 0), Position(18, 24, 1)] ideal = {'test_window_three.txt': t} self.assertEqual(result, ideal) def test_get_window_two_result3(self): result = self.x.multiple_search_lim('тестов', 0, 1) res1 = result['test_window_three.txt'][0] t = [Position(18, 24, 0), Position(18, 24, 1)] ideal = {'test_window_three.txt': t} self.assertEqual(result, ideal) def test_get_window_wrong_offset(self): result = self.x.multiple_search_lim('tree', 5, 2) ideal = {} self.assertEqual(result, ideal)
def setUp(self): self.x = SearchEngine("database") self.x.database.update(idealdict)
from flask import Flask, request from flask import jsonify from flask_cors import CORS from io import BytesIO import json from searchengine import SearchEngine from retriever import Retriever from transformers import BertTokenizer, BertModel from ranker import BertRanker, Word2vecRanker app = Flask(__name__) cors = CORS(app, resources={r'/get': {"origins": "*"}}) se = SearchEngine() with open('../cache/word_to_pos_count2.json') as f: word_to_pos = json.load(f) with open('../cache/pos_hanzi.json') as f: pos_to_hanzi = json.load(f) hanzi_to_pos = {hanzi:pos for pos, hanzi in pos_to_hanzi.items()} hanzi_to_pos['不限'] = 'all' print('finish load...') initialPos = ['名词', '人名', '地名', '机构名', '其它专名', '数词', '量词', '数量词', '时间词', '方位词', '处所词', '动词', '形容词', '副词', '前接成分', '后接成分', '习语', '简称', '代词', '连词', '介词', '助词', '语气助词', '叹词', '拟声词', '语素', '标点', '其它'] bert = BertModel.from_pretrained("hfl/chinese-bert-wwm-ext") tokenizer = BertTokenizer.from_pretrained("hfl/chinese-bert-wwm-ext") bertranker = BertRanker() word2vecranker = Word2vecRanker("/data/disk2/private/hujinyi/IRHomework/cache/sgns.renmin.word") @app.route('/') def hello_world(): return 'Hello World!'