class TestDictionary(unittest.TestCase): def __init__(self, *args, **kwargs): super(TestDictionary, self).__init__(*args, **kwargs) self.dictionary = Dictionary('doc/dictionary.json') def test_initializer_with_invalid_dictionary_path_must_create_empty_dictionary( self): dictionary = Dictionary('doc/invalid_dictionary.json') is_empty = dictionary.is_empty() self.assertTrue(is_empty) def test_initializer_with_valid_dictionary_path_must_initialize_words_dictionary( self): is_empty = self.dictionary.is_empty() self.assertFalse(is_empty) @parameterized.expand(['Three', 'valid', 'words']) def test_is_valid_word_with_actual_word_must_return_true(self, word): result = self.dictionary.is_valid_word(word) self.assertTrue(result) @parameterized.expand(['Thr33', 'in-valid', '$words', '']) def test_is_valid_word_with_not_wrong_word_must_return_false( self, invalid_word): result = self.dictionary.is_valid_word(invalid_word) self.assertFalse(result) @parameterized.expand(['I', 'a']) def test_is_valid_scrabble_word_with_actual_one_letter_word_must_return_false( self, word): result = self.dictionary.is_valid_scrabble_word(word) self.assertFalse(result)
def Initialize(credentials="persistent", opt_url=None): """Initialize the EE library. If this hasn't been called by the time any object constructor is used, it will be called then. If this is called a second time with a different URL, this doesn't do an un-initialization of e.g.: the previously loaded Algorithms, but will overwrite them and let point at alternate servers. Args: credentials: OAuth2 credentials. 'persistent' (default) means use credentials already stored in the filesystem, or raise an explanatory exception guiding the user to create those credentials. opt_url: The base url for the EarthEngine REST API to connect to. """ if credentials == "persistent": credentials = _GetPersistentCredentials() data.initialize(credentials, (opt_url + "/api" if opt_url else None), opt_url) # Initialize the dynamically loaded functions on the objects that want them. ApiFunction.initialize() Element.initialize() Image.initialize() Feature.initialize() Collection.initialize() ImageCollection.initialize() FeatureCollection.initialize() Filter.initialize() Geometry.initialize() List.initialize() Number.initialize() String.initialize() Date.initialize() Dictionary.initialize() Terrain.initialize() _InitializeGeneratedClasses() _InitializeUnboundMethods()
def deactivate(self): # close the last array, even though it might not be complete... # # Adjust the last delimiting commas, and add proper array ends. # self.data_arr_string = self.data_arr_string[:-2] + ']]\n' # # write last array, and then close the file. If written in TEXT format, the result is a string. Otherwise # it is saved in numpy format. # with open(self.data_file_name, 'a') as f: f.write(self.data_arr_string) f.close() with open(self.result_file_name, 'a') as f: f.write(self.result_arr_string) f.close() np.save(self.data_file_name_np, self.data_arr_np) np.save(self.result_file_name_np, self.result_arr_np) print(dict.get_string('plugclose') + self.data_file_name) print(dict.get_string('checkarray')) return
def test_get_slot(): map_buckets = Dictionary() bucket_object = map_buckets.get_bucket('9.0') key = map_buckets.set_key_to_value('9.0', 'Tesla') bucket_object, node = map_buckets.get_slot('9.0') assert node.value[1] == 'Tesla' assert node.value[0] == '9.0'
def build_index(): print('build index..', file=sys.stderr) #1. read dictionary dictionary = Dictionary() dictionary.load_from_galago_dump(args.dict_file, args.dict_min_freq) #2. make snrm instance & load weight device = torch.device('cpu') snrm = SNRM(args).to(device) snrm.load_state_dict(torch.load(args.model_file)) ## load model snrm.eval() ## set inference mode #3. read train data doc_data = Triplet('doc', args, dictionary) #4. make index db_loader = DataLoader(dataset=doc_data, batch_size=1, shuffle=False, num_workers=0) inverted_index = InMemoryInvertedIndex( args.conv3_channel) ## last channel is output representation with torch.no_grad(): for i, (doc_id, doc) in enumerate(db_loader): doc_repr = snrm(doc.float()) inverted_index.add(doc_id.numpy(), doc_repr.numpy()) if (i % 10 == 0): print(i, ' document inferenced \r', file=sys.stderr, end='') inverted_index.store(args.index_file) print('>save index: ', args.index_file, file=sys.stderr)
def reload(path, params): """ Create a sentence embedder from a pretrained model. """ # reload model reloaded = torch.load(path) state_dict = reloaded['model'] # handle models from multi-GPU checkpoints if 'checkpoint' in path: state_dict = {(k[7:] if k.startswith('module.') else k): v for k, v in state_dict.items()} # reload dictionary and model parameters dico = Dictionary(reloaded['dico_id2word'], reloaded['dico_word2id'], reloaded['dico_counts']) pretrain_params = AttrDict(reloaded['params']) pretrain_params.n_words = len(dico) pretrain_params.bos_index = dico.index(BOS_WORD) pretrain_params.eos_index = dico.index(EOS_WORD) pretrain_params.pad_index = dico.index(PAD_WORD) pretrain_params.unk_index = dico.index(UNK_WORD) pretrain_params.mask_index = dico.index(MASK_WORD) # build model and reload weights model = Trained_Model(pretrain_params, dico) model.load_state_dict(state_dict) model.eval() # adding missing parameters params.max_batch_size = 0 return SentenceEmbedder(model, dico, pretrain_params)
class MarkovChainWalker(object): def __init__(self): self.dictionary = Dictionary() def parse(self, filename): self.dictionary.update(filename) def process(self): self.dictionary.process() # pick a random word def pick(self, hash): random_pick = random.random() for key, value in sorted(hash.iteritems(), key=lambda (k, v): (v, k)): if random_pick < value: return key # generate def generate(self, start_word, number_of_words): output = StringIO.StringIO() word = start_word for n in range(number_of_words): output.write('%s ' % word) secondary = self.dictionary.dictionary[word] word = self.pick(secondary) contents = output.getvalue() output.close() return contents
def main(): if sys.version_info[0] < 3: raise Exception("Must be using Python 3+") words = [] print("Ingesting words from words.txt...") with open("words.txt", 'r') as file: words = [word.strip() for word in file.readlines()] dictionary = Dictionary(words) while True: print("Please enter letters you want to use:") letters = input() if not re.match("^[A-za-z]+$", letters): print( "%s is considered invalid input, please only use english alphabets a-z." % letters) continue matches = dictionary.match_anagram(letters) print("") for word in sorted(matches): print(word) print("Found %d matches." % len(matches))
def setup_module(module): global DICTIONARIES global cluster global node dict_configs_path = os.path.join(SCRIPT_DIR, 'configs/dictionaries') for f in os.listdir(dict_configs_path): os.remove(os.path.join(dict_configs_path, f)) for layout in LAYOUTS: for source in SOURCES: if source.compatible_with_layout(layout): structure = DictionaryStructure(layout, FIELDS[layout.layout_type]) dict_name = source.name + "_" + layout.name dict_path = os.path.join(dict_configs_path, dict_name + '.xml') dictionary = Dictionary(dict_name, structure, source, dict_path, "table_" + dict_name) dictionary.generate_config() DICTIONARIES.append(dictionary) else: print "Source", source.name, "incompatible with layout", layout.name main_configs = [] for fname in os.listdir(dict_configs_path): main_configs.append(os.path.join(dict_configs_path, fname)) cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join( SCRIPT_DIR, 'configs')) node = cluster.add_instance('node', main_configs=main_configs, with_mysql=True, with_mongo=True) cluster.add_instance('clickhouse1')
class Bot(): def __init__(self): self.dictionary = Dictionary() self.morph = Morph() self.resp_what = responder.WhatResponder(self.dictionary) self.resp_random = responder.RandomResponder(self.dictionary) self.resp_pattern = responder.PatternResponder(self.dictionary) self.resp_template = responder.TemplateResponder(self.dictionary) self.responder = self.resp_pattern def dialogue(self, input_text): parts = self.morph.analyze(input_text) i = random.randint(0, 100) if 0 <= i < 40: self.responder = self.resp_pattern elif 40 <= i < 70: self.response = self.resp_template elif 70 <= i < 90: self.responder = self.resp_random else: self.responder = self.resp_what self.response = self.responder.response(input_text, parts) #学習 self.dictionary.study(input_text, parts) return self.response def save(self): self.dictionary.save()
def reset(self): self.model = None self.x_train_list = [] self.y_train_list = [] self.x_train = None self.y_train = None self.dictionary = Dictionary()
def run_search(dict_file, postings_file, queries_file, results_file): """ using the given dictionary file and postings file, perform searching on the given queries file and output the results to a file """ dictionary = Dictionary(dict_file) postings = PostingsFile(postings_file) dictionary.load() # Load dictionary into memory with open(queries_file, 'r') as query_file: with open(results_file, 'w') as output_file: complete_result = [] for query in query_file: if query.strip(): result = util.eval_query(query, dictionary, postings) result = util.format_result(result) complete_result.append(result) else: complete_result.append("") write_data = "\n".join(complete_result) output_file.write(write_data) output_file.close() query_file.close()
class TextCorpus(interfaces.CorpusABC): """ Helper class to simplify the pipeline of getting bag-of-words vectors (= a gensim corpus) from plain text. This is an abstract base class: override the `get_texts()` method to match your particular input. Given a filename (or a file-like object) in constructor, the corpus object will be automatically initialized with a dictionary in `self.dictionary` and will support the `iter` corpus method. You must only provide a correct `get_texts` implementation. """ def __init__(self, input=None): super(TextCorpus, self).__init__() self.input = input self.dictionary = Dictionary() if input is not None: self.dictionary.add_documents(self.get_texts()) else: logger.warning("No input document stream provided; assuming " "dictionary will be initialized some other way.") def __iter__(self): """ The function that defines a corpus. Iterating over the corpus must yield sparse vectors, one for each document. """ for text in self.get_texts(): yield self.dictionary.doc2bow(text, allow_update=False) def getstream(self): return getstream(self.input) def get_texts(self): """ Iterate over the collection, yielding one document at a time. A document is a sequence of words (strings) that can be fed into `Dictionary.doc2bow`. Override this function to match your input (parse input files, do any text preprocessing, lowercasing, tokenizing etc.). There will be no further preprocessing of the words coming out of this function. """ # Instead of raising NotImplementedError, let's provide a sample implementation: # assume documents are lines in a single file (one document per line). # Yield each document as a list of lowercase tokens, via `utils.tokenize`. length = 0 for lineno, line in enumerate(getstream(self.input)): length += 1 yield utils.tokenize(line, lowercase=True) self.length = length def __len__(self): return self.length # will throw if corpus not initialized
def load_dictionaries(path, src_lang, dst_lang): """Load dictionaries for a given language pair.""" src_dict = Dictionary.load( os.path.join(path, 'dict.{}.txt'.format(src_lang))) dst_dict = Dictionary.load( os.path.join(path, 'dict.{}.txt'.format(dst_lang))) return src_dict, dst_dict
class Unmo: """人工無脳コアクラス。 プロパティ: name -- 人工無脳コアの名前 responder_name -- 現在の応答クラスの名前 """ def __init__(self, name): """文字列を受け取り、コアインスタンスの名前に設定する。 Responder(What, Random, Pattern)インスタンスを作成し、保持する。 Dictionaryインスタンスを作成し、保持する。 Tokenizerインスタンスを作成し、保持する。 """ self._tokenizer = Tokenizer() self._dictionary = Dictionary() self._responders = { 'what': WhatResponder('What', self._dictionary), 'random': RandomResponder('Random', self._dictionary), 'pattern': PatternResponder('Pattern', self._dictionary), 'template': TemplateResponder('Template', self._dictionary), 'markov': MarkovResponder('Markov', self._dictionary), } self._name = name self._responder = self._responders['pattern'] def dialogue(self, text): """ユーザーからの入力を受け取り、Responderに処理させた結果を返す。 呼び出されるたびにランダムでResponderを切り替える。 入力をDictionaryに学習させる。""" chance = randrange(0, 100) if chance in range(0, 29): self._responder = self._responders['pattern'] elif chance in range(30, 49): self._responder = self._responders['template'] elif chance in range(50, 69): self._responder = self._responders['random'] elif chance in range(70, 89): self._responder = self._responders['markov'] else: self._responder = self._responders['what'] parts = morph.analyze(text) response = self._responder.response(text, parts) self._dictionary.study(text, parts) return response def save(self): """Dictionaryへの保存を行う。""" self._dictionary.save() @property def name(self): """人工無脳インスタンスの名前""" return self._name @property def responder_name(self): """保持しているResponderの名前""" return self._responder.name
def find_port(self): # # Finds the serial port names. The port addresses are different on different # platforms. # if sys.platform.startswith('study_window'): self.gui.log_mess(dict.get_string(self, 'checkwin')) ports = ['COM%s' % (i + 1) for i in range(256)] elif sys.platform.startswith('linux') or sys.platform.startswith('cygwin'): self.gui.log_mess(dict.get_string(self, 'checklin')) ports = glob.glob('/dev/ttyUSB*') elif sys.platform.startswith('darwin'): self.gui.log_mess(dict.get_string(self, 'checkmac')) ports = glob.glob('/dev/tty.usbserial*') else: raise EnvironmentError('Error finding ports on your operating system') openbci_port = '' for port in ports: try: s = serial.Serial(port = cfg.portUsed, baudrate = cfg.baudrate, timeout = cfg.timeoutt) s.write(b'v') openbci_serial = self.openbci_id(s) s.close() if openbci_serial: openbci_port = port except (OSError, serial.SerialException): pass if openbci_port == '': raise OSError('noport') else: return openbci_port
def test_set_get(self): dictionary = Dictionary() dictionary.set(key=1, value=2) value1 = dictionary.get(1) self.assertEqual(2, value1, "set_get value 1 did not have the right value")
def main(): # Init configuration = Dictionary() environment = Environment(configuration) learner = QLearning(configuration) # Learn configuration._debug = True strategy = learner.q_learn(environment, do_plot=True) configuration._debug = False # Test done = False total_reward = 0. configuration._debug = True state = environment.reset() while not done: action = environment.decide_next_action(state, strategy) state, reward, done, _ = environment.step(action) total_reward += reward configuration.display.results(environment.portfolio_, do_plot=True) # Save the model? if configuration.save_model is True: learner.nn.save_model(learner.model)
def setup_module(module): global DICTIONARIES global cluster global node dict_configs_path = os.path.join(SCRIPT_DIR, 'configs/dictionaries') for f in os.listdir(dict_configs_path): os.remove(os.path.join(dict_configs_path, f)) for layout in LAYOUTS: for source in SOURCES: if source.compatible_with_layout(layout): structure = DictionaryStructure(layout, FIELDS[layout.layout_type]) dict_name = source.name + "_" + layout.name dict_path = os.path.join(dict_configs_path, dict_name + '.xml') dictionary = Dictionary(dict_name, structure, source, dict_path, "table_" + dict_name) dictionary.generate_config() DICTIONARIES.append(dictionary) else: print "Source", source.name, "incompatible with layout", layout.name main_configs = [] for fname in os.listdir(dict_configs_path): main_configs.append(os.path.join(dict_configs_path, fname)) cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs')) node = cluster.add_instance('node', main_configs=main_configs, with_mysql=True, with_mongo=True) cluster.add_instance('clickhouse1')
def checkFile(file_name, dictionary_file="words.dat"): # Set up dictionary based on words.dat d = Dictionary(file_name=dictionary_file) file_in = open(file_name, 'r') file_out = open("{}.out".format(file_name), 'w') current_word = "" while True: # Read one character at a time from the input file next_char = file_in.read(1) # Exit the loop when there's nothing else to read if not next_char: break if next_char in d.ALLOWED_LETTERS: current_word += next_char else: # Verify the current_word with the dictionary resp, current_word = d.verify(current_word) if not resp: # Word was not found in dictionary resp, new_word = getUserResponse(current_word) d.update(resp, current_word, new_word) current_word = new_word file_out.write(current_word) current_word = "" file_out.write(next_char) file_in.close() file_out.close() print("Spellchecked file written to {}.out.".format(file_name))
class Anagram(object): def __init__(self): self.possible_words = set() self.output = set() self.dict = Dictionary() fich = open('Unabr.dict', 'r') self.dict.get_dict(fich) fich.close() def set_input(self, string): self.string = string def get_output(self): self.process('', list(self.string)) for word in self.possible_words: if self.dict.is_in_dict(word): self.output.add(word) return self.output def process(self, string, l): if len(l) == 0: self.possible_words.add(string) return for index in range(len(l)): new_list = l[:] elem = new_list.pop(index) self.process(string + elem, new_list)
def test_valid_query(self): """Tests to see if the the querying the definition is implemented correctly""" data = 'Children word for "father".' dictionary = Dictionary('../data.json') value = dictionary.query_definition("dad") print(value) self.assertEquals(value, data)
def Initialize(credentials=None, opt_url=None): """Initialize the EE library. If this hasn't been called by the time any object constructor is used, it will be called then. If this is called a second time with a different URL, this doesn't do an un-initialization of e.g.: the previously loaded Algorithms, but will overwrite them and let point at alternate servers. Args: credentials: OAuth2 credentials. opt_url: The base url for the EarthEngine REST API to connect to. """ data.initialize(credentials, (opt_url + '/api' if opt_url else None), opt_url) # Initialize the dynamically loaded functions on the objects that want them. ApiFunction.initialize() Element.initialize() Image.initialize() Feature.initialize() Collection.initialize() ImageCollection.initialize() FeatureCollection.initialize() Filter.initialize() Geometry.initialize() List.initialize() Number.initialize() String.initialize() Date.initialize() Dictionary.initialize() _InitializeGeneratedClasses() _InitializeUnboundMethods()
class Solver: problem = None dictionary = None def __init__(self, problem): self.problem = problem self.dictionary = Dictionary(problem) def solve(self): # pivot until solution found while self.dictionary.canPivot(): self.dictionary.pivot() # unbounded problem if self.dictionary.unbounded: raise SolverError("problem is unbounded") return self.__getSolution() # returns solution only in primal form def __getSolution(self): dic = self.dictionary # convert dual solutions if self.problem.dual: dual = dic.toProblem() primal = dual.getDual() dic = Dictionary(primal) return dic.getSolution()
def test_delete_get(self): dictionary = Dictionary().set(key=1, value=2) dictionary.delete(key=1) value = dictionary.get(key=1) self.assertEqual(None, value, "delete_get did not have the right value")
def main(): files = sys.argv[1:] d = Dictionary() for f in files: for word in parseWords(f): d.add_word(word) d.save("words.dat")
def main(): graph = sys.argv[1] wordsList = sys.argv[2] with open(graph, 'r') as f: n = int(f.readline().strip()) data = [] for i in range(0,n): l = list(f.readline().strip()) data.append(l) g = HoneyGraph() g.setup(n, data) words = [] with open(wordsList, 'r') as f: words = [line.strip() for line in f.readlines()] d = Dictionary() d.setup(words) bound = max(words, key=len) out = set() for key in d.tree.keys(): for n in g.comb[key]: recurseSearch(n, d.tree, '', [], out) out = sorted(out) with open('output.txt', 'w') as f: for i in out: f.write(i + "\n")
def __init__(self, data_dir, min_occurance=None, size=None, load_from=None): self.size = size data_dir = data_dir data_file = os.path.join(data_dir, 'dataset/review.json') dictionary_file = os.path.join(data_dir, 'dict.json') if not os.path.exists(dictionary_file): assert min_occurance is not None assert size is not None self.dictionary = Dictionary(data_file, min_occurance, size) self.dictionary.save(dictionary_file) else: self.dictionary = Dictionary.load(dictionary_file) if load_from is not None: self.data = self.load(load_from) else: dataset_file = os.path.join(data_dir, 'data.json') if not os.path.exists(dataset_file): self.data = self.create_dataset(data_file) self.save(dataset_file) self.data = self.load(dataset_file)
def test_load_dictionary(self) -> None: """ Reading a dictionary and ensuring the number of lines matches the number of words Also testing the various exceptions are raised correctly """ for filename in TestDictionary.FILENAMES: self.dictionary = Dictionary(TestDictionary.DEFAULT_HASH_BASE, TestDictionary.DEFAULT_TABLE_SIZE) words = self.dictionary.load_dictionary(filename) lines = file_len(filename) self.assertEqual(words, lines, "Number of words should match number of lines")
def test_load_dictionary_not_existing_filepath(self): dictionary = Dictionary() expected = """!!! The file does not exist. -- loading dictionary finished. """ with patch('sys.stdout', new=StringIO()) as mock: dictionary.load_dictionary("notExist.txt") self.assertEqual(mock.getvalue(), expected)
def test_delete(): cars = Dictionary() cars.set('WRX', 'Subaru') cars.set('Cherokee', 'Jeep') cars.set('Tacoma', 'Toyota') assert cars.get('Cherokee') == 'Jeep' cars.delete('Cherokee') assert cars.get('Cherokee') == None
def test_load_data(self): """ Unit test for loading data to a dictionary. """ diction = Dictionary() diction.load_dictionary("data.json") result = True self.assertEqual(result, True)
def create_dictionary_obj(cls): """ Creates a dictionary object, and loads the file into it. :return: Dictionary """ my_dictionary = Dictionary() my_dictionary.load_dictionary("data.json") return my_dictionary
def test_query_definition_query_a_word(self): dictionary = Dictionary() dictionary.load_dictionary("data.json") actual = dictionary.query_definition("abandoned industrial site") expected = [ "Site that cannot be used for any purpose, being contaminated by pollutants." ] self.assertEqual(actual, expected)
def test_word_not_found(self): """ tests if no words are found and throws the WordNotFound exception. :return: WordNotFound """ d = Dictionary() d.load_dictionary("data.json") self.assertRaises(WordNotFound, d.query_definition, "xxxx")
def test_is_data_loaded(self): """ tests if is_data_loaded funtion works fine. :return: True """ d = Dictionary() d.load_dictionary("data.json") self.assertTrue(d.is_data_loaded, "Dictionary is empty!")
class Wordplay: def __init__(self): self.dico = Dictionary() def __del__(self): self.dico.close() def open(self, dictionary_path): self.dico.open(dictionary_path) def close(self): self.dico.close() def search_words(self, pattern): for word in self.dico.search_words(pattern): yield word def letters_for_three_words(self, word1_begin, word2_begin, word3_begin, word_end_len): word1_suffixes = set(self._search_suffixes(word1_begin, word_end_len)) word2_suffixes = set(self._search_suffixes(word2_begin, word_end_len)) word3_suffixes = set(self._search_suffixes(word3_begin, word_end_len)) common_suffixes = word1_suffixes & word2_suffixes & word3_suffixes for common_suffix in sorted(common_suffixes): yield common_suffix def quatro(self, prefix1, suffix1, prefix2, suffix2, middleLength): word1_middle = set(self._search_middle(prefix1, suffix1, middleLength)) word2_middle = set(self._search_middle(prefix2, suffix2, middleLength)) common_middles = word1_middle & word2_middle for common_middle in sorted(common_middles): yield common_middle def _search_middle(self, prefix, suffix, middleLength): for word in self.dico.search_words(prefix + "_" * middleLength + suffix): middle = word[len(prefix):-len(suffix)] yield middle def _search_suffixes(self, word_begin, word_end_len): for word in self.dico.search_words(word_begin + "_" * word_end_len): suffix = word[-word_end_len:] yield suffix def search_anagrams(self, word): for anagram in self.dico.search_anagrams(word): yield anagram
def __getSolution(self): dic = self.dictionary # convert dual solutions if self.problem.dual: dual = dic.toProblem() primal = dual.getDual() dic = Dictionary(primal) return dic.getSolution()
def test2(): dictionary = Dictionary() dictionary.set_words(["KISSED"]) board = Board() rack = "KISSEDQ" solutions = board.generate_solutions(rack, dictionary) solution = board.find_best_solution(solutions, dictionary) if solution: print "Winner: %s" % solution board.add_solution(solution) print board assert solution and solution.score == 32
def test_cross_with_blank(self): dic = Dictionary() dic.set_words(["SA","JETS"]) board = Board() board.add_word('JET', 5, 4, VERTICAL) sol= Solution(8, 4, HORIZONTAL, 'SA', []) sol.determine_score(board, dic) self.assertEqual(sol.score, 13) sol= Solution(8, 4, HORIZONTAL, 'SA', [0]) sol.determine_score(board, dic) self.assertEqual(sol.score, 11) board.add_solution(sol)
def file_to_dict(path): word_file = open(path, 'r') dictionary = Dictionary() counter = 0 for line in word_file: if re.match('^[a-z]+$',line) is not None: dictionary.add_word(line.strip()) if counter % 25000 == 0: print "Loading Dictionary..." counter += 1 dictionary.update_word_count() word_file.close() return dictionary
def test_blanks_with_same_letter(self): dic = Dictionary() dic.set_words(["ABA"]) board = Board() solutions = [] board.generate_solutions_in_line('?BA', dic, 7, HORIZONTAL, solutions) words = set([(str(s)) for s in solutions]) self.assertEqual(words, set(['ABa (7,7,H)', 'aBA (7,7,H)', 'ABa (7,5,H)', 'aBA (7,5,H)', 'ABa (7,6,H)', 'aBA (7,6,H)']))
def test_two_letter_one_blank(self): dic = Dictionary() dic.set_words(["DUCE","EGRUGEAI"]) board = Board() board.add_word('DUCE', 7, 4, HORIZONTAL) sol= Solution(7, 7, VERTICAL, 'EGRUGEAI', [1]) sol.determine_score(board, dic) sol= Solution(7, 7, VERTICAL, 'EGRUGEAI', [4]) sol.determine_score(board, dic) solutions = board.generate_solutions('RUIAG?E', dic) solution = board.find_best_solution(solutions, dic) self.assertEqual(solution.score, 80)
def search(dictionary_file, postings_file, queries_file, output_file): # Build in memory dict from dictionary_file. with open(dictionary_file) as dict_file: dictionary = Dictionary.from_json(dict_file.read()) # Process queries. with open(output_file, 'w+') as output: with open(queries_file) as qfile: with PostingsFile(postings_file, mode='r') as pfile: for query in qfile: # Strip newline character. query = query.replace('\n', '') query = query.replace('\r', '') prefix_notation = parse_query.infix_to_prefix(query) # Process all words in the query here. processed = [] for token in prefix_notation: if parse_query.is_operand(token): token = process_word(token) processed.append(token) query = parse_query.process_infix_query(processed) result = execute_query(query, dictionary, pfile) output.write('%s\n' % ' '.join([str(x) for x in result]))
def __init__(self, max_links_allowed, compress_status): self.__html_parser = Parser() self.__bfs_tree = Queue() self.__unique_links = Dictionary(max_links_allowed) self.__compress = compress_status self.__pyurlopener = lib.PyURLOpener() self.__start_time = datetime.now()
def __init__(self): self.possible_words = set() self.output = set() self.dict = Dictionary() fich = open('Unabr.dict', 'r') self.dict.get_dict(fich) fich.close()
def search(dictionary_file, postings_file, queries_file, output_file): # Build in memory dict from dictionary_file. with open(dictionary_file) as dict_file: dictionary = Dictionary.from_json(dict_file.read()) # Process queries. with open(output_file, 'w+') as output: with open(queries_file) as qfile: with PostingsFile(postings_file, mode='r', entry_cls=PostingsFileEntryWithFrequencies) as pfile: for query in qfile: # Strip newline character. query = query.strip() # Process all words in the query here. query_tokens = process_query(query) query_tf = collections.Counter(query_tokens) query_terms = sorted(set(query_tokens)) # Calculate query vector query_vector = \ [logtf(query_tf[term]) for term in query_terms] query_vector = list(unit_vector(query_vector)) # Execute query results = execute_query( query_terms, query_vector, dictionary, pfile) # Write doc_ids to output file. results = [str(x) for x in results] output.write('%s\n' % ' '.join(results))
def __init__(self, request=None, response=None): self.initialize(request, response) self.error_msg = '' try: self.dictionary = Dictionary.create_default() except Exception, e: self.error_msg += Exceptions.print_exception(e)
def __init__(self,request=None, response=None): self.initialize(request, response) self.errorMsg = '' try: self.dictionary = Dictionary.create_default() except Exception as e: print Exceptions.format_exception(e)
def test1(): dictionary = Dictionary() dictionary.set_words(["OOZ", "OOZS", "PROSAIC", "PROC", "CC"]) board = Board() # With this bug we'll get "PROC" but we want "PROSAIC" (where the S is the plural # of "OOS"), which is longer. board.add_word("OOZ", Board.SIZE/2, Board.SIZE/2 - 2, HORIZONTAL) board.add_word("CC", Board.SIZE/2 + 1, Board.SIZE/2 - 2, HORIZONTAL) rack = "PROSAIC" solutions = board.generate_solutions(rack, dictionary) solution = board.find_best_solution(solutions, dictionary) if solution: print "Winner: %s" % solution board.add_solution(solution) print board
def __init__(self, input=None): super(TextCorpus, self).__init__() self.input = input self.dictionary = Dictionary() if input is not None: self.dictionary.addDocuments(self.get_texts()) else: logger.warning("No input document stream provided; assuming " "dictionary will be initialized some other way.")
def __init__(self, dictionary_file_name='', dictionary=None): if dictionary_file_name: self.dictionary = Dictionary.load_from_file(dictionary_file_name) elif dictionary: self.dictionary = dictionary else: raise AIException( "Вы должны указать один из параметров dictionary_file_name или dictionary" )
def __init__(self, document_generator, stop_words): self.document_generator = document_generator self.stop_list = stop_words self.dictionary = Dictionary(document_generator()) self.tfidf_model = TfidfModel(self.dictionary) stop_ids = [self.dictionary.token_to_id[stop_word] for stop_word in self.stop_list if stop_word in self.dictionary.token_to_id] once_ids = [token_id for token_id, doc_freq in self.dictionary.doc_freqs.iteritems() if doc_freq == 1] self.dictionary.filter_tokens(stop_ids + once_ids)
def build(training_dir, dict_file, postings_file): dictionary = Dictionary() # Read each file in the training dir. filepaths = [] for filename in os.listdir(training_dir): filepaths.append(os.path.join(training_dir, filename)) # Sort the filepaths according to doc_id filepaths = sorted(filepaths, key=lambda x: int(os.path.basename(x))) # Two loops here to have control over the size of the loop. # NOTE(michael): for testing. # filepaths = filepaths[:10] with PostingsFile(postings_file, mode='w+', entry_cls=PostingsFileEntryWithFrequencies) as postings_file: for filepath in filepaths: # TODO(michael): Making assumption that document is an int. doc_id = int(os.path.basename(filepath)) terms = process_file(filepath) for term in terms: # Create postings file entry if entry does not exist for # `(term, doc_id)` pair. if not dictionary.has_entry(term, doc_id): # Update postings file entry for previous `(term, doc_id)` # entry for the current term. (To point to the entry we are # about to add. # `(term, doc_id)` pair. if dictionary.get_frequency(term) != 0: previous_node_location = dictionary.get_tail(term) previous_entry = \ postings_file.get_entry(previous_node_location) previous_entry.next_pointer = postings_file.pointer postings_file.write_entry(previous_entry) # Add new postings file entry for the `(term, doc_id)` pair. dictionary.add_term(term, doc_id, postings_file.pointer) new_entry = PostingsFileEntryWithFrequencies(doc_id) postings_file.write_entry(new_entry) # Update postings file entry term frequency. (Increment). # NOTE(michael): We can safely use the tail pointer since we # process documents in order and not at random. current_term_location = dictionary.get_tail(term) current_term_entry = \ postings_file.get_entry(current_term_location) current_term_entry.term_freq += 1 postings_file.write_entry(current_term_entry) # Write dictionary to file. with open(dict_file, 'w') as dictionary_file: dictionary_file.write(dictionary.to_json())
def __init__( self ) : self.dict = Dictionary() self.spliter = PinyinSpliter() self.fitter = Fitter() self.picker = Picker( self.dict ) #self.picker.set( [], [], True ) self.cache = [ [ 0, [], "" ] ] self.candCacheIndex = 0 self.candStartIndex = 0 self.candList = []
def Reset(): """Reset the library. Useful for re-initializing to a different server.""" data.reset() ApiFunction.reset() Element.reset() Image.reset() Feature.reset() Collection.reset() ImageCollection.reset() FeatureCollection.reset() Filter.reset() Geometry.reset() List.reset() Number.reset() String.reset() Date.reset() Dictionary.reset() _ResetGeneratedClasses() global Algorithms Algorithms = _AlgorithmsContainer()
def test_dictionary_has_entry(): d = Dictionary() assert not d.has_entry('asdf', 1) d.add_term('asdf', 1, 10) assert d.has_entry('asdf', 1) assert not d.has_entry('qwer', 1)