示例#1
0
class TestDictionary(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super(TestDictionary, self).__init__(*args, **kwargs)
        self.dictionary = Dictionary('doc/dictionary.json')

    def test_initializer_with_invalid_dictionary_path_must_create_empty_dictionary(
            self):
        dictionary = Dictionary('doc/invalid_dictionary.json')
        is_empty = dictionary.is_empty()
        self.assertTrue(is_empty)

    def test_initializer_with_valid_dictionary_path_must_initialize_words_dictionary(
            self):
        is_empty = self.dictionary.is_empty()
        self.assertFalse(is_empty)

    @parameterized.expand(['Three', 'valid', 'words'])
    def test_is_valid_word_with_actual_word_must_return_true(self, word):
        result = self.dictionary.is_valid_word(word)
        self.assertTrue(result)

    @parameterized.expand(['Thr33', 'in-valid', '$words', ''])
    def test_is_valid_word_with_not_wrong_word_must_return_false(
            self, invalid_word):
        result = self.dictionary.is_valid_word(invalid_word)
        self.assertFalse(result)

    @parameterized.expand(['I', 'a'])
    def test_is_valid_scrabble_word_with_actual_one_letter_word_must_return_false(
            self, word):
        result = self.dictionary.is_valid_scrabble_word(word)
        self.assertFalse(result)
示例#2
0
def Initialize(credentials="persistent", opt_url=None):
    """Initialize the EE library.

  If this hasn't been called by the time any object constructor is used,
  it will be called then.  If this is called a second time with a different
  URL, this doesn't do an un-initialization of e.g.: the previously loaded
  Algorithms, but will overwrite them and let point at alternate servers.

  Args:
    credentials: OAuth2 credentials.  'persistent' (default) means use
        credentials already stored in the filesystem, or raise an explanatory
        exception guiding the user to create those credentials.
    opt_url: The base url for the EarthEngine REST API to connect to.
  """
    if credentials == "persistent":
        credentials = _GetPersistentCredentials()
    data.initialize(credentials, (opt_url + "/api" if opt_url else None), opt_url)
    # Initialize the dynamically loaded functions on the objects that want them.
    ApiFunction.initialize()
    Element.initialize()
    Image.initialize()
    Feature.initialize()
    Collection.initialize()
    ImageCollection.initialize()
    FeatureCollection.initialize()
    Filter.initialize()
    Geometry.initialize()
    List.initialize()
    Number.initialize()
    String.initialize()
    Date.initialize()
    Dictionary.initialize()
    Terrain.initialize()
    _InitializeGeneratedClasses()
    _InitializeUnboundMethods()
示例#3
0
    def deactivate(self):

        # close the last array, even though it might not be complete...
        #
        # Adjust the last delimiting commas, and add proper array ends.
        #
        self.data_arr_string = self.data_arr_string[:-2] + ']]\n'

        #
        # write last array, and then close the file. If written in TEXT format, the result is a string. Otherwise
        # it is saved in numpy format.
        #
        with open(self.data_file_name, 'a') as f:
            f.write(self.data_arr_string)
            f.close()

        with open(self.result_file_name, 'a') as f:
            f.write(self.result_arr_string)
            f.close()

        np.save(self.data_file_name_np, self.data_arr_np)
        np.save(self.result_file_name_np, self.result_arr_np)

        print(dict.get_string('plugclose') + self.data_file_name)
        print(dict.get_string('checkarray'))

        return
def test_get_slot():
    map_buckets = Dictionary()
    bucket_object = map_buckets.get_bucket('9.0')
    key = map_buckets.set_key_to_value('9.0', 'Tesla')
    bucket_object, node = map_buckets.get_slot('9.0')
    assert node.value[1] == 'Tesla'
    assert node.value[0] == '9.0'
示例#5
0
def build_index():
    print('build index..', file=sys.stderr)
    #1. read dictionary
    dictionary = Dictionary()
    dictionary.load_from_galago_dump(args.dict_file, args.dict_min_freq)

    #2. make snrm instance & load weight
    device = torch.device('cpu')
    snrm = SNRM(args).to(device)
    snrm.load_state_dict(torch.load(args.model_file))  ## load model
    snrm.eval()  ## set inference mode

    #3. read train data
    doc_data = Triplet('doc', args, dictionary)

    #4. make index
    db_loader = DataLoader(dataset=doc_data,
                           batch_size=1,
                           shuffle=False,
                           num_workers=0)

    inverted_index = InMemoryInvertedIndex(
        args.conv3_channel)  ## last channel is output representation
    with torch.no_grad():
        for i, (doc_id, doc) in enumerate(db_loader):
            doc_repr = snrm(doc.float())
            inverted_index.add(doc_id.numpy(), doc_repr.numpy())
            if (i % 10 == 0):
                print(i, ' document inferenced \r', file=sys.stderr, end='')

    inverted_index.store(args.index_file)
    print('>save index: ', args.index_file, file=sys.stderr)
示例#6
0
    def reload(path, params):
        """
        Create a sentence embedder from a pretrained model.
        """
        # reload model
        reloaded = torch.load(path)
        state_dict = reloaded['model']

        # handle models from multi-GPU checkpoints
        if 'checkpoint' in path:
            state_dict = {(k[7:] if k.startswith('module.') else k): v
                          for k, v in state_dict.items()}

        # reload dictionary and model parameters
        dico = Dictionary(reloaded['dico_id2word'], reloaded['dico_word2id'],
                          reloaded['dico_counts'])
        pretrain_params = AttrDict(reloaded['params'])
        pretrain_params.n_words = len(dico)
        pretrain_params.bos_index = dico.index(BOS_WORD)
        pretrain_params.eos_index = dico.index(EOS_WORD)
        pretrain_params.pad_index = dico.index(PAD_WORD)
        pretrain_params.unk_index = dico.index(UNK_WORD)
        pretrain_params.mask_index = dico.index(MASK_WORD)

        # build model and reload weights
        model = Trained_Model(pretrain_params, dico)
        model.load_state_dict(state_dict)
        model.eval()

        # adding missing parameters
        params.max_batch_size = 0

        return SentenceEmbedder(model, dico, pretrain_params)
示例#7
0
class MarkovChainWalker(object):
    def __init__(self):
        self.dictionary = Dictionary()

    def parse(self, filename):
        self.dictionary.update(filename)

    def process(self):
        self.dictionary.process()

    # pick a random word
    def pick(self, hash):
        random_pick = random.random()
        for key, value in sorted(hash.iteritems(), key=lambda (k, v): (v, k)):
            if random_pick < value:
                return key

    # generate
    def generate(self, start_word, number_of_words):
        output = StringIO.StringIO()
        word = start_word
        for n in range(number_of_words):
            output.write('%s ' % word)
            secondary = self.dictionary.dictionary[word]
            word = self.pick(secondary)
        contents = output.getvalue()
        output.close()
        return contents
示例#8
0
def main():
    if sys.version_info[0] < 3:
        raise Exception("Must be using Python 3+")

    words = []
    print("Ingesting words from words.txt...")
    with open("words.txt", 'r') as file:
        words = [word.strip() for word in file.readlines()]

    dictionary = Dictionary(words)

    while True:
        print("Please enter letters you want to use:")
        letters = input()
        if not re.match("^[A-za-z]+$", letters):
            print(
                "%s is considered invalid input, please only use english alphabets a-z."
                % letters)
            continue

        matches = dictionary.match_anagram(letters)
        print("")
        for word in sorted(matches):
            print(word)

        print("Found %d matches." % len(matches))
def setup_module(module):
    global DICTIONARIES
    global cluster
    global node

    dict_configs_path = os.path.join(SCRIPT_DIR, 'configs/dictionaries')
    for f in os.listdir(dict_configs_path):
        os.remove(os.path.join(dict_configs_path, f))

    for layout in LAYOUTS:
        for source in SOURCES:
            if source.compatible_with_layout(layout):
                structure = DictionaryStructure(layout,
                                                FIELDS[layout.layout_type])
                dict_name = source.name + "_" + layout.name
                dict_path = os.path.join(dict_configs_path, dict_name + '.xml')
                dictionary = Dictionary(dict_name, structure, source,
                                        dict_path, "table_" + dict_name)
                dictionary.generate_config()
                DICTIONARIES.append(dictionary)
            else:
                print "Source", source.name, "incompatible with layout", layout.name

    main_configs = []
    for fname in os.listdir(dict_configs_path):
        main_configs.append(os.path.join(dict_configs_path, fname))
    cluster = ClickHouseCluster(__file__,
                                base_configs_dir=os.path.join(
                                    SCRIPT_DIR, 'configs'))
    node = cluster.add_instance('node',
                                main_configs=main_configs,
                                with_mysql=True,
                                with_mongo=True)
    cluster.add_instance('clickhouse1')
示例#10
0
class Bot():
    def __init__(self):
        self.dictionary = Dictionary()
        self.morph = Morph()

        self.resp_what = responder.WhatResponder(self.dictionary)
        self.resp_random = responder.RandomResponder(self.dictionary)
        self.resp_pattern = responder.PatternResponder(self.dictionary)
        self.resp_template = responder.TemplateResponder(self.dictionary)
        self.responder = self.resp_pattern

    def dialogue(self, input_text):
        parts = self.morph.analyze(input_text)

        i = random.randint(0, 100)
        if 0 <= i < 40:
            self.responder = self.resp_pattern
        elif 40 <= i < 70:
            self.response = self.resp_template
        elif 70 <= i < 90:
            self.responder = self.resp_random
        else:
            self.responder = self.resp_what

        self.response = self.responder.response(input_text, parts)

        #学習
        self.dictionary.study(input_text, parts)

        return self.response

    def save(self):
        self.dictionary.save()
示例#11
0
 def reset(self):
     self.model = None
     self.x_train_list = []
     self.y_train_list = []
     self.x_train = None
     self.y_train = None
     self.dictionary = Dictionary()
示例#12
0
def run_search(dict_file, postings_file, queries_file, results_file):
    """
    using the given dictionary file and postings file,
    perform searching on the given queries file and output the results to a file
    """

    dictionary = Dictionary(dict_file)
    postings = PostingsFile(postings_file)

    dictionary.load()  # Load dictionary into memory

    with open(queries_file, 'r') as query_file:
        with open(results_file, 'w') as output_file:
            complete_result = []
            for query in query_file:
                if query.strip():
                    result = util.eval_query(query, dictionary, postings)
                    result = util.format_result(result)
                    complete_result.append(result)
                else:
                    complete_result.append("")

            write_data = "\n".join(complete_result)
            output_file.write(write_data)

        output_file.close()
    query_file.close()
示例#13
0
class TextCorpus(interfaces.CorpusABC):
    """
    Helper class to simplify the pipeline of getting bag-of-words vectors (= a
    gensim corpus) from plain text.

    This is an abstract base class: override the `get_texts()` method to match
    your particular input.

    Given a filename (or a file-like object) in constructor, the corpus object
    will be automatically initialized with a dictionary in `self.dictionary` and
    will support the `iter` corpus method. You must only provide a correct `get_texts`
    implementation.

    """
    def __init__(self, input=None):
        super(TextCorpus, self).__init__()
        self.input = input
        self.dictionary = Dictionary()
        if input is not None:
            self.dictionary.add_documents(self.get_texts())
        else:
            logger.warning("No input document stream provided; assuming "
                           "dictionary will be initialized some other way.")


    def __iter__(self):
        """
        The function that defines a corpus.

        Iterating over the corpus must yield sparse vectors, one for each document.
        """
        for text in self.get_texts():
            yield self.dictionary.doc2bow(text, allow_update=False)


    def getstream(self):
        return getstream(self.input)


    def get_texts(self):
        """
        Iterate over the collection, yielding one document at a time. A document
        is a sequence of words (strings) that can be fed into `Dictionary.doc2bow`.

        Override this function to match your input (parse input files, do any
        text preprocessing, lowercasing, tokenizing etc.). There will be no further
        preprocessing of the words coming out of this function.
        """
        # Instead of raising NotImplementedError, let's provide a sample implementation:
        # assume documents are lines in a single file (one document per line).
        # Yield each document as a list of lowercase tokens, via `utils.tokenize`.
        length = 0
        for lineno, line in enumerate(getstream(self.input)):
            length += 1
            yield utils.tokenize(line, lowercase=True)
        self.length = length


    def __len__(self):
        return self.length # will throw if corpus not initialized
示例#14
0
def load_dictionaries(path, src_lang, dst_lang):
    """Load dictionaries for a given language pair."""
    src_dict = Dictionary.load(
        os.path.join(path, 'dict.{}.txt'.format(src_lang)))
    dst_dict = Dictionary.load(
        os.path.join(path, 'dict.{}.txt'.format(dst_lang)))
    return src_dict, dst_dict
示例#15
0
文件: unmo.py 项目: karsay/2018
class Unmo:
    """人工無脳コアクラス。

    プロパティ:
    name -- 人工無脳コアの名前
    responder_name -- 現在の応答クラスの名前
    """
    def __init__(self, name):
        """文字列を受け取り、コアインスタンスの名前に設定する。
        Responder(What, Random, Pattern)インスタンスを作成し、保持する。
        Dictionaryインスタンスを作成し、保持する。
        Tokenizerインスタンスを作成し、保持する。
        """
        self._tokenizer = Tokenizer()
        self._dictionary = Dictionary()

        self._responders = {
            'what': WhatResponder('What', self._dictionary),
            'random': RandomResponder('Random', self._dictionary),
            'pattern': PatternResponder('Pattern', self._dictionary),
            'template': TemplateResponder('Template', self._dictionary),
            'markov': MarkovResponder('Markov', self._dictionary),
        }
        self._name = name
        self._responder = self._responders['pattern']

    def dialogue(self, text):
        """ユーザーからの入力を受け取り、Responderに処理させた結果を返す。
        呼び出されるたびにランダムでResponderを切り替える。
        入力をDictionaryに学習させる。"""
        chance = randrange(0, 100)
        if chance in range(0, 29):
            self._responder = self._responders['pattern']
        elif chance in range(30, 49):
            self._responder = self._responders['template']
        elif chance in range(50, 69):
            self._responder = self._responders['random']
        elif chance in range(70, 89):
            self._responder = self._responders['markov']
        else:
            self._responder = self._responders['what']

        parts = morph.analyze(text)
        response = self._responder.response(text, parts)
        self._dictionary.study(text, parts)
        return response

    def save(self):
        """Dictionaryへの保存を行う。"""
        self._dictionary.save()

    @property
    def name(self):
        """人工無脳インスタンスの名前"""
        return self._name

    @property
    def responder_name(self):
        """保持しているResponderの名前"""
        return self._responder.name
示例#16
0
    def find_port(self):
        #
        # Finds the serial port names. The port addresses are different on different
        # platforms.
        #
        if sys.platform.startswith('study_window'):
            self.gui.log_mess(dict.get_string(self, 'checkwin'))
            ports = ['COM%s' % (i + 1) for i in range(256)]
        elif sys.platform.startswith('linux') or sys.platform.startswith('cygwin'):
            self.gui.log_mess(dict.get_string(self, 'checklin'))
            ports = glob.glob('/dev/ttyUSB*')
        elif sys.platform.startswith('darwin'):
            self.gui.log_mess(dict.get_string(self, 'checkmac'))
            ports = glob.glob('/dev/tty.usbserial*')
        else:
            raise EnvironmentError('Error finding ports on your operating system')

        openbci_port = ''
        for port in ports:
            try:
                s = serial.Serial(port = cfg.portUsed,
                                  baudrate = cfg.baudrate,
                                  timeout = cfg.timeoutt)
                s.write(b'v')
                openbci_serial = self.openbci_id(s)
                s.close()
                if openbci_serial:
                    openbci_port = port
            except (OSError, serial.SerialException):
                pass
        if openbci_port == '':
            raise OSError('noport')
        else:
            return openbci_port
    def test_set_get(self):
        dictionary = Dictionary()
        dictionary.set(key=1, value=2)
        value1 = dictionary.get(1)

        self.assertEqual(2, value1,
                         "set_get value 1 did not have the right value")
示例#18
0
文件: trader.py 项目: rcostu/trader
def main():
    # Init
    configuration = Dictionary()
    environment = Environment(configuration)
    learner = QLearning(configuration)

    # Learn
    configuration._debug = True
    strategy = learner.q_learn(environment, do_plot=True)
    configuration._debug = False

    # Test
    done = False
    total_reward = 0.
    configuration._debug = True
    state = environment.reset()
    while not done:
        action = environment.decide_next_action(state, strategy)
        state, reward, done, _ = environment.step(action)
        total_reward += reward

    configuration.display.results(environment.portfolio_, do_plot=True)

    # Save the model?
    if configuration.save_model is True:
        learner.nn.save_model(learner.model)
示例#19
0
def setup_module(module):
    global DICTIONARIES
    global cluster
    global node

    dict_configs_path = os.path.join(SCRIPT_DIR, 'configs/dictionaries')
    for f in os.listdir(dict_configs_path):
        os.remove(os.path.join(dict_configs_path, f))

    for layout in LAYOUTS:
        for source in SOURCES:
            if source.compatible_with_layout(layout):
                structure = DictionaryStructure(layout, FIELDS[layout.layout_type])
                dict_name = source.name + "_" + layout.name
                dict_path = os.path.join(dict_configs_path, dict_name + '.xml')
                dictionary = Dictionary(dict_name, structure, source, dict_path, "table_" + dict_name)
                dictionary.generate_config()
                DICTIONARIES.append(dictionary)
            else:
                print "Source", source.name, "incompatible with layout", layout.name

    main_configs = []
    for fname in os.listdir(dict_configs_path):
        main_configs.append(os.path.join(dict_configs_path, fname))
    cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs'))
    node = cluster.add_instance('node', main_configs=main_configs, with_mysql=True, with_mongo=True)
    cluster.add_instance('clickhouse1')
示例#20
0
def checkFile(file_name, dictionary_file="words.dat"):
    # Set up dictionary based on words.dat
    d = Dictionary(file_name=dictionary_file)

    file_in = open(file_name, 'r')
    file_out = open("{}.out".format(file_name), 'w')

    current_word = ""

    while True:
        # Read one character at a time from the input file
        next_char = file_in.read(1)
        # Exit the loop when there's nothing else to read
        if not next_char:
            break

        if next_char in d.ALLOWED_LETTERS:
            current_word += next_char
        else:
            # Verify the current_word with the dictionary
            resp, current_word = d.verify(current_word)
            if not resp:  # Word was not found in dictionary
                resp, new_word = getUserResponse(current_word)
                d.update(resp, current_word, new_word)
                current_word = new_word
            file_out.write(current_word)
            current_word = ""
            file_out.write(next_char)

    file_in.close()
    file_out.close()
    print("Spellchecked file written to {}.out.".format(file_name))
示例#21
0
class Anagram(object):
    
    def __init__(self):
        self.possible_words = set()
        self.output = set()
        self.dict = Dictionary()
        fich = open('Unabr.dict', 'r')
        self.dict.get_dict(fich)
        fich.close()
        
    def set_input(self, string):
        self.string = string
        
    def get_output(self):
        self.process('', list(self.string))
        for word in self.possible_words:
            if self.dict.is_in_dict(word):
                self.output.add(word)
        return self.output
    
    def process(self, string, l):
        if len(l) == 0:
            self.possible_words.add(string)
            return
        for index in range(len(l)):
            new_list = l[:]
            elem = new_list.pop(index)
            self.process(string + elem, new_list)            
 def test_valid_query(self):
     """Tests to see if the the querying the definition is implemented correctly"""
     data = 'Children word for "father".'
     dictionary = Dictionary('../data.json')
     value = dictionary.query_definition("dad")
     print(value)
     self.assertEquals(value, data)
示例#23
0
def Initialize(credentials=None, opt_url=None):
    """Initialize the EE library.

  If this hasn't been called by the time any object constructor is used,
  it will be called then.  If this is called a second time with a different
  URL, this doesn't do an un-initialization of e.g.: the previously loaded
  Algorithms, but will overwrite them and let point at alternate servers.

  Args:
    credentials: OAuth2 credentials.
    opt_url: The base url for the EarthEngine REST API to connect to.
  """
    data.initialize(credentials, (opt_url + '/api' if opt_url else None),
                    opt_url)
    # Initialize the dynamically loaded functions on the objects that want them.
    ApiFunction.initialize()
    Element.initialize()
    Image.initialize()
    Feature.initialize()
    Collection.initialize()
    ImageCollection.initialize()
    FeatureCollection.initialize()
    Filter.initialize()
    Geometry.initialize()
    List.initialize()
    Number.initialize()
    String.initialize()
    Date.initialize()
    Dictionary.initialize()
    _InitializeGeneratedClasses()
    _InitializeUnboundMethods()
示例#24
0
文件: solver.py 项目: 3gx/ilp-solver
class Solver:

    problem = None
    dictionary = None

    def __init__(self, problem):
        self.problem = problem
        self.dictionary = Dictionary(problem)

    def solve(self):
        # pivot until solution found
        while self.dictionary.canPivot():
            self.dictionary.pivot()

        # unbounded problem
        if self.dictionary.unbounded:
            raise SolverError("problem is unbounded")

        return self.__getSolution()

    # returns solution only in primal form
    def __getSolution(self):
        dic = self.dictionary

        # convert dual solutions
        if self.problem.dual:
            dual = dic.toProblem()
            primal = dual.getDual()
            dic = Dictionary(primal)

        return dic.getSolution()
    def test_delete_get(self):
        dictionary = Dictionary().set(key=1, value=2)
        dictionary.delete(key=1)
        value = dictionary.get(key=1)

        self.assertEqual(None, value,
                         "delete_get did not have the right value")
示例#26
0
def Initialize(credentials=None, opt_url=None):
  """Initialize the EE library.

  If this hasn't been called by the time any object constructor is used,
  it will be called then.  If this is called a second time with a different
  URL, this doesn't do an un-initialization of e.g.: the previously loaded
  Algorithms, but will overwrite them and let point at alternate servers.

  Args:
    credentials: OAuth2 credentials.
    opt_url: The base url for the EarthEngine REST API to connect to.
  """
  data.initialize(credentials, (opt_url + '/api' if opt_url else None), opt_url)
  # Initialize the dynamically loaded functions on the objects that want them.
  ApiFunction.initialize()
  Element.initialize()
  Image.initialize()
  Feature.initialize()
  Collection.initialize()
  ImageCollection.initialize()
  FeatureCollection.initialize()
  Filter.initialize()
  Geometry.initialize()
  List.initialize()
  Number.initialize()
  String.initialize()
  Date.initialize()
  Dictionary.initialize()
  _InitializeGeneratedClasses()
  _InitializeUnboundMethods()
示例#27
0
def main():
    files = sys.argv[1:]
    d = Dictionary()
    for f in files:
        for word in parseWords(f):
            d.add_word(word)
    d.save("words.dat")
示例#28
0
def main():
  graph = sys.argv[1]
  wordsList = sys.argv[2]

  with open(graph, 'r') as f:
    n = int(f.readline().strip())
    data = []
    for i in range(0,n):
      l = list(f.readline().strip())
      data.append(l)

  g = HoneyGraph()
  g.setup(n, data)

  words = []
  with open(wordsList, 'r') as f:
    words = [line.strip() for line in f.readlines()]

  d = Dictionary()
  d.setup(words)
  bound = max(words, key=len)

  out = set()
  for key in d.tree.keys():
    for n in g.comb[key]:
      recurseSearch(n, d.tree, '', [], out)
  out = sorted(out)

  with open('output.txt', 'w') as f:
    for i in out:
      f.write(i + "\n")
示例#29
0
    def __init__(self,
                 data_dir,
                 min_occurance=None,
                 size=None,
                 load_from=None):

        self.size = size

        data_dir = data_dir
        data_file = os.path.join(data_dir, 'dataset/review.json')

        dictionary_file = os.path.join(data_dir, 'dict.json')
        if not os.path.exists(dictionary_file):
            assert min_occurance is not None
            assert size is not None
            self.dictionary = Dictionary(data_file, min_occurance, size)
            self.dictionary.save(dictionary_file)
        else:
            self.dictionary = Dictionary.load(dictionary_file)

        if load_from is not None:
            self.data = self.load(load_from)
        else:
            dataset_file = os.path.join(data_dir, 'data.json')
            if not os.path.exists(dataset_file):
                self.data = self.create_dataset(data_file)
                self.save(dataset_file)
            self.data = self.load(dataset_file)
示例#30
0
class Solver:

    problem = None
    dictionary = None

    def __init__(self, problem):
        self.problem = problem
        self.dictionary = Dictionary(problem)

    def solve(self):
        # pivot until solution found
        while self.dictionary.canPivot():
            self.dictionary.pivot()

        # unbounded problem
        if self.dictionary.unbounded:
            raise SolverError("problem is unbounded")

        return self.__getSolution()

    # returns solution only in primal form
    def __getSolution(self):
        dic = self.dictionary

        # convert dual solutions
        if self.problem.dual:
            dual = dic.toProblem()
            primal = dual.getDual()
            dic = Dictionary(primal)
            
        return dic.getSolution()
 def test_load_dictionary(self) -> None:
     """ Reading a dictionary and ensuring the number of lines matches the number of words
         Also testing the various exceptions are raised correctly """
     for filename in TestDictionary.FILENAMES:
         self.dictionary = Dictionary(TestDictionary.DEFAULT_HASH_BASE, TestDictionary.DEFAULT_TABLE_SIZE)
         words = self.dictionary.load_dictionary(filename)
         lines = file_len(filename)
         self.assertEqual(words, lines, "Number of words should match number of lines")
示例#32
0
    def test_load_dictionary_not_existing_filepath(self):
        dictionary = Dictionary()
        expected = """!!! The file does not exist.
-- loading dictionary finished.
"""
        with patch('sys.stdout', new=StringIO()) as mock:
            dictionary.load_dictionary("notExist.txt")
            self.assertEqual(mock.getvalue(), expected)
示例#33
0
def test_delete():
    cars = Dictionary()
    cars.set('WRX', 'Subaru')
    cars.set('Cherokee', 'Jeep')
    cars.set('Tacoma', 'Toyota')
    assert cars.get('Cherokee') == 'Jeep'
    cars.delete('Cherokee')
    assert cars.get('Cherokee') == None
示例#34
0
 def test_load_data(self):
     """
     Unit test for loading data to a dictionary.
     """
     diction = Dictionary()
     diction.load_dictionary("data.json")
     result = True
     self.assertEqual(result, True)
示例#35
0
 def create_dictionary_obj(cls):
     """
     Creates a dictionary object, and loads the file into it.
     :return: Dictionary
     """
     my_dictionary = Dictionary()
     my_dictionary.load_dictionary("data.json")
     return my_dictionary
示例#36
0
 def test_query_definition_query_a_word(self):
     dictionary = Dictionary()
     dictionary.load_dictionary("data.json")
     actual = dictionary.query_definition("abandoned industrial site")
     expected = [
         "Site that cannot be used for any purpose, being contaminated by pollutants."
     ]
     self.assertEqual(actual, expected)
示例#37
0
 def test_word_not_found(self):
     """
     tests if no words are found and throws the WordNotFound exception.
     :return: WordNotFound
     """
     d = Dictionary()
     d.load_dictionary("data.json")
     self.assertRaises(WordNotFound, d.query_definition, "xxxx")
示例#38
0
 def test_is_data_loaded(self):
     """
     tests if is_data_loaded funtion works fine.
     :return: True
     """
     d = Dictionary()
     d.load_dictionary("data.json")
     self.assertTrue(d.is_data_loaded, "Dictionary is empty!")
示例#39
0
class Wordplay:
    def __init__(self):
        self.dico = Dictionary()

    def __del__(self):
        self.dico.close()


    def open(self, dictionary_path):
        self.dico.open(dictionary_path)


    def close(self):
        self.dico.close()


    def search_words(self, pattern):
        for word in self.dico.search_words(pattern):
            yield word


    def letters_for_three_words(self, word1_begin, word2_begin, word3_begin, word_end_len):
        word1_suffixes = set(self._search_suffixes(word1_begin, word_end_len))
        word2_suffixes = set(self._search_suffixes(word2_begin, word_end_len))
        word3_suffixes = set(self._search_suffixes(word3_begin, word_end_len))

        common_suffixes = word1_suffixes & word2_suffixes & word3_suffixes
        for common_suffix in sorted(common_suffixes):
            yield common_suffix


    def quatro(self, prefix1, suffix1, prefix2, suffix2, middleLength):
        word1_middle = set(self._search_middle(prefix1, suffix1, middleLength))
        word2_middle = set(self._search_middle(prefix2, suffix2, middleLength))

        common_middles = word1_middle & word2_middle
        for common_middle in sorted(common_middles):
            yield common_middle


    def _search_middle(self, prefix, suffix, middleLength):
        for word in self.dico.search_words(prefix + "_" * middleLength + suffix):
            middle = word[len(prefix):-len(suffix)]

            yield middle


    def _search_suffixes(self, word_begin, word_end_len):
        for word in self.dico.search_words(word_begin + "_" * word_end_len):
            suffix = word[-word_end_len:]

            yield suffix

    def search_anagrams(self, word):
        for anagram in self.dico.search_anagrams(word):
            yield anagram
示例#40
0
    def __getSolution(self):
        dic = self.dictionary

        # convert dual solutions
        if self.problem.dual:
            dual = dic.toProblem()
            primal = dual.getDual()
            dic = Dictionary(primal)
            
        return dic.getSolution()
示例#41
0
def test2():
    dictionary = Dictionary()
    dictionary.set_words(["KISSED"])
    board = Board()
    rack = "KISSEDQ"
    solutions = board.generate_solutions(rack, dictionary)
    solution = board.find_best_solution(solutions, dictionary)
    if solution:
        print "Winner: %s" % solution
        board.add_solution(solution)
    print board
    assert solution and solution.score == 32
示例#42
0
    def test_cross_with_blank(self):
        dic = Dictionary()
        dic.set_words(["SA","JETS"])

        board = Board()
        board.add_word('JET', 5, 4, VERTICAL)
        sol= Solution(8, 4, HORIZONTAL, 'SA', []) 
        sol.determine_score(board, dic)
        self.assertEqual(sol.score, 13)
        sol= Solution(8, 4, HORIZONTAL, 'SA', [0]) 
        sol.determine_score(board, dic)
        self.assertEqual(sol.score, 11)
        board.add_solution(sol)
示例#43
0
def file_to_dict(path):
    word_file = open(path, 'r')
    dictionary = Dictionary()

    counter = 0
    for line in word_file:
        if re.match('^[a-z]+$',line) is not None:
            dictionary.add_word(line.strip())
        if counter % 25000 == 0:
            print "Loading Dictionary..."
        counter += 1
    dictionary.update_word_count()
    word_file.close()
    return dictionary
示例#44
0
    def test_blanks_with_same_letter(self):

        dic = Dictionary()
        dic.set_words(["ABA"])
        board = Board()
        solutions = []
        board.generate_solutions_in_line('?BA', dic, 7, HORIZONTAL, solutions)
        words = set([(str(s)) for s in solutions])
        self.assertEqual(words, set(['ABa (7,7,H)', 
            'aBA (7,7,H)', 
            'ABa (7,5,H)', 
            'aBA (7,5,H)', 
            'ABa (7,6,H)', 
            'aBA (7,6,H)']))
示例#45
0
    def test_two_letter_one_blank(self):
        dic = Dictionary()
        dic.set_words(["DUCE","EGRUGEAI"])

        board = Board()
        board.add_word('DUCE', 7, 4, HORIZONTAL)
        sol= Solution(7, 7, VERTICAL, 'EGRUGEAI', [1]) 
        sol.determine_score(board, dic)
        sol= Solution(7, 7, VERTICAL, 'EGRUGEAI', [4]) 
        sol.determine_score(board, dic)

        solutions = board.generate_solutions('RUIAG?E', dic)
        solution = board.find_best_solution(solutions, dic)
        self.assertEqual(solution.score, 80)
示例#46
0
def search(dictionary_file, postings_file, queries_file, output_file):
    # Build in memory dict from dictionary_file.
    with open(dictionary_file) as dict_file:
        dictionary = Dictionary.from_json(dict_file.read())

    # Process queries.
    with open(output_file, 'w+') as output:
        with open(queries_file) as qfile:
            with PostingsFile(postings_file, mode='r') as pfile:
                for query in qfile:
                    # Strip newline character.
                    query = query.replace('\n', '')
                    query = query.replace('\r', '')
                    prefix_notation = parse_query.infix_to_prefix(query)

                    # Process all words in the query here.
                    processed = []
                    for token in prefix_notation:
                        if parse_query.is_operand(token):
                            token = process_word(token)
                        processed.append(token)

                    query = parse_query.process_infix_query(processed)
                    result = execute_query(query, dictionary, pfile)

                    output.write('%s\n' % ' '.join([str(x) for x in result]))
示例#47
0
 def __init__(self, max_links_allowed, compress_status):
     self.__html_parser = Parser()
     self.__bfs_tree = Queue()
     self.__unique_links = Dictionary(max_links_allowed)
     self.__compress = compress_status
     self.__pyurlopener = lib.PyURLOpener()
     self.__start_time = datetime.now()
示例#48
0
 def __init__(self):
     self.possible_words = set()
     self.output = set()
     self.dict = Dictionary()
     fich = open('Unabr.dict', 'r')
     self.dict.get_dict(fich)
     fich.close()
示例#49
0
def search(dictionary_file, postings_file, queries_file, output_file):
    # Build in memory dict from dictionary_file.
    with open(dictionary_file) as dict_file:
        dictionary = Dictionary.from_json(dict_file.read())

    # Process queries.
    with open(output_file, 'w+') as output:
        with open(queries_file) as qfile:
            with PostingsFile(postings_file, mode='r',
                    entry_cls=PostingsFileEntryWithFrequencies) as pfile:
                for query in qfile:
                    # Strip newline character.
                    query = query.strip()

                    # Process all words in the query here.
                    query_tokens = process_query(query)
                    query_tf = collections.Counter(query_tokens)
                    query_terms = sorted(set(query_tokens))

                    # Calculate query vector
                    query_vector = \
                        [logtf(query_tf[term]) for term in query_terms]
                    query_vector = list(unit_vector(query_vector))

                    # Execute query
                    results = execute_query(
                        query_terms, query_vector, dictionary, pfile)

                    # Write doc_ids to output file.
                    results = [str(x) for x in results]
                    output.write('%s\n' % ' '.join(results))
示例#50
0
 def __init__(self, request=None, response=None):
     self.initialize(request, response)
     self.error_msg = ''
     try:
         self.dictionary = Dictionary.create_default()
     except Exception, e:
         self.error_msg += Exceptions.print_exception(e)
示例#51
0
 def __init__(self,request=None, response=None):
     self.initialize(request, response)
     self.errorMsg = ''
     try:
         self.dictionary = Dictionary.create_default()
     except Exception as e:
         print Exceptions.format_exception(e)
示例#52
0
def test1():
    dictionary = Dictionary()
    dictionary.set_words(["OOZ", "OOZS", "PROSAIC", "PROC", "CC"])
    board = Board()

    # With this bug we'll get "PROC" but we want "PROSAIC" (where the S is the plural
    # of "OOS"), which is longer.
    board.add_word("OOZ", Board.SIZE/2, Board.SIZE/2 - 2, HORIZONTAL)
    board.add_word("CC", Board.SIZE/2 + 1, Board.SIZE/2 - 2, HORIZONTAL)

    rack = "PROSAIC"
    solutions = board.generate_solutions(rack, dictionary)
    solution = board.find_best_solution(solutions, dictionary)
    if solution:
        print "Winner: %s" % solution
        board.add_solution(solution)
    print board
示例#53
0
 def __init__(self, input=None):
     super(TextCorpus, self).__init__()
     self.input = input
     self.dictionary = Dictionary()
     if input is not None:
         self.dictionary.addDocuments(self.get_texts())
     else:
         logger.warning("No input document stream provided; assuming "
                        "dictionary will be initialized some other way.")
示例#54
0
 def __init__(self, dictionary_file_name='', dictionary=None):
     if dictionary_file_name:
         self.dictionary = Dictionary.load_from_file(dictionary_file_name)
     elif dictionary:
         self.dictionary = dictionary
     else:
         raise AIException(
                 "Вы должны указать один из параметров dictionary_file_name или dictionary"
                 )
示例#55
0
 def __init__(self, document_generator, stop_words):
     self.document_generator = document_generator
     self.stop_list = stop_words
     self.dictionary = Dictionary(document_generator())
     self.tfidf_model = TfidfModel(self.dictionary)
     stop_ids = [self.dictionary.token_to_id[stop_word] for stop_word in self.stop_list
                 if stop_word in self.dictionary.token_to_id]
     once_ids = [token_id for token_id, doc_freq in self.dictionary.doc_freqs.iteritems() if doc_freq == 1]
     self.dictionary.filter_tokens(stop_ids + once_ids)
示例#56
0
def build(training_dir, dict_file, postings_file):
    dictionary = Dictionary()

    # Read each file in the training dir.
    filepaths = []
    for filename in os.listdir(training_dir):
        filepaths.append(os.path.join(training_dir, filename))

    # Sort the filepaths according to doc_id
    filepaths = sorted(filepaths, key=lambda x: int(os.path.basename(x)))

    # Two loops here to have control over the size of the loop.
    # NOTE(michael): for testing.
    # filepaths = filepaths[:10]

    with PostingsFile(postings_file, mode='w+',
            entry_cls=PostingsFileEntryWithFrequencies) as postings_file:
        for filepath in filepaths:
            # TODO(michael): Making assumption that document is an int.
            doc_id = int(os.path.basename(filepath))
            terms = process_file(filepath)
            for term in terms:
                # Create postings file entry if entry does not exist for
                # `(term, doc_id)` pair.
                if not dictionary.has_entry(term, doc_id):
                    # Update postings file entry for previous `(term, doc_id)`
                    # entry for the current term. (To point to the entry we are
                    # about to add.
                    # `(term, doc_id)` pair.
                    if dictionary.get_frequency(term) != 0:
                        previous_node_location = dictionary.get_tail(term)
                        previous_entry = \
                            postings_file.get_entry(previous_node_location)
                        previous_entry.next_pointer = postings_file.pointer
                        postings_file.write_entry(previous_entry)

                    # Add new postings file entry for the `(term, doc_id)` pair.
                    dictionary.add_term(term, doc_id, postings_file.pointer)
                    new_entry = PostingsFileEntryWithFrequencies(doc_id)
                    postings_file.write_entry(new_entry)

                # Update postings file entry term frequency. (Increment).
                # NOTE(michael): We can safely use the tail pointer since we
                # process documents in order and not at random.
                current_term_location = dictionary.get_tail(term)
                current_term_entry = \
                    postings_file.get_entry(current_term_location)
                current_term_entry.term_freq += 1
                postings_file.write_entry(current_term_entry)

    # Write dictionary to file.
    with open(dict_file, 'w') as dictionary_file:
        dictionary_file.write(dictionary.to_json())
示例#57
0
    def __init__( self ) :
        self.dict = Dictionary()
        self.spliter = PinyinSpliter()
        self.fitter = Fitter()
        self.picker = Picker( self.dict )
        #self.picker.set( [], [], True )

        self.cache = [ [ 0, [], "" ] ]
        self.candCacheIndex = 0
        self.candStartIndex = 0
        self.candList = []
def Reset():
  """Reset the library. Useful for re-initializing to a different server."""
  data.reset()
  ApiFunction.reset()
  Element.reset()
  Image.reset()
  Feature.reset()
  Collection.reset()
  ImageCollection.reset()
  FeatureCollection.reset()
  Filter.reset()
  Geometry.reset()
  List.reset()
  Number.reset()
  String.reset()
  Date.reset()
  Dictionary.reset()
  _ResetGeneratedClasses()
  global Algorithms
  Algorithms = _AlgorithmsContainer()
示例#59
0
def test_dictionary_has_entry():
    d = Dictionary()
    assert not d.has_entry('asdf', 1)

    d.add_term('asdf', 1, 10)
    assert d.has_entry('asdf', 1)
    assert not d.has_entry('qwer', 1)