Пример #1
0
 def __init__(self, filename='word.vec', dict=None):
     self.ncols = 10  # word converts to list having 10 items
     if dict:
         self.dict = dict
     if not dict:
         self.dict = dictionary.Dictionary()
     self.data = []
Пример #2
0
    def buildDictionary(self):
        """
        Populate dictionary mapping and statistics.

        This is done by sequentially retrieving the article fulltexts, splitting
        them into tokens and converting tokens to their ids (creating new ids as
        necessary).
        """
        logging.info("creating dictionary from %i articles" %
                     len(self.documents))
        self.dictionary = dictionary.Dictionary()
        numPositions = 0
        for docNo, (sourceId, docUri) in enumerate(self.documents):
            if docNo % 1000 == 0:
                logging.info("PROGRESS: at document #%i/%i (%s, %s)" %
                             (docNo, len(self.documents), sourceId, docUri))
            source = self.config.sources[sourceId]
            contents = source.getContent(docUri)
            words = [
                source.normalizeWord(word)
                for word in source.tokenize(contents)
            ]
            numPositions += len(words)

            # convert to bag-of-words, but ignore the result -- here we only care about updating token ids
            _ = self.dictionary.doc2bow(words, allowUpdate=True)
        logging.info("built %s from %i documents (total %i corpus positions)" %
                     (self.dictionary, len(self.documents), numPositions))
Пример #3
0
        def create_dictionary(direction):
            dict_ = dictionary.Dictionary()
            t5_vocab = [[tok.sp_model.id_to_piece(id), id] for id in range(tok.sp_model.get_piece_size())]
            assert t5_vocab.pop(0)[0] == "<pad>"
            assert t5_vocab.pop(0)[0] == "</s>"
            assert t5_vocab.pop(0)[0] == "<unk>"
            for word, id in t5_vocab:
                dict_.add_symbol(word)
            for word, id in sorted(
                    zip(tok.additional_special_tokens, tok.additional_special_tokens_ids), key=lambda x: x[1]
            ):
                dict_.add_symbol(word)

            t5_vocab_dict = dict(((w, id) for w, id in t5_vocab))
            t5_vocab_dict.update(
                zip(tok.additional_special_tokens, tok.additional_special_tokens_ids)
            )


            for word, id in dict_.indices.items():
                if word in {"<Lua heritage>", "<pad>", "</s>", "<unk>"}:
                    continue
                assert id == t5_vocab_dict[word] + 1

            dict_.finalize()
            dict_.save(os.path.join(path, f"dict.{direction}.txt"))
Пример #4
0
def main():
    dict = dictionary.Dictionary()
    display = False
    print(w)
    command, args = "", ""
    while command != "exit":
        line = input(">>> ").split(' ')
        if len(line) == 0 or len(line) > 2:
            continue
        elif len(line) == 1:
            command = line[0]
            if command == 'display':
                display = not display
            elif command == 'print':
                dict.print_words()
            continue
        else:
            command, args = line

        if command in ['insert', 'delete', 'batch_load', 'batch_delete']:
            dict.print_info()
            print(dict.__getattribute__(command)(args))
            dict.print_info()
        elif command == 'batch_lookup':
            print("Found: ", dict.batch_lookup(args, display))
        elif command == 'lookup':
            found, _ = dict.lookup(args)
            print("Word is in the Dictionary"
                  if found else "Word is not in the Dictionary")
Пример #5
0
 def calculate(self):
     if self.ent1.get() and self.ent2.get() and self.ent3.get():
         first = str(self.ent1.get())
         second = str(self.ent2.get())
         count_system = int(self.ent3.get())
         #creating the dictionary for translating
         self.dictionary = dictionary.Dictionary("base.csv", "r", ",")
         #creating the translator
         self.translator = translator.Translator(
             self.dictionary.get_dictionary(count_system), first)
         if self.translator.get_collection() == None:
             self.txt.delete(0.0, END)
             self.txt.insert(0.0, "Перепроверьте введенные данные!")
         else:
             self.translator = translator.Translator(
                 self.dictionary.get_dictionary(count_system), second)
             if self.translator.get_collection() == None:
                 self.txt.delete(0.0, END)
                 self.txt.insert(0.0, "Перепроверьте введенные данные!")
             else:
                 self.txt.delete(0.0, END)
                 self.txt.insert(0.0, count(self.choose.get()))
     else:
         self.txt.delete(0.0, END)
         self.txt.insert(0.0, "Не все поля заполнены!")
Пример #6
0
 def initFromFile(self):
     setup.init()
     with open('disk.txt') as file:
         line = file.readline()
         self.super_block = super_block.SuperBlock(line)
         block_bitmap = ""
         for i in range(128):
             block_bitmap += file.readline().strip()
         self.fat = fat.FAT(block_bitmap)
         fcb_bitmap = ""
         for i in range(4):
             fcb_bitmap += file.readline().strip()
         fcb_list = []
         for i in range(512):
             line = file.readline()
             attr_list = line.split(" ")
             line = file.readline()
             attr_list += line.split(" ")
             # name file_type pos create_time update_time
             # size first_block write_user read_user delete_able is_able
             if line == "-1\n":
                 fcb_list.append(FCB('-1', '-1', -1, '-1', '-1', -1, '-1', '-1', '-1', -1, -1))
                 continue
             fcb_list.append(
                 FCB(attr_list[0], attr_list[1], evalPro(attr_list[2]), attr_list[3], attr_list[4],
                     evalPro(attr_list[5]), attr_list[6], attr_list[7], attr_list[8], evalPro(attr_list[9]),
                     evalPro(attr_list[10])))
             if evalPro(attr_list[10]) == 1 and evalPro(attr_list[2]) >= 0:
                 fcb_list[evalPro(attr_list[2])].child_list.append(i)
         self.dictionary = dictionary.Dictionary(fcb_bitmap, fcb_list)
         for i in range(2048):
             self.data.append(file.readline().strip())
Пример #7
0
    def initialize(self, length_count, length_to_word):
        self.guessed_letters = set()
        # initialize game state
        for letter in range(self.word_length):
            self.game_state.append('_')

        if self.multiplayer.lower() == 'y':
            # list of the word of such length to choose (print about 50 of them)
            print("Here are possible words of such length: ")
            if length_count[self.word_length] >= 50:
                print(length_to_word[self.word_length][:50])
            else:
                print(length_to_word[self.word_length])
            word = str(
                input("Please choose a word of length " +
                      str(self.word_length) + ": "))
            while len(word) != self.word_length or word not in length_to_word[
                    self.word_length]:
                word = str(
                    input("Invalid choice. Please choose a word of length " +
                          str(self.word_length) + ":"))
        else:
            ## create an AI to get a word of length based on level.
            self.level = str(
                input(
                    "Please choose a level (easy,intermediate,medium,hard): "))
            while self.level not in self.set_levels:
                self.level = str(
                    input(
                        "Please choose a proper level (easy,intermediate,medium,hard): "
                    ))
            # create an instant of AI_computer based on level
            self.computer = AI.AI_Computer(
                self.level,
                dictionary.Dictionary(length_to_word[self.word_length]))
            # choose a word using the level
            word = self.computer.choose_word()
            # print ("word:",word)
        self.word = word  # initial our word, it could be a bag of word from hard/ insane level
        if self.level != "hard":
            self.solver = AI.AI_solver()
        else:
            self.solver = AI.AI_hard_solver()
        self.solver.dic = dictionary.Dictionary(
            length_to_word[self.word_length]
        )  #  object initial our solver data as a dictionary
Пример #8
0
def main(solve, board):
	d = dictionary.Dictionary(DICTS)
	ans = []
	for word in solve(board, d, SIZE):
		if len(word) >= MIN_LENGTH:
			ans.append(word)

	return ans
Пример #9
0
def test_dictionary():

    #Arrange:
    Array = [6, 2, 84, 9]
    expected = {0: 6, 1: 2, 2: 84, 3: 9}

    #Actual:
    actual = dictionary.Dictionary(Array)

    #Assert
    assert expected == actual
Пример #10
0
 def __init__(self, node_id, port, num_dc):
     self.node_id = node_id
     self.s = socket.socket()
     self.timeTable = timeTable.TimeTable(num_dc, node_id)
     self.hostname = socket.gethostname()  # get local machine name
     self.port = port
     self.addr = ''
     self.c = None
     self.log = log.Log()
     self.dictionary = dictionary.Dictionary(node_id)
     self.threads = []
Пример #11
0
 def test_add_first_word_to_board(self):
     """
     Unit test for add first word to board
     not in center
     """
     bb_test = sbd.Board(5)
     dd_test = sdi.Dictionary(['aa'])
     pos = [0, 0]
     wd_test = 'aa'
     direct = 'down'
     with self.assertRaises(ValueError):
         bb_test.add_word(pos, direct, wd_test, dd_test)
Пример #12
0
 def test_wrong_direction(self):
     """
     Unit test to check that direction is
     well posed
     """
     bb_test = sbd.Board(5)
     dd_test = sdi.Dictionary(['aa'])
     pos = [0, 0]
     wd_test = 'aa'
     direct = 'asdf'
     with self.assertRaises(ValueError):
         bb_test.add_word(pos, direct, wd_test, dd_test)
Пример #13
0
 def test_add_word_out_of_board(self):
     """
     Unit test to check that position
     is within the board
     """
     bb_test = sbd.Board(5)
     dd_test = sdi.Dictionary(['aa'])
     pos = [-1, 2]
     wd_test = 'aa'
     direct = 'down'
     with self.assertRaises(ValueError):
         bb_test.add_word(pos, direct, wd_test, dd_test)
Пример #14
0
 def test_add_too_large_word(self):
     """
     Unit test to check that a word can
     fit on the board
     """
     bb_test = sbd.Board(5)
     dd_test = sdi.Dictionary(['aa'])
     pos = [0, 2]
     wd_test = '123456789'
     direct = 'down'
     with self.assertRaises(ValueError):
         bb_test.add_word(pos, direct, wd_test, dd_test)
def init_dictionary():
    dic = dictionary.Dictionary()

    # unigram
    dic.create_table(constants.UNIGRAM)
    dic.tables[constants.UNIGRAM].set_unk(constants.UNK_SYMBOL)

    # segmentation label
    dic.create_table(constants.SEG_LABEL)
    for label in constants.SEG_LABELS:
        dic.tables[constants.SEG_LABEL].get_id(label, update=True)

    return dic
Пример #16
0
def init_dictionary(num_attrs=0): 
    dic = dictionary.Dictionary()

    # unigram
    dic.create_table(constants.UNIGRAM)
    dic.tables[constants.UNIGRAM].set_unk(constants.UNK_SYMBOL)

    # attributes
    for i in range(num_attrs):
        dic.create_table(constants.ATTR_LABEL(i))
        # dic.tables[constants.ATTR_LABEL(i)].set_unk(constants.UNK_SYMBOL)

    return dic
Пример #17
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('search_terms', nargs="+")
    parser.add_argument('-l',
                        '--lang',
                        nargs='?',
                        default='en',
                        dest='lang',
                        choices=dictionaries.keys(),
                        help=u'language (enda, daen, de, fr)')
    parser.add_argument('-t',
                        '--trans',
                        nargs='?',
                        default=0,
                        dest="translate",
                        choices=['0', '1', '2'],
                        help='0: from Danish, 1: to Danish, 2: both ways')
    args = parser.parse_args()

    translate = int(args.translate)
    language = args.lang
    search_terms = [term.lstrip(' ').rstrip(' ') for term in args.search_terms]
    language_name = dictionaries[language]['name']
    directions = [('fromDanish', 'Dansk-%s' % language_name),
                  ('toDanish', '%s-Dansk' % language_name)]
    if dictionaries[language]['doubflag'] < 2 or translate == 0:
        del directions[1]
    elif translate == 1:
        del directions[0]

    dic = dictionary.Dictionary(dictionaries)
    tables = [('lookup', 'Artikler')]
    if len(search_terms) > 1:
        tables.append(('collocation_lookup', 'Ordforbindelser'))
    prefix, tab_terms = tab(dic, search_terms, directions, tables, language)

    if not tab_terms and len(search_terms) == 1:
        tables = [('collocation_lookup', 'Ordforbindelser')]
        prefix, tab_terms = tab(dic, search_terms, directions, tables,
                                language)

    ofile = open('tabterms.txt', 'w')
    ofile.write("%s\n" % " ".join(search_terms))
    ofile.write("%s\n" % prefix)
    if len(tab_terms) >= 1:
        for term in tab_terms:
            ofile.write("%s\n" % (term))
    else:
        ofile.write("")
    ofile.close()
Пример #18
0
 def __init__(self):
     self.LIMIT_COUNT = 5
     self.WORD_NAME = 'word'
     self.PLUS_NAME = 'plus'
     self.MINUS_NAME = 'minus'
     self.SPLIT_COUNT = 1000
     self.START_NAME = 'start'
     self.FINAL_NAME = 'final'
     self.DATE_NAME = 'date'
     self.LIMIT_YEAR_SEPERATOR = 5
     self.INTERVAL_YEAR_SEPERATOR = 73
     self.FINANCE_NAME = 'finance'
     self.dbm = dbmanager.DBManager()
     self.dic = dictionary.Dictionary()
     self.THREAD_LIMIT_COUNT = 4
Пример #19
0
    def testRadiusServerStart(self):
        ipaddress = socket.gethostbyname(socket.gethostname())
        ipaddress_port = (ipaddress, 6000)
        try:
            ## Start the Radius Server
            RadiusSvr = radserver.Radserver('10.0.28.131')
            RadiusSvr.AddUser(ipaddress, 'passwd')
            RadiusSvr.Start()
        except:
            self.assert_(False, "Failed to Start RADIUS server")

        ## Test whether the RADIUS server responds by sending an authentication request
        srv=client.Client(server="radiusprimary", secret="passwd", dict=dictionary.Dictionary(\
        "/usr/lib/python2.3/site-packages/pyrad/dictionary"))
        try:
            srv.bind(ipaddress_port)

            req = srv.CreateAuthPacket(code=pyrad.packet.AccessRequest,
                                       User_Name="test",
                                       NAS_Identifier=socket.gethostbyname(
                                           socket.gethostname()))
            req["User-Password"] = req.PwCrypt("passwd")

            reply = srv.SendPacket(req)

        except:
            self.assert_(
                False,
                "Failed to create and send Authentication request packet")
        print("Authentication Request packet sent")
        if reply.code == pyrad.packet.AccessAccept:
            print(
                "RADIUS server is running and has Accepted the AccessRequest packet"
            )
        else:
            self.assert_(False, "RADIUS server failed to Accept the request")

        try:
            RadiusSvr.Stop()
        except:
            self.assert_(False, "Failed to STOP the RADIUS server")

        try:
            reply = srv.SendPacket(req)
        except:
            self.assert_(True, "RADIUS server Started/Stopped successfully")
        else:
            self.assert_(False, "Failed to Stop RADIUS server")
Пример #20
0
 def test_add_second_word_not_in_di(self):
     """
     Unit test to check what happens if by adding
     a second word we form a word not in dict/lexic
     """
     bb_test = sbd.Board(5)
     dd_test = sdi.Dictionary(['aa', 'ab'])
     pos = [2, 2]
     wd_test = 'aa'
     direct = 'd'
     bb_test.add_word(pos, direct, wd_test, dd_test)
     pos = [2, 1]
     wd_test = 'ab'
     direct = 'd'
     with self.assertRaises(ValueError):
         bb_test.add_word(pos, direct, wd_test, dd_test)
Пример #21
0
 def test_add_second_word_not_ol(self):
     """
     Unit test that checks if second word added
     is not overlapped with words on board
     """
     bb_test = sbd.Board(5)
     dd_test = sdi.Dictionary(['aa'])
     pos = [2, 2]
     wd_test = 'aa'
     direct = 'd'
     bb_test.add_word(pos, direct, wd_test, dd_test)
     pos = [0, 0]
     wd_test = 'aa'
     direct = 'd'
     with self.assertRaises(ValueError):
         bb_test.add_word(pos, direct, wd_test, dd_test)
Пример #22
0
 def test_add_first_word(self):
     """
     Unit test to add first word correctly
     """
     bb_test = sbd.Board(5)
     dd_test = sdi.Dictionary(['aa'])
     pos = [2, 2]
     wd_test = 'aa'
     direct = 'r'
     bb_test.add_word(pos, direct, wd_test, dd_test)
     self.assertEqual(bb_test.board,
                      [['.']*5,
                       ['.']*5,
                       ['.', '.', 'a', 'a', '.'],
                       ['.']*5,
                       ['.']*5])
Пример #23
0
def translated_data():
    directory = 'C:\\Users\\olive\\Desktop\\Datasets_for_thesis\\Prisjakt\\training_data'
    extracted_data = extractor.json_extract(directory)
    extracted_reviews = extracted_data[0]
    polarities = extracted_data[1]
    preprocessed_reviews = preprocessor.preprocess(extracted_reviews)
    dictionary = dict.Dictionary(preprocessed_reviews).dictionary
    # review_translator.translate_reviews(preprocessed_reviews, polarities)
    with open('untranslated_reviews validation combined.txt', 'r') as file:
        untranslated_reviews = np.concatenate(
            vectorizer.vectorize_data(
                preprocessor.preprocess(file.readlines()), dictionary, 300))
    with open('translated_polarities validation combined.txt', 'r') as file:
        translated_polarities = []
        for line in file:
            translated_polarities.append(int(line))
    return [untranslated_reviews, np.array(translated_polarities)]
Пример #24
0
 def __init__(self, path=None, raw=None):
     self._path = path
     self._dict = dic.Dictionary()
     if self._path is None:
         self._logs = []
         self._data = {}
         self._data['logs'] = self._logs
         self._data['dictionary'] = None
         if raw is not None:
             with codecs.open(raw, 'r', encoding='utf-8') as f:
                 rawdata = json.load(f)
                 self.convert(rawdata)
     else:
         with codecs.open(self._path, 'r', encoding='utf-8') as f:
             self._data = json.load(f)
             self._logs = self._data['logs']
             self._dict.set(self._data['dictionary'])
Пример #25
0
def main():

    # check om sprogene er tilgængelige
    filenotfound = False
    for d in dictionaries.keys():
        lang = dictionaries[d]
        if not (os.path.exists(lang['gddfile'])
                and os.path.exists(lang['datfile'])):
            print 'Ordbogsfiler for %s kan ikke findes i mappen %s' % (
                lang['name'], os.path.dirname(os.path.abspath(
                    lang['gddfile'])))
            filenotfound = True
    if filenotfound:
        return
    dic = dictionary.Dictionary(dictionaries)
    dictionaryGUI = gui.DictionaryGUI(dic)
    dictionaryGUI.run()
Пример #26
0
 def __init__(self):
     self.prevKey = ''
     self.should_keep_going = True
     self.paused = False
     self.hp = 30
     self.wordDictionary = dictionary.Dictionary("dictionary_testfile")
     self.stat = stats.Stats()
     self.scrn = screen.Screen(self.stat)
     self.scrn.scr.nodelay(1)
     self.words = []
     self.wordsOnScreen = 10
     self.pos_in_word = 0
     self.last_time = datetime.datetime.now()
     self.probableWords = []
     self.poss_vals = list(range(1, self.scrn.height - 10))
     debug.init()
     self.new_game()
Пример #27
0
def initLanguage():
    # os.path.dirname(os.path.abspath(file))
    # (QtCore.QDir.currentPath() + "/session")
    # directory = os.path.dirname(__file__)
    # directory = (QtCore.QDir.currentPath() + "/yomi_base/japanese")
    #directory = os.path.split(os.path.realpath(__file__))[0]

    # fix for --onefile
    directory = os.path.dirname(sys.executable)  # + "/yomi_base/japanese"
    if "Python27" == os.path.basename(directory):
        directory = os.path.dirname(__file__)
    else:
        directory = os.path.dirname(sys.executable) + "/yomi_base/japanese"

    return translate.Translator(
        deinflect.Deinflector(os.path.join(directory, 'deinflect.json')),
        dictionary.Dictionary(os.path.join(directory, 'dictionary.db')))
Пример #28
0
def get_text_utts(indir, compilexpath):
    txt = load_txt_dir(indir)

    dct = dictionary.Dictionary(compilexpath)

    oov = get_oov_words(txt, dct)

    if len(oov) != 0:
        print "Please remove all OOV word containing sents or add the words to dictonary before proceeding."
        for w in oov:
            print w
        raise SiReError("OOV words present, cannot continue.")

    args.dictionary = dct
    args.intype = "txt"
    utts = get_utts(txt, args)
    return utts
    def __init__(self):
        self.dictionary = dictionary.Dictionary()
        self.dictionary.load_dictionary('japonais.csv')

        self.show_furigana = True
        self.show_roumaji = True

        self.root = tkinter.Tk()
        self.root.title('Words Quiz')
        self.root.config(padx=20, pady=20)
        self.root.bind('<Return>', self.check)

        self.menubar = tkinter.Menu(self.root)
        self.menu = tkinter.Menu(self.menubar, tearoff=0)
        self.menu.add_command(label="Furigana", command=self.set_furigana)
        self.menu.add_command(label="Roumaji", command=self.set_roumaji)
        self.menubar.add_cascade(label="Select", menu=self.menu)
        self.root['menu'] = self.menubar

        self.native = tkinter.Label(self.root)
        self.native['font'] = ('Arial', 20)
        self.native.pack()

        self.alternate = tkinter.Label(self.root)
        self.alternate['font'] = ('Arial', 10)
        self.alternate.pack()

        self.latin = tkinter.Label(self.root)
        self.latin['font'] = ('Arial', 10)
        self.latin.pack()

        self.entry = tkinter.Entry(self.root)
        self.entry['justify'] = 'center'
        self.entry.pack()

        self.button = tkinter.Button(self.root, text='Check')
        self.button.pack_configure(pady=10)
        self.button.pack()
        self.button['command'] = self.check

        self.new_random_word()

        self.root.mainloop()
Пример #30
0
 def test_add_second_word_neigh(self):
     """
     Unit tests for adding a word which
     neighbors but doesn't overlap
     """
     bb_test = sbd.Board(5)
     dd_test = sdi.Dictionary(['aa', 'aaa'])
     pos = [2, 2]
     wd_test = 'aa'
     direct = 'd'
     bb_test.add_word(pos, direct, wd_test, dd_test)
     pos = [2, 3]
     bb_test.add_word(pos, direct, wd_test, dd_test)
     self.assertEqual(bb_test.board,
                      [['.']*5,
                       ['.']*5,
                       ['.', '.', 'a', 'a', '.'],
                       ['.', '.', 'a', 'a', '.'],
                       ['.']*5])