Python Unigram примеры использования

Язык программирования: Python

Пространство имен/Пакет: Unigram

Класс/Тип: Unigram

Примеров на hotexamples.com: 7

Python Unigram - 7 примеров найдено. Это лучшие примеры Python кода для Unigram.Unigram, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Unigram(5)

combine_features_from_models(1)

compute_test_predictions(1)

counter(1)

find_unigram(1)

generate_input_matrix(1)

generator(1)

get_accuracy_rate(1)

get_no_of_unigrams(1)

gradient_ascent(1)

plot_output(1)

set_input_path(1)

set_output_path(1)

train(1)

write_probability_list(1)

Пример #1

Показать файл

Файл: write_model_predictions.py Проект: ryanaustincarlson/LangAndStatsTermProject

def main(args):
  logging.basicConfig(level=LOGGING_LEVEL, format="DEBUG: %(message)s")

  if len(args) < 3 or len(args) > 4:
    print 'usage: %s training-file dev-file [output-dir]' % args[0]
    print '       output-dir is optional, default is "%s"' % OUTPUT_DIR_DEFAULT
    sys.exit(1)

  training_filename = args[1]
  dev_filename = args[2]
  output_dir = args[3] if len(args) == 4 else OUTPUT_DIR_DEFAULT

  logging.debug('Training models...')

  # train all the models!
  unigram_model = Unigram(training_filename)
  logging.debug('Done training unigram model')
  bigram_model = Bigram(training_filename)
  logging.debug('Done training bigram model')
  trigram_model = Trigram(training_filename)
  logging.debug('Done training trigram model')

  dev_words = [line.strip() for line in open(dev_filename, 'r')]

  # write predictions out to disk
  unigram_model.write_probability_list(dev_words, get_output_filename(output_dir, dev_filename, 'unigram'))
  logging.debug('Wrote dev set predictions using unigram model')
  bigram_model.write_probability_list(dev_words, get_output_filename(output_dir, dev_filename, 'bigram'))
  logging.debug('Wrote dev set predictions using bigram model')
  trigram_model.write_probability_list(dev_words, get_output_filename(output_dir, dev_filename, 'trigram'))
  logging.debug('Wrote dev set predictions using trigram model')

Пример #2

Показать файл

Файл: Driver.py Проект: nausheenfatma/Word-Representation-and-Clustering

 def find_unigrams(self):
     print "finding unigrams"
     print "inputpath",self.input_path
     u=Unigram()
     u.set_input_path(self.input_path)
     u.set_output_path(self.unigram_output_path)
     u.find_unigram()
     self.no_of_words=u.no_of_unigrams
     print "self.no_of_words",u.get_no_of_unigrams()
     self.words=u.ranked_list
     print "self.words",len(self.words)

Пример #3

Показать файл

class Author(object):

    __name = ""
    __unigram = Unigram()
    __bigram = Bigram()
    __trigram = Trigram()

    # Constructor.
    def __init__(self, name):
        self.__name = name
        self.__unigram = Unigram()
        self.__bigram = Bigram()
        self.__trigram = Trigram()

    # Getters.
    def getUnigram(self):
        return self.__unigram

    def getBigram(self):
        return self.__bigram

    def getTrigram(self):
        return self.__trigram

    def getName(self):
        return self.__name

    # Caller method, it is used for counting frequency in the unigram, bigram and trigram.
    def counterCaller(self, separated_line):
        self.__unigram.counter(separated_line)
        self.__bigram.counter(separated_line)
        self.__trigram.counter(separated_line)

    # Caller method, it is used for generating new text with respect to unigram, bigram and trigram.
    def generatorCaller(self, uni_list, bi_list, tri_list):
        self.__unigram.generator(uni_list)
        self.__bigram.generator(bi_list)
        self.__trigram.generator(tri_list)

Пример #4

Показать файл

def unigram(train_sentences, test_sentences):
    #  processing of the sentences to tagged samples, since there's no importance to sentences structure
    train = sentences_to_samples(train_sentences)
    test = sentences_to_samples(test_sentences)

    unigram_HMM = Unigram(train)
    unigram_HMM.train()

    #  initialisation of lists of samples containing known and unknown words
    test_known_words, test_unknown_words = divide_test_to_known_and_unknown_samples(
        train_sentences, test_sentences)

    #  evaluation of the accuracy for each case
    print("Accuracy rate for unknown words: ",
          unigram_HMM.get_accuracy_rate(np.array(test_unknown_words)))
    print("Accuracy rate for known words: ",
          unigram_HMM.get_accuracy_rate(np.array(test_known_words)))
    print("Total accuracy rate: ",
          unigram_HMM.get_accuracy_rate(np.array(test)))

Пример #5

Показать файл

 def __init__(self, name):
     self.__name = name
     self.__unigram = Unigram()
     self.__bigram = Bigram()
     self.__trigram = Trigram()

Пример #6

Показать файл

print(os.cpu_count())
Config.num_threads = os.cpu_count()

Config.epsilon = args.epsilon
Config.learning_rate = args.lr
Config.lamb = args.lamb
Config.t = args.t

data = IOModule()
data_set = data.read_file(Config.train_data)
valid_set = data.read_file(Config.validate_data)
test_set = data.read_file(Config.test_data)

if args.model == 'unigram':
    model = Unigram(data_set, valid_set, test_set)
elif args.model == 'ngram':
    model = BiTrigram(data_set, valid_set, test_set)
elif args.model == 'custom':
    model = CustomModel(data_set, valid_set, test_set)
elif args.model == 'best':
    model1 = BiTrigram(data_set, valid_set, test_set)
    model2 = CustomModel(data_set, valid_set, test_set)
    model = Model(data_set, valid_set, test_set)
    model.combine_features_from_models(model1, model2)

model.generate_input_matrix()

model.gradient_ascent()

model.plot_output(args.model)

Пример #7

Показать файл

Файл: Main.py Проект: metehanyildirim/spellcorrection

i = 0
delete = False
while i < len(origWords):
    if(origWords[i].startswith("targ=")):
        while(origWords[i] != "</ERR>"):
            del origWords[i]
        del origWords[i]
        i += 1
    else:
        i += 1




### Let's form our bigram.
unigram = Unigram(correctWords)
bigram = Bigram(correctWords, unigram)
lettersMap = {}

lettersMap[EditDistance.WORDBOUNDARY] = len(correctWords)
for word in correctWords:
    for i in range(len(word)):
        lettersMap[word[i]] = lettersMap.get(word[i] , 0) + 1
    lettersMap[(EditDistance.WORDBOUNDARY + word[0])] = lettersMap.get((EditDistance.WORDBOUNDARY + word[0]), 0) + 1
    for i in range(len(word) - 1):
        lettersMap[(word[i] + word[i + 1])] = lettersMap.get((word[i] + word[i + 1]) , 0) + 1

# This is for creating the edit distances. They return a hashmap with a tuple like this: ('ins', 'a', 'ab') -> 22
changeMap = {}
wrongWordsSet = set([origWords[i] for i in correctionIndexes])
start = time.time()