Python Sentence примеры использования

Язык программирования: Python

Пространство имен/Пакет: utils.sentence

Класс/Тип: Sentence

Примеров на hotexamples.com: 7

Python Sentence - 7 примеров найдено. Это лучшие примеры Python кода для utils.sentence.Sentence, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Sentence(5)

add_data(2)

get_cuted_sentence(1)

Пример #1

Показать файл

	def processFile(self, file_path_and_name):
		try:

			f = open(file_path_and_name, 'r')
			text_0 = f.read()

			text_1 = re.search(r"<TEXT>.*</TEXT>", text_0, re.DOTALL)
			text_1 = re.sub("<TEXT>\n", "", text_1.group(0))
			text_1 = re.sub("\n</TEXT>", "", text_1)

			text_1 = re.sub("<P>", "", text_1)
			text_1 = re.sub("</P>", "", text_1)
			text_1 = re.sub("\n", " ", text_1)
			text_1 = re.sub("\"", "\"", text_1)
			text_1 = re.sub("''", "\"", text_1)
			text_1 = re.sub("``", "\"", text_1)
			text_1 = re.sub(" +", " ", text_1)
			text_1 = re.sub(" _ ", "", text_1)

			text_1 = re.sub(r"\(AP\) _", " ", text_1)
			text_1 = re.sub("&\w+;", " ", text_1)

			sent_tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
			lines = sent_tokenizer.tokenize(text_1.strip())

			index = lines[0].find("--")
			if index != -1:
				lines[0] = lines[0][index + 2:]
			index = lines[0].find(" _ ")
			if index != -1:
				lines[0] = lines[0][index + 3:]
			sentences = []

			for sent in lines:
				sent = sent.strip()
				OG_sent = sent[:]
				sent = sent.lower()
				line = nltk.word_tokenize(sent)

				stemmed_sentence = [porter.stem(word) for word in line]
				stemmed_sentence = list(filter(lambda x: x != '.' and x != '`' and x != ',' and x != '_' and x != ';'
														 and x != '(' and x != ')' and x.find('&') == -1
														 and x != '?' and x != "'" and x != '!' and x != '''"'''
														 and x != '``' and x != '--' and x != ':'
														 and x != "''" and x != "'s", stemmed_sentence))

				# stemmed_sentence = [word for word in stemmed_sentence if word not in stopwords.words('english')]

				if (len(stemmed_sentence) <= 4):
					continue

				if stemmed_sentence:
					sentences.append(Sentence(file_path_and_name, stemmed_sentence, OG_sent))

			return sentences


		except IOError:
			print('Oops! File not found', file_path_and_name)
			return [Sentence(file_path_and_name, [], [])]

Пример #2

Показать файл

Файл: data_loader.py Проект: ovednagar/NLP---assignment3_PMI

 def _read_file(src_file):
     data = []
     lemma_count = {}
     total_samples = 0
     src = open(src_file, "rt", encoding="utf-8")
     # init lists
     words = []
     lemma_words = []
     is_prep = []
     tree = []
     for row in src:
         if row == "\n":
             # create sentence object
             data.append(Sentence(words, lemma_words, is_prep, tree))
             # init lists
             words = []
             lemma_words = []
             is_prep = []
             tree = []
             continue
         # read file
         tree_id, word, lemma, _,  pos, _, parent_idx, context, _, _ = row.split()
         total_samples += 1
         lemma_count[lemma] = lemma_count.get(lemma, 0) + 1
         # fill list with relevant data from the file
         words.append(word)
         lemma_words.append(lemma)
         is_prep.append(True if pos in PREP else False)
         tree.append((int(parent_idx) - 1, context))
     return total_samples, lemma_count, data

Пример #3

Показать файл

Файл: process.py Проект: mfomicheva/metric-dev

    def run_processors(self):

        results_target = []
        results_reference = []

        sentences_target = []
        sentences_reference = []

        selected_names = loads(self.config.get('Processors', 'processors'))
        selected_processors = []
        existing_processors = {}
        processors_with_output = []

        for name, my_class in inspect.getmembers(processors):
            existing_processors[name] = my_class

        for proc in selected_names:
            name_class = (proc, existing_processors[proc])
            selected_processors.append(name_class)

        for name, my_class in selected_processors:

            instance = my_class()
            from_file = False
            if self.config.has_option('Processors', 'from_file'):
                if instance.__class__.__name__ in loads(self.config.get('Processors', 'from_file')):
                    from_file = True

            print('Running ' + instance.get_name())
            instance.run(self.config, from_file=from_file)

            print('Getting ' + instance.get_name())
            instance.get(self.config, from_file=from_file)

            print(instance.get_name() + ' ' + 'finished!')

            if instance.get_output() is not None:

                processors_with_output.append((name, my_class))
                results_target.append(instance.get_result_tgt())
                results_reference.append(instance.get_result_ref())

        for i in range(len(results_target[0])):

            my_sentence_tgt = Sentence()
            my_sentence_ref = Sentence()

            for k, (name, my_class) in enumerate(processors_with_output):
                instance = my_class()

                if instance.get_output() is not None:
                    my_sentence_tgt.add_data(instance.get_name(), results_target[k][i])
                    my_sentence_ref.add_data(instance.get_name(), results_reference[k][i])

            sentences_target.append(my_sentence_tgt)
            sentences_reference.append(my_sentence_ref)

        return [sentences_target, sentences_reference]

Пример #4

Показать файл

    def buildQuery(self, sentences, TF_IDF_w, n):
        scores = list(TF_IDF_w.keys())
        scores.sort(reverse=True)

        i = 0
        j = 0
        queryWords = []

        while (i < n):
            words = TF_IDF_w[scores[j]]
            for word in words:
                queryWords.append(word)
                i = i + 1
                if (i > n):
                    break
            j = j + 1

        return Sentence("query", queryWords, queryWords)

Пример #5

Показать файл

Файл: clean_punctuation.py Проект: mfomicheva/metric-dev

    def get_clean_sentence(self, punct_cand, punct_ref, cand, ref):

        clean_cand = Sentence()
        clean_ref = Sentence()

        for method in sorted(cand.keys()):
            if method == 'alignments':
                alignments = self.get_clean_alignments(punct_cand, punct_ref, cand, ref)
                clean_data_cand = alignments
                clean_data_ref = alignments
            else:
                clean_data_cand = self.get_clean_data(cand[method], punct_cand)
                clean_data_ref = self.get_clean_data(ref[method], punct_ref)

            clean_cand.add_data(method, clean_data_cand)
            clean_ref.add_data(method, clean_data_ref)

        return clean_cand, clean_ref

Пример #6

Показать файл

Файл: sentenceSimilarity.py Проект: zh417233956/pykeywordsearch

 def sentence2vec(self, sentence):
     sentence = Sentence(sentence, self.seg)
     vec_bow = self.dictionary.doc2bow(sentence.get_cuted_sentence())
     return self.model[vec_bow]

Пример #7

Показать файл

Файл: sentenceSimilarity.py Проект: zh417233956/pykeywordsearch

 def set_sentences(self, sentences):
     self.sentences = []
     for i in range(0, len(sentences)):
         self.sentences.append(Sentence(sentences[i], self.seg, i))