Python split_words примеры использования

Язык программирования: Python

Пространство имен/Пакет: utility.stringUtil

Метод/Функция: split_words

Примеров на hotexamples.com: 3

Python split_words - 3 примера найдено. Это лучшие примеры Python кода для utility.stringUtil.split_words, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

    def handle_data(self):
        '''
		Note:
			The word item site from the line number to doc file offset
		'''
        with open(self.doc_id_output, 'w+') as doc_file, \
         open(self.corpus, 'r') as corpus:
            doclen = 0
            for line in corpus:
                offset_inline = 0
                match = match_docheader(line)
                if match:
                    self.doc_id += 1
                    self.doc_offset = 0
                    if self.doc_id > 1 and doclen > 0:
                        doc_file.write(str(doclen) + '\n')
                        doclen = 0
                    doc_file.write(match.groups()[0] + ' ' + str(self.doc_id) +
                                   ' ')

                line_words = split_words(line)
                doclen += len(line_words)
                lastword = ''
                for word in line_words:
                    offset_inline = line.find(word,
                                              offset_inline + len(lastword))
                    lastword = word
                    # Stem reduction
                    word = stem(word).lower()
                    if word not in self.stop_word and len(word) != 0:
                        self.__add_word_index(word,
                                              self.doc_offset + offset_inline)
                self.doc_offset = self.doc_offset + len(line)

            doc_file.write(str(doclen))

Пример #2

Показать файл

Файл: bking.py Проект: Riozhcd/BKing

def main():
    return_count = 10
    #parse parameters
    if len(sys.argv) >= 3:

        if "-w" in sys.argv:
           file_name = sys.argv[sys.argv.index("-w") + 1]
        else:
            usage()

        if "-r" in sys.argv:
            return_count = int(sys.argv[sys.argv.index("-r") + 1])

        if "-ql" in sys.argv and "-qs" not in sys.argv:
            query_strlist = stem_query(sys.argv[sys.argv.index("-ql") + 1:])
            
        elif "-qs" in sys.argv and "-ql" not in sys.argv:
            query_string = str(sys.argv[sys.argv.index("-qs") + 1:])
            query_strlist = stem_query(split_words(query_string))
        else:
            usage()
    else:
       usage()
  
    BKing(file_name, query_strlist, return_count)

Пример #3

Показать файл

Файл: corpusParser.py Проект: Riozhcd/BKing

	def handle_data(self):
		'''
		Note:
			The word item site from the line number to doc file offset
		'''
		with open(self.doc_id_output, 'w+') as doc_file, \
			open(self.corpus, 'r') as corpus:
			doclen = 0
			for line in corpus:
				offset_inline = 0
				match = match_docheader(line)
				if match:
					self.doc_id += 1
					self.doc_offset = 0
					if self.doc_id > 1 and doclen > 0:
						doc_file.write(str(doclen)+'\n')
						doclen = 0
					doc_file.write(match.groups()[0]+' '+str(self.doc_id)+ ' ')
					
				
				line_words = split_words(line)
				doclen += len(line_words)
				lastword = ''
				for word in line_words:
					offset_inline = line.find(word, offset_inline + len(lastword))
					lastword = word	
					# Stem reduction
					word = stem(word).lower()
					if word not in self.stop_word and len(word) != 0: 
						self.__add_word_index(word, self.doc_offset + offset_inline)
				self.doc_offset = self.doc_offset + len(line)

			doc_file.write(str(doclen))