Python LineIterator示例

编程语言: Python

命名空间/包名称: nlpy.util

类/类型: LineIterator

hotexamples.com的示例: 10

Python LineIterator - 已找到10个示例。这些是从开源项目中提取的最受好评的nlpy.util.LineIterator现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

LineIterator(4)

常用方法

LineIterator (4)

示例#1

显示文件

文件： data_generator.py 项目： Satssuki/nlpy

    def __init__(self, vocab, data_path, history_len, batch_size = 1, overlap=False, progress=False, fixed_length=True,
                 target_vector=False, _just_test=False, shuffle=True, max_words=999, min_words=0):
        """
        Generate data for training with RNN
        :type vocab: nlpy.lm.Vocab
        :type data_path: str
        :param history_len: if this value is -1, then one trunk is a sentence
        :type history_len: int
        :type binvector: bool
        """
        self._vocab = vocab
        self._target_vector = target_vector
        self._just_test = _just_test
        self.history_len = history_len
        self.batch_size = batch_size
        self.minibatch_mode = not (batch_size == 1)
        self.fixed_length = fixed_length
        self.progress = progress
        self.overlap = overlap
        self.shuffle = shuffle

        self.sentences = []

        # Treat each sentence as a trunk
        for line in LineIterator(data_path):
            sequence = [vocab.sent_index]
            wc = line.count(" ") + 1
            if wc < min_words or wc > max_words:
                continue
            for w in line.split(" "):
                sequence.append(vocab.index(w))
            sequence.append(vocab.sent_index)
            self.sentences.append(sequence)
        logging.info("%d sentences loaded from %s" % (len(self.sentences), data_path))

示例#2

显示文件

    def _load_source(self):
        tokenizer = NLTKEnglishTokenizer()
        counter = Counter()
        for l in LineIterator(self.source):
            counter.update(map(str.lower, tokenizer.tokenize(l)))

        self._freqmap = dict(counter.items())
        self._maxfreq = sum(self._freqmap.values()) * 2 / len(self._freqmap)

示例#3

显示文件

 def _build_data(self, path):
     data = []
     for l in LineIterator(path):
         l = l.lower()
         word_data = self.vocab.convert(l)
         if len(word_data) == 0:
             continue
         data.append([word_data])
     return data

示例#4

显示文件

 def _load_frequency(self):
     self._maxfreq = 3000
     self._freqmap = {}
     for line in LineIterator(_FREQ_DATA_PATH):
         freq, word = line.split("\t")
         freq = int(freq)
         if freq > self._maxfreq:
             continue
         self._freqmap[word] = freq

示例#5

显示文件

 def load(self, path, fixed_size=-1):
     logging.info("load data from %s" % path)
     if fixed_size > 0:
         self._load_fixed_size(path, fixed_size)
         return
     for line in LineIterator(path):
         words = line.split(" ")
         map(self.add, words)
     logging.info("vocab size: %d" % self.size)

示例#6

显示文件

 def _load_fixed_size(self, path, fixed_size):
     from collections import Counter
     logging.info("fixed size: %d" % fixed_size)
     counter = Counter()
     for line in LineIterator(path):
         words = line.split(" ")
         counter.update(words)
     for w, _ in counter.most_common(fixed_size):
         self.add(w)

示例#7

显示文件

文件： recase.py 项目： Satssuki/nlpy

 def __init__(self):
     """
     Initialize recase map.
     """
     self._recase_map = {}
     for line in LineIterator(_FREQ_DATA_PATH):
         _, word = line.split("\t")
         low_word = word.lower()
         if low_word not in self._recase_map:
             self._recase_map[low_word] = word

示例#8

显示文件

文件： word_recursive_encoder.py 项目： Satssuki/nlpy

 def _build_data(self, path):
     data = []
     for l in LineIterator(path):
         l = l.lower()
         chars = filter(lambda x: x in self.chat_set, l)
         if not chars:
             continue
         char_ids = [self.chat_set.index(c) + 1 for c in chars]
         char_ids = [0] + char_ids + [0]
         word_data = [
             np.eye(1, M=self.input_size, k=c)[0] for c in char_ids
         ]
         data.append([word_data])
     return data

示例#9

显示文件

    def serve(param):
        from nlpy.util import external_resource
        from nlpy.util import LineIterator
        import urllib2
        global semantic_searcher
        if "semantic_searcher" not in globals():
            print "Loading searcher ..."
            data = LineIterator(
                external_resource("general/elementary_questions.txt"))
            semantic_searcher = SemanticSearcher()
            semantic_searcher.load_data(data)

        caches = set()
        if "caches" in param:
            caches = set(urllib2.unquote(param["caches"]).split(" ||| "))
        print caches
        output = ""
        for _, result in semantic_searcher.searchMany(
                param['input'].convert('utf-8')):
            if result not in caches:
                output = result
                break
        return {"output": output}

示例#10

显示文件

 def _load_ranking(self):
     self._rank_list = []
     for l in LineIterator(_MASSIVE_WORD_LIST):
         self._rank_list.append(l)