def fileStart(self, file_name):
     """Opens output files for the sentences kept, as well as one for each
     filter, where the sentences filtered by that particular filter are
     written."""
     self.kept_file = FileWriter(file_name + '.kept').open()
     self.filtered_files = [FileWriter(file_name + '.f' + str(i)).open()
                            for i in xrange(len(self.filters))]
Пример #2
0
def export_model(model_file, out_file):
    """Saves the model. The output will be utf-8 encoded."""
    #    model = model_mapping[model_type].load(model_file)
    model = LsiModel.load(model_file)
    with FileWriter(out_file, 'w').open() as out:
        out.write(u"{0}\t{1}\n".format(model.numTerms, model.numTopics))
        for term in xrange(model.numTerms):
            word = model.id2word.id2token[term].decode("utf-8")
            while len(word) > 0 and not word[-1].isalnum():
                word = word[0:-1]
            out.write(u"{0}\n".format(word))
            out.write(u"{0}\n".format(u"\t".join(
                str(f)
                for f in numpy.asarray(model.projection.u.T[:,
                                                            term]).flatten())))
class SentenceFilterCallback(ConllCallback):
    """Filters all incomplete sentences, i.e. those that don't end in a period,
    question mark, etc. and those that don't have a verb in them."""
    def __init__(self):
        self.sentence = []
        self.filters = []

    def addFilter(self, filter):
        """Adds a SentenceFilter to the filter list."""
        if filter is not None and filter not in self.filters:
            self.filters.append(filter)

    def fileStart(self, file_name):
        """Opens output files for the sentences kept, as well as one for each
        filter, where the sentences filtered by that particular filter are
        written."""
        self.kept_file = FileWriter(file_name + '.kept').open()
        self.filtered_files = [FileWriter(file_name + '.f' + str(i)).open()
                               for i in xrange(len(self.filters))]

    def sentenceStart(self):
        self.sentence = []

    def word(self, attributes):
        self.sentence.append(attributes)

    def sentenceEnd(self):
        if len(self.sentence) > 0:
            for i, filter in enumerate(self.filters):
                if not filter.filter(self.sentence):
                    self.filtered_files[i].write(u"\n".join(
                        u"\t".join(word) for word in self.sentence))
                    self.filtered_files[i].write(u"\n\n")
                    return
            self.kept_file.write(u"\n".join(
                u"\t".join(word) for word in self.sentence))
            self.kept_file.write(u"\n\n")

    def fileEnd(self):
        """Closes all the files."""
        self.kept_file.close()
        for ff in self.filtered_files:
            ff.close()