示例#1
0
    def export(self, models_fname, dictionary_fname, config_fname):
        print("exporting Classifier for Caesar to read")
        print("models to be saved in", models_fname)
        print("dictionary to be saved in", dictionary_fname)
        print("config to be saved in", config_fname)

        if self.type != "svm":
            print("Only know how to export SVMs")
            return
        lines = []
        for this_tuple in self.classifiers:
            if self.classifiers[this_tuple] != None:
                t = this_tuple
                if Tuples.is_generic(this_tuple[-1]):
                    t = this_tuple[:-1] + ("<generic_value>", )
                lines += ['(' + ','.join(t) + ')']
                lines += utils.svm_to_libsvm(
                    self.classifiers[this_tuple].model)
                lines += [".", ""]
        models_savefile = open(models_fname, "wb")
        for line in lines:
            models_savefile.write(line + "\n")
        models_savefile.close()

        # save dictionary
        json_dictionary = []
        dictionary_items = self.dictionary.items()
        dictionary_items.sort(key=lambda x: x[1])
        assert [x[1] for x in dictionary_items] == range(len(self.dictionary))
        keys = [list(x[0]) for x in dictionary_items]

        json.dump(keys, open(dictionary_fname, "w"))

        # save config
        config_savefile = open(config_fname, "w")
        config_savefile.write(
            "# Automatically generated by CNetTrain scripts\n")
        options = {
            "FEATURES": json.dumps(self.features),
            "MAX_ACTIVE_TUPLES": str(self.tuples.max_active),
            "TAIL_CUTOFF": str(self.tuples.tail_cutoff),
            "MODELS": os.path.join(os.getcwd(), models_fname),
            "DICTIONARY": os.path.join(os.getcwd(), dictionary_fname),
        }
        if "cnet" in self.features:
            index = self.features.index("cnet")
            cnf = self.feature_extractors[index]
            options["MAX_NGRAM_LENGTH"] = str(cnf.max_length)
            options["MAX_NGRAMS"] = str(cnf.max_ngrams)
        for key in options:
            this_line = "CNET   : %s" % key
            this_line = this_line.ljust(30)
            this_line += "= " + options[key]
            config_savefile.write("\t" + this_line + "\n")
        config_savefile.close()
        print("exported Classifier.")
示例#2
0
    def tuple_calculate(self, this_tuple, log_turn, log_input_key="batch"):
        final_ngrams = self.final_ngrams
        # do we need to add generic ngrams?
        new_ngrams = []
        if Tuples.is_generic(this_tuple[-1]):
            gvalue = this_tuple[-1]
            for ngram in final_ngrams:
                new_ngram = cn_ngram_replaced(ngram, gvalue.value.lower(), "<generic_value>")
                if new_ngram != False:
                    new_ngrams.append(new_ngram)

        return dict([(ng.string_repn(), ng.score()) for ng in new_ngrams])
示例#3
0
    def tuple_calculate(self, this_tuple, log_turn, log_input_key="batch"):
        final_ngrams = self.final_ngrams
        # do we need to add generic ngrams?
        new_ngrams = []

        if Tuples.is_generic(this_tuple[-1]):
            gvalue = this_tuple[-1]
            for ngram, score in final_ngrams:
                if gvalue.value is not None:
                    if gvalue.value.lower() in ngram:
                        new_ngram = ngram.replace(gvalue.value.lower(), "<generic_value>")
                        new_ngrams.append((new_ngram, score))

        return dict(new_ngrams)
示例#4
0
 def tuple_calculate(self, this_tuple, log_turn, log_input_key="batch"):
     if Tuples.is_generic(this_tuple[-1]):
         return {"<generic_value=" + this_tuple[-1].value + ">": 1}
     else:
         return {}