示例#1
0
    def write(self, filename="-", weights=None):

        if weights is None:
            weights = self.weights

        if filename == "-":
            outfile = sys.stdout
            filename = "STDOUT"  # careful overriding
        else:
            outfile = open(filename, "wt")

        self.print_templates(outfile)

        mytime = Mytime()

        nonzero = 0
        print >> logs, "sorting %d features..." % len(weights),
        for i, f in enumerate(sorted(weights), 1):
            if i == 1:  # sorting done
                print >> logs, "done in %.2lf seconds." % mytime.period()
                print >> logs, "writing features to %s..." % filename

            v = weights[f]
            if math.fabs(v) > 1e-3:
                print >> outfile, "%s\t%.5lf" % (f, v)
                nonzero += 1

        if self.unk > 0:  # print known words
            print >> outfile, " " + " ".join(sorted(
                self.knowns))  # " " to mark

        print >> logs, "%d nonzero feature instances written in %.2lf seconds." % \
              (nonzero, mytime.period())  ## nonzero != i
示例#2
0
    def read_weights(self, filename, infertemplates=False):
        '''instances are like "s0t-q0t=LRB-</s>=>LEFT     3.8234"'''

        infile = self.read_templates(filename)

        infertemplates = len(self.templates) < 1
        if infertemplates:
            print >> logs, "will infer templates from weights..."

        mytime = Mytime()
        i = 0
        if infile is not None:
            print >> logs, "reading feature weights from %s\t" % filename,
            for i, line in enumerate(infile, 1):
                if i % 200000 == 0:
                    print >> logs, "%d lines read..." % i,

                if line[0] == " ":
                    # TODO: separate known words line (last line)
                    self.knowns = set(line.split())
                    print >> logs, "\n%d known words read." % len(self.knowns)
                    self.unk = 1  # in cae you forgot to say it; doesn't matter 1 or x
                    break

                feat, weight = line.split()
                self.weights[feat] = float(weight)

                if infertemplates:
                    self.add_template(feat.split("=", 1)[0],
                                      1)  ## one occurrence

        print >> logs, "\n%d feature instances (%d lines) read in %.2lf seconds." % \
              (len(self.weights), i, mytime.period())

        self.print_autoevals()
示例#3
0
    def write(self, filename="-", weights=None):

        if weights is None:
            weights = self.weights

        if filename == "-":
            outfile = sys.stdout
            filename = "STDOUT"  # careful overriding
        else:
            outfile = open(filename, "wt")

        self.print_templates(outfile)

        mytime = Mytime()

        nonzero = 0
        # my wvector
        for action, feats in weights.iteritems():
            for f in sorted(feats):
                v = feats[f]
                if math.fabs(float(v)) > 1e-3:  # TODO
                    tid, feat = f.split("=", 1)
                    print >> outfile, "%s=%s=>%s\t%.5lf" % (
                        self.list_templates[int(tid)][0], feat, action,
                        float(v))
                    nonzero += 1

        print >> logs, "%d nonzero feature instances written in %.2lf seconds." % \
              (nonzero, mytime.period())  ## nonzero != i
示例#4
0
    def read_weights(self, filename, infertemplates=False):
        '''instances are like "s0t-q0t=LRB-</s>=>LEFT     3.8234"'''

        infile = self.read_templates(filename)

        infertemplates = len(self.templates) <= 1
        if infertemplates:
            print >> logs, "will infer templates from weights..."

        mytime = Mytime()
        i = 0
        if infile is not None:
            print >> logs, "reading feature weights from %s\t" % filename,
            for i, line in enumerate(infile, 1):
                if i % 200000 == 0:
                    print >> logs, "%d lines read..." % i,

                feat, weight = line.split()
                weight = WVector.value_class(
                    float(weight))  # in case of mydouble

                if FLAGS.use_template_id:
                    template, instance = feat.split("=", 1)
                    tid = self.templates[template]
                    feat = "%d=%s" % (tid, instance)

                if Model.doublehash == 1:
                    if FLAGS.tuplefeats:
                        f, action = instance.rsplit("=>", 1)
                        #action = Model.mapnames[action]
                        fs = tuple(f.split("|"))
                        if FLAGS.integerize:
                            fs = tuple(map(Vocab.str2id, fs))
                        self.weights[action][(tid, ) + fs] = weight
                    else:
                        f, action = feat.rsplit("=>", 1)
                        #action = Model.mapnames[action]
                        self.weights[action][f] = weight

                elif Model.doublehash == 2:
                    f, action = instance.rsplit("=>", 1)
                    action = Model.mapnames[action]
                    self.weights[action][tid][f] = weight
                else:
                    self.weights[feat] = weight

                if infertemplates:
                    self.add_template(feat.split("=", 1)[0],
                                      1)  ## one occurrence

        print >> logs, "\n%d feature instances (%d lines) read in %.2lf seconds." % \
              (len(self.weights), i, mytime.period())

        self.print_autoevals()
示例#5
0
    def read_weights_and_insert_different_noise(self,
                                                filename,
                                                noise_info,
                                                infertemplates=False):
        # ADDED CODE HERE
        '''instances are like "s0t-q0t=LRB-</s>=>LEFT     3.8234"'''
        """
            noise_info = {
                'method': FLAGS.noise_method,
                'mu': FLAGS.mu,
                'sigma': FLAGS.sigma,
                'noise_file_path': FLAGS.noise_file_path,
        }
        """
        method = noise_info['method']
        mu = noise_info['mu']
        sigma = noise_info['sigma']
        noise_file_path = noise_info['noise_file_path']
        noises_vector = numpy.load(noise_file_path)

        infile = self.read_templates(filename)

        infertemplates = len(self.templates) <= 1
        if infertemplates:
            print >> logs, "will infer templates from weights..."

        mytime = Mytime()
        i = 0
        if infile is not None:
            print >> logs, "reading feature weights from %s\t" % filename,
            # for i, line in enumerate(infile, 1):
            for i, (line, noises_vector_coeff) in enumerate(
                    zip(infile, noises_vector), 1):

                if i % 200000 == 0:
                    print >> logs, "%d lines read..." % i,

                feat, weight = line.split()
                weight = float(
                    weight
                )  # WVector.value_class(float(weight)) # in case of mydouble
                noise_ = float(
                    noises_vector_coeff) * numpy.random.randn() + float(mu)

                if method == 'a':
                    weight = weight + noise_
                elif method == 'm':
                    weight = weight * noise_

                if FLAGS.use_template_id:
                    template, instance = feat.split("=", 1)
                    tid = self.templates[template]
                    feat = "%d=%s" % (tid, instance)

                if Model.doublehash == 1:
                    if FLAGS.tuplefeats:
                        f, action = instance.rsplit("=>", 1)
                        # action = Model.mapnames[action]
                        fs = tuple(f.split("|"))
                        self.weights[action][(tid, ) + fs] = weight
                    else:
                        f, action = feat.rsplit("=>", 1)
                        # action = Model.mapnames[action]
                        self.weights[action][f] = weight

                elif Model.doublehash == 2:
                    f, action = instance.rsplit("=>", 1)
                    action = Model.mapnames[action]
                    self.weights[action][tid][f] = weight
                else:
                    self.weights[feat] = weight

                if infertemplates:
                    self.add_template(feat.split("=", 1)[0],
                                      1)  ## one occurrence

        print >> logs, "\n%d feature instances (%d lines) read in %.2lf seconds." % \
                       (len(self.weights), i, mytime.period())

        self.print_autoevals()