def write(self, filename="-", weights=None): if weights is None: weights = self.weights if filename == "-": outfile = sys.stdout filename = "STDOUT" # careful overriding else: outfile = open(filename, "wt") self.print_templates(outfile) mytime = Mytime() nonzero = 0 print >> logs, "sorting %d features..." % len(weights), for i, f in enumerate(sorted(weights), 1): if i == 1: # sorting done print >> logs, "done in %.2lf seconds." % mytime.period() print >> logs, "writing features to %s..." % filename v = weights[f] if math.fabs(v) > 1e-3: print >> outfile, "%s\t%.5lf" % (f, v) nonzero += 1 if self.unk > 0: # print known words print >> outfile, " " + " ".join(sorted( self.knowns)) # " " to mark print >> logs, "%d nonzero feature instances written in %.2lf seconds." % \ (nonzero, mytime.period()) ## nonzero != i
def read_weights(self, filename, infertemplates=False): '''instances are like "s0t-q0t=LRB-</s>=>LEFT 3.8234"''' infile = self.read_templates(filename) infertemplates = len(self.templates) < 1 if infertemplates: print >> logs, "will infer templates from weights..." mytime = Mytime() i = 0 if infile is not None: print >> logs, "reading feature weights from %s\t" % filename, for i, line in enumerate(infile, 1): if i % 200000 == 0: print >> logs, "%d lines read..." % i, if line[0] == " ": # TODO: separate known words line (last line) self.knowns = set(line.split()) print >> logs, "\n%d known words read." % len(self.knowns) self.unk = 1 # in cae you forgot to say it; doesn't matter 1 or x break feat, weight = line.split() self.weights[feat] = float(weight) if infertemplates: self.add_template(feat.split("=", 1)[0], 1) ## one occurrence print >> logs, "\n%d feature instances (%d lines) read in %.2lf seconds." % \ (len(self.weights), i, mytime.period()) self.print_autoevals()
def write(self, filename="-", weights=None): if weights is None: weights = self.weights if filename == "-": outfile = sys.stdout filename = "STDOUT" # careful overriding else: outfile = open(filename, "wt") self.print_templates(outfile) mytime = Mytime() nonzero = 0 # my wvector for action, feats in weights.iteritems(): for f in sorted(feats): v = feats[f] if math.fabs(float(v)) > 1e-3: # TODO tid, feat = f.split("=", 1) print >> outfile, "%s=%s=>%s\t%.5lf" % ( self.list_templates[int(tid)][0], feat, action, float(v)) nonzero += 1 print >> logs, "%d nonzero feature instances written in %.2lf seconds." % \ (nonzero, mytime.period()) ## nonzero != i
def read_weights(self, filename, infertemplates=False): '''instances are like "s0t-q0t=LRB-</s>=>LEFT 3.8234"''' infile = self.read_templates(filename) infertemplates = len(self.templates) <= 1 if infertemplates: print >> logs, "will infer templates from weights..." mytime = Mytime() i = 0 if infile is not None: print >> logs, "reading feature weights from %s\t" % filename, for i, line in enumerate(infile, 1): if i % 200000 == 0: print >> logs, "%d lines read..." % i, feat, weight = line.split() weight = WVector.value_class( float(weight)) # in case of mydouble if FLAGS.use_template_id: template, instance = feat.split("=", 1) tid = self.templates[template] feat = "%d=%s" % (tid, instance) if Model.doublehash == 1: if FLAGS.tuplefeats: f, action = instance.rsplit("=>", 1) #action = Model.mapnames[action] fs = tuple(f.split("|")) if FLAGS.integerize: fs = tuple(map(Vocab.str2id, fs)) self.weights[action][(tid, ) + fs] = weight else: f, action = feat.rsplit("=>", 1) #action = Model.mapnames[action] self.weights[action][f] = weight elif Model.doublehash == 2: f, action = instance.rsplit("=>", 1) action = Model.mapnames[action] self.weights[action][tid][f] = weight else: self.weights[feat] = weight if infertemplates: self.add_template(feat.split("=", 1)[0], 1) ## one occurrence print >> logs, "\n%d feature instances (%d lines) read in %.2lf seconds." % \ (len(self.weights), i, mytime.period()) self.print_autoevals()
def read_weights_and_insert_different_noise(self, filename, noise_info, infertemplates=False): # ADDED CODE HERE '''instances are like "s0t-q0t=LRB-</s>=>LEFT 3.8234"''' """ noise_info = { 'method': FLAGS.noise_method, 'mu': FLAGS.mu, 'sigma': FLAGS.sigma, 'noise_file_path': FLAGS.noise_file_path, } """ method = noise_info['method'] mu = noise_info['mu'] sigma = noise_info['sigma'] noise_file_path = noise_info['noise_file_path'] noises_vector = numpy.load(noise_file_path) infile = self.read_templates(filename) infertemplates = len(self.templates) <= 1 if infertemplates: print >> logs, "will infer templates from weights..." mytime = Mytime() i = 0 if infile is not None: print >> logs, "reading feature weights from %s\t" % filename, # for i, line in enumerate(infile, 1): for i, (line, noises_vector_coeff) in enumerate( zip(infile, noises_vector), 1): if i % 200000 == 0: print >> logs, "%d lines read..." % i, feat, weight = line.split() weight = float( weight ) # WVector.value_class(float(weight)) # in case of mydouble noise_ = float( noises_vector_coeff) * numpy.random.randn() + float(mu) if method == 'a': weight = weight + noise_ elif method == 'm': weight = weight * noise_ if FLAGS.use_template_id: template, instance = feat.split("=", 1) tid = self.templates[template] feat = "%d=%s" % (tid, instance) if Model.doublehash == 1: if FLAGS.tuplefeats: f, action = instance.rsplit("=>", 1) # action = Model.mapnames[action] fs = tuple(f.split("|")) self.weights[action][(tid, ) + fs] = weight else: f, action = feat.rsplit("=>", 1) # action = Model.mapnames[action] self.weights[action][f] = weight elif Model.doublehash == 2: f, action = instance.rsplit("=>", 1) action = Model.mapnames[action] self.weights[action][tid][f] = weight else: self.weights[feat] = weight if infertemplates: self.add_template(feat.split("=", 1)[0], 1) ## one occurrence print >> logs, "\n%d feature instances (%d lines) read in %.2lf seconds." % \ (len(self.weights), i, mytime.period()) self.print_autoevals()