def predict(test_data, true_labels, batch_size, model, model_file=None): if model_file is not None: print("loading model from {}".format(model_file), end='\n', file=sys.stderr) model.load_from_file(file_path=model_file, careful=True) n_test_batches = test_data.shape[0] / batch_size y = T.ivector('y') prob_fcn = theano.function(inputs=[model.input], outputs=model.output, ) error_fcn = theano.function(inputs=[model.input, y], outputs=model.errors(y), ) errors = [error_fcn(test_data[x * batch_size: (x + 1) * batch_size], true_labels[x * batch_size: (x + 1) * batch_size]) for x in xrange(n_test_batches)] probs = [prob_fcn(test_data[x * batch_size: (x + 1) * batch_size]) for x in xrange(n_test_batches)] probs = list(chain(*probs)) return errors, probs
def init_optimizers(self, lr): self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), lr=lr) self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), lr=lr) self.proto_optimizer = torch.optim.Adam(utils.chain( self.encoder.parameters(), self.proto.parameters()), lr=lr) self.log_alpha_optimizer = torch.optim.Adam([self.log_alpha], lr=lr)
def make_word_to_id(vocab_size): word_freqs = Counter() # word -> # occurences in corpus for words, label in utils.chain(utils.stormfront_gen, utils.twitter_gen)(): for word in words: word_freqs[word] += 1 id_to_word_and_freq = word_freqs.most_common( min(vocab_size, len(word_freqs))) return {word: i for i, (word, _) in enumerate(id_to_word_and_freq)}
def exe(node): """ node[0] - operator name node[1:] - params """ if D: self.start("executing node '%s'", node) type_node=type(node) if node is None or type_node in (str,int,float,long,bool,generator,chain): return node elif type_node is list: return map(exe,node) elif type_node is dict: ret={} for i in node.iteritems(): ret[exe(i[0])]=exe(i[1]) return ret op=node[0] if op=="or": if D: self.debug("%s or %s", node[1],node[2]) return exe(node[1]) or exe(node[2]) elif op=="and": if D: self.debug("%s and %s", node[1],node[2]) return exe(node[1]) and exe(node[2]) elif op=="+": if len(node)>2: fst=exe(node[1]) snd=exe(node[2]) if fst is None: return snd if snd is None: return fst typefst=type(fst) if typefst is dict: fst.update(snd) return fst typesnd=type(snd) if typefst is list and typesnd is list: if D: self.debug("both sides are lists, returning '%s'",fst+snd) return fst+snd if typefst in ITER_TYPES or typesnd in ITER_TYPES: if typefst not in ITER_TYPES: fst=[fst] elif typesnd not in ITER_TYPES: snd=[snd] if D: self.debug("at least one side is generator and other is iterable, returning chain") return chain(fst,snd) if typefst in (int,float): try: return fst+snd except: return fst+float(snd) if typefst in STR_TYPES or typesnd in STR_TYPES: if D: self.info("doing string comparison '%s' is '%s'",fst,snd) if typefst is unicode: fst=fst.encode("utf-8") if typesnd is unicode: snd=snd.encode("utf-8") return str(fst)+str(snd) try: timeType=timeutils.datetime.time if typefst is timeType and typesnd is timeType: return timeutils.addTimes(fst,snd) except: pass if D: self.debug("standard addition, returning '%s'",fst+snd) return fst + snd else: return exe(node[1]) elif op=="-": #TODO move -N to tree builder! if len(node)>2: fst=exe(node[1]) snd=exe(node[2]) try: return fst-snd except: typefst=type(fst) typesnd=type(snd) timeType=timeutils.datetime.time if typefst is timeType and typesnd is timeType: return timeutils.subTimes(fst,snd) else: return - exe(node[1]) elif op=="*": return exe(node[1]) * exe(node[2]) elif op=="%": return exe(node[1]) % exe(node[2]) elif op=="/": return exe(node[1]) / float(exe(node[2])) elif op==">": if D: self.debug("%s > %s", node[1],node[2]) return exe(node[1]) > exe(node[2]) elif op=="<": return exe(node[1]) < exe(node[2]) elif op==">=": return exe(node[1]) >= exe(node[2]) elif op=="<=": return exe(node[1]) <= exe(node[2]) #TODO this algorithm produces 3 for 1<2<3 and should be true #elif op in "<=>=": # fst=exe(node[1]) # snd=exe(node[2]) # if op==">": # return fst > snd and snd or False # elif op=="<": # return fst < snd and snd or False # elif op==">=": # return fst >= snd and snd or False # elif op=="<=": # return fst <= snd and snd or False elif op=="not": fst=exe(node[1]) if D: self.debug("doing not '%s'",fst) return not fst elif op=="in": if D: self.debug("doing '%s' in '%s'",node[1],node[2]) return exe(node[1]) in exe(node[2]) elif op=="not in": return exe(node[1]) not in exe(node[2]) elif op in ("is","is not"): if D: self.debug("found operator '%s'",op) try: fst=exe(node[1]) except Exception,e: if D: self.debug("NOT ERROR! Can't execute node[1] '%s', error: '%s'. Falling back to orginal value.",node[1],str(e)) fst=node[1] try: snd=exe(node[2]) except Exception,e: if D: self.debug("NOT ERROR! Can't execute node[2] '%s', error: '%s'. Falling back to orginal value.",node[2],str(e)) snd=node[2]
def eval_node( filter_box: FilterBoxState ) -> Fun[[List[Signal]], Maybe[List[Signal]]]: return chain(lambda signals: signals[0], transformation(filter_box.filter_state), lambda signal: Just([signal]))
return histos if __name__ == "__main__": parser = ArgumentParser() parser.add_argument("--vocab_size", type=int, default=20000) parser.add_argument("--tf_idf", type=bool, default=False) parser.add_argument("--merge", type=bool, default=False) FLAGS = parser.parse_args() word_to_id = make_word_to_id(FLAGS.vocab_size) x_values = list(range(len(word_to_id))) if FLAGS.merge: histos = histograms( utils.chain(utils.stormfront_gen, utils.twitter_gen), word_to_id, tf_idf=FLAGS.tf_idf, ) ymax = np.amax(histos) plt.subplot(2, 2, 1) plt.plot(x_values, histos[0], "g-") plt.ylim(0, ymax) plt.subplot(2, 2, 2) plt.plot(x_values, histos[1], "r-") plt.ylim(0, ymax) plt.show() else: stormfront_histos = histograms(utils.stormfront_gen, word_to_id, tf_idf=FLAGS.tf_idf)