示例#1
0
 def repeat_worker(*args, **kwargs):
   if max_time is not None:
     deadline = time() + max_time
   else:
     deadline = None
   if max_tries is None:
       r = itercount()
   else:
       r = range(0, max_tries)
   for i in r:
     start_time = time()
     threw = False
     try:
       ret = callback(*args, **kwargs)
     except Exception as e:
       # An exception was caught, so we failed.
       if catch_predicate(e):
           # This exception was expected. So we failed, but might need retry.
           threw = True
       else:
           # This exception was unexpected, lets re-throw.
           raise
     if not threw and predicate(ret):
       # We didn't throw, and got a success! Exit.
       return True
     if deadline is not None and time() > deadline:
       return False
     end_time = time()
     sleep_time = max(0.0, period - (end_time - start_time))
     sleep(sleep_time)
   # We fell through to here, fail.
   return False
示例#2
0
def make_key_times(year_count: int) -> typing.List[str]:
    """
    year_count: year run date count
    return: list of key times points

    should append `1` because the svg keyTimes rule
    """
    s = list(takewhile(lambda n: n < 1, itercount(0, 1 / year_count)))
    s.append(1)
    return [str(round(i, 2)) for i in s]
示例#3
0
文件: filebox.py 项目: kybin/h
def incFromLastFile(filepath):
	'''
	if file exists increment filename
	'''
	if ospath.exists(filepath):
		base, ext = ospath.splitext(filepath)
		for v in itercount(1):
			newpath = base+'_'+str(v)+ext
			if not ospath.exists(newpath):
				return newpath
	return filepath
 def GetIfaceStaticRoutes(self, ifname):
     rtlist = []
     for i in itercount():
         address = self.routes[ifname]['ADDRESS%u' % i]
         if not address:
             break
         netmask = self.routes[ifname]['NETMASK%u' % i]
         if not netmask:
             continue
         gateway = self.routes[ifname]['GATEWAY%u' % i]
         rtlist += [(address, netmask, gateway)]
     return rtlist
示例#5
0
class DataHolder(object):

	_holdercount = itercount(1)

	def __init__(self, x, y, dx=0, dy=0, name=None):
		self.num = next(self._holdercount)
		self.datakw = dict(
			fmt='none',
			ecolor='black',
			elinewidth=0.8,
			capthick=0.8,
			label='data'
		)
		self.subpars = dict(hspace=0.3)
		self.pts = None
		self.title = ""
		if name:
			self.name = name
		else:
			self.name = "dataset #{}".format(self.num)

		self.x = MeasureObj(x, dx)
		self.x.edge_padding = 1/20
		self.y = MeasureObj(y, dy)
		self.y.edge_padding = 3/20

		for z in (self.x, self.y):
			z.lims = None
			z.type = 'linear'
			z.re = 1
			z.label = ""

	def sort(self, **kwargs):
		srt = np.argsort(self.x.val, **kwargs)
		for z in (self.x.val, self.x.err, self.y.val, self.y.err):
			z = z[srt]

	def fit_generic(self, *funcs, verbose=True, **kwargs):

		if verbose:
			print("Lavoro su {}\n".format(self.name))
			fitmsg = "Il fit di {funname} su {dataname} ha dato i seguenti parametri:"
		for f in funcs:
			mask = getattr(f, 'mask', np.ones(len(self.x.val), dtype=bool))
			x = self.x.val[mask]
			y = self.y.val[mask]
			dx = self.x.err[mask]
			dy = self.y.err[mask]
			p0 = getattr(f, 'pars', None)
			df = getattr(f, 'deriv', _nullfunc)
			pars, pcov = fit_generic(f, x, y, dx, dy, p0=p0, **kwargs)
			f.pars = pars
			f.cov = pcov
			f.sigmas = np.sqrt(np.diag(pcov))
			f.resd = (y - f(x, *pars)) / np.sqrt(dy**2 + dx**2 * df(x, *pars)**2)
			if verbose:
				print(fitmsg.format(dataname=self.name, funname=f.__name__))
				argnames = inspect.getargspec(f).args[1:]
				for name, par, err in zip(argnames, pars, f.sigmas):
					print("{0} = {1:.4f} \pm {2:.4f}".format(name, par, err))
				print(tell_chi2(f.resd, np.alen(x) - len(pars), style='latex'))
				print("")
			if verbose:
				print("{} completo\n\n".format(self.name))

	def _set_edges(self, var, type=None):
		if var == 'x':
			z = self.x
		elif var == 'y':
			z = self.y
		if not type:
			type = z.type
		top = np.amax(z.val)
		bot = np.amin(z.val)
		if type == 'log':
			top = np.log10(top)
			bot = np.log10(bot)
		width = top - bot
		high = top + width * z.edge_padding
		low = bot - width * z.edge_padding
		if type == 'log':
			high = 10**high
			low = 10**low
		z.lims = np.array([low, high])

	def _getpts(self, type=None):
		if not type:
			type = self.x.type
		low = self.x.lims[0]
		high = self.x.lims[1]
		if type == 'log':
			self.pts = np.logspace(np.log10(low), np.log10(high), num=max(len(self.x.val)*10, 200))
		elif type == 'linear':
			self.pts = np.linspace(low, high, num=max(len(self.x.val)*10, 200))

	def _graph_setup(self, resid=False):
		if self.x.lims is None:
			self._set_edges('x')
		if self.y.lims is None:
			self._set_edges('y')
		if self.pts is None:
			self._getpts()
		if not resid:
			main_ax = self.fig.add_subplot(1, 1, 1)
			main_ax.set_xlabel(self.x.label)
			resid = ()
		else:
			sub_gs = mpl.gridspec.GridSpec(5, 1)		# TODO: make better
			main_ax = self.fig.add_subplot(sub_gs[:4])
			resd_ax = self.fig.add_subplot(sub_gs[4:])
			resd_ax.axhline(y=0, color='black')
			resd_ax.set_xlabel(self.x.label)
			resd_ax.set_ylabel('Norm. res.')
			resd_ax.set_xscale(self.x.type)
			resd_ax.set_xlim(*(self.x.lims * self.x.re))
			self.resd_ax = resd_ax

		main_ax.set_xlim(*(self.x.lims * self.x.re))
		main_ax.set_ylim(*(self.y.lims * self.y.re))
		main_ax.set_ylabel(self.y.label)
		main_ax.set_title(self.title)
		main_ax.set_xscale(self.x.type)
		main_ax.set_yscale(self.y.type)
		self.main_ax = main_ax

	def draw(self, *funcs, resid=False, data=True, legend=True):
		self.fig = plt.figure(self.num)
		self.fig.subplots_adjust(**self.subpars)
		self.fig.clf()
		self._graph_setup(resid)
		main_ax = self.main_ax
		if not resid:
			resid = ()
		else:
			resd_ax = self.resd_ax
			if resid is True:
				resid = funcs

		x = self.x.val * self.x.re
		y = self.y.val * self.y.re
		dx = self.x.err * self.x.re
		dy = self.y.err * self.y.re
		if data:
			main_ax.errorbar(x, y, dy, dx, **self.datakw)

		for fun in funcs:
			if callable(fun):
				mask = np.zeros(len(self.pts), dtype=bool)
				for lowest, highest in getattr(fun, 'bounds', [(-np.inf, np.inf)]):
					mask |= (self.pts > lowest) & (self.pts < highest)
				points = self.pts[mask]
				try:
					linekw = fun.linekw
				except AttributeError:
					linekw = fun.linekw = {}
				g, = main_ax.plot(points * self.x.re, fun(points, *fun.pars)*self.y.re, **linekw)
				if 'color' not in linekw:
					linekw['color'] = g.get_color()
			else:
				pass

		if legend:
			main_ax.legend(loc='best')

		for fun in resid:
			mask = getattr(fun, 'mask', np.ones(len(x), dtype=bool))
			resdkw = dict(marker='o', markeredgecolor='k', markeredgewidth=.5)
			if hasattr(fun, 'linekw'):
				resdkw.update(fun.linekw)
			resdkw.update(ls='none')
			if hasattr(fun, 'resd'):
				res = fun.resd
			else:
				df = getattr(fun, 'deriv', _nullfunc)
				delta = self.y.val - fun(self.x.val, *fun.pars)
				variance = self.y.err**2 + self.x.err**2 * df(self.x.val, *fun.pars)**2
				fun.resd = res = delta / np.sqrt(variance)
			resd_ax.plot(x[mask], res, **resdkw)
示例#6
0
def run_random_or_not_nn(
        data_package_path,
        log_path=None,
        meta_graph_path=None,
        epochs=None,
        batches=None,
        val_period=200,  # how often to validate in batches per validation
        cp_period=500,  # how often to save checkpoints in batches per validation
        # nn hyper params
    batch_size=100,  # number of training cases per batch
        val_size=50,  # validation set size in batches
        vocab_size=10000,  # vocabulary size
        seq_size=5,  # sub sequence size
        h1_size=200,  # 1st hidden layer size
        h2_size=100,  # 2nd hidden layer size
        learning_rate=0.003,  # learning rate
        start_batch=0):
    """
    DESCRIPTION:
        Constructs and runs a neural net that learns word embeddings by trying
        to guess whether or not the word in the middle is random or not.

    ARGUMENTS:
        data_package_path   The path to the data package
        log_path            The path to the directory for logging
        meta_graph_path     (optional) If specified will restore the session
        from this file path
        epochs              (optional) The number of epochs the nn should run
        batches             (optional) The number of batches the nn should run

        val_period          (optional) How often to test validation sets in
                            batches per validation
        cp_period           (optional) How often to save checkpoints in
                            batches per checkpoint

        batch_size          (optional) The number of training cases per batch
        test_size           (optional) The test set size in batches
        val_size            (optional) The validation set size in batches
        vocab_size          (optional) The vocabulary size
        seq_size            (optional) The sub sequence size
        h1_size             (optional) The 1st hidden layer size
        h2_size             (optional) The 2nd hidden layer size
        learning_rate       (optional) The learning rate
    """
    RESTORE_SESSION = (meta_graph_path != None)

    # paths
    vocab_path = os.path.join(data_package_path, VOCABULARY_FILENAME)
    log_path = log_path if log_path else os.path.join(data_package_path, "log")
    print("LOG DIRECTORY: {}".format(log_path))

    epochscount = range(epochs) if epochs != None else itercount()

    timestamp = str(math.trunc(time.time()))
    PREFIX = "b_{}-l_{}-w_{}-h1_{}-h2_{}-s_{}-{}"\
        .format(batch_size,learning_rate,vocab_size,h1_size,h2_size,seq_size, timestamp)

    ################################################################################
    #                                                                              #
    #                         NEURAL NET CODE STARTS HERE                          #
    #                                                                              #
    ################################################################################

    with tf.name_scope('Input_Layer') as scope:
        # current batch size
        n = tf.placeholder(tf.int32)
        # [n, seq_size]
        indices = tf.placeholder(tf.int32,
                                 shape=[None, seq_size],
                                 name="Indices")
        # [n, 2] correct answers will go here. 2 classifications T/F
        Y_ = tf.placeholder(tf.float32, [None, 2])
        # [n * seq_size, vocab_size]
        flat_indices = tf.reshape(indices,
                                  shape=[n * seq_size],
                                  name="Reshaped_Indices")

    with tf.name_scope('H1_Layer') as scope:
        # [vocab_size, h1_size]
        word_embeddings = tf.Variable(tf.random_uniform([vocab_size, h1_size],
                                                        -1.0, 1.0),
                                      name="H1_Word_Embeddings")
        # [n, seq_size * h1_size]
        Y1 = tf.nn.embedding_lookup(word_embeddings,
                                    flat_indices,
                                    name="H1_Activations")
        Y1 = tf.reshape(Y1,
                        shape=[n, seq_size * h1_size],
                        name="Reshaped_H1_Activations")

    with tf.name_scope('H2_Layer') as scope:
        # [seq_size * h1_size, h2_size]
        W2 = tf.Variable(tf.truncated_normal([seq_size * h1_size, h2_size],
                                             stddev=0.1),
                         name="H2_Weights")
        # [h2_size]
        B2 = tf.Variable(tf.zeros([h2_size]), name="H2_Bias")
        # [n, h2_size]
        Y2 = tf.nn.relu(tf.matmul(Y1, W2) + B2, name="H2_Activations")

    with tf.name_scope('Output_Layer') as scope:
        # [h2_size, 2]
        W3 = tf.Variable(tf.truncated_normal([h2_size, 2], stddev=0.1),
                         name="Output_Weights")
        # [2]
        B3 = tf.Variable(tf.zeros([2]), name="Output_Bias")
        # [n,2]
        Ylogits = tf.matmul(Y2, W3) + B3
        # [n,2]
        Y = tf.nn.softmax(Ylogits, name="Output")

    with tf.name_scope('Stats') as scope:
        # cross entropy (scalar)
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
            logits=Ylogits, labels=Y_, name="Cross_Entropy")
        cross_entropy = tf.reduce_mean(cross_entropy, name="Ave_Cross_Entropy")
        # accuracy of the trained model, between 0 (worst) and 1 (best)
        correct_prediction = tf.equal(tf.argmax(Y, 1),
                                      tf.argmax(Y_, 1),
                                      name="Accuracy")
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32),
                                  name="Ave_Accuracy")

    with tf.name_scope('Training_Step') as scope:
        # training step
        train_step = tf.train.AdamOptimizer(learning_rate).minimize(
            cross_entropy)

    print("STARTING SESSION...")
    with tf.Session() as sess:
        # tensorboard stuff
        train_path = os.path.join(log_path, "{}-training".format(PREFIX))
        val_path = os.path.join(log_path, "{}-validation".format(PREFIX))
        summary_writer = tf.summary.FileWriter(train_path)
        validation_writer = tf.summary.FileWriter(val_path, sess.graph)

        # histograms
        word_embeddings_summary = tf.summary.histogram("word_embeddings",
                                                       word_embeddings)
        W2_summary = tf.summary.histogram("W2", W2)
        B2_summary = tf.summary.histogram("B2", B2)
        W3_summary = tf.summary.histogram("W3", W3)
        B3_summary = tf.summary.histogram("B3", B3)

        # scalars
        loss_summary = tf.summary.scalar("batch_loss", cross_entropy)
        acc_summary = tf.summary.scalar("batch_accuracy", accuracy)

        # test and validation summaries
        summaries = tf.summary.merge([loss_summary, acc_summary])
        val_summaries = tf.summary.merge([
            loss_summary, acc_summary, word_embeddings_summary, W2_summary,
            B2_summary, W3_summary, B3_summary
        ])

        # initialize
        init = tf.global_variables_initializer()

        # projector
        config = projector.ProjectorConfig()

        # You can add multiple embeddings. Here we add only one.
        embedding = config.embeddings.add()
        embedding.tensor_name = word_embeddings.name
        # Link this tensor to its metadata file (e.g. labels).
        embedding.metadata_path = vocab_path

        # The next line writes a projector_config.pbtxt in the LOG_DIR. TensorBoard will
        # read this file during startup.
        projector.visualize_embeddings(summary_writer, config)
        projector.visualize_embeddings(validation_writer, config)

        saver = tf.train.Saver()
        if RESTORE_SESSION:
            # saver = tf.train.import_meta_graph(meta_graph_path)
            saver.restore(sess, meta_graph_path)
            print("RESTORED:", meta_graph_path)
        else:
            sess.run(init)

        global_step = 0
        batch_count = 0

        for epoch in epochscount:
            with read_data_package(data_package_path) as (seqs_reader, vocab,
                                                          probs):
                for i in range(start_batch - batch_count):
                    print("SKIPPING ", i, end="\r")
                    for j in range(batch_size):
                        next(seqs_reader)  # skip to start_batch
                    batch_count += 1
                print()

                batch_gen = batch_generator(seqs_reader, probs, batch_size,
                                            vocab_size, seq_size)

                # create validation sets
                validation_set = [next(batch_gen) for i in range(val_size)]
                validation_set = {
                    "indices":
                    np.vstack([b["indices"] for b in validation_set]),
                    "Y_": np.vstack([b["Y_"] for b in validation_set]),
                    "n": sum([b["n"] for b in validation_set])
                }

                for i, batch in enumerate(batch_gen):
                    if batches and batch_count >= batches:
                        print("REACHED {} BATCHES".format(batch_count))
                        return
                    global_step += batch["n"]
                    batch_count += 1
                    # validation
                    if i % val_period == 0:
                        feed_dict = {
                            indices: validation_set["indices"],
                            Y_: validation_set["Y_"],
                            n: validation_set["n"]
                        }

                        a, c, smm, y = sess.run(
                            [accuracy, cross_entropy, val_summaries, Y],
                            feed_dict=feed_dict)
                        validation_writer.add_summary(smm, global_step)

                        print("OUTPUT SAMPLE:")
                        for j in range(min(validation_set["n"], 50)):
                            seq = validation_set["indices"].tolist()[j]
                            actual_value = validation_set["Y_"].tolist()[j]
                            actual_value = (bool(actual_value[0])
                                            and not bool(actual_value[1])
                                            )  # T:[1,0] F:[0,1]
                            guess = [round(g) for g in y[j]]
                            guess = (guess[0] > guess[1])  # T:[1,0] F:[0,1]
                            confidence = (float(y[j][0])
                                          if guess else float(y[j][1])) * 100
                            text = ""
                            for k, l in enumerate(seq):
                                text += " " if k > 0 else ""
                                if k == seq_size // 2:
                                    text += (ANSI.GREEN if actual_value else
                                             ANSI.RED) + vocab[l] + ANSI.ENDC
                                else:
                                    text += vocab[l]
                            print_message = "TEXT: {0:50s} ACTUAL VALUE: {1:6s} GUESS: {2:6s} CONFIDENCE: {3:3.2f}%"\
                                .format(text, str(actual_value), str(guess), confidence)
                            print(print_message)

                        print("VALIDATION: ACCURACY:{0:7.4f} LOSS:{1:7.4f}"\
                                .format(a,c))

                    # forward pass
                    feed_dict = {
                        indices: batch["indices"],
                        Y_: batch["Y_"],
                        n: batch["n"]
                    }

                    a, c, smm = sess.run([accuracy, cross_entropy, summaries],
                                         feed_dict=feed_dict)
                    summary_writer.add_summary(smm, global_step)
                    print("EPOCH:{0:3d} BATCH:{1:10d} ACCURACY:{2:8.4f} LOSS:{3:8.4f}"\
                        .format(epoch, i, a, c))

                    # backprop
                    sess.run(train_step, feed_dict=feed_dict)

                    # save checkpoint
                    if i % cp_period == 0:
                        # sess.run(assign_word_embedding)
                        save_path = saver.save(
                            sess,
                            os.path.join(log_path, "{}.ckpt".format(
                                PREFIX)))  #, global_step=global_step)
                        print(save_path)