def repeat_worker(*args, **kwargs): if max_time is not None: deadline = time() + max_time else: deadline = None if max_tries is None: r = itercount() else: r = range(0, max_tries) for i in r: start_time = time() threw = False try: ret = callback(*args, **kwargs) except Exception as e: # An exception was caught, so we failed. if catch_predicate(e): # This exception was expected. So we failed, but might need retry. threw = True else: # This exception was unexpected, lets re-throw. raise if not threw and predicate(ret): # We didn't throw, and got a success! Exit. return True if deadline is not None and time() > deadline: return False end_time = time() sleep_time = max(0.0, period - (end_time - start_time)) sleep(sleep_time) # We fell through to here, fail. return False
def make_key_times(year_count: int) -> typing.List[str]: """ year_count: year run date count return: list of key times points should append `1` because the svg keyTimes rule """ s = list(takewhile(lambda n: n < 1, itercount(0, 1 / year_count))) s.append(1) return [str(round(i, 2)) for i in s]
def incFromLastFile(filepath): ''' if file exists increment filename ''' if ospath.exists(filepath): base, ext = ospath.splitext(filepath) for v in itercount(1): newpath = base+'_'+str(v)+ext if not ospath.exists(newpath): return newpath return filepath
def GetIfaceStaticRoutes(self, ifname): rtlist = [] for i in itercount(): address = self.routes[ifname]['ADDRESS%u' % i] if not address: break netmask = self.routes[ifname]['NETMASK%u' % i] if not netmask: continue gateway = self.routes[ifname]['GATEWAY%u' % i] rtlist += [(address, netmask, gateway)] return rtlist
class DataHolder(object): _holdercount = itercount(1) def __init__(self, x, y, dx=0, dy=0, name=None): self.num = next(self._holdercount) self.datakw = dict( fmt='none', ecolor='black', elinewidth=0.8, capthick=0.8, label='data' ) self.subpars = dict(hspace=0.3) self.pts = None self.title = "" if name: self.name = name else: self.name = "dataset #{}".format(self.num) self.x = MeasureObj(x, dx) self.x.edge_padding = 1/20 self.y = MeasureObj(y, dy) self.y.edge_padding = 3/20 for z in (self.x, self.y): z.lims = None z.type = 'linear' z.re = 1 z.label = "" def sort(self, **kwargs): srt = np.argsort(self.x.val, **kwargs) for z in (self.x.val, self.x.err, self.y.val, self.y.err): z = z[srt] def fit_generic(self, *funcs, verbose=True, **kwargs): if verbose: print("Lavoro su {}\n".format(self.name)) fitmsg = "Il fit di {funname} su {dataname} ha dato i seguenti parametri:" for f in funcs: mask = getattr(f, 'mask', np.ones(len(self.x.val), dtype=bool)) x = self.x.val[mask] y = self.y.val[mask] dx = self.x.err[mask] dy = self.y.err[mask] p0 = getattr(f, 'pars', None) df = getattr(f, 'deriv', _nullfunc) pars, pcov = fit_generic(f, x, y, dx, dy, p0=p0, **kwargs) f.pars = pars f.cov = pcov f.sigmas = np.sqrt(np.diag(pcov)) f.resd = (y - f(x, *pars)) / np.sqrt(dy**2 + dx**2 * df(x, *pars)**2) if verbose: print(fitmsg.format(dataname=self.name, funname=f.__name__)) argnames = inspect.getargspec(f).args[1:] for name, par, err in zip(argnames, pars, f.sigmas): print("{0} = {1:.4f} \pm {2:.4f}".format(name, par, err)) print(tell_chi2(f.resd, np.alen(x) - len(pars), style='latex')) print("") if verbose: print("{} completo\n\n".format(self.name)) def _set_edges(self, var, type=None): if var == 'x': z = self.x elif var == 'y': z = self.y if not type: type = z.type top = np.amax(z.val) bot = np.amin(z.val) if type == 'log': top = np.log10(top) bot = np.log10(bot) width = top - bot high = top + width * z.edge_padding low = bot - width * z.edge_padding if type == 'log': high = 10**high low = 10**low z.lims = np.array([low, high]) def _getpts(self, type=None): if not type: type = self.x.type low = self.x.lims[0] high = self.x.lims[1] if type == 'log': self.pts = np.logspace(np.log10(low), np.log10(high), num=max(len(self.x.val)*10, 200)) elif type == 'linear': self.pts = np.linspace(low, high, num=max(len(self.x.val)*10, 200)) def _graph_setup(self, resid=False): if self.x.lims is None: self._set_edges('x') if self.y.lims is None: self._set_edges('y') if self.pts is None: self._getpts() if not resid: main_ax = self.fig.add_subplot(1, 1, 1) main_ax.set_xlabel(self.x.label) resid = () else: sub_gs = mpl.gridspec.GridSpec(5, 1) # TODO: make better main_ax = self.fig.add_subplot(sub_gs[:4]) resd_ax = self.fig.add_subplot(sub_gs[4:]) resd_ax.axhline(y=0, color='black') resd_ax.set_xlabel(self.x.label) resd_ax.set_ylabel('Norm. res.') resd_ax.set_xscale(self.x.type) resd_ax.set_xlim(*(self.x.lims * self.x.re)) self.resd_ax = resd_ax main_ax.set_xlim(*(self.x.lims * self.x.re)) main_ax.set_ylim(*(self.y.lims * self.y.re)) main_ax.set_ylabel(self.y.label) main_ax.set_title(self.title) main_ax.set_xscale(self.x.type) main_ax.set_yscale(self.y.type) self.main_ax = main_ax def draw(self, *funcs, resid=False, data=True, legend=True): self.fig = plt.figure(self.num) self.fig.subplots_adjust(**self.subpars) self.fig.clf() self._graph_setup(resid) main_ax = self.main_ax if not resid: resid = () else: resd_ax = self.resd_ax if resid is True: resid = funcs x = self.x.val * self.x.re y = self.y.val * self.y.re dx = self.x.err * self.x.re dy = self.y.err * self.y.re if data: main_ax.errorbar(x, y, dy, dx, **self.datakw) for fun in funcs: if callable(fun): mask = np.zeros(len(self.pts), dtype=bool) for lowest, highest in getattr(fun, 'bounds', [(-np.inf, np.inf)]): mask |= (self.pts > lowest) & (self.pts < highest) points = self.pts[mask] try: linekw = fun.linekw except AttributeError: linekw = fun.linekw = {} g, = main_ax.plot(points * self.x.re, fun(points, *fun.pars)*self.y.re, **linekw) if 'color' not in linekw: linekw['color'] = g.get_color() else: pass if legend: main_ax.legend(loc='best') for fun in resid: mask = getattr(fun, 'mask', np.ones(len(x), dtype=bool)) resdkw = dict(marker='o', markeredgecolor='k', markeredgewidth=.5) if hasattr(fun, 'linekw'): resdkw.update(fun.linekw) resdkw.update(ls='none') if hasattr(fun, 'resd'): res = fun.resd else: df = getattr(fun, 'deriv', _nullfunc) delta = self.y.val - fun(self.x.val, *fun.pars) variance = self.y.err**2 + self.x.err**2 * df(self.x.val, *fun.pars)**2 fun.resd = res = delta / np.sqrt(variance) resd_ax.plot(x[mask], res, **resdkw)
def run_random_or_not_nn( data_package_path, log_path=None, meta_graph_path=None, epochs=None, batches=None, val_period=200, # how often to validate in batches per validation cp_period=500, # how often to save checkpoints in batches per validation # nn hyper params batch_size=100, # number of training cases per batch val_size=50, # validation set size in batches vocab_size=10000, # vocabulary size seq_size=5, # sub sequence size h1_size=200, # 1st hidden layer size h2_size=100, # 2nd hidden layer size learning_rate=0.003, # learning rate start_batch=0): """ DESCRIPTION: Constructs and runs a neural net that learns word embeddings by trying to guess whether or not the word in the middle is random or not. ARGUMENTS: data_package_path The path to the data package log_path The path to the directory for logging meta_graph_path (optional) If specified will restore the session from this file path epochs (optional) The number of epochs the nn should run batches (optional) The number of batches the nn should run val_period (optional) How often to test validation sets in batches per validation cp_period (optional) How often to save checkpoints in batches per checkpoint batch_size (optional) The number of training cases per batch test_size (optional) The test set size in batches val_size (optional) The validation set size in batches vocab_size (optional) The vocabulary size seq_size (optional) The sub sequence size h1_size (optional) The 1st hidden layer size h2_size (optional) The 2nd hidden layer size learning_rate (optional) The learning rate """ RESTORE_SESSION = (meta_graph_path != None) # paths vocab_path = os.path.join(data_package_path, VOCABULARY_FILENAME) log_path = log_path if log_path else os.path.join(data_package_path, "log") print("LOG DIRECTORY: {}".format(log_path)) epochscount = range(epochs) if epochs != None else itercount() timestamp = str(math.trunc(time.time())) PREFIX = "b_{}-l_{}-w_{}-h1_{}-h2_{}-s_{}-{}"\ .format(batch_size,learning_rate,vocab_size,h1_size,h2_size,seq_size, timestamp) ################################################################################ # # # NEURAL NET CODE STARTS HERE # # # ################################################################################ with tf.name_scope('Input_Layer') as scope: # current batch size n = tf.placeholder(tf.int32) # [n, seq_size] indices = tf.placeholder(tf.int32, shape=[None, seq_size], name="Indices") # [n, 2] correct answers will go here. 2 classifications T/F Y_ = tf.placeholder(tf.float32, [None, 2]) # [n * seq_size, vocab_size] flat_indices = tf.reshape(indices, shape=[n * seq_size], name="Reshaped_Indices") with tf.name_scope('H1_Layer') as scope: # [vocab_size, h1_size] word_embeddings = tf.Variable(tf.random_uniform([vocab_size, h1_size], -1.0, 1.0), name="H1_Word_Embeddings") # [n, seq_size * h1_size] Y1 = tf.nn.embedding_lookup(word_embeddings, flat_indices, name="H1_Activations") Y1 = tf.reshape(Y1, shape=[n, seq_size * h1_size], name="Reshaped_H1_Activations") with tf.name_scope('H2_Layer') as scope: # [seq_size * h1_size, h2_size] W2 = tf.Variable(tf.truncated_normal([seq_size * h1_size, h2_size], stddev=0.1), name="H2_Weights") # [h2_size] B2 = tf.Variable(tf.zeros([h2_size]), name="H2_Bias") # [n, h2_size] Y2 = tf.nn.relu(tf.matmul(Y1, W2) + B2, name="H2_Activations") with tf.name_scope('Output_Layer') as scope: # [h2_size, 2] W3 = tf.Variable(tf.truncated_normal([h2_size, 2], stddev=0.1), name="Output_Weights") # [2] B3 = tf.Variable(tf.zeros([2]), name="Output_Bias") # [n,2] Ylogits = tf.matmul(Y2, W3) + B3 # [n,2] Y = tf.nn.softmax(Ylogits, name="Output") with tf.name_scope('Stats') as scope: # cross entropy (scalar) cross_entropy = tf.nn.softmax_cross_entropy_with_logits( logits=Ylogits, labels=Y_, name="Cross_Entropy") cross_entropy = tf.reduce_mean(cross_entropy, name="Ave_Cross_Entropy") # accuracy of the trained model, between 0 (worst) and 1 (best) correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1), name="Accuracy") accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="Ave_Accuracy") with tf.name_scope('Training_Step') as scope: # training step train_step = tf.train.AdamOptimizer(learning_rate).minimize( cross_entropy) print("STARTING SESSION...") with tf.Session() as sess: # tensorboard stuff train_path = os.path.join(log_path, "{}-training".format(PREFIX)) val_path = os.path.join(log_path, "{}-validation".format(PREFIX)) summary_writer = tf.summary.FileWriter(train_path) validation_writer = tf.summary.FileWriter(val_path, sess.graph) # histograms word_embeddings_summary = tf.summary.histogram("word_embeddings", word_embeddings) W2_summary = tf.summary.histogram("W2", W2) B2_summary = tf.summary.histogram("B2", B2) W3_summary = tf.summary.histogram("W3", W3) B3_summary = tf.summary.histogram("B3", B3) # scalars loss_summary = tf.summary.scalar("batch_loss", cross_entropy) acc_summary = tf.summary.scalar("batch_accuracy", accuracy) # test and validation summaries summaries = tf.summary.merge([loss_summary, acc_summary]) val_summaries = tf.summary.merge([ loss_summary, acc_summary, word_embeddings_summary, W2_summary, B2_summary, W3_summary, B3_summary ]) # initialize init = tf.global_variables_initializer() # projector config = projector.ProjectorConfig() # You can add multiple embeddings. Here we add only one. embedding = config.embeddings.add() embedding.tensor_name = word_embeddings.name # Link this tensor to its metadata file (e.g. labels). embedding.metadata_path = vocab_path # The next line writes a projector_config.pbtxt in the LOG_DIR. TensorBoard will # read this file during startup. projector.visualize_embeddings(summary_writer, config) projector.visualize_embeddings(validation_writer, config) saver = tf.train.Saver() if RESTORE_SESSION: # saver = tf.train.import_meta_graph(meta_graph_path) saver.restore(sess, meta_graph_path) print("RESTORED:", meta_graph_path) else: sess.run(init) global_step = 0 batch_count = 0 for epoch in epochscount: with read_data_package(data_package_path) as (seqs_reader, vocab, probs): for i in range(start_batch - batch_count): print("SKIPPING ", i, end="\r") for j in range(batch_size): next(seqs_reader) # skip to start_batch batch_count += 1 print() batch_gen = batch_generator(seqs_reader, probs, batch_size, vocab_size, seq_size) # create validation sets validation_set = [next(batch_gen) for i in range(val_size)] validation_set = { "indices": np.vstack([b["indices"] for b in validation_set]), "Y_": np.vstack([b["Y_"] for b in validation_set]), "n": sum([b["n"] for b in validation_set]) } for i, batch in enumerate(batch_gen): if batches and batch_count >= batches: print("REACHED {} BATCHES".format(batch_count)) return global_step += batch["n"] batch_count += 1 # validation if i % val_period == 0: feed_dict = { indices: validation_set["indices"], Y_: validation_set["Y_"], n: validation_set["n"] } a, c, smm, y = sess.run( [accuracy, cross_entropy, val_summaries, Y], feed_dict=feed_dict) validation_writer.add_summary(smm, global_step) print("OUTPUT SAMPLE:") for j in range(min(validation_set["n"], 50)): seq = validation_set["indices"].tolist()[j] actual_value = validation_set["Y_"].tolist()[j] actual_value = (bool(actual_value[0]) and not bool(actual_value[1]) ) # T:[1,0] F:[0,1] guess = [round(g) for g in y[j]] guess = (guess[0] > guess[1]) # T:[1,0] F:[0,1] confidence = (float(y[j][0]) if guess else float(y[j][1])) * 100 text = "" for k, l in enumerate(seq): text += " " if k > 0 else "" if k == seq_size // 2: text += (ANSI.GREEN if actual_value else ANSI.RED) + vocab[l] + ANSI.ENDC else: text += vocab[l] print_message = "TEXT: {0:50s} ACTUAL VALUE: {1:6s} GUESS: {2:6s} CONFIDENCE: {3:3.2f}%"\ .format(text, str(actual_value), str(guess), confidence) print(print_message) print("VALIDATION: ACCURACY:{0:7.4f} LOSS:{1:7.4f}"\ .format(a,c)) # forward pass feed_dict = { indices: batch["indices"], Y_: batch["Y_"], n: batch["n"] } a, c, smm = sess.run([accuracy, cross_entropy, summaries], feed_dict=feed_dict) summary_writer.add_summary(smm, global_step) print("EPOCH:{0:3d} BATCH:{1:10d} ACCURACY:{2:8.4f} LOSS:{3:8.4f}"\ .format(epoch, i, a, c)) # backprop sess.run(train_step, feed_dict=feed_dict) # save checkpoint if i % cp_period == 0: # sess.run(assign_word_embedding) save_path = saver.save( sess, os.path.join(log_path, "{}.ckpt".format( PREFIX))) #, global_step=global_step) print(save_path)