def initialize(self, examples): ''' initialize the nmf model, creates the dictionaries args: numlabels: the number of labels numac: the number of acoustic co-ocurrences returns: the initial activations ''' _, tasks = zip(*examples) #read all the tasks tasks = [read_task(task) for task in tasks] #encode the tasks vs_full = np.array([self.coder.encode(t) for t in tasks]) self.knownlabels = np.where(vs_full.sum(0) > 0)[0] vs_full = vs_full[:, self.knownlabels] #initialize the activations h = vs_full h = np.concatenate([h] * int(self.conf['numwords_per_label']), axis=1) # add noise to content word activations # If set to 0, activations of content word will remain 'pure', i.e. # content words that are not present according to the (weak) training # supervision, will remain absent. h = (h + np.random.uniform(0, float(self.conf['activation_scale']), h.shape)) garbage = np.zeros( [h.shape[0], int(h.shape[1] * float(self.conf['garbage_words']))]) # add noise to garbage word activations if 'garbage_scale' in self.conf.keys(): gbg_scale = float(self.conf['garbage_scale']) else: gbg_scale = float(self.conf['activation_scale']) garbage = (garbage + np.random.uniform(0, gbg_scale, garbage.shape)) h = np.concatenate([h, garbage], 1) numlabels = self.coder.numlabels #initialize the semantic dictionary self.ws = np.identity(numlabels) self.ws = self.ws[self.knownlabels, :] self.ws = np.concatenate([self.ws] * int(self.conf['numwords_per_label'])) #randomly permute the semantic dictionary self.ws = (self.ws + np.random.uniform( 0, float(self.conf['semantic_scale']), self.ws.shape)) #initialize the acoustic dictionary self.wa = np.random.uniform(0, float(self.conf['acoustic_scale']), [h.shape[1], self.knownobs.size]) return h
def main(expdir): '''main function''' #read the coder config file coderconf = ConfigParser() coderconf.read(os.path.join(expdir, 'coder.cfg')) #create a task structure file structure = Structure(os.path.join(expdir, 'structure.xml')) #create a coder coder = coder_factory.factory(coderconf.get('coder', 'name'))(structure, coderconf) #read the traintasks with open(os.path.join(expdir, 'traintasks')) as f: lines = f.readlines() for line in lines: splitline = line.strip().split(' ') taskstring = ' '.join(splitline[1:]) print(splitline[0] + " : " + taskstring) task = read_task(taskstring) vs = coder.encode(task).astype(int) print("%d : [" % len(vs)) sys.stdout.write(" ".join(str(x) for x in vs)) print("]\n")
def main(expdir, name): '''main function''' colorlist = ['black'] linestyles = ['-', '--', ':', '-.'] #read the alignment alignment = np.load(os.path.join(expdir, 'alignment', '%s.npy' % name)) #read the decoded task taskstrings = dict() for line in open(os.path.join(expdir, 'dectasks')): splitline = line.strip().split(' ') taskstrings[splitline[0]] = ' '.join(splitline[1:]) #read the coder config file coderconf = ConfigParser() coderconf.read(os.path.join(expdir, 'coder.cfg')) #create a task structure file structure = Structure(os.path.join(expdir, 'structure.xml')) #create a coder coder = coder_factory.factory(coderconf.get('coder', 'name'))(structure, coderconf) #encode the decoded task labelvec = coder.encode(read_task(taskstrings[name])) #create the legend legend = coder.labelids alignment = [ alignment[:, l] for l in range(coder.numlabels) if labelvec[l] ] legend = [legend[l] for l in range(coder.numlabels) if labelvec[l]] for i, ali in enumerate(alignment): plt.plot(ali, color=colorlist[i % len(colorlist)], linestyle=linestyles[i % len(linestyles)], label=legend[i]) plt.legend() plt.show()
def main(expdir, recipe, computing): '''main function''' overwrite = False if os.path.isdir(expdir): text = '' while text not in ('o', 'r'): text = raw_input('%s already exists, do you want to ' 'resume experiment (r) or overwrite (o) ' '(respond with o or r)' % expdir) if text == 'o': overwrite = True else: #create the experiments directory os.makedirs(expdir) #copy the config files if overwrite: shutil.copyfile(os.path.join(recipe, 'acquisition.cfg'), os.path.join(expdir, 'acquisition.cfg')) else: tools.safecopy(os.path.join(recipe, 'acquisition.cfg'), os.path.join(expdir, 'acquisition.cfg')) shutil.copyfile(os.path.join(recipe, 'coder.cfg'), os.path.join(expdir, 'coder.cfg')) shutil.copyfile(os.path.join(recipe, 'structure.xml'), os.path.join(expdir, 'structure.xml')) shutil.copyfile(os.path.join(recipe, 'coder.cfg'), os.path.join(expdir, 'coder.cfg')) shutil.copyfile(os.path.join(recipe, 'structure.xml'), os.path.join(expdir, 'structure.xml')) shutil.copyfile(os.path.join(recipe, 'database.cfg'), os.path.join(expdir, 'database.cfg')) shutil.copyfile(os.path.join(recipe, 'cross_validation_ppall.cfg'), os.path.join(expdir, 'cross_validation_ppall.cfg')) acquisitionconf = ConfigParser() acquisitionconf.read(os.path.join(recipe, 'acquisition.cfg')) modelname = acquisitionconf.get('acquisition', 'name') shutil.copyfile( os.path.join(os.getcwd(), 'assist', 'acquisition', 'defaults', modelname + '.cfg'), os.path.join(expdir, modelname + '.cfg')) #read the cross_validation config file expconf = ConfigParser() expconf.read(os.path.join(recipe, 'cross_validation.cfg')) #default conf file default = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'defaults', 'cross_validation.cfg') #apply the defaults if os.path.exists(default): tools.default_conf(expconf, default) expconf = dict(expconf.items('cross_validation')) #read the data config file if not os.path.exists(os.path.join(recipe, 'database.cfg')): raise Exception('cannot find database.cfg in %s' % recipe) dataconf = ConfigParser() dataconf.read(os.path.join(recipe, 'database.cfg')) #read the coder config file coderconf = ConfigParser() coderconf.read(os.path.join(expdir, 'coder.cfg')) # filter out all speakers with less than 100 examples # (in the FluentSpeechCommands dataset (~20%)) bad_spks = [] if os.path.exists(os.path.join(recipe, 'FS_linecounts.txt')): for l in open(os.path.join(recipe, 'FS_linecounts.txt')): splitline = l.strip().split(' ') if int(splitline[1]) < 100: bad_spks.append(splitline[0]) print bad_spks for speaker in dataconf.sections(): if speaker in bad_spks: continue print 'speaker: %s' % (speaker) #create the speaker directory if os.path.isdir(os.path.join(expdir, speaker)): if overwrite: shutil.rmtree(os.path.join(expdir, speaker)) os.makedirs(os.path.join(expdir, speaker)) else: os.makedirs(os.path.join(expdir, speaker)) #create a task structure file structure = Structure(os.path.join(expdir, 'structure.xml')) #create a coder coder = coder_factory.factory(coderconf.get('coder', 'name'))(structure, coderconf) #read and code all the tasks labelvecs = [] names = [] taskstrings = dict() for line in open(dataconf.get(speaker, 'tasks')): splitline = line.strip().split(' ') name = speaker + '_' + splitline[0] names.append(name) taskstring = ' '.join(splitline[1:]) taskstrings[name] = taskstring task = read_task(taskstring) labelvecs.append(coder.encode(task)) #devide the data into blocks blocksfile = os.path.join(expdir, speaker, 'blocks.pkl') if os.path.exists(blocksfile): with open(blocksfile, 'rb') as fid: blocks = pickle.load(fid) else: blocks = make_blocks(np.array(labelvecs), expconf, dataconf.get(speaker, 'features')) with open(blocksfile, 'wb') as fid: pickle.dump(blocks, fid) #create train-testsets for all experiments #seed the random number generator random.seed(3105) trainids = [None] * (len(blocks) - 1) testids = [None] * (len(blocks) - 1) for b in range(len(blocks) - 1): trainids[b] = [None] * int(expconf['numexp']) testids[b] = [None] * int(expconf['numexp']) for e in range(int(expconf['numexp'])): trainids[b][e] = list( itertools.chain.from_iterable(random.sample(blocks, b + 1))) testids[b][e] = [ x for x in range(len(names)) if x not in trainids[b][e] ] #read the feature files features = dict() for l in open(os.path.join(dataconf.get(speaker, 'features'), 'feats')): splitline = l.strip().split(' ') featname = speaker + '_' + splitline[0] features[featname] = ' '.join(splitline[1:]) #create an expdir for each experiment b = int(expconf['startblocks']) - 1 while True: for e in range(int(expconf['numexp'])): print ' train blocks: %d, experiment %s' % (b + 1, e) #creat the directory subexpdir = os.path.join(expdir, speaker, '%dblocks_exp%d' % (b + 1, e)) if os.path.exists(os.path.join(subexpdir, 'f1')): continue if not os.path.isdir(subexpdir): os.makedirs(subexpdir) #create pointers to the config files tools.symlink(os.path.join(expdir, 'acquisition.cfg'), os.path.join(subexpdir, 'acquisition.cfg')) tools.symlink(os.path.join(expdir, 'coder.cfg'), os.path.join(subexpdir, 'coder.cfg')) tools.symlink(os.path.join(expdir, 'structure.xml'), os.path.join(subexpdir, 'structure.xml')) tools.symlink(os.path.join(expdir, 'database.cfg'), os.path.join(subexpdir, 'database.cfg')) if not os.path.exists(os.path.join(subexpdir, 'trainfeats')): trainutts = [names[i] for i in trainids[b][e]] print 'number of examples: %d' % len(trainutts) testutts = [names[i] for i in testids[b][e]] #create the train and test sets tools.writefile(os.path.join(subexpdir, 'trainfeats'), {utt: features[utt] for utt in trainutts}) tools.writefile( os.path.join(subexpdir, 'traintasks'), {utt: taskstrings[utt] for utt in trainutts}) tools.writefile(os.path.join(subexpdir, 'testfeats'), {utt: features[utt] for utt in testutts}) tools.writefile( os.path.join(subexpdir, 'testtasks'), {utt: taskstrings[utt] for utt in testutts}) if computing in ('condor', 'condor_gpu'): #create the outputs directory if not os.path.isdir(os.path.join(subexpdir, 'outputs')): os.makedirs(os.path.join(subexpdir, 'outputs')) if computing == 'condor_gpu': jobfile = 'run_script_GPU.job' else: jobfile = 'run_script.job' #only submit the job if it not running yet in_queue = os.popen( 'if condor_q -nobatch -wide | grep -q %s; ' 'then echo true; else echo false; fi' % subexpdir).read().strip() == 'true' #submit the condor job if not in_queue: os.system('condor_submit expdir=%s script=train_test' ' assist/condor/%s' % (subexpdir, jobfile)) else: train_test.main(subexpdir) newb = (b + 1) * int(expconf['scale']) + int( expconf['increment']) - 1 newb = min(newb, len(blocks) - 2) if b == newb: break else: b = newb
def fit(self, examples, h, parameters='ash'): '''fit the model parameters to the data Args: examples: the training examples as a list of pairs containing the inputs and reference tasks h: the initial value for the activations parameters: a string, the parameters to be updated a for acoustic dictionary, s for sementic dictionary and h for activations returns: the final activations h ''' features, tasks = zip(*examples) #read all the tasks tasks = [read_task(task) for task in tasks] #encode the tasks vs_full = np.array([self.coder.encode(t) for t in tasks]) self.knownlabels = np.where(vs_full.sum(0) > 0)[0] vs_full = vs_full[:, self.knownlabels] # apply weighting z = np.ones([1, vs_full.shape[0]]) if 'label_weight_train' in self.conf.keys(): weightingstrategy = self.conf['label_weight_train'] else: weightingstrategy = "none" print( 'Warning: "acquisition" config key "label_weight_train" set ' 'to "none"') if weightingstrategy == "frobNMF": # find the nonnegative utterance weights z such that vs*z=constant ###labeloccurrence # We want a maximally flat z, so add ||z||^4 as cost frobregweight = float(self.conf['frob_nmf_regular']) #num = vs_full.dot(labeloccurrence) num = vs_full.sum(1, keepdims=True).transpose() crit = vs_full.sum(0) print 'Prior to weighting: labelratio %f' % (crit.max() / (crit.min() + 1e-10)) #num = np.ones(num.shape) for _ in range(10): y = z.dot(vs_full) den = vs_full.dot(np.transpose(y)).transpose() den = den + frobregweight * np.power(z, 3) z = z * num / (den + 1e-10) vs_full *= z.transpose() crit = vs_full.sum(0) print 'After weighting: labelratio %f' % (crit.max() / (crit.min() + 1e-10)) # do nothing on "none" #use acoustic model for the features events = [self.acoustic(f) for f in features] #compute the hacs va_full = np.array([ hac.hac(e, self.delays, int(self.conf['numkeep'])) for e in events ]) va_full *= z.transpose() #only keep the acoustics that actually occur self.knownobs = np.where(va_full.sum(0) > 0)[0] va_full = va_full[:, self.knownobs] #make sure the semantics and inputs sum to the same value self.ac_scale = vs_full.sum() / va_full.sum() va_full = va_full * self.ac_scale #convert the data matrices to sparse matrices va = sparse.csr_matrix(va_full) vs = sparse.csr_matrix(vs_full) #only retain the known observations and labels ws = self.ws[:, self.knownlabels] wa = self.wa[:, self.knownobs] sv = np.array(va.sum(1) + vs.sum(1)) #get the number of content words nc = self.ws.shape[0] #normalize h = h.clip(float(self.conf['floor'])) h *= sv / (2 * h[:, :nc].sum(1, keepdims=True) + h[:, nc:].sum(1, keepdims=True)) ws = ws.clip(float(self.conf['floor'])) wa = wa.clip(float(self.conf['floor'])) ws /= ws.sum(1, keepdims=True) wa /= wa.sum(1, keepdims=True) #start iteration for _ in range(int(self.conf['numiters_train'])): xs = h[:, :nc].dot(ws) cs = kld(vs, xs) xa = h.dot(wa) ca = kld(va, xa) print 'nmf cost %f = %f + %f' % (ca + cs, ca, cs) #update the semantic dictionary if 's' in parameters: qs = vs.multiply(1 / (h[:, :nc].dot(ws))) num = qs.transpose().dot(h[:, :nc]).transpose() den = h[:, :nc].sum(0)[:, np.newaxis] ws *= num / den ws /= ws.sum(1, keepdims=True) ws = ws.clip(float(self.conf['floor'])) #update the acoustic dictionary if 'a' in parameters: qa = va.multiply(1 / (h.dot(wa))) num = qa.transpose().dot(h).transpose() den = h.sum(0)[:, np.newaxis] wa *= num / den wa /= wa.sum(1, keepdims=True) wa = wa.clip(float(self.conf['floor'])) #update the activations if 'h' in parameters: qs = vs.multiply(1 / (h[:, :nc].dot(ws))) qa = va.multiply(1 / (h.dot(wa))) h[:, :nc] *= (qs.dot(ws.transpose()) + qa.dot(wa[:nc].transpose())) h[:, :nc] /= wa[:nc].sum(1) + ws.sum(1) h[:, nc:] *= qa.dot(wa[nc:].transpose()) h[:, nc:] /= wa[nc:].sum(1) h = h.clip(float(self.conf['floor'])) self.ws = np.zeros(self.ws.shape) self.ws[:, self.knownlabels] = ws self.wa = np.zeros(self.wa.shape) self.wa[:, self.knownobs] = wa return h
def main(expdir): '''main function''' #check if this experiment has been completed if os.path.exists(os.path.join(expdir, 'f1')): print 'result found %s' % expdir return #read the acquisition config file acquisitionconf = ConfigParser() acquisitionconf.read(os.path.join(expdir, 'acquisition.cfg')) #read the coder config file coderconf = ConfigParser() coderconf.read(os.path.join(expdir, 'coder.cfg')) #create a task structure file structure = Structure(os.path.join(expdir, 'structure.xml')) #create a coder coder = coder_factory.factory(coderconf.get('coder', 'name'))(structure, coderconf) #create an acquisition model model = model_factory.factory(acquisitionconf.get('acquisition', 'name'))(acquisitionconf, coder, expdir) print 'loading model' model.load(os.path.join(expdir, 'model')) print 'prepping testing data' #load the testing features features = dict() for line in open(os.path.join(expdir, 'testfeats')): splitline = line.strip().split(' ') featsfile = ' '.join(splitline[1:]) features[splitline[0]] = np.load(featsfile) #read the testtasks references = dict() for line in open(os.path.join(expdir, 'testtasks')): splitline = line.strip().split(' ') references[splitline[0]] = read_task.read_task(' '.join(splitline[1:])) print 'testing the model' #decode the test uterances decoded = model.decode(features) #write the decoded tasks to disc with open(os.path.join(expdir, 'dectasks'), 'w') as fid: for name, task in decoded.items(): fid.write('%s %s\n' % (name, read_task.to_string(task))) (precision, recal, f1, macroprec, macrorecall, macrof1), scores = \ score.score(decoded, references) print 'precision: %f' % precision print 'recal: %f' % recal print 'f1: %f' % f1 print 'macro precision: %f' % macroprec print 'macro recal: %f' % macrorecall print 'macro f1: %f' % macrof1 with open(os.path.join(expdir, 'precision'), 'w') as fid: fid.write(str(precision)) with open(os.path.join(expdir, 'recal'), 'w') as fid: fid.write(str(recal)) with open(os.path.join(expdir, 'f1'), 'w') as fid: fid.write(str(f1)) with open(os.path.join(expdir, 'macroprecision'), 'w') as fid: fid.write(str(macroprec)) with open(os.path.join(expdir, 'macrorecal'), 'w') as fid: fid.write(str(macrorecall)) with open(os.path.join(expdir, 'macrof1'), 'w') as fid: fid.write(str(macrof1)) score.write_scores(scores, expdir)
def train(self, examples): '''train the model Args: examples: the training examples as a dictionary of pairs containing the inputs and reference tasks ''' self.is_training = True #create the graph graph = tf.Graph() features, tasks = zip(*examples.values()) #read all the tasks tasks = [read_task(task) for task in tasks] #encode the tasks vs = np.array([self.coder.encode(t) for t in tasks]) if self.conf['batch_size'] == 'None': batch_size = features.shape[0] else: batch_size = min(int(self.conf['batch_size']), len(features)) with graph.as_default(): #put the features in a constant inputs = tf.placeholder( dtype=tf.float32, shape=[batch_size, None, features[0].shape[-1]], name='inputs') seq_length = tf.placeholder(dtype=tf.int32, shape=[batch_size], name='seq_length') #put the targets in a constant targets = tf.placeholder(dtype=tf.float32, shape=[batch_size, vs.shape[-1]], name='targets') #apply the model probs = self.model(inputs, seq_length) loss = self.loss(targets, probs) #count the number of parameters num_params = 0 for var in tf.trainable_variables(): num_params += reduce(mul, var.get_shape().as_list()) print 'number of parameters: %d' % num_params #create an optimizer optimizer = tf.train.AdamOptimizer( float(self.conf['learning_rate'])) #compute the gradients grads_and_vars = optimizer.compute_gradients(loss=loss) with tf.variable_scope('clip'): #clip the gradients grads_and_vars = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in grads_and_vars] #opperation to apply the gradients apply_gradients_op = optimizer.apply_gradients( grads_and_vars=grads_and_vars, name='apply_gradients') #all remaining operations with the UPDATE_OPS GraphKeys update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) #create an operation to update the model update_op = tf.group(*([apply_gradients_op] + update_ops), name='update') #create the init op init_op = tf.variables_initializer(tf.global_variables()) #create a saver saver = tf.train.Saver() #create a summary for var in tf.trainable_variables(): tf.summary.histogram(var.name, var) tf.summary.scalar('loss', loss) if self.conf['images'] == 'True': images = tf.get_collection('image') for image in images: tf.summary.image(image.name, image) summary = tf.summary.merge_all() #create a session session_conf = tf.ConfigProto(inter_op_parallelism_threads=1, intra_op_parallelism_threads=4) sess = tf.Session(graph=graph, config=session_conf) #create a summary writer writer = tf.summary.FileWriter(os.path.join(self.expdir, 'logdir'), graph) #initialize the model sess.run(init_op) #create an index queue index_queue = [] for _ in range(int(self.conf['numiters'])): i = range(len(tasks)) shuffle(i) index_queue += i #iterativaly train the model i = 0 while len(index_queue) > batch_size: indices = index_queue[:batch_size] index_queue = index_queue[batch_size:] batch_inputs = [features[j] for j in indices] batch_lengths = np.array([f.shape[0] for f in batch_inputs]) ml = np.max(batch_lengths) batch_inputs = np.array([ np.pad(f, ((0, ml - f.shape[0]), (0, 0)), 'constant') for f in batch_inputs ]) if i == 'a': run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() else: run_options = run_metadata = None _, s, l = sess.run( (update_op, summary, loss), feed_dict={ inputs: batch_inputs, seq_length: batch_lengths, targets: vs[indices] }, options=run_options, run_metadata=run_metadata) print 'step %d: loss = %f' % (i, l) #Early stopping if i == 0: base_l = l if l <= base_l * float(self.conf['early_stop']): break writer.add_summary(s, i) if i == 'a': writer.add_run_metadata(run_metadata, 'statistics') i += 1 #save the final model saver.save(sess, os.path.join(self.expdir, 'logdir', 'model.ckpt')) sess.close() self.is_training = False
def main(expdir, recipe, computing): '''main function''' overwrite = False if os.path.isdir(expdir): text = '' while text not in ('o', 'r'): text = raw_input('%s already exists, do you want to ' 'resume experiment (r) or overwrite (o) ' '(respond with o or r)' % expdir) if text == 'o': overwrite = True else: #create the experiments directory os.makedirs(expdir) #copy the config files if overwrite: shutil.copyfile(os.path.join(recipe, 'acquisition.cfg'), os.path.join(expdir, 'acquisition.cfg')) else: tools.safecopy(os.path.join(recipe, 'acquisition.cfg'), os.path.join(expdir, 'acquisition.cfg')) shutil.copyfile(os.path.join(recipe, 'coder.cfg'), os.path.join(expdir, 'coder.cfg')) shutil.copyfile(os.path.join(recipe, 'structure.xml'), os.path.join(expdir, 'structure.xml')) shutil.copyfile(os.path.join(recipe, 'database.cfg'), os.path.join(expdir, 'database.cfg')) shutil.copyfile(os.path.join(recipe, 'cross_validation_ppall.cfg'), os.path.join(expdir, 'cross_validation_ppall.cfg')) acquisitionconf = ConfigParser() acquisitionconf.read(os.path.join(recipe, 'acquisition.cfg')) modelname = acquisitionconf.get('acquisition', 'name') shutil.copyfile( os.path.join(os.getcwd(), 'assist', 'acquisition', 'defaults', modelname + '.cfg'), os.path.join(expdir, modelname + '.cfg')) #read the cross_validation config file expconf = ConfigParser() expconf.read(os.path.join(recipe, 'cross_validation_ppall.cfg')) #default conf file default = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'defaults', 'cross_validation_ppall.cfg') #apply the defaults if os.path.exists(default): tools.default_conf(expconf, default) expconf = dict(expconf.items('cross_validation_ppall')) #read the data config file if not os.path.exists(os.path.join(recipe, 'database.cfg')): raise Exception('cannot find database.cfg in %s' % recipe) dataconf = ConfigParser() dataconf.read(os.path.join(recipe, 'database.cfg')) #read the coder config file coderconf = ConfigParser() coderconf.read(os.path.join(expdir, 'coder.cfg')) # for word specific thresholds (not used anymore) #if os.path.isfile(os.path.join(recipe,'word_thresholds.pkl')): # print('File with wordthresholds found in recipe') # shutil.copyfile(os.path.join(recipe, 'word_thresholds.pkl'), # os.path.join(expdir, 'word_thresholds.pkl')) # thresholdsarepresent = True #else: # print('No file found with wordthresholds, using a fixed one') # thresholdsarepresent = False labelvecs = [] names = [] taskstrings = dict() features = dict() print 'Searching for all speakers...' for speaker in dataconf.sections(): print ' speaker: %s' % (speaker) #create a task structure file structure = Structure(os.path.join(expdir, 'structure.xml')) #create a coder coder = coder_factory.factory(coderconf.get('coder', 'name'))(structure, coderconf) # typesplit_coder.py line 51 to see all labels and corresponding output capsule numbers #read and code all the tasks for line in open( dataconf.get(speaker, 'tasks') ): #'recording1_Voice_10 <move_rel direction="forward" distance="little" throttle="fast" />' splitline = line.strip().split(' ') name = speaker + '_' + splitline[0] #'recording1_Voice_10' names.append(name) taskstring = ' '.join( splitline[1:] ) #'<move_rel_direction="forwqrd" distance="little" throttle="fast"/>' taskstrings[name] = taskstring task = read_task(taskstring) labelvecs.append(coder.encode(task)) # read the feature files for l in open(os.path.join(dataconf.get(speaker, 'features'), 'feats')): splitline = l.strip().split( ' ' ) #['recording1_Voice_10', '/esat/spchtemp/scratch/r0580562/databases/grabo_features/pp2/recording1_Voice_10.npy'] featname = speaker + '_' + splitline[0] features[featname] = ' '.join(splitline[1:]) print 'Devide data into blocks...' #devide the data into blocks, look for existing blocksfile in recipe because takes a very long time to make!!! blocksfile = os.path.join(recipe, 'blocks.pkl') if os.path.exists(blocksfile): print 'Loading found blocks file (check if number of blocks is still the same)' with open(blocksfile, 'rb') as fid: blocks = pickle.load(fid) else: print 'No blocksfile found in recipe, making new one' blocks = make_blocks( np.array(labelvecs), expconf, expdir ) #massive list, matrix of [[..,..,..,..],[..,... .]] numbers between 1-350 approx with open(blocksfile, 'wb') as fid: pickle.dump(blocks, fid) print 'Shuffle speakers...' # look for existing train and test sets and load them in ('saved_ids' in recipe), because takes a very long time to make!!! sets_properties = {} if os.path.isdir(os.path.join(recipe, 'saved_ids')): saved_ids = ConfigParser() saved_ids.read( os.path.join(recipe, 'saved_ids', 'cross_validation_ppall.cfg')) sets_properties = dict(saved_ids.items('cross_validation_ppall')) else: sets_properties['numblocks'] = 0 sets_properties['numexp'] = 0 if (sets_properties['numblocks'] == expconf['numblocks']) and (sets_properties['numexp'] == expconf['numexp']): print ' Loading found test recipe' trainids_saved = os.path.join(recipe, 'saved_ids', 'trainids.pkl') with open(trainids_saved, 'rb') as fid: trainids = pickle.load(fid) testids_saved = os.path.join(recipe, 'saved_ids', 'testids.pkl') with open(testids_saved, 'rb') as fid: testids = pickle.load(fid) else: print ' No saved test and train sets found with same crossvalidation configuration in the recipe' # seed the random number generator random.seed(3105) trainids = [None] * (len(blocks) - 1) #len(blocks)=15 testids = [None] * (len(blocks) - 1) print ' Number of blocks: %d' % (len(blocks)) b = 0 while b < (len(blocks) - 1): #for b in range(len(blocks)-1): print ' block %d' % b trainids[b] = [None] * int(expconf['numexp']) testids[b] = [None] * int(expconf['numexp']) for e in range(int(expconf['numexp'])): trainids[b][e] = list( itertools.chain.from_iterable(random.sample(blocks, b + 1))) testids[b][e] = [ x for x in range(len(names)) if x not in trainids[b][e] ] #scale factor to use more smaller blocks and less bigger blocks (for the curve, it saturates) newb = int( np.floor((b + 1) * float(expconf['scale']) + int(expconf['increment']) - 1)) newb = min(newb, len(blocks) - 2) if b == newb: break else: b = newb os.makedirs(os.path.join(expdir, 'saved_ids')) trainids_saved = os.path.join(expdir, 'saved_ids', 'trainids.pkl') testids_saved = os.path.join(expdir, 'saved_ids', 'testids.pkl') with open(trainids_saved, 'wb') as fid: pickle.dump(trainids, fid) with open(testids_saved, 'wb') as fid: pickle.dump(testids, fid) shutil.copyfile( os.path.join(recipe, 'cross_validation_ppall.cfg'), os.path.join(expdir, 'saved_ids', 'cross_validation_ppall.cfg')) #create an expdir for each experiment b = int(expconf['startblocks']) - 1 #0 print 'Launch the experiments...' while True: for e in range(int(expconf['numexp'])): print ' train blocks: %d, experiment %s' % (b + 1, e) #creat the directory subexpdir = os.path.join(expdir, '%dblocks_exp%d' % (b + 1, e)) if os.path.exists(os.path.join(subexpdir, 'f1')): continue if not os.path.isdir(subexpdir): os.makedirs(subexpdir) #create pointers to the config files tools.symlink(os.path.join(expdir, 'acquisition.cfg'), os.path.join(subexpdir, 'acquisition.cfg')) tools.symlink(os.path.join(expdir, 'coder.cfg'), os.path.join(subexpdir, 'coder.cfg')) tools.symlink(os.path.join(expdir, 'structure.xml'), os.path.join(subexpdir, 'structure.xml')) tools.symlink(os.path.join(expdir, 'database.cfg'), os.path.join(subexpdir, 'database.cfg')) #if thresholdsarepresent: # tools.symlink(os.path.join(expdir, 'word_thresholds.pkl'), # os.path.join(subexpdir, 'word_thresholds.pkl')) if not os.path.exists(os.path.join(subexpdir, 'trainfeats')): trainutts = [names[i] for i in trainids[b][e]] print 'number of examples: %d' % len(trainutts) testutts = [names[i] for i in testids[b][e]] #create the train and test sets tools.writefile(os.path.join(subexpdir, 'trainfeats'), {utt: features[utt] for utt in trainutts}) tools.writefile(os.path.join(subexpdir, 'traintasks'), {utt: taskstrings[utt] for utt in trainutts}) tools.writefile(os.path.join(subexpdir, 'testfeats'), {utt: features[utt] for utt in testutts}) tools.writefile(os.path.join(subexpdir, 'testtasks'), {utt: taskstrings[utt] for utt in testutts}) if computing in ('condor', 'condor_gpu'): #create the outputs directory if not os.path.isdir(os.path.join(subexpdir, 'outputs')): os.makedirs(os.path.join(subexpdir, 'outputs')) if computing == 'condor_gpu': jobfile = 'run_script_GPU.job' else: jobfile = 'run_script.job' #only submit the job if it not running yet in_queue = os.popen('if condor_q -nobatch -wide | grep -q %s; ' 'then echo true; else echo false; fi' % subexpdir).read().strip() == 'true' #submit the condor job if not in_queue: os.system('condor_submit expdir=%s script=train_test' ' assist/condor/%s' % (subexpdir, jobfile)) else: train_test.main(subexpdir) newb = int( np.floor((b + 1) * float(expconf['scale']) + int(expconf['increment']) - 1)) newb = min(newb, len(blocks) - 2) if b == newb: break else: b = newb
def main(expdir): '''main function''' #check if this experiment has been completed if os.path.exists(os.path.join(expdir, 'f1')): print 'result found %s' % expdir return #read the acquisition config file acquisitionconf = ConfigParser() acquisitionconf.read(os.path.join(expdir, 'acquisition.cfg')) #read the coder config file coderconf = ConfigParser() coderconf.read(os.path.join(expdir, 'coder.cfg')) #create a task structure file structure = Structure(os.path.join(expdir, 'structure.xml')) #create a coder coder = coder_factory.factory(coderconf.get('coder', 'name'))(structure, coderconf) #create an acquisition model model = model_factory.factory(acquisitionconf.get('acquisition', 'name'))(acquisitionconf, coder, expdir) print 'loading model' model.load(os.path.join(expdir, 'model')) print 'prepping testing data' #load the testing features features = dict() for line in open(os.path.join(expdir, 'testfeats')): splitline = line.strip().split(' ') featsfile = ' '.join(splitline[1:]) features[splitline[0]] = np.load(featsfile) #read the testtasks references = dict() tasklabels = dict() for line in open(os.path.join(expdir, 'testtasks')): splitline = line.strip().split(' ') taskstring = read_task.read_task(' '.join(splitline[1:])) references[splitline[0]] = taskstring tasklabels[splitline[0]] = coder.encode(taskstring) #find all words said by speakers and save all spoken sentences in testtasks wordcount = {} testsentences = {} dataconf = ConfigParser() dataconf.read(os.path.join(expdir, 'database.cfg')) for speaker in dataconf.sections(): taskloc = dataconf.get(speaker, 'tasks') textloc = taskloc[:-5] + str('text') with open(textloc) as fp: line = (fp.readline())[:-1] while line: sentence = (line.split(" "))[1:] voice = str(speaker) + '_' + (line.split(" "))[0] for word in sentence: if word in wordcount: wordcount[word] += 1 else: wordcount[word] = 1 if voice in references: testsentences[voice] = " ".join(sentence) line = (fp.readline())[:-1] all_words = sorted(wordcount.keys()) #ordered alphabetically #read the singlebest word for each label singlebest = [] with open(os.path.join(expdir, 'singlebestwords'), 'r') as fp: line = (fp.readline())[:-1] while line: word = (line.split(" "))[1] singlebest.append(word) line = (fp.readline())[:-1] #get the singlebest predictions for each voicing singlebest_sentences = {} for voice in tasklabels: label = tasklabels[voice] indices = np.nonzero(label) sentence = '' print(indices) for i in indices[0]: word = singlebest[i] sentence = sentence + ' ' + str(word) singlebest_sentences[voice] = sentence with open(os.path.join(expdir, 'singlebestsentences'), 'w') as fid: for name, sentence in sorted(singlebest_sentences.items()): fid.write('%s %s\n' % (name, sentence)) print 'testing the model' word_thresholds = None # if you would want word specific thresholds ... threshfile = os.path.join(expdir, 'word_thresholds.pkl') if os.path.isfile(threshfile): with open(threshfile, 'r') as fid: word_dict = pickle.load(fid) word_thresholds = [] for word, thresh in sorted(word_dict.items()): word_thresholds.append(thresh) print 'LAUNCHING MODEL.DECODE' decoded_speakers = dict() decoded_words = dict() # filter out features that are too short # (+ corresponding references to not get errors in score.score) for k in features.keys(): if features[k].shape[0] <= 5: del features[k] del references[k] #decode the test uterances if acquisitionconf.get('acquisition', 'name') == 'nmf': decoded = model.decode(features) else: #### decoded, decoded_speakers, decoded_words = model.decode(features, all_words, word_thresholds, tasklabels) decoded, decoded_speakers, decoded_words = model.decode( features, all_words, word_thresholds) #decoded = model.decode(features) #write the decoded tasks to disc with open(os.path.join(expdir, 'dectasks'), 'w') as fid: for name, task in decoded.items(): fid.write('%s %s\n' % (name, read_task.to_string(task))) if acquisitionconf.get('acquisition', 'name') != 'nmf': with open(os.path.join(expdir, 'decspeakers'), 'w') as fid: for name, spk in decoded_speakers.items(): fid.write('%s %s\n' % (name, spk)) speakerperformance = score.spk_score(decoded_speakers) print 'speakerperformance: %f' % speakerperformance with open(os.path.join(expdir, 'speakerperformance'), 'w') as fid: fid.write(str(speakerperformance)) decoded_sentences = {} with open(os.path.join(expdir, 'decwords'), 'w') as fid: for name, wordslist in sorted(decoded_words.items()): sentence = '' wordindices = np.nonzero( wordslist) #returns all indices that are nonzero for wordindex in wordindices[0]: word = all_words[wordindex] sentence = sentence + ' ' + str(word) fid.write('%s %s\n' % (name, sentence)) decoded_sentences[name] = sentence with open(os.path.join(expdir, 'testwords'), 'w') as fid: for name, sentence in sorted(testsentences.items()): fid.write('%s %s\n' % (name, sentence)) word_f1, word_precision, word_recal = score.wordscore( decoded_sentences, testsentences) singlebest_f1, singlebest_precision, singlebest_recal = score.wordscore( singlebest_sentences, testsentences) print 'word_f1: %f' % word_f1 print 'word_precision: %f' % word_precision print 'word_recal: %f' % word_recal with open(os.path.join(expdir, 'word_f1'), 'w') as fid: fid.write(str(word_f1)) with open(os.path.join(expdir, 'word_precision'), 'w') as fid: fid.write(str(word_precision)) with open(os.path.join(expdir, 'word_recal'), 'w') as fid: fid.write(str(word_recal)) print 'singlebest_f1: %f' % singlebest_f1 print 'singlebest_precision: %f' % singlebest_precision print 'singlebest_recal: %f' % singlebest_recal with open(os.path.join(expdir, 'singlebest_f1'), 'w') as fid: fid.write(str(singlebest_f1)) with open(os.path.join(expdir, 'singlebest_precision'), 'w') as fid: fid.write(str(singlebest_precision)) with open(os.path.join(expdir, 'singlebest_recal'), 'w') as fid: fid.write(str(singlebest_recal)) (precision, recal, f1, macroprec, macrorecall, macrof1), scores = \ score.score(decoded, references) fluent_accuracy = score.fluentscore(decoded, references) print 'fluent_accuracy: %f' % fluent_accuracy with open(os.path.join(expdir, 'fluent_accuracy'), 'w') as fid: fid.write(str(fluent_accuracy)) print 'precision: %f' % precision print 'recal: %f' % recal print 'f1: %f' % f1 print 'macro precision: %f' % macroprec print 'macro recal: %f' % macrorecall print 'macro f1: %f' % macrof1 with open(os.path.join(expdir, 'precision'), 'w') as fid: fid.write(str(precision)) with open(os.path.join(expdir, 'recal'), 'w') as fid: fid.write(str(recal)) with open(os.path.join(expdir, 'f1'), 'w') as fid: fid.write(str(f1)) with open(os.path.join(expdir, 'macroprecision'), 'w') as fid: fid.write(str(macroprec)) with open(os.path.join(expdir, 'macrorecal'), 'w') as fid: fid.write(str(macrorecall)) with open(os.path.join(expdir, 'macrof1'), 'w') as fid: fid.write(str(macrof1)) score.write_scores(scores, expdir) for f in glob.glob('%s*' % os.path.join(expdir, 'logdir')): shutil.rmtree(f, ignore_errors=True) for f in glob.glob('%s*' % os.path.join(expdir, 'logdir-decode')): shutil.rmtree(f, ignore_errors=True)
def main(expdir): '''main function''' #check if this experiment has been completed if os.path.isdir(os.path.join(expdir, 'model')): return #read the acquisition config file acquisitionconf = ConfigParser() acquisitionconf.read(os.path.join(expdir, 'acquisition.cfg')) #read the coder config file coderconf = ConfigParser() coderconf.read(os.path.join(expdir, 'coder.cfg')) #create a task structure file structure = Structure(os.path.join(expdir, 'structure.xml')) #create a coder coder = coder_factory.factory(coderconf.get('coder', 'name'))( structure, coderconf) #create an acquisition model model = model_factory.factory(acquisitionconf.get('acquisition', 'name'))( acquisitionconf, coder, expdir) print 'prepping training data' #load the training features features = dict() for line in open(os.path.join(expdir, 'trainfeats')): splitline = line.strip().split(' ') featsfile = ' '.join(splitline[1:]) features[splitline[0]] = np.load(featsfile) #read the traintasks taskstrings = dict() for line in open(os.path.join(expdir, 'traintasks')): splitline = line.strip().split(' ') taskstrings[splitline[0]] = ' '.join(splitline[1:]) task = read_task(taskstrings[splitline[0]]) label = coder.encode(task) num_labels = len(label) print('num_labels: ', num_labels) #find all words said by speakers and count number of occurrences wordcount = {} wordcount_in_traintasks = {} sentencecount = 0 sentencecount_in_traintasks = 0 #save file with transcription of traintasks trainsentences = {} unique_sentencecount = {} dataconf = ConfigParser() dataconf.read(os.path.join(expdir, 'database.cfg')) for speaker in dataconf.sections(): taskloc = dataconf.get(speaker, 'tasks') textloc = taskloc[:-5]+str('text') with open(textloc) as fp: line = (fp.readline())[:-1] while line: sentencecount += 1 voice = str(speaker)+'_'+(line.split(" "))[0] sentence = (line.split(" "))[1:] for word in sentence: if word in wordcount: wordcount[word] += 1 else: wordcount[word] = 1 if voice in taskstrings: trainsentences[voice] = " ".join(sentence) sentencecount_in_traintasks += 1 for word in sentence: if word in wordcount_in_traintasks: wordcount_in_traintasks[word] += 1 else: wordcount_in_traintasks[word] = 1 uniquewords = list(dict.fromkeys(sentence)) for word in uniquewords: if word in unique_sentencecount: unique_sentencecount[word] += 1 else: unique_sentencecount[word] = 1 line = (fp.readline())[:-1] all_words = sorted(wordcount.keys()) #ordered alphabetically wordfactors = np.zeros((2,len(all_words)),dtype=np.float32) for i in range(0,len(all_words)): word = all_words[i] if word not in wordcount_in_traintasks: continue count = wordcount_in_traintasks[word] if count == sentencecount_in_traintasks: count -= 1 wordfactors[0,i] = sentencecount_in_traintasks/(2*count) #factor for present wordfactors[1,i] = sentencecount_in_traintasks/(2*(sentencecount_in_traintasks-count)) #factor for absent print('Word: ',word,' Wordfactors (present vs absent): ',wordfactors[0,i],' and ',wordfactors[1,i]) #create lists of features and training tasks examples = {utt: (features[utt], taskstrings[utt]) for utt in taskstrings} #calculate the TF #for every tasklabel, count frequency of words in trainsentences corresponding to each tasklabel wordfreq_matrix = np.zeros((num_labels,len(all_words))) for voice in taskstrings: taskstring = taskstrings[voice] task = read_task(taskstring) labels = coder.encode(task) sentence = trainsentences[voice] for word in sentence.split(" "): wordind = all_words.index(word) wordfreq_matrix[:,wordind] += labels # term frequency TF #calculate the IDF N = sentencecount_in_traintasks # total number of sentences in the trainingset D = np.zeros(len(all_words)) # vector with for each word in how many sentences in trainingset the word occurs for word in unique_sentencecount: wordind = all_words.index(word) count = unique_sentencecount[word] D[wordind] = count IDF = np.array([np.log(N/(1+x)) for x in D]) # inverse document frequency TFIDF = wordfreq_matrix * IDF # TFIDF matrix print(coder.argindices.items()) # get all labels and corresponding capsule number #save the single best chosen to a file with open(os.path.join(expdir, 'singlebestwords'), 'w') as fid: for i in range(0, num_labels): singlebest_ind = np.argmax(TFIDF[i,:]) singlebest = all_words[singlebest_ind] fid.write('%i %s\n' % (i, singlebest)) # hier nog labelnaam in krijgen #save the trainsentences to a file with open(os.path.join(expdir, 'trainwords'),'w') as fid: for name, sentence in sorted(trainsentences.items()): fid.write('%s %s\n' % (name, sentence)) if acquisitionconf.get('acquisition', 'name') == 'nmf': model.train(examples) else: print 'training acquisition model (LAUNCHING MODEL.TRAIN)' model.train(examples, all_words, wordfactors) #model.train(examples) #save the trained model model.save(os.path.join(expdir, 'model'))
def train(self, examples, all_words, wordfactors): '''train the model Args: examples: the training examples as a dictionary of pairs containing the inputs and reference tasks all_words: list with all words said by any speaker, ordered alphabetically ''' self.is_training = True #create the graph graph = tf.Graph() for k in examples.keys(): if (examples[k][0].shape[0] <= 5) or not np.isfinite( examples[k][0]).all(): del examples[k] features, tasks = zip(*examples.values()) voices = examples.keys() #encode the speakers dataconf = ConfigParser() dataconf.read(os.path.join(self.expdir, 'database.cfg')) nr_spk = len(dataconf.sections()) print('nr_spk: ', nr_spk) speakers_ordered = dataconf.sections() print(speakers_ordered) encoded_spk = np.zeros(len(tasks)) # one-hot encoding of speakers cnt = 0 for voice in voices: spk_id = '_'.join(voice.split('_')[:1]) encoded_spk[cnt] = speakers_ordered.index(spk_id) cnt += 1 #encode all the words encoded_words = np.zeros((len(all_words), len(tasks))) voicestemp = list(voices) for speaker in speakers_ordered: result = sorted( [x for x in voicestemp if x.startswith(speaker + '_')]) voicestemp = [x for x in voicestemp if x not in result] if result: #check if not empty taskloc = dataconf.get( speaker, 'tasks' ) # to get folder of database with the text written out textloc = taskloc[:-5] + str('text') with open(textloc) as fp: line = (fp.readline())[:-1] while line: voice = speaker + '_' + str((line.split(" "))[0]) sentence = (line.split(" "))[1:] if voice in result: corr_index = voices.index(voice) for word in sentence: pos = all_words.index(word) encoded_words[pos, corr_index] = 1 line = (fp.readline())[:-1] encoded_words = np.transpose(encoded_words) #read all the tasks tasks = [read_task(task) for task in tasks] #encode the tasks vs = np.array([self.coder.encode(t) for t in tasks]) if self.conf['batch_size'] == 'None': batch_size = features.shape[0] else: batch_size = min(int(self.conf['batch_size']), len(features)) with graph.as_default(): #put the features in a constant inputs = tf.placeholder( dtype=tf.float32, shape=[batch_size, None, features[0].shape[-1]], name='inputs') seq_length = tf.placeholder(dtype=tf.int32, shape=[batch_size], name='seq_length') #put the targets in a constant targets = tf.placeholder(dtype=tf.float32, shape=[batch_size, vs.shape[-1]], name='targets') #put the labels in a constant speakers = tf.placeholder(dtype=tf.int32, shape=[batch_size], name='speakers') correct_words = tf.placeholder(dtype=tf.float32, shape=[batch_size, len(all_words)], name='correct_words') expanded_wordfactors_present = np.zeros( (batch_size, len(all_words)), np.float32) expanded_wordfactors_absent = np.zeros( (batch_size, len(all_words)), np.float32) for i in range(0, batch_size): expanded_wordfactors_present[i, :] = wordfactors[0, :] expanded_wordfactors_absent[i, :] = wordfactors[1, :] expanded_wordfactors_present = tf.convert_to_tensor( expanded_wordfactors_present, dtype=tf.float32) expanded_wordfactors_absent = tf.convert_to_tensor( expanded_wordfactors_absent, dtype=tf.float32) #apply the model labelprobs, spklogits, wordlogits = self.model( inputs, seq_length, nr_spk, all_words, targets) loss, labelloss, spkloss, wordloss = self.loss( targets, speakers, correct_words, labelprobs, spklogits, wordlogits, expanded_wordfactors_present, expanded_wordfactors_absent) #count the number of parameters num_params = 0 for var in tf.trainable_variables(): num_params += reduce(mul, var.get_shape().as_list()) print 'number of parameters: %d' % num_params #create an optimizer optimizer = tf.train.AdamOptimizer(learning_rate=float( self.conf['learning_rate']), epsilon=1e-03) #compute the gradients grads_and_vars = optimizer.compute_gradients(loss=loss) with tf.variable_scope('clip'): #clip the gradients grads_and_vars = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in grads_and_vars] #opperation to apply the gradients apply_gradients_op = optimizer.apply_gradients( grads_and_vars=grads_and_vars, name='apply_gradients') #all remaining operations with the UPDATE_OPS GraphKeys update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) #create an operation to update the model update_op = tf.group(*([apply_gradients_op] + update_ops), name='update') #create the init op init_op = tf.variables_initializer(tf.global_variables()) #create a saver saver = tf.train.Saver() #create a summary for var in tf.trainable_variables(): tf.summary.histogram(var.name, var) tf.summary.scalar('loss', loss) if self.conf['images'] == 'True': images = tf.get_collection('image') for image in images: tf.summary.image(image.name, image) summary = tf.summary.merge_all() #create a session session_conf = tf.ConfigProto(inter_op_parallelism_threads=0, intra_op_parallelism_threads=0) sess = tf.Session(graph=graph, config=session_conf) #create a summary writer writer = tf.summary.FileWriter(os.path.join(self.expdir, 'logdir'), graph) #initialize the model sess.run(init_op) #create an index queue index_queue = [] for _ in range(int(self.conf['numiters'])): i = range(len(tasks)) shuffle(i) index_queue += i #iterativaly train the model i = 0 while len(index_queue) > batch_size: indices = index_queue[:batch_size] index_queue = index_queue[batch_size:] batch_inputs = [features[j] for j in indices] batch_lengths = np.array([f.shape[0] for f in batch_inputs]) ml = np.max(batch_lengths) batch_inputs = np.array([ np.pad(f, ((0, ml - f.shape[0]), (0, 0)), 'constant') for f in batch_inputs ]) if i == 'a': run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() else: run_options = run_metadata = None _, s, l, lbl, spkl, spklgts, wrdl, wrdlgts = sess.run( (update_op, summary, loss, labelloss, spkloss, spklogits, wordloss, wordlogits), feed_dict={ inputs: batch_inputs, seq_length: batch_lengths, targets: vs[indices], speakers: encoded_spk[indices], correct_words: encoded_words[indices] }, options=run_options, run_metadata=run_metadata) print 'step %d: loss = %f' % (i, l) print ' labelloss = %f' % lbl print ' spkloss = %f' % spkl print ' wordloss = %f' % wrdl writer.add_summary(s, i) if i == 'a': writer.add_run_metadata(run_metadata, 'statistics') i += 1 #save the final model saver.save(sess, os.path.join(self.expdir, 'logdir', 'model.ckpt')) sess.close() self.is_training = False
structure = Structure(os.path.join(expdir, 'structure.xml')) #create a coder coder = coder_factory.factory(coderconf.get('coder', 'name'))(structure, coderconf) #ref=dict() with open(os.path.join(expdir, 'testtasks')) as f: ref = dict([l.split(' ', 1) for l in f]) # lines = f.readlines() # for line in lines: # splitline = line.strip().split(' ') # taskstrings[splitline[0]] = ' '.join(splitline[1:]) with open(os.path.join(args.expdir, 'dectasks')) as f: hyp = dict([l.split(' ', 1) for l in f]) # reftasks = [read_task(task) for task in ref.values()] # hyptasks = [read_task(task) for task in hyp.values()] tasks = [(read_task(ref[k]), read_task(hyp[k])) for k in hyp.keys()] vs_ref = np.array([coder.encode(t[0], None, 0.0) for t in tasks]) vs_hyp = np.array([coder.encode(t[1], None, 0.0) for t in tasks]) eq = vs_ref == vs_hyp eqtasks = eq.all(axis=1) corr = np.sum(eqtasks) # tasks=[(read_task(ref[k]),read_task(hyp[k])) for k in hyp.keys()] # print (len(tasks)) # sum = reduce((lambda x,y: int(x==y)),tasks) pct = 100.0 * float(corr) / float(len(eqtasks)) print('%10.3g\n' % pct)