def update_coefficient_statistics(self): for i in range(self.patches_per_node): l0_norm = functools.partial(np.linalg.norm, ord=0) self.nodebufs.a_l0_norm[i] = np.apply_along_axis( l0_norm, 1, self.nodebufs.a[i]) self.nodebufs.a_l0_norm[i] /= self.nodebufs.a[i].shape[1] l1_norm = functools.partial(np.linalg.norm, ord=1) self.nodebufs.a_l1_norm[i] = np.apply_along_axis( l1_norm, 1, self.nodebufs.a[i]) self.nodebufs.a_l1_norm[i] /= np.max(self.nodebufs.a_l1_norm[i]) l2_norm = functools.partial(np.linalg.norm, ord=2) self.nodebufs.a_l2_norm[i] = np.apply_along_axis( l2_norm, 1, self.nodebufs.a[i]) self.nodebufs.a_l2_norm[i] /= np.max(self.nodebufs.a_l2_norm[i]) self.nodebufs.a_variance[i] = np.apply_along_axis( np.var, 1, self.nodebufs.a[i]) self.nodebufs.a_variance[i] /= np.max(self.nodebufs.a_variance[i]) for stat in ['a_l0_norm', 'a_l1_norm', 'a_l1_norm', 'a_variance']: setattr(self.nodebufs.mean, stat, np.mean(getattr(self.nodebufs, stat), axis=0)) mpi.gather(getattr(self.nodebufs.mean, stat), getattr(self.rootbufs, stat))
def main(): myrank, size = mpi.init() # split the problem in chunks if problemlength % size == 0: blocksize = problemlength / size else: print "Sorry, I don't know how to split up the problem, aborting!" mpi.finalize() if myrank == 0: data = range(1,problemlength + 1) # create a toy dataset... random.shuffle(data) # ...modifies data in place mydata = data[0:blocksize] # get some data for me... # and communicate the rest to slaves for host in range(1,size): hisdata = data[blocksize*host:blocksize*(host+1)] mpi.send(hisdata,blocksize,mpi.MPI_INT,host,0,mpi.MPI_COMM_WORLD) else: mydata = mpi.recv(blocksize,mpi.MPI_INT,0,0,mpi.MPI_COMM_WORLD) mymax = max(mydata) maximums = mpi.gather(mymax,1,mpi.MPI_INT, size, mpi.MPI_INT, 0, mpi.MPI_COMM_WORLD) if myrank == 0: mymax = max(maximums) print "The maximum value is:", mymax mpi.finalize()
def main(): # Start MPI myrank, size = mpi.init() # Create a toy dataset: data = range( 1, 1001 ) # We know what the max will be already :-) random.shuffle( data ) # Modifies data in place # Divide up the problem (if we can divide it evenly) if( len(data) % size == 0 ): blocksize = len(data) / size start = blocksize * myrank end = start + blocksize mydata = data[ start : end ] max = -1 for i in mydata: if ( i > max ): max = i maximums = mpi.gather( max, 1, mpi.MPI_INT, size, mpi.MPI_INT, 0, mpi.MPI_COMM_WORLD) if ( myrank == 0 ): max = -1 for i in maximums: if ( i > max ): max = i print "The maximum value is:",max mpi.finalize() else: print "Sorry, I don't know how to split up the problem, aborting!" mpi.finalize()
def parallelRunTest(self): #decide on targets targets = [0, 0, 0] #targets = [int(mpi.procs / 3), 0, int(mpi.procs / 2) ] #values to be gathered list1 = [mpi.rank + 1] list2 = [0, mpi.rank, mpi.rank * mpi.rank, mpi.rank + 20] string1 = "S" + str(mpi.rank % 4) + "FOO" tuple1 = ("t") tuple2 = ("fOo", [1, mpi.rank, 3], (0, 1)) list3 = [] #Test gather where each process passes in a different count for x in range(mpi.rank + 2): list3 += [x] longList = range(256) #do gathers results = [0, 0, 0, 0, 0, 0, 0, 0] results[0] = mpi.gather(list1, 1, targets[0]) results[1] = mpi.gather(list2, 3, targets[1]) results[2] = mpi.gather(string1, 3, targets[2]) results[3] = mpi.gather(tuple1, 1, targets[0]) results[4] = mpi.gather(tuple2, 1, targets[1]) results[5] = mpi.gather(tuple2, 3, targets[2]) results[6] = mpi.gather(list3, mpi.rank + 2, targets[0]) results[7] = mpi.gather(longList, 256, targets[0]) #correct answers correctAnswers = [0, 0, 0, 0, 0, 0, 0, 0] for x in range(8): correctAnswers[x] = [] for x in range(mpi.procs): correctAnswers[0] += [x + 1] correctAnswers[1] += [0, x, x * x] correctAnswers[2] += ["S", str(x % 4), "F"] correctAnswers[3] += ["t"] correctAnswers[4] += ["fOo"] correctAnswers[5] += ["fOo", [1, x, 3], (0, 1)] for i in range(x + 2): correctAnswers[6] += [i] correctAnswers[7] = range(256) * mpi.procs for x in range(8): if mpi.rank == targets[x % 3] and results[x] != correctAnswers[x]: self.fail(" gather failed on test " + str(x)) elif mpi.rank != targets[x % 3] and results[x] != None: errstr = "gather failed on off-target" errstr += "process on test " + str(x) self.fail(errstr) return
def parallelRunTest(self): #decide on targets targets = [0,0,0] #targets = [int(mpi.procs / 3), 0, int(mpi.procs / 2) ] #values to be gathered list1 = [ mpi.rank + 1 ] list2 = [0, mpi.rank, mpi.rank*mpi.rank, mpi.rank + 20] string1 = "S" + str(mpi.rank % 4) + "FOO" tuple1 = ("t") tuple2 = ( "fOo", [1,mpi.rank, 3], (0,1) ) list3 = [] #Test gather where each process passes in a different count for x in range(mpi.rank+2): list3 += [x] longList = range(256) #do gathers results = [0,0,0, 0,0,0, 0,0] results[0] = mpi.gather( list1, 1, targets[0]) results[1] = mpi.gather( list2, 3, targets[1]) results[2] = mpi.gather( string1, 3, targets[2]) results[3] = mpi.gather( tuple1, 1, targets[0]) results[4] = mpi.gather( tuple2, 1, targets[1]) results[5] = mpi.gather( tuple2, 3, targets[2]) results[6] = mpi.gather( list3, mpi.rank+2, targets[0] ) results[7] = mpi.gather( longList, 256, targets[0] ) #correct answers correctAnswers = [0,0,0, 0,0,0, 0,0] for x in range(8): correctAnswers[x] = [] for x in range(mpi.procs): correctAnswers[0] += [x + 1] correctAnswers[1] += [0, x, x*x] correctAnswers[2] += ["S", str(x%4), "F"] correctAnswers[3] += ["t"] correctAnswers[4] += [ "fOo" ] correctAnswers[5] += [ "fOo", [1, x, 3], (0,1) ] for i in range(x+2): correctAnswers[6] += [i] correctAnswers[7] = range(256)*mpi.procs for x in range(8): if mpi.rank == targets[x%3] and results[x] != correctAnswers[x]: self.fail(" gather failed on test "+str(x)) elif mpi.rank != targets[x%3] and results[x] != None: errstr = "gather failed on off-target"; errstr += "process on test " + str(x) self.fail( errstr) return
def wrapped_method(self, *args, **kwargs): mpi.bcast((expression, args, kwargs)) # local execution ret = method(self, *args, **kwargs) # get return values from all cpus ret = mpi.gather(ret) if not init: # filter object creation which must not return return ret
def testGatherInt(self): x = 10 * mpi.rank ans = range(0, 10 * mpi.procs, 10) for root in xrange(mpi.procs): globalx = mpi.gather(x, root=root) if mpi.rank == root: self.check(globalx == ans) else: self.check(globalx == None)
def gather_test(comm, generator, kind, root): if comm.rank == root: print("Gathering %s to root %d..." % (kind, root)), my_value = generator(comm.rank) result = mpi.gather(comm, my_value, root) if comm.rank == root: for p in range(0, comm.size): assert result[p] == generator(p) print("OK.") else: assert result == None return
def gather_test(comm, generator, kind, root): if comm.rank == root: print ("Gathering %s to root %d..." % (kind, root)), my_value = generator(comm.rank) result = mpi.gather(comm, my_value, root) if comm.rank == root: for p in range(0, comm.size): assert result[p] == generator(p) print "OK." else: assert result == None return
def update_coefficient_statistics(self): for i in range(self.patches_per_node): l0_norm = functools.partial(np.linalg.norm, ord=0) self.nodebufs.a_l0_norm[i] = np.apply_along_axis(l0_norm, 1, self.nodebufs.a[i]) self.nodebufs.a_l0_norm[i] /= self.nodebufs.a[i].shape[1] l1_norm = functools.partial(np.linalg.norm, ord=1) self.nodebufs.a_l1_norm[i] = np.apply_along_axis(l1_norm, 1, self.nodebufs.a[i]) self.nodebufs.a_l1_norm[i] /= np.max(self.nodebufs.a_l1_norm[i]) l2_norm = functools.partial(np.linalg.norm, ord=2) self.nodebufs.a_l2_norm[i] = np.apply_along_axis(l2_norm, 1, self.nodebufs.a[i]) self.nodebufs.a_l2_norm[i] /= np.max(self.nodebufs.a_l2_norm[i]) self.nodebufs.a_variance[i] = np.apply_along_axis(np.var, 1, self.nodebufs.a[i]) self.nodebufs.a_variance[i] /= np.max(self.nodebufs.a_variance[i]) for stat in ['a_l0_norm', 'a_l1_norm', 'a_l1_norm', 'a_variance']: setattr(self.nodebufs.mean, stat, np.mean(getattr(self.nodebufs, stat), axis=0)) mpi.gather(getattr(self.nodebufs.mean, stat), getattr(self.rootbufs, stat))
def runTest(self): mpi.barrier() name = self.__class__.__name__ if name[:5] == 'PyMPI': name = name[5:] if name[-8:] == 'TestCase': name = name[:-8] mpi.trace(name+'<') try: try: self.parallelRunTest() any_errors = mpi.gather([]) except: any_errors = mpi.gather([self.__message()]) if mpi.rank == 0 and any_errors: import string raise self.failureException,string.join(any_errors,'') finally: mpi.barrier() mpi.traceln('>') return
def runTest(self): mpi.barrier() name = self.__class__.__name__ if name[:5] == 'PyMPI': name = name[5:] if name[-8:] == 'TestCase': name = name[:-8] mpi.trace(name + '<') try: try: self.parallelRunTest() any_errors = mpi.gather([]) except: any_errors = mpi.gather([self.__message()]) if mpi.rank == 0 and any_errors: import string raise self.failureException, string.join(any_errors, '') finally: mpi.barrier() mpi.traceln('>') return
# Dr. Izaguirre: I have checked and this constraint # is correct. The energy is harmonic, but the force (the gradient) # is not harmonic. In fact it is exactly what is in the paper. prop[0].propagate(scheme="velocityscale", steps=1, dt=dt, forcefield=ff, params={'T0': 300}) prop[1].propagate(scheme="velocityscale", steps=1, dt=dt, forcefield=ff, params={'T0': 300}) mpi.barrier() z = mpi.gather([z_p]) if (mpi.rank == 0): z = FTSM.reparamTrevor(z) if (iter >= 110000): for g in range(0, 8): for h in range(0, 2): avg[g][h] += z[g][h] print "\nI" + str(iter + 1) + ": ", z if (iter + 1 == 200000): for g in range(0, 8): for h in range(0, 2): avg[g][h] /= 90000. print "\nAVG: ", avg z_p = mpi.scatter(z)[0]
# USE FIRST SYSTEM TO GET M # USE SECOND SYSTEM TO OBTAIN PHI AND PSI DIFFERENCES # FROM TARGETS zp0 = z_p[0] z_p[0] -= (kappa/gamma)*dt*(FTSM.M(x, PHI, PHI)*(z_p[0]-y.angle(PHI)) + FTSM.M(x, PHI, PSI)*(z_p[1] - y.angle(PSI))) z_p[1] -= (kappa/gamma)*dt*(FTSM.M(x, PSI, PHI)*(zp0-y.angle(PHI)) + FTSM.M(x, PSI, PSI)*(z_p[1] - y.angle(PSI))) # UPDATE CARTESIAN # Dr. Izaguirre: I have checked and this constraint # is correct. The energy is harmonic, but the force (the gradient) # is not harmonic. In fact it is exactly what is in the paper. prop[0].propagate(scheme="velocityscale", steps=1, dt=dt, forcefield=ff, params={'T0':300}) prop[1].propagate(scheme="velocityscale", steps=1, dt=dt, forcefield=ff, params={'T0':300}) mpi.barrier() z = mpi.gather([z_p]) if (mpi.rank == 0): z = FTSM.reparamTrevor(z) if (iter >= 110000): for g in range(0, 8): for h in range(0, 2): avg[g][h] += z[g][h] print "\nI"+str(iter+1)+": ",z if (iter+1 == 200000): for g in range(0, 8): for h in range(0, 2): avg[g][h] /= 90000. print "\nAVG: ", avg
def perceptron_parallel(epoch, indices, blob, weights = None, valid_feature_names = None): """ Implements parallelized version of perceptron training for structured outputs (Collins, 2002; McDonald, 2010). """ # Which processor am I? myRank = mpi.rank # Setting for output of decoding Path decodingPath = None decodingPathFile = robustWrite("%s/%s%s" % (tmpdir, FLAGS.decoding_path_out,str(myRank))) # Let processor 0 be the master. masterRank = 0 # How many processors are there? nProcs = mpi.size ########################################## # Keep track of time to train this epoch ########################################## startTime = time.time() # Restart with weights from last epoch or 0. # Will ignore any weights passed during function call. weights_restart_filename = '%s/training-restart.%s' % (tmpdir, str(mpi.rank)) if os.path.isfile(weights_restart_filename): weights_restart_file = open(weights_restart_filename, 'r') weights = cPickle.load(weights_restart_file) weights_restart_file.close() else: # If weights passed during function call is None start with empty. if weights is None or len(weights) == 0: weights = svector.Vector() # Restart with previous running weight sum, also. weights_sum_filename = '%s/training.%s' % (tmpdir, str(mpi.rank)) if os.path.isfile(weights_sum_filename): weights_sum_file = open(weights_sum_filename, 'r') weights_sum = cPickle.load(weights_sum_file) weights_sum_file.close() else: weights_sum = svector.Vector() numChanged = 0 done = False for i, instanceID in enumerate(indices[:FLAGS.subset]): if myRank == i % nProcs: # Assign the current instances we will look at f = blob['f_instances'][instanceID] e = blob['e_instances'][instanceID] etree = blob['etree_instances'][instanceID] gold_str = blob['gold_instances'][instanceID] inverse = None if FLAGS.inverse_a is not None: inverse = blob['inverse_instances'][instanceID] a1 = None if FLAGS.a1 is not None: a1 = blob['a1_instances'][instanceID] a2 = None if FLAGS.a2 is not None: a2 = blob['a2_instances'][instanceID] ftree = None if FLAGS.ftrees is not None: ftree = blob['ftree_instances'][instanceID] # Preprocess input data # f, e are sequences of words f = f.split() ; e = e.split() # gold is a sequence of f-e link pairs gold = Alignment.Alignment(gold_str, FLAGS.inverse) # Initialize model for this instance model = GridAlign.Model(f, e, etree, ftree, instanceID, weights, a1, a2, inverse, LOCAL_FEATURES=blob['localFeatures'], NONLOCAL_FEATURES=blob['nonlocalFeatures'], FLAGS=FLAGS) model.gold = gold # Initialize model with data tables model.pef = blob['pef'] model.pfe = blob['pfe'] # Load language model model.lm = blob['lm'] # Align the current training instance model.align() if FLAGS.decoding_path_out is not None: cPickle.dump(model.decodingPath, decodingPathFile, protocol=cPickle.HIGHEST_PROTOCOL) ###################################################################### # Weight updating ###################################################################### LEARNING_RATE = FLAGS.learningrate # Set the oracle item oracle = None if FLAGS.oracle in ['gold','hope']: oracle = model.oracle else: sys.stderr.write("ERROR: Unknown oracle class: %s\n" %(FLAGS.oracle)) # Set the hypothesis item hyp = None if FLAGS.hyp in ['1best', 'fear']: hyp = model.hyp else: sys.stderr.write("ERROR: Unknown hyp class: %s\n" %(FLAGS.hyp)) # Debiasing if FLAGS.debiasing: validate_features(oracle.scoreVector, valid_feature_names) validate_features(hyp.scoreVector, valid_feature_names) deltas = None if set(hyp.links) != set(oracle.links): numChanged += 1 ############################################################### # WEIGHT UPDATES ################################################################ deltas = oracle.scoreVector - hyp.scoreVector weights = weights + LEARNING_RATE*deltas # Even if we didnt update, the current weight vector should count towards the sum! weights_sum += weights # L1 Projection step # if w in [-tau, tau], w -> 0 # else, move w closer to 0 by tau. if FLAGS.tau is not None: for index, w in weights_sum.iteritems(): if w == 0: del weights_sum[index] continue if index[-3:] == '_nb': continue if w > 0 and w <= FLAGS.tau and not FLAGS.negreg: del weights_sum[index] elif w < 0 and w >= (FLAGS.tau * -1): del weights_sum[index] elif w > 0 and w > FLAGS.tau and not FLAGS.negreg: weights_sum[index] -= FLAGS.tau elif w < 0 and w < (FLAGS.tau * -1): weights_sum[index] += FLAGS.tau # Set uniq pickled output file for this process # Holds sum of weights over each iteration for this process output_filename = "%s/training.%s" %(tmpdir, str(mpi.rank)) output_file = open(output_filename,'w') # Dump all weights used during this node's run; to be averaged by master along with others cPickle.dump(weights_sum, output_file, protocol=cPickle.HIGHEST_PROTOCOL) output_file.close() # Remeber just the last weights used for this process; start here next epoch. output_filename_last_weights = "%s/training-restart.%s" %(tmpdir, str(mpi.rank)) output_file_last_weights = open(output_filename_last_weights,'w') cPickle.dump(weights, output_file_last_weights, protocol=cPickle.HIGHEST_PROTOCOL) output_file_last_weights.close() decodingPathFile.close() ############################################# # Gather "done" messages from workers ############################################# # Synchronize done = mpi.gather(value=True,root=0) ##################################################################################### # Compute f-measure over all alignments ##################################################################################### masterWeights = svector.Vector() if myRank == masterRank: decodePathFiles = {} # Read pickled output for rank in range(nProcs): input_filename = tmpdir+'/training.'+str(rank) input_file = open(input_filename,'r') masterWeights += cPickle.load(input_file) input_file.close() decodePathFiles[rank] = robustRead("%s/%s%s" % (tmpdir, FLAGS.decoding_path_out, str(rank)) ) sys.stderr.write("Done reading data.\n") sys.stderr.write("len(masterWeights)= %d\n"%(len(masterWeights))) sys.stderr.flush() ###################################################### # AVERAGED WEIGHTS ###################################################### sys.stderr.write("[%d] Averaging weights.\n" %(mpi.rank)) sys.stderr.flush() masterWeights = masterWeights / (len(indices) * (epoch+1)) # Dump master weights to file # There is only one weight vector in this file at a time. mw = robustWrite(tmpdir+'/weights') cPickle.dump(masterWeights,mw,protocol=cPickle.HIGHEST_PROTOCOL) mw.close() # Write decoding path decodingPathList = [] if FLAGS.decoding_path_out is not None: path_out = robustWrite(FLAGS.decoding_path_out, encoding="utf-8") for i, instanceID in enumerate(indices[:FLAGS.subset]): node = i % nProcs chosenTree = cPickle.load(decodePathFiles[node]) heappush(decodingPathList, (instanceID, chosenTree)) orderedList = [heappop(decodingPathList)[1] for _ in xrange(len(decodingPathList))] path_out.write(u"\n".join(orderedList)) path_out.close() # CLEAN UP for i in range(nProcs): decodePathFiles[i].close() ###################################################################### # All processes read and load new averaged weights ###################################################################### # But make sure worker nodes don't attempt to read from the weights # file before the root node has written it. # Sync-up with a blocking broadcast call ready = mpi.broadcast(value=True, root=0) mw = robustRead(tmpdir+'/weights') masterWeights = cPickle.load(mw) mw.close() ###################################################################### # Print report for this iteration ###################################################################### elapsedTime = time.time() - startTime if myRank == masterRank: # masterRank is printing elapsed time. # May differ at each node. sys.stderr.write("Time: %0.2f\n" %(elapsedTime)) sys.stderr.write("[%d] Finished training.\n" %(mpi.rank)) return masterWeights
def decode_parallel(weights, indices, blob, name="", out=sys.stdout, score_out=None): """ Align some input data in blob with a given weight vector. Report accuracy. """ myRank = mpi.rank decodingPath = None decodingPathFilename = robustWrite("%s/%s%s" % (tmpdir, FLAGS.decoding_path_out,str(myRank))) masterRank = 0 # How many processors are there? nProcs = mpi.size results = [ ] allResults = None fmeasure = 0.0 ########################################## # Keep track of time to train this epoch ########################################## startTime = time.time() result_file = robustWrite(tmpdir+'/results.'+str(mpi.rank)) for i, instanceID in enumerate(indices[:FLAGS.subset]): if myRank == i % nProcs: # Assign the current instances we will look at f = blob['f_instances'][instanceID] e = blob['e_instances'][instanceID] etree = blob['etree_instances'][instanceID] if FLAGS.train: gold_str = blob['gold_instances'][instanceID] gold = Alignment.Alignment(gold_str, FLAGS.inverse) ftree = None if FLAGS.ftrees is not None: ftree = blob['ftree_instances'][instanceID] inverse = None if FLAGS.inverse_a is not None: inverse = blob['inverse_instances'][instanceID] a1 = None if FLAGS.a1 is not None: a1 = blob['a1_instances'][instanceID] a2 = None if FLAGS.a2 is not None: a2 = blob['a2_instances'][instanceID] # Prepare input data. # f, e are sequences of words f = f.split() e = e.split() # Initialize model for this instance model = GridAlign.Model(f, e, etree, ftree, instanceID, weights, a1, a2, inverse, DECODING=True, LOCAL_FEATURES=blob['localFeatures'], NONLOCAL_FEATURES=blob['nonlocalFeatures'], FLAGS=FLAGS) if FLAGS.train: model.gold = gold # Initialize model with data tables model.pef = blob['pef'] model.pfe = blob['pfe'] # Load language model model.lm = blob['lm'] # Align the current training instance # FOR PROFILING: cProfile.run('model.align(1)','profile.out') model.align() decodingPath = model.decodingPath # Dump intermediate chunk to disk. Reassemble later. if FLAGS.train: cPickle.dump((model.hyp.links, model.gold.links_dict), result_file, protocol=cPickle.HIGHEST_PROTOCOL) elif FLAGS.align: # cPickle.dump(model.modelBest.links, result_file, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump((model.hyp.links, model.hyp.score), result_file, protocol=cPickle.HIGHEST_PROTOCOL) if FLAGS.decoding_path_out is not None: cPickle.dump(decodingPath, decodingPathFilename, protocol=cPickle.HIGHEST_PROTOCOL) result_file.close() decodingPathFilename.close() done = mpi.gather(value=True, root=0) # REDUCE HERE if myRank == masterRank: # Open result files for reading resultFiles = { } decodePathFiles = {} for i in range(nProcs): resultFiles[i] = open(tmpdir+'/results.'+str(i),'r') decodePathFiles[i] = robustRead("%s/%s%s" % (tmpdir, FLAGS.decoding_path_out, str(i)) ) if FLAGS.train: ########################################################################## # Compute f-measure over all alignments ########################################################################## numCorrect = 0 numModelLinks = 0 numGoldLinks = 0 for i, instanceID in enumerate(indices[:FLAGS.subset]): # What node stored instance i node = i % nProcs # Retrieve result from instance i resultTuple = cPickle.load(resultFiles[node]) modelBest = resultTuple[0] gold = resultTuple[1] # Update F-score counts numCorrect_, numModelLinks_, numGoldLinks_ = f1accumulator(modelBest, gold) numCorrect += numCorrect_ numModelLinks += numModelLinks_ numGoldLinks += numGoldLinks_ # Compute F-measure, Precision, and Recall fmeasure, precision, recall = f1score(numCorrect, numModelLinks, numGoldLinks) elapsedTime = time.time() - startTime ###################################################################### # Print report for this iteration ###################################################################### sys.stderr.write("Time: "+str(elapsedTime)+"\n") sys.stderr.write("\n") sys.stderr.write('F-score-%s: %1.5f\n' % (name, fmeasure)) sys.stderr.write('Precision-%s: %1.5f\n' % (name, precision)) sys.stderr.write('Recall-%s: %1.5f\n' % (name, recall)) sys.stderr.write('# Correct: %d\n' % (numCorrect)) sys.stderr.write('# Me Total: %d\n' % (numModelLinks)) sys.stderr.write('# Gold Total: %d\n' % (numGoldLinks)) sys.stderr.write("[%d] Finished decoding.\n" %(myRank)) else: if score_out!=None: sout = open(score_out,"w") for i, instanceID in enumerate(indices): node = i % nProcs resultTuple = cPickle.load(resultFiles[node]) modelBestLinks = resultTuple[0] score = resultTuple[1] if FLAGS.inverse: if FLAGS.joint: out.write("%s\n" %(" ".join(map(lambda link: "%s-%s[%s]" % (link[1], link[0], link.linkTag.name), modelBestLinks)))) else: out.write("%s\n" %(" ".join(map(lambda link: "%s-%s" %(link[1], link[0]), modelBestLinks)))) else: if FLAGS.joint: out.write("%s\n" %(" ".join(map(lambda link: "%s-%s[%s]" % (link[0], link[1], link.linkTag.name), modelBestLinks)))) else: out.write("%s\n" %(" ".join(map(lambda link: "%s-%s" %(link[0], link[1]), modelBestLinks)))) if(score_out!=None): sout.write("%s\n" % (score)) # Write decoding path decodingPathList = [] if FLAGS.decoding_path_out is not None: path_out = robustWrite(FLAGS.decoding_path_out, True, encoding="utf-8") for i, instanceID in enumerate(indices): node = i % nProcs chosenTree = cPickle.load(decodePathFiles[node]) heappush(decodingPathList, (instanceID, chosenTree)) orderedList = [heappop(decodingPathList)[1] for _ in xrange(len(decodingPathList))] path_out.write(u"\n".join(orderedList)) # for o in orderedList: # print o.encode('utf-8') path_out.close() # CLEAN UP for i in range(nProcs): decodePathFiles[i].close() # CLEAN UP for i in range(nProcs): resultFiles[i].close() return
def plotVectorField2d(dataBase, fieldList, plotGhosts=False, vectorMultiplier=1.0, colorNodeLists=False, colorDomains=False, title=""): assert colorNodeLists + colorDomains <= 1 # Gather the node positions and vectors across all domains. # Loop over all the NodeLists. localNumNodes = [] xNodes = [] yNodes = [] vxNodes = [] vyNodes = [] for i in xrange(dataBase.numNodeLists): nodeList = dataBase.nodeLists()[i] assert i < fieldList.numFields vectorField = fieldList[i] if plotGhosts: n = nodeList.numNodes else: n = nodeList.numInternalNodes localNumNodes.append(n) xNodes += numpy.array( map(lambda x: x.x, list(nodeList.positions())[:n])) yNodes += numpy.array( map(lambda x: x.y, list(nodeList.positions())[:n])) vxNodes += numpy.array(map(lambda x: x.x, list(vectorField)[:n])) * vectorMultiplier vyNodes += numpy.array(map(lambda x: x.y, list(vectorField)[:n])) * vectorMultiplier assert len(xNodes) == len(yNodes) == len(vxNodes) == len(vyNodes) numDomainNodes = [len(xNodes)] numNodesPerDomain = mpi.gather(numDomainNodes) globalNumNodes = mpi.gather(localNumNodes) globalXNodes = mpi.gather(xNodes) globalYNodes = mpi.gather(yNodes) globalVxNodes = mpi.gather(vxNodes) globalVyNodes = mpi.gather(vyNodes) if mpi.rank == 0: plot = generateNewGnuPlot() plot("set size square") plot.title = title if colorDomains: cumulativeN = 0 for domain in xrange(len(numNodesPerDomain)): n = numNodesPerDomain[domain] x = numpy.array(globalXNodes[cumulativeN:cumulativeN + n]) y = numpy.array(globalYNodes[cumulativeN:cumulativeN + n]) vx = numpy.array(globalVxNodes[cumulativeN:cumulativeN + n]) vy = numpy.array(globalVyNodes[cumulativeN:cumulativeN + n]) cumulativeN += n ## plot("set linestyle %i lt %i pt %i" % (domain + 1, ## domain + 1, ## domain + 1)) data = Gnuplot.Data(x, y, vx, vy, with_="vector ls %i" % (domain + 1), inline=True) plot.replot(data) SpheralGnuPlotCache.append(data) elif colorNodeLists: cumulativeN = 0 for i in xrange(len(globalNumNodes)): n = globalNumNodes[i] if n > 0: iNodeList = i % dataBase.numNodeLists x = numpy.array(globalXNodes[cumulativeN:cumulativeN + n]) y = numpy.array(globalYNodes[cumulativeN:cumulativeN + n]) vx = numpy.array(globalVxNodes[cumulativeN:cumulativeN + n]) vy = numpy.array(globalVyNodes[cumulativeN:cumulativeN + n]) cumulativeN += n ## plot("set linestyle %i lt %i pt %i" % (iNodeList + 1, ## iNodeList + 1, ## iNodeList + 1)) data = Gnuplot.Data(x, y, vx, vy, with_="vector ls %i" % (iNodeList + 1), inline=True) plot.replot(data) SpheralGnuPlotCache.append(data) else: x = numpy.array(globalXNodes) y = numpy.array(globalYNodes) vx = numpy.array(globalVxNodes) vy = numpy.array(globalVyNodes) data = Gnuplot.Data(x, y, vx, vy, with_="vector", inline=True) plot.replot(data) SpheralGnuPlotCache.append(data) return plot else: SpheralGnuPlotCache.append(data)
def gridSample(fieldList, zFunction="%s", nx=100, ny=100, xmin=None, xmax=None, ymin=None, ymax=None): assert nx > 0 and ny > 0 # Set up our return value array. xValues = np.array([[0.0] * nx] * ny) yValues = np.array([[0.0] * nx] * ny) zValues = np.array([[0.0] * nx] * ny) # Gather the fieldList info across all processors to process 0. localNumNodes = [] localX = [] localY = [] for ifield in xrange(fieldList.numFields): field = fieldList[ifield] n = field.nodeList().numNodes localNumNodes.append(n) for r in field.nodeList().positions(): localX.append(r.x) localY.append(r.y) globalNumNodes = mpi.gather(localNumNodes) globalX = mpi.gather(localX) globalY = mpi.gather(localY) # If the user did not specify the sampling volume, then find the min and # max node positions. if xmin == None: xmin = min(localX) if ymin == None: ymin = min(localY) if xmax == None: xmax = max(localX) if ymax == None: ymax = max(localY) xmin = mpi.allreduce(xmin, mpi.MIN) ymin = mpi.allreduce(ymin, mpi.MIN) xmax = mpi.allreduce(xmax, mpi.MAX) ymax = mpi.allreduce(ymax, mpi.MAX) assert xmax > xmin assert ymax > ymin # Figure out the sizes of the bins we're going to be sampling in dx = (xmax - xmin) / nx dy = (ymax - ymin) / ny # Loop over all the grid sampling positions, and figure out this processors # contribution. for iy in xrange(ny): for ix in xrange(nx): xValues[iy][ix] = xmin + (ix + 0.5) * dx yValues[iy][ix] = ymin + (iy + 0.5) * dy r = Vector2d(xValues[iy][ix], yValues[iy][ix]) z = fieldList.sample(r) localZ = eval(zFunction % "z") globalZ = mpi.reduce(localZ, mpi.SUM) if mpi.rank == 0: print "%i %i %i %s %g %g" % (mpi.rank, ix, iy, r, z, localZ) print "%i %g" % (mpi.rank, globalZ) zValues[iy][ix] = globalZ return xValues, yValues, zValues
def learn_basis1(self): self.compute_patch_objectives(self.nodebufs) self.average_patch_objectives(self.nodebufs) mpi.gather(self.nodebufs.mean.E, self.rootbufs.E) mpi.gather(self.nodebufs.mean.dphi, self.rootbufs.dphi)
def run_server(port, settings): server = None if mpi.size == 1: class PodService(rpyc.Service): def on_disconnect(self): logger.info('Client is disconnected: closing server') server.close() class exposed_Pod(Pod): pass logger.info('Creating poder on %s:%i', socket.gethostname(), port) server = ThreadedServer(PodService, port=port, auto_register = False, protocol_config={'allow_public_attrs' : True}) server.start() else: if mpi.myid == 0: # Master serves, it holds a PodMPI instance as opposed to the slaves. class PodService(rpyc.Service): def on_disconnect(self): logger.info('Client is disconnected: closing server') server.close() mpi.bcast(('return', None, None)) class exposed_Pod(PodMPI): pass logger.info('Creating poder on %s:%i', socket.gethostname(), port) server = ThreadedServer(PodService, port=port, auto_register = False, protocol_config={'allow_public_attrs' : True}) server.start() else: # Slaves work with instances of Pod. # # Slaves scenario: # * on the first loop occurence, they create a Pod object named 'pod', # * then on subsequent loop occurences, they call a pod method and bind its return value to the variable 'ret'. # # In the body of the loop, a python expression is dynamically executed. The expression is formed of a stub and function's arguments. The stub contains a function name with a variable assignment (like 'ret = pod.dump'). The positional and optionnal arguments are passed to the named function in the stub. # # One loop occurence goes as follows: # * wait for an expression from master node, # * execute the expression # * then return the expression's left hand side (if any) to master. # local namespace scope for using exec, # it keeps track of the already created variables over multiple loop iterations scope = {} while True: (expression, args, kwargs) = mpi.bcast() # logger.info(expression) # logger.info(args) # logger.info(kwargs) if expression == 'return': # TODO: how to tell exec it's in a function? return scope.update(locals()) exec expression in globals(), scope mpi.gather(scope.get('ret'))
def perceptron_parallel(epoch, indices, blob, weights = None, valid_feature_names = None): """ Implements parallelized version of perceptron training for structured outputs (Collins, 2002; McDonald, 2010). """ # Which processor am I? myRank = mpi.rank # Let processor 0 be the master. masterRank = 0 # How many processors are there? nProcs = mpi.size ########################################## # Keep track of time to train this epoch ########################################## startTime = time.time() # Restart with weights from last epoch or 0. # Will ignore any weights passed during function call. weights_restart_filename = '%s/training-restart.%s' % (tmpdir, str(mpi.rank)) if os.path.isfile(weights_restart_filename): weights_restart_file = open(weights_restart_filename, 'r') weights = cPickle.load(weights_restart_file) weights_restart_file.close() else: # If weights passed during function call is None start with empty. if weights is None or len(weights) == 0: weights = svector.Vector() # Restart with previous running weight sum, also. weights_sum_filename = '%s/training.%s' % (tmpdir, str(mpi.rank)) if os.path.isfile(weights_sum_filename): weights_sum_file = open(weights_sum_filename, 'r') weights_sum = cPickle.load(weights_sum_file) weights_sum_file.close() else: weights_sum = svector.Vector() numChanged = 0 done = False for i, instanceID in enumerate(indices[:FLAGS.subset]): if myRank == i % nProcs: # Assign the current instances we will look at f = blob['f_instances'][instanceID] e = blob['e_instances'][instanceID] etree = blob['etree_instances'][instanceID] gold_str = blob['gold_instances'][instanceID] inverse = None if FLAGS.inverse is not None: inverse = blob['inverse_instances'][instanceID] a1 = None if FLAGS.a1 is not None: a1 = blob['a1_instances'][instanceID] a2 = None if FLAGS.a2 is not None: a2 = blob['a2_instances'][instanceID] ftree = None if FLAGS.ftrees is not None: ftree = blob['ftree_instances'][instanceID] # Preprocess input data # f, e are sequences of words f = f.split() ; e = e.split() # gold is a sequence of f-e link pairs gold = Alignment.Alignment(gold_str) # Initialize model for this instance model = GridAlign.Model(f, e, etree, ftree, instanceID, weights, a1, a2, inverse, LOCAL_FEATURES=blob['localFeatures'], NONLOCAL_FEATURES=blob['nonlocalFeatures'], FLAGS=FLAGS) model.gold = gold # Initialize model with data tables model.pef = blob['pef'] model.pfe = blob['pfe'] # Align the current training instance model.align() ###################################################################### # Weight updating ###################################################################### LEARNING_RATE = FLAGS.learningrate # Set the oracle item oracle = None if FLAGS.oracle == 'gold': oracle = model.oracle elif FLAGS.oracle == 'hope': oracle = model.hope else: sys.stderr.write("ERROR: Unknown oracle class: %s\n" %(FLAGS.oracle)) # Set the hypothesis item hyp = None if FLAGS.hyp == '1best': hyp = model.modelBest elif FLAGS.hyp == 'fear': hyp = model.fear else: sys.stderr.write("ERROR: Unknown hyp class: %s\n" %(FLAGS.hyp)) # Debiasing if FLAGS.debiasing: validate_features(oracle.scoreVector, valid_feature_names) validate_features(hyp.scoreVector, valid_feature_names) deltas = None if set(hyp.links) != set(oracle.links): numChanged += 1 ############################################################### # WEIGHT UPDATES ################################################################ deltas = oracle.scoreVector - hyp.scoreVector weights = weights + LEARNING_RATE*deltas # Even if we didnt update, the current weight vector should count towards the sum! weights_sum += weights # L1 Projection step # if w in [-tau, tau], w -> 0 # else, move w closer to 0 by tau. if FLAGS.tau is not None: for index, w in weights_sum.iteritems(): if w == 0: del weights_sum[index] continue if index[-3:] == '_nb': continue if w > 0 and w <= FLAGS.tau and not FLAGS.negreg: del weights_sum[index] elif w < 0 and w >= (FLAGS.tau * -1): del weights_sum[index] elif w > 0 and w > FLAGS.tau and not FLAGS.negreg: weights_sum[index] -= FLAGS.tau elif w < 0 and w < (FLAGS.tau * -1): weights_sum[index] += FLAGS.tau # Set uniq pickled output file for this process # Holds sum of weights over each iteration for this process output_filename = "%s/training.%s" %(tmpdir, str(mpi.rank)) output_file = open(output_filename,'w') # Dump all weights used during this node's run; to be averaged by master along with others cPickle.dump(weights_sum, output_file, protocol=cPickle.HIGHEST_PROTOCOL) output_file.close() # Remeber just the last weights used for this process; start here next epoch. output_filename_last_weights = "%s/training-restart.%s" %(tmpdir, str(mpi.rank)) output_file_last_weights = open(output_filename_last_weights,'w') cPickle.dump(weights, output_file_last_weights, protocol=cPickle.HIGHEST_PROTOCOL) output_file_last_weights.close() ############################################# # Gather "done" messages from workers ############################################# # Synchronize done = mpi.gather(value=True,root=0) ##################################################################################### # Compute f-measure over all alignments ##################################################################################### masterWeights = svector.Vector() if myRank == masterRank: # Read pickled output for rank in range(nProcs): input_filename = tmpdir+'/training.'+str(rank) input_file = open(input_filename,'r') masterWeights += cPickle.load(input_file) input_file.close() sys.stderr.write("Done reading data.\n") sys.stderr.write("len(masterWeights)= %d\n"%(len(masterWeights))) sys.stderr.flush() ###################################################### # AVERAGED WEIGHTS ###################################################### sys.stderr.write("[%d] Averaging weights.\n" %(mpi.rank)) sys.stderr.flush() masterWeights = masterWeights / (len(indices) * (epoch+1)) # Dump master weights to file # There is only one weight vector in this file at a time. mw = robustWrite(tmpdir+'/weights') cPickle.dump(masterWeights,mw,protocol=cPickle.HIGHEST_PROTOCOL) mw.close() ###################################################################### # All processes read and load new averaged weights ###################################################################### # But make sure worker nodes don't attempt to read from the weights # file before the root node has written it. # Sync-up with a blocking broadcast call ready = mpi.broadcast(value=True, root=0) mw = robustRead(tmpdir+'/weights') masterWeights = cPickle.load(mw) mw.close() ###################################################################### # Print report for this iteration ###################################################################### elapsedTime = time.time() - startTime if myRank == masterRank: # masterRank is printing elapsed time. # May differ at each node. sys.stderr.write("Time: %0.2f\n" %(elapsedTime)) sys.stderr.write("[%d] Finished training.\n" %(mpi.rank)) return masterWeights
if not opts.parallel or parallel.rank != parallel.master: if online_learning: outweights = float( nweights) * thedecoder.weights - sumweights_helper else: outweights = thedecoder.weights nweights = 1 remove_zeros(outweights) log.write("summed feature weights: %s n=%d\n" % (outweights * watch_features, nweights)) if opts.parallel: all_outweights = mpi.gather(mpi.world, outweights, parallel.master) all_nweights = mpi.gather(mpi.world, nweights, parallel.master) if parallel.rank == parallel.master: sumweights = sum(all_outweights, svector.Vector()) outweights = sumweights / float(sum(all_nweights)) log.write("summed feature weights: %s n=%d\n" % (sumweights * watch_features, sum(all_nweights))) log.write("averaged feature weights: %s\n" % (outweights * watch_features)) if opts.outweightfilename: if not opts.parallel or parallel.rank == parallel.master: outweightfile.write("%s\n" % outweights) outweightfile.flush() if opts.parallel:
def learn_basis2(self): mpi.gather(self.nodebufs.a, self.rootbufs.a, mpi.root)
""" allgather.py Quick test of how the 'allgather' function works """ import mpi rank,size=mpi.init() data = mpi.gather( rank, 1, mpi.MPI_INT, 1, mpi.MPI_INT, 0, mpi.MPI_COMM_WORLD ) print type(data) print data mpi.finalize()
def plotNodePositions2d(thingy, xFunction="%s.x", yFunction="%s.y", plotGhosts=False, colorNodeLists=True, colorDomains=False, title="", plotStyle="ro", markerSize=4): assert colorNodeLists + colorDomains <= 1 if isinstance(thingy, DataBase2d): nodeLists = thingy.nodeLists() else: nodeLists = thingy # Gather the node positions across all domains. # Loop over all the NodeLists. xNodes = [] yNodes = [] for nodeList in nodeLists: if plotGhosts: pos = nodeList.positions().allValues() else: pos = nodeList.positions().internalValues() xNodes.append([eval(xFunction % "x") for x in pos]) yNodes.append([eval(yFunction % "x") for x in pos]) assert len(xNodes) == len(nodeLists) assert len(xNodes) == len(yNodes) globalXNodes = mpi.gather(xNodes) globalYNodes = mpi.gather(yNodes) plot = newFigure() if mpi.rank == 0: assert len(globalXNodes) == mpi.procs assert len(globalYNodes) == mpi.procs xlist, ylist = [], [] if colorDomains: for xDomain, yDomain in zip(globalXNodes, globalYNodes): assert len(xDomain) == len(nodeLists) assert len(yDomain) == len(nodeLists) xlist.append([]) ylist.append([]) for xx in xDomain: xlist[-1].extend(xx) for yy in yDomain: ylist[-1].extend(yy) assert len(xlist) == mpi.procs assert len(ylist) == mpi.procs elif colorNodeLists: for i in xrange(len(nodeLists)): xlist.append([]) ylist.append([]) for xDomain, yDomain in zip(globalXNodes, globalYNodes): assert len(xDomain) == len(nodeLists) assert len(yDomain) == len(nodeLists) for i in xrange(len(nodeLists)): xlist[i].extend(xDomain[i]) ylist[i].extend(yDomain[i]) assert len(xlist) == len(nodeLists) assert len(ylist) == len(nodeLists) else: xlist, ylist = [[]], [[]] for xDomain, yDomain in zip(globalXNodes, globalYNodes): print len(xDomain), len(nodeLists) assert len(xDomain) == len(nodeLists) assert len(yDomain) == len(nodeLists) for i in xrange(len(nodeLists)): xlist[0].extend(xDomain[i]) ylist[0].extend(yDomain[i]) plt.title(title) color = iter(pltcm.rainbow(np.linspace(0, 1, len(xlist)))) for x, y in zip(xlist, ylist): c = next(color) plot.plot(x, y, "o", color=c, ms=markerSize) plot.axes.set_aspect("equal", "datalim") return plot
import mpi, sys, os, string if mpi.rank == 0: sequences = sys.argv[1:] else: sequences = [] local_sequences = mpi.scatter(sequences) result = [] for sequence in local_sequences: cmd = "java -Djava.awt.headless=true -cp FoldingServer.jar foldingServer.FoldingServer -c " + sequence for line in os.popen(cmd).readlines(): if line.find(":") >= 0: message = str(mpi.rank) + ";" + sequence + ";" + line.rstrip("\n") result.append(message) all = mpi.gather(result) if mpi.rank == 0: for result in all: print result
def plotVectorField2d(dataBase, fieldList, plotGhosts=False, vectorMultiplier=1.0, colorNodeLists=False, colorDomains=False, title=""): assert colorNodeLists + colorDomains <= 1 # Gather the node positions and vectors across all domains. # Loop over all the NodeLists. localNumNodes = [] xNodes = [] yNodes = [] vxNodes = [] vyNodes = [] for i in xrange(dataBase.numNodeLists): nodeList = dataBase.nodeLists()[i] assert i < fieldList.numFields vectorField = fieldList[i] if plotGhosts: n = nodeList.numNodes else: n = nodeList.numInternalNodes localNumNodes.append(n) xNodes += np.array([x.x for x in nodeList.positions()[:n]]) yNodes += np.array([x.y for x in nodeList.positions()[:n]]) vxNodes += np.array([x.x for x in vectorField[:n]]) vyNodes += np.array([x.y for x in vectorField[:n]]) assert len(xNodes) == len(yNodes) == len(vxNodes) == len(vyNodes) numDomainNodes = [len(xNodes)] numNodesPerDomain = mpi.gather(numDomainNodes) globalNumNodes = mpi.gather(localNumNodes) globalXNodes = mpi.gather(xNodes) globalYNodes = mpi.gather(yNodes) globalVxNodes = mpi.gather(vxNodes) globalVyNodes = mpi.gather(vyNodes) plot = newFigure() if mpi.rank == 0: plot.axes().set_aspect("equal", "datalim") plot.title(title) color = iter(pltcm.rainbow(np.linspace(0, 1, len(xlist)))) if colorDomains: cumulativeN = 0 for domain in xrange(len(numNodesPerDomain)): c = next(color) n = numNodesPerDomain[domain] x = np.array(globalXNodes[cumulativeN:cumulativeN + n]) y = np.array(globalYNodes[cumulativeN:cumulativeN + n]) vx = np.array(globalVxNodes[cumulativeN:cumulativeN + n]) vy = np.array(globalVyNodes[cumulativeN:cumulativeN + n]) cumulativeN += n plot.quiver(x, y, vx, vy, color=c) elif colorNodeLists: cumulativeN = 0 for i in xrange(len(globalNumNodes)): c = next(color) n = globalNumNodes[i] if n > 0: iNodeList = i % dataBase.numNodeLists x = np.array(globalXNodes[cumulativeN:cumulativeN + n]) y = np.array(globalYNodes[cumulativeN:cumulativeN + n]) vx = p.array(globalVxNodes[cumulativeN:cumulativeN + n]) vy = np.array(globalVyNodes[cumulativeN:cumulativeN + n]) cumulativeN += n plot.quiver(x, y, vx, vy, color=c) else: x = np.array(globalXNodes) y = np.array(globalYNodes) vx = np.array(globalVxNodes) vy = np.array(globalVyNodes) plot.quiver(x, y, vx, vy) return plot
def plotNodePositions2d(thingy, xFunction="%s.x", yFunction="%s.y", plotGhosts=False, colorNodeLists=True, colorDomains=False, title="", style="points", persist=None): assert colorNodeLists + colorDomains <= 1 if isinstance(thingy, DataBase2d): nodeLists = thingy.nodeLists() else: nodeLists = thingy # Gather the node positions across all domains. # Loop over all the NodeLists. xNodes = [] yNodes = [] for nodeList in nodeLists: if plotGhosts: pos = nodeList.positions().allValues() else: pos = nodeList.positions().internalValues() xNodes.append([eval(xFunction % "x") for x in pos]) yNodes.append([eval(yFunction % "x") for x in pos]) assert len(xNodes) == len(nodeLists) assert len(xNodes) == len(yNodes) globalXNodes = mpi.gather(xNodes) globalYNodes = mpi.gather(yNodes) if mpi.rank == 0: assert len(globalXNodes) == mpi.procs assert len(globalYNodes) == mpi.procs xlist, ylist = [], [] if colorDomains: for xDomain, yDomain in zip(globalXNodes, globalYNodes): assert len(xDomain) == len(nodeLists) assert len(yDomain) == len(nodeLists) xlist.append([]) ylist.append([]) for xx in xDomain: xlist[-1].extend(xx) for yy in yDomain: ylist[-1].extend(yy) assert len(xlist) == mpi.procs assert len(ylist) == mpi.procs elif colorNodeLists: for i in xrange(len(nodeLists)): xlist.append([]) ylist.append([]) for xDomain, yDomain in zip(globalXNodes, globalYNodes): assert len(xDomain) == len(nodeLists) assert len(yDomain) == len(nodeLists) for i in xrange(len(nodeLists)): xlist[i].extend(xDomain[i]) ylist[i].extend(yDomain[i]) assert len(xlist) == len(nodeLists) assert len(ylist) == len(nodeLists) else: xlist, ylist = [[]], [[]] for xDomain, yDomain in zip(globalXNodes, globalYNodes): print len(xDomain), len(nodeLists) assert len(xDomain) == len(nodeLists) assert len(yDomain) == len(nodeLists) for i in xrange(len(nodeLists)): xlist[0].extend(xDomain[i]) ylist[0].extend(yDomain[i]) plot = generateNewGnuPlot(persist=persist) plot("set size square") plot.title = title assert len(xlist) == len(ylist) for x, y in zip(xlist, ylist): data = Gnuplot.Data(x, y, with_=style, inline=True) plot.replot(data) SpheralGnuPlotCache.append(data) return plot else: return fakeGnuplot()
# average weights over iterations and nodes outweights = svector.Vector() if not opts.parallel or parallel.rank != parallel.master: if online_learning: outweights = float(nweights) * thedecoder.weights - sumweights_helper else: outweights = thedecoder.weights nweights = 1 remove_zeros(outweights) log.write("summed feature weights: %s n=%d\n" % (outweights * watch_features, nweights)) if opts.parallel: all_outweights = mpi.gather(mpi.world, outweights, parallel.master) all_nweights = mpi.gather(mpi.world, nweights, parallel.master) if parallel.rank == parallel.master: sumweights = sum(all_outweights, svector.Vector()) outweights = sumweights / float(sum(all_nweights)) log.write("summed feature weights: %s n=%d\n" % (sumweights * watch_features, sum(all_nweights))) log.write("averaged feature weights: %s\n" % (outweights * watch_features)) if opts.outweightfilename: if not opts.parallel or parallel.rank == parallel.master: outweightfile.write("%s\n" % outweights) outweightfile.flush() if opts.parallel: outweights = mpi.broadcast(mpi.world, outweights, parallel.master)
import Numeric as nm import mpi mpi.init() rank = mpi.comm_rank(mpi.MPI_COMM_WORLD) size = mpi.comm_size(mpi.MPI_COMM_WORLD) root = 0 message = [rank] * (size + rank) print "Sending:", message recvcounts = mpi.gather(len(message), 1, mpi.MPI_INT, 1, mpi.MPI_INT, root, mpi.MPI_COMM_WORLD) displacements = [0] for i in recvcounts[:-1]: displacements.append(i) result = mpi.gatherv( message, len(message), mpi.MPI_INT, recvcounts, displacements, mpi.MPI_INT, root, mpi.MPI_COMM_WORLD ) if rank == root: print "Received:", result mpi.finalize()
def decode_parallel(weights, indices, blob, name="", out=sys.stdout, score_out=None): """ Align some input data in blob with a given weight vector. Report accuracy. """ myRank = mpi.rank masterRank = 0 # How many processors are there? nProcs = mpi.size results = [ ] allResults = None fmeasure = 0.0 ########################################## # Keep track of time to train this epoch ########################################## startTime = time.time() result_file = robustWrite(tmpdir+'/results.'+str(mpi.rank)) for i, instanceID in enumerate(indices[:FLAGS.subset]): if myRank == i % nProcs: # Assign the current instances we will look at f = blob['f_instances'][instanceID] e = blob['e_instances'][instanceID] etree = blob['etree_instances'][instanceID] if FLAGS.train: gold_str = blob['gold_instances'][instanceID] gold = Alignment.Alignment(gold_str) ftree = None if FLAGS.ftrees is not None: ftree = blob['ftree_instances'][instanceID] inverse = None if FLAGS.inverse is not None: inverse = blob['inverse_instances'][instanceID] a1 = None if FLAGS.a1 is not None: a1 = blob['a1_instances'][instanceID] a2 = None if FLAGS.a2 is not None: a2 = blob['a2_instances'][instanceID] # Prepare input data. # f, e are sequences of words f = f.split() e = e.split() # Initialize model for this instance model = GridAlign.Model(f, e, etree, ftree, instanceID, weights, a1, a2, inverse, DECODING=True, LOCAL_FEATURES=blob['localFeatures'], NONLOCAL_FEATURES=blob['nonlocalFeatures'], FLAGS=FLAGS) if FLAGS.train: model.gold = gold # Initialize model with data tables model.pef = blob['pef'] model.pfe = blob['pfe'] # Align the current training instance # FOR PROFILING: cProfile.run('model.align(1)','profile.out') model.align() # Dump intermediate chunk to disk. Reassemble later. if FLAGS.train: cPickle.dump((model.modelBest.links, model.gold.links_dict), result_file, protocol=cPickle.HIGHEST_PROTOCOL) elif FLAGS.align: # cPickle.dump(model.modelBest.links, result_file, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump((model.modelBest.links,model.modelBest.score), result_file, protocol=cPickle.HIGHEST_PROTOCOL) result_file.close() done = mpi.gather(value=True, root=0) # REDUCE HERE if myRank == masterRank: # Open result files for reading resultFiles = { } for i in range(nProcs): resultFiles[i] = open(tmpdir+'/results.'+str(i),'r') if FLAGS.train: ########################################################################## # Compute f-measure over all alignments ########################################################################## numCorrect = 0 numModelLinks = 0 numGoldLinks = 0 for i, instanceID in enumerate(indices[:FLAGS.subset]): # What node stored instance i node = i % nProcs # Retrieve result from instance i resultTuple = cPickle.load(resultFiles[node]) modelBest = resultTuple[0] gold = resultTuple[1] # Update F-score counts numCorrect_, numModelLinks_, numGoldLinks_ = f1accumulator(modelBest, gold) numCorrect += numCorrect_ numModelLinks += numModelLinks_ numGoldLinks += numGoldLinks_ # Compute F-measure, Precision, and Recall fmeasure, precision, recall = f1score(numCorrect, numModelLinks, numGoldLinks) elapsedTime = time.time() - startTime ###################################################################### # Print report for this iteration ###################################################################### sys.stderr.write("Time: "+str(elapsedTime)+"\n") sys.stderr.write("\n") sys.stderr.write('F-score-%s: %1.5f\n' % (name, fmeasure)) sys.stderr.write('Precision-%s: %1.5f\n' % (name, precision)) sys.stderr.write('Recall-%s: %1.5f\n' % (name, recall)) sys.stderr.write('# Correct: %d\n' % (numCorrect)) sys.stderr.write('# Me Total: %d\n' % (numModelLinks)) sys.stderr.write('# Gold Total: %d\n' % (numGoldLinks)) sys.stderr.write("[%d] Finished decoding.\n" %(myRank)) else: if score_out!=None: sout = open(score_out,"w") for i, instanceID in enumerate(indices): node = i % nProcs resultTuple = cPickle.load(resultFiles[node]) modelBestLinks = resultTuple[0] score = resultTuple[1] out.write("%s\n" %(" ".join(map(lambda link: "%s-%s" %(link[0], link[1]), modelBestLinks)))) if(score_out!=None): sout.write("%s\n" % (score)) # CLEAN UP for i in range(nProcs): resultFiles[i].close() return