def pgroupby(df, groups, f, **kwargs): '''# mirror groupby order (group then agg) replace: results = df.groupby(['col1','col2']).apply(f) with: results = df.pgroupby(['col1','col2'], f) ''' # split into names and groups names, df_split = zip(*[(n, g) for n, g in df.groupby(groups)]) # pmap groups out = pmap(f, df_split, **kwargs) # reassemble and return groups = [groups] if isinstance(groups, str) else groups return pd.concat( [pd.concat({k: v}, names=groups) for k, v in zip(names, out)])
import traceback log.writeln("decoder raised exception: %s" % "".join(traceback.format_exception(*sys.exc_info()))) global decoder_errors decoder_errors += 1 if decoder_errors >= 5: raise else: return bestv, best = decoder.get_nbest(goal, 1)[0] log.write("done decoding\n") # Collect hypotheses that will be used for learning sent.hyps = get_hyps(sent, goal, thedecoder.weights) log.write("done rescoring\n") return sent if opts.parallel: outsents = parallel.pmap(process, insents, tag=0, verbose=1) else: outsents = (process(sent) for sent in insents) if not opts.parallel or parallel.rank == parallel.master: bleu_comps = svector.Vector() for outsent in outsents: if outsent: for hyp in outsent.hyps: visualization_output(output_file, outsent, hyp) output_file.flush()
(bestv, best) = outputs[0] if french_parse_file: french_parse_file.write("%s ||| %s\n" % (sent.id, goal.viterbi_deriv().french_tree())) french_parse_file.flush() if english_parse_file: english_parse_file.write("%s ||| %s\n" % (sent.id, goal.viterbi_deriv().english_tree())) english_parse_file.flush() if log.level >= 1: gc.collect() log.write(" done decoding, memory=%s\n" % monitor.memory()) log.write(" features: %s; %s\n" % (bestv, thedecoder.weights.dot(bestv))) sent.ewords = best return sent if opts.parallel: outsents = parallel.pmap(process, insents, tag=0, verbose=1) else: outsents = (process(sent) for sent in insents) if not opts.parallel or parallel.rank == parallel.master: for outsent in outsents: if outsent is None: output_file.write("\n") else: output_file.write("%s\n" % " ".join(outsent.ewords)) output_file.flush()
if not opts.parallel or parallel.rank == parallel.master: outweightfile.write("%s\n" % outweights) outweightfile.flush() if opts.parallel: outweights = MPI.COMM_WORLD.bcast(outweights, root=parallel.master) # Process heldout data if not opts.parallel or parallel.rank != parallel.master: saveweights = thedecoder.weights thedecoder.weights = outweights if opts.parallel: outsents = parallel.pmap(process_heldout, heldoutsents, tag=0, verbose=1) else: outsents = (process_heldout(sent) for sent in heldoutsents) if not opts.parallel or parallel.rank == parallel.master: heldout_score_comps = svector.Vector() for outsent in outsents: if outsent: output_file.write("%s\n" % " ".join(outsent.ewords)) heldout_score_comps += outsent.score_comps else: output_file.write("\n") # dummy output for decoder failure output_file.flush() if not opts.parallel or parallel.rank == parallel.master:
if loop_forever: iterations = itertools.count() else: iterations = xrange(1) for iteration in iterations: log.writeln("epoch %d" % iteration) # Process training data if shuffle_sentences and (not opts.parallel or parallel.rank == parallel.master): random.shuffle(trainsents) if opts.parallel: outsents = parallel.pmap(lambda (si, sent): (si, process(sent)), trainsents, tag=0, verbose=1) if parallel.rank == parallel.master: outsents = list(outsents) else: outsents = [(si, process(sent)) for (si, sent) in trainsents] if not opts.parallel or parallel.rank == parallel.master: outsents.sort() train_score_comps = svector.Vector() for _, outsent in outsents: if outsent: output_file.write("%s\n" % " ".join(outsent.ewords)) train_score_comps += outsent.score_comps else: output_file.write("\n") # dummy output for decoder failure