class CorpusIteratorFuncHead_V(): def __init__(self, language, partition="train", storeMorph=False, splitLemmas=False, shuffleDataSeed=None): self.basis = CorpusIterator_V(language, partition=partition, storeMorph=storeMorph, splitLemmas=splitLemmas, shuffleDataSeed=shuffleDataSeed) def permute(self): self.basis.permute() def length(self): return self.basis.length() def iterator(self, rejectShortSentences=False): iterator = self.basis.iterator( rejectShortSentences=rejectShortSentences) for sentence in iterator: reverse_content_head(sentence) yield sentence def getSentence(self, index): return reverse_content_head(self.basis.getSentence(index))
class CorpusIteratorFuncHeadFraction_V(): def __init__(self, language, partition="train", fraction=1.0, storeMorph=False, splitLemmas=False): self.basis = CorpusIterator_V(language, partition=partition, storeMorph=storeMorph, splitLemmas=splitLemmas, shuffleDataSeed=4) self.basis.data = self.basis.data[:int(fraction*len(self.basis.data))] self.permute() self.fraction = fraction def permute(self): self.basis.permute() def length(self): return self.basis.length() def iterator(self, rejectShortSentences = False): iterator = self.basis.iterator(rejectShortSentences=rejectShortSentences) counter = 0 print("Actual length", self.length()) for sentence in iterator: # if counter > self.fraction * self.length(): # break # counter += 1 reverse_content_head(sentence) yield sentence def getSentence(self, index): return reverse_content_head(self.basis.getSentence(index))
return (totalDepLength, numberOfWords, byType) assert batchSize == 1 depLengths = [] #while True: outpath = "/u/scr/mhahn/japanese/"+str(myID) with open(outpath, "w") as outFile: print >> outFile, "\t".join(["Sent", "Length"]) counter = 0 if True: corpus = CorpusIterator_V(language,"train", shuffleDataSeed=40) corpusIterator = corpus.iterator() if corpus.length() == 0: quit() while True: try: batch = map(lambda x:next(corpusIterator), 10*range(batchSize)) except StopIteration: break batch = sorted(batch, key=len) partitions = range(10) for partition in partitions: counter += 1 printHere = (counter % 100 == 0) current = batch[partition*batchSize:(partition+1)*batchSize] depLength = doForwardPass(current)
numberOfWords = wordNum return (totalDepLength, numberOfWords, byType) assert batchSize == 1 depLengths = [] #while True: outpath = "/u/scr/mhahn/japanese/" + str(myID) with open(outpath, "w") as outFile: print >> outFile, "\t".join(["Sent", "Length"]) counter = 0 if True: corpus = CorpusIterator_V(language, "train", shuffleDataSeed=40) corpusIterator = corpus.iterator() if corpus.length() == 0: quit() while True: try: batch = map(lambda x: next(corpusIterator), 10 * range(batchSize)) except StopIteration: break batch = sorted(batch, key=len) partitions = range(10) for partition in partitions: counter += 1 printHere = (counter % 100 == 0) current = batch[partition * batchSize:(partition + 1) * batchSize]
print(docs) times = {'Graal_1225_prose': 1225, 'Aucassin_early13_verse-prose': 1210, 'QuatreLivresReis_late12_prose': 1180, 'TroyesYvain_1180_verse': 1180, 'Roland_1100_verse': 1100, 'BeroulTristan_late12_verse': 1180, 'StLegier_1000_verse': 1000, 'StAlexis_1050_verse': 1050, 'Strasbourg_842_prose': 842, 'Lapidaire_mid12_prose': 1150} timesVector = (torch.FloatTensor([times[itos_docs[i]] for i in range(len(docs))])/100.0) #covarianceMatrix = print(timesVector) print(times) quit() for epoch in range(100): corpus.permute() for q in range(corpus.length()): loss = forward(corpus, q) backward(loss) if epoch % 1 == 0: print("Saving") save_path = "../raw-results/" #save_path = "/afs/cs.stanford.edu/u/mhahn/scr/deps/" with open(save_path+"/manual_output_ground_coarse/"+args.language+"_"+__file__+"_model_"+str(myID)+".tsv", "w") as outFile: print("\t".join(list(map(str,["Epochs", "DH_Mean_NoPunct", "DH_Sigma_NoPunct", "Distance_Mean_NoPunct", "Distance_Sigma_NoPunct", "Dependency"]))), file=outFile) dh_numpy = pyro.get_param_store().get_param("mu_DH").data.numpy() dh_sigma_numpy = pyro.get_param_store().get_param("sigma_DH").data.numpy() dist_numpy = pyro.get_param_store().get_param("mu_Dist").data.numpy() dist_sigma_numpy = pyro.get_param_store().get_param("sigma_Dist").data.numpy()