def align_years(years, rep_type, in_dir, out_dir, count_dir, min_count, **rep_args): first_iter = True base_embed = None for year in years: print "Loading year:", year # for each year year_embed = create_representation(rep_type, in_dir + str(year), **rep_args) # load in embedding pkl year_words = words_above_count( count_dir, year, min_count) # load count pkl, returns only words greater min_count year_embed.get_subembed( year_words ) # keep the embeddings for only the words in year_words, if not out of vocabulary print "Aligning year:", year if first_iter: # for first iteration, our aligned embed is our base embed so basically skip it aligned_embed = year_embed first_iter = False else: aligned_embed = alignment.smart_procrustes_align( base_embed, year_embed) base_embed = aligned_embed print "Writing year:", year foutname = out_dir + str(year) np.save(foutname + "-w.npy", aligned_embed.m) write_pickle(aligned_embed.iw, foutname + "-vocab.pkl")
def get_aligned(self, normalize=False): year_aligned_embeds = collections.OrderedDict() first_iter = True base_embed = None for year,embed in self.embeds.iteritems(): if first_iter: year_aligned_embeds[year] = embed first_iter = False else: year_aligned_embeds[year] = smart_procrustes_align(base_embed, embed, post_normalize=False) base_embed = year_aligned_embeds[year] return SequentialEmbedding.from_ordered_dict(year_aligned_embeds)
def align_years(years): first_iter = True base_embed = None for year in years: print "Loading year:", year year_embed = embedding.Embedding.load(INPUT_FILE.format(year=year)) print "Aligning year:", year if first_iter: aligned_embed = year_embed first_iter = False else: aligned_embed = alignment.smart_procrustes_align(base_embed, year_embed) base_embed = aligned_embed print "Writing year:", year foutname = OUTPUT_FILE.format(year=year) np.save(foutname+".npy",aligned_embed.m) with file(foutname+".vocab","w") as outf: print >> outf, " ".join(aligned_embed.iw)
def align_years(years): first_iter = True base_embed = None for year in years: print "Loading year:", year year_embed = embedding.Embedding.load(INPUT_FILE.format(year=year)) print "Aligning year:", year if first_iter: aligned_embed = year_embed first_iter = False else: aligned_embed = alignment.smart_procrustes_align( base_embed, year_embed) base_embed = aligned_embed print "Writing year:", year foutname = OUTPUT_FILE.format(year=year) np.save(foutname + ".npy", aligned_embed.m) with file(foutname + ".vocab", "w") as outf: print >> outf, " ".join(aligned_embed.iw)
def align_years(years, rep_type, in_dir, out_dir, count_dir, min_count, **rep_args): first_iter = True base_embed = None for year in years: print("Loading year:", year) year_embed = create_representation(rep_type, in_dir + str(year), **rep_args) year_words = words_above_count(count_dir, year, min_count) year_embed.get_subembed(year_words) print("Aligning year:", year) if first_iter: aligned_embed = year_embed first_iter = False else: aligned_embed = alignment.smart_procrustes_align(base_embed, year_embed) base_embed = aligned_embed print("Writing year:", year) foutname = out_dir + str(year) np.save(foutname + "-w.npy",aligned_embed.m) write_pickle(aligned_embed.iw, foutname + "-vocab.pkl")
def align_years(years, rep_type, in_dir, out_dir, count_dir, min_count, **rep_args): first_iter = True base_embed = None for year in years: print "Loading year:", year year_embed = create_representation(rep_type, in_dir + str(year), **rep_args) year_words = words_above_count(count_dir, year, min_count) year_embed.get_subembed(year_words) print "Aligning year:", year if first_iter: aligned_embed = year_embed first_iter = False else: aligned_embed = alignment.smart_procrustes_align(base_embed, year_embed) base_embed = aligned_embed print "Writing year:", year foutname = out_dir + str(year) np.save(foutname + "-w.npy",aligned_embed.m) write_pickle(aligned_embed.iw, foutname + "-vocab.pkl")
def align_years(years, rep_type, in_dir, out_dir, **rep_args): first_iter = True base_embed = None for year in years: # Iterates through years print "Loading year:", year year_embed = create_representation( rep_type, in_dir + str(year), **rep_args) # Loads the individual embedding print "Aligning year:", year if first_iter: aligned_embed = year_embed first_iter = False else: aligned_embed = alignment.smart_procrustes_align( base_embed, year_embed, post_normalize=False) # Rotates to the previous year embedding base_embed = aligned_embed print "Writing year:", year foutname = out_dir + str(year) np.save(foutname + "-w.npy", aligned_embed.m) write_pickle(aligned_embed.iw, foutname + "-vocab.pkl")