# print 'error' def memory_usage_psutil(): # return the memory usage in MB import psutil process = psutil.Process(os.getpid()) mem = process.get_memory_info()[0] / float(2 ** 20) return mem print len(words) # words = {'car':0, 'flower':0, 'truck':0, 'train':0, 'glove':0} twitter_reader = GloveEmbeddingReader('H:/Embeddings/Glove twitter/glove.twitter.27B.25d.txt') twitter_reader.read_embedding(words=words) glove_reader = GloveEmbeddingReader('H:/Embeddings/Glove wiki/glove.6B.50d.txt') glove_reader.read_embedding(words=words) glove_reader = GloveEmbeddingReader('H:/Embeddings/Glove Common Crawl/glove.6B.50d.txt') glove_reader.read_embedding(words=words) print "Memory usage %1.1f MB" % memory_usage_psutil() words2 = set(twitter_reader.word2index.keys()).union(glove_reader.word2index.keys()) print len(words2) diff = set(words.keys()).difference(words2) words2 = {w:0 for w in words2} print list(diff)[0:100] exit()
# twitter_reader.save_as_csv(join(EMBEDDINGS_DIR, 'Glove twitter/glove.twitter.27B.25d_small.csv')) # # # print "Reading %s" % 'glove_reader_wiki' # glove_reader_wiki = GloveEmbeddingReader(join(EMBEDDINGS_DIR, 'Glove wiki/glove.6B.50d.txt')) # glove_reader_wiki.read_embedding(words=words) # glove_reader_wiki.save_as_csv(join(EMBEDDINGS_DIR, 'Glove wiki/glove.6B.50d_small.csv')) print "Reading %s" % 'Google news W2V' w2v_reader = W2VEmbeddingReader(join(EMBEDDINGS_DIR, 'Google news w2v/GoogleNews-vectors-negative300.bin')) w2v_reader.read_embedding(words=words) w2v_reader.save_as_csv(join(EMBEDDINGS_DIR, 'Google news w2v/GoogleNews-vectors-negative300_small.csv')) # print "Reading %s" % 'twitter_reader' twitter_reader = GloveEmbeddingReader(join(EMBEDDINGS_DIR, 'Glove twitter/glove.twitter.27B.200d.txt')) twitter_reader.read_embedding(words=words) twitter_reader.save_as_csv(join(EMBEDDINGS_DIR, 'Glove twitter/glove.twitter.27B.200d_small.csv')) print "Reading %s" % 'glove_reader_common_crawl' glove_reader_common_crawl = GloveEmbeddingReader(join(EMBEDDINGS_DIR, 'Glove Common Crawl/glove.840B.300d.txt')) glove_reader_common_crawl.read_embedding(words=words) glove_reader_common_crawl.save_as_csv(join(EMBEDDINGS_DIR, 'Glove Common Crawl/glove.840B.300d_small.csv')) print "Reading %s" % 'glove_reader_wiki' glove_reader_wiki = GloveEmbeddingReader(join(EMBEDDINGS_DIR, 'Glove wiki/glove.6B.300d.txt')) glove_reader_wiki.read_embedding(words=words) glove_reader_wiki.save_as_csv(join(EMBEDDINGS_DIR, 'Glove wiki/glove.6B.300d_small.csv'))