def create_representation(args): rep_type = args['<representation>'] path = args['<representation_path>'] neg = int(args['--neg']) w_c = args['--w+c'] eig = float(args['--eig']) if rep_type == 'PPMI': if w_c: raise Exception('w+c is not implemented for PPMI.') else: return PositiveExplicit(path, True, neg) elif rep_type == 'SVD': if w_c: return EnsembleEmbedding(SVDEmbedding(path, False, eig, False), SVDEmbedding(path, False, eig, True), True) else: return SVDEmbedding(path, True, eig) elif rep_type == 'GLOVE': return GLOVEEmbedding(path, True) else: if w_c: return EnsembleEmbedding(Embedding(path + '.words', False), Embedding(path + '.contexts', False), True) else: return Embedding(path + '.words', True)
def main(): args = docopt(""" Usage: svd2text.py [options] <svd_path> <output_path> Options: --w+c Use ensemble of word and context vectors --eig NUM Weighted exponent of the eigenvalue matrix [default: 0.5] """) svd_path = args['<svd_path>'] output_path = args['<output_path>'] w_c = args['--w+c'] eig = float(args['--eig']) if w_c: svd = EnsembleEmbedding(SVDEmbedding(svd_path, False, eig, False), SVDEmbedding(svd_path, False, eig, True), True) else: svd = SVDEmbedding(svd_path, True, eig) with open(output_path, 'w') as f: for i, w in enumerate(svd.iw): print >> f, w, ' '.join([str(x) for x in svd.m[i]])
def __init__(self, path, years, **kwargs): self.embeds = collections.OrderedDict() for year in years: self.embeds[year] = SVDEmbedding(path + "/" + str(year), **kwargs)
parser.add_argument("test_path", help="Path to test data") parser.add_argument("--word-path", help="Path to sorted list of context words", default="") parser.add_argument("--num-context", type=int, help="Number context words to use", default=-1) parser.add_argument("--type", default="PPMI") args = parser.parse_args() if args.type == "PPMI": year = int(args.vec_path.split("/")[-1].split(".")[0]) if args.num_context != -1 and args.word_path == "": raise Exception( "Must specify path to context word file if the context words are to be restricted!" ) elif args.word_path != "": _, context_words = ioutils.load_target_context_words( [year], args.word_path, -1, args.num_context) context_words = context_words[year] else: context_words = None rep = Explicit.load(args.vec_path, restricted_context=context_words) elif args.type == "SVD": rep = SVDEmbedding(args.vec_path, eig=0.0) else: rep = Embedding.load(args.vec_path, add_context=False) data = read_test_set(args.test_path) correlation = evaluate(rep, data) print "Correlation: " + str(correlation)
def folder2svd(folder, raw=False): if raw: return SVDEmbedding(join(folder, "svd_pmi")) return SVDEmbedding(join(folder, "svd_pmi")).similarity