Пример #1
0
def create_representation(args):
    rep_type = args['<representation>']
    path = args['<representation_path>']
    neg = int(args['--neg'])
    w_c = args['--w+c']
    eig = float(args['--eig'])
    
    if rep_type == 'PPMI':
        if w_c:
            raise Exception('w+c is not implemented for PPMI.')
        else:
            return PositiveExplicit(path, True, neg)
        
    elif rep_type == 'SVD':
        if w_c:
            return EnsembleEmbedding(SVDEmbedding(path, False, eig, False), SVDEmbedding(path, False, eig, True), True)
        else:
            return SVDEmbedding(path, True, eig)
    elif rep_type == 'GLOVE':
        return GLOVEEmbedding(path, True)        
    else:
        if w_c:
            return EnsembleEmbedding(Embedding(path + '.words', False), Embedding(path + '.contexts', False), True)
        else:
            return Embedding(path + '.words', True)
Пример #2
0
def main():
    args = docopt("""
    Usage:
        svd2text.py [options] <svd_path> <output_path>
    
    Options:
        --w+c        Use ensemble of word and context vectors
        --eig NUM    Weighted exponent of the eigenvalue matrix [default: 0.5]
    """)

    svd_path = args['<svd_path>']
    output_path = args['<output_path>']
    w_c = args['--w+c']
    eig = float(args['--eig'])

    if w_c:
        svd = EnsembleEmbedding(SVDEmbedding(svd_path, False, eig, False),
                                SVDEmbedding(svd_path, False, eig, True), True)
    else:
        svd = SVDEmbedding(svd_path, True, eig)

    with open(output_path, 'w') as f:
        for i, w in enumerate(svd.iw):
            print >> f, w, ' '.join([str(x) for x in svd.m[i]])
Пример #3
0
 def __init__(self, path, years, **kwargs):
     self.embeds = collections.OrderedDict()
     for year in years:
         self.embeds[year] = SVDEmbedding(path + "/" + str(year), **kwargs)
Пример #4
0
    parser.add_argument("test_path", help="Path to test data")
    parser.add_argument("--word-path",
                        help="Path to sorted list of context words",
                        default="")
    parser.add_argument("--num-context",
                        type=int,
                        help="Number context words to use",
                        default=-1)
    parser.add_argument("--type", default="PPMI")
    args = parser.parse_args()
    if args.type == "PPMI":
        year = int(args.vec_path.split("/")[-1].split(".")[0])
        if args.num_context != -1 and args.word_path == "":
            raise Exception(
                "Must specify path to context word file if the context words are to be restricted!"
            )
        elif args.word_path != "":
            _, context_words = ioutils.load_target_context_words(
                [year], args.word_path, -1, args.num_context)
            context_words = context_words[year]
        else:
            context_words = None
        rep = Explicit.load(args.vec_path, restricted_context=context_words)
    elif args.type == "SVD":
        rep = SVDEmbedding(args.vec_path, eig=0.0)
    else:
        rep = Embedding.load(args.vec_path, add_context=False)
    data = read_test_set(args.test_path)
    correlation = evaluate(rep, data)
    print "Correlation: " + str(correlation)
Пример #5
0
def folder2svd(folder, raw=False):
    if raw:
        return SVDEmbedding(join(folder, "svd_pmi"))
    return SVDEmbedding(join(folder, "svd_pmi")).similarity