def init(args):
    """
        Load data, build model, create optimizer, create vars to hold metrics, etc.
    """
    #need to handle really large text fields
    csv.field_size_limit(
        sys.maxsize)  # Sets field size to max available for strings

    # LOAD VOCAB DICTS
    dicts = datasets.load_vocab_dict(args.vocab_path)

    ## Loading structured data --> need to figure out best way to do this
    X, y = load_svmlight_file(args.struc_data_path)
    print("struc data loaded")

    num_struc_feats = X.shape[1]

    model = tools.pick_model(args, dicts, num_struc_feats)
    print(model)

    print("\nGPU: " + str(args.gpu))

    optimizer = optim.Adam(model.params_to_optimize(),
                           weight_decay=args.weight_decay,
                           lr=args.lr)

    params = tools.make_param_dict(args)

    return args, model, optimizer, params, dicts, X, y
def init(args):
    """
        Load data, build model, create optimizer, create vars to hold metrics, etc.
    """
    #need to handle really large text fields
    csv.field_size_limit(
        sys.maxsize)  # Sets field size to max available for strings

    # LOAD VOCAB DICTS
    dicts = datasets.load_vocab_dict(args.vocab_path)

    model = tools.pick_model(args, dicts)
    print(model)

    print("\nGPU: " + str(args.gpu))

    optimizer = optim.Adam(model.params_to_optimize(),
                           weight_decay=args.weight_decay,
                           lr=args.lr)

    print("Weight decay: " + str(args.weight_decay))

    params = tools.make_param_dict(args)

    return args, model, optimizer, params, dicts
Пример #3
0
def gensim_to_embeddings(wv_file, vocab_file, Y, outfile=None):
    model = gensim.models.Word2Vec.load(wv_file)
    wv = model.wv
    #free up memory
    del model

    ind2w, _ = datasets.load_vocab_dict(vocab_file)

    W, words = build_matrix(ind2w, wv)

    if outfile is None:
        outfile = wv_file.replace('.w2v', '.embed')

    #smash that save button
    save_embeddings(W, words, outfile)