def init(args): """ Load data, build model, create optimizer, create vars to hold metrics, etc. """ #need to handle really large text fields csv.field_size_limit( sys.maxsize) # Sets field size to max available for strings # LOAD VOCAB DICTS dicts = datasets.load_vocab_dict(args.vocab_path) ## Loading structured data --> need to figure out best way to do this X, y = load_svmlight_file(args.struc_data_path) print("struc data loaded") num_struc_feats = X.shape[1] model = tools.pick_model(args, dicts, num_struc_feats) print(model) print("\nGPU: " + str(args.gpu)) optimizer = optim.Adam(model.params_to_optimize(), weight_decay=args.weight_decay, lr=args.lr) params = tools.make_param_dict(args) return args, model, optimizer, params, dicts, X, y
def init(args): """ Load data, build model, create optimizer, create vars to hold metrics, etc. """ #need to handle really large text fields csv.field_size_limit( sys.maxsize) # Sets field size to max available for strings # LOAD VOCAB DICTS dicts = datasets.load_vocab_dict(args.vocab_path) model = tools.pick_model(args, dicts) print(model) print("\nGPU: " + str(args.gpu)) optimizer = optim.Adam(model.params_to_optimize(), weight_decay=args.weight_decay, lr=args.lr) print("Weight decay: " + str(args.weight_decay)) params = tools.make_param_dict(args) return args, model, optimizer, params, dicts
def gensim_to_embeddings(wv_file, vocab_file, Y, outfile=None): model = gensim.models.Word2Vec.load(wv_file) wv = model.wv #free up memory del model ind2w, _ = datasets.load_vocab_dict(vocab_file) W, words = build_matrix(ind2w, wv) if outfile is None: outfile = wv_file.replace('.w2v', '.embed') #smash that save button save_embeddings(W, words, outfile)