def generate_sentences(model_path, seed_path, num_words): model = getmodel(31, 10000, 32, model_path) tokenizer_path = model_path + '.tokenizer.json' with open(tokenizer_path, 'r') as f: tokenizer = tokenizer_from_json(f.readlines()[0]) # with open(seed_path, 'r') as f: # seed = f.readlines()[0] seed = create_indexes_tape('seed3', tokenizer) seed_seq = seed # seed_seq = tokenizer.texts_to_sequences([seed])[0] # seed_seq = tf.keras.preprocessing.sequence.pad_sequences([seed_seq], 31)[0] seed_seq = list(seed_seq) pred = None out_seq = [] for _ in range(num_words): seed_seq.extend(out_seq) seq_input = seed_seq[-(31):] seq_input = np.expand_dims(seq_input, 0) res = model.predict([seq_input], 1) pred = res.squeeze().argmax() out_seq.append(pred) words = [tokenizer.index_word[w] for w in out_seq if w not in [0]] sentence = ' '.join(words).replace('<eom>', '\n') print(sentence)
def trainmodel(): sentiment = [] text = [] with open("/home/venky/Sem1/Machine Learning/Project/Data/training.csv") as file: linedata = [] for line in file: linedata = line.split("|") sentiment.append(linedata[0]) text.append(linedata[1]) label = [] for i in range(len(sentiment)): if sentiment[i] == "positive": label.append(1) else: label.append(0) preprocessedText = preprocess(text) input = getVector(preprocessedText) model = getmodel(input,label) return model
def extract_features(directory): #feature dict features = dict() print("[INFO] loading model.....") model = mm.getmodel() print("[INFO] Model loaded.....") for name in tqdm(os.listdir(directory)): image_id = name.split('.')[0] #get filename filename = os.path.join(directory, name) #load image image = load_img(filename, target_size=(224, 224)) #convert to array image = img_to_array(image) #reshape image to input size to the model image = image.reshape( (1, image.shape[0], image.shape[1], image.shape[2])) #prepare image for VGG model image = preprocess_input(image) #get features feature = model.predict(image, verbose=0) #store features features[image_id] = feature return features
def train(dataset_path, run_hash, seq_len=32, vocab_size=10000, emb_dim=32, batch_size=128, epochs=20, train_split = 0.8, val_split = 0.2): logs_path, ckp_path, tok_path = check_dirs(run_hash) ckp_cb = tf.keras.callbacks.ModelCheckpoint( ckp_path, 'val_accuracy', save_best_only=False, save_weights_only=True) lr_cb = tf.keras.callbacks.LearningRateScheduler( create_lr_sched(epochs/2, epochs), True) tb_cb = tf.keras.callbacks.TensorBoard( logs_path, 10, True, True, embeddings_freq=10, embeddings_metadata=logs_path+'/meta.tsv') with open(tok_path, 'r') as f: tokenizer = tokenizer_from_json(f.read()) indexes_tape = create_indexes_tape(dataset_path, tokenizer) train_nbatches = int((len(indexes_tape)-seq_len) * train_split / batch_size) val_nbatches = int((len(indexes_tape)-seq_len) * val_split / batch_size) train_ds, val_ds = create_datasets( indexes_tape, train_nbatches, val_nbatches, batch_size, seq_len, vocab_size) model = getmodel(seq_len-1, vocab_size, emb_dim, ckp_path) embeddings = model.layers[0].weights[0].numpy() export_vocabulary(vocab_size, tokenizer.word_index, logs_path) export_embeddings(embeddings, logs_path) hist = model.fit( train_ds, batch_size=batch_size, epochs=epochs, steps_per_epoch=train_nbatches, validation_data=val_ds, callbacks=[ckp_cb, lr_cb, tb_cb])
print( "Training data before split: {}\nTest data: {}\nTraining labels before split: {}\nTest labels: {}" .format(Xall.shape, Xtest.shape, Yall.shape, Ytest.shape)) Xall, Xtest = mapValues(Xall, 0, 255, save=True), mapValues(Xtest, 0, 255) Xall, Xtest = normalize(Xall), normalize(Xtest) print("Intensities after scaling: min={}, max={}, mean={}, std={}".format( np.min(Xall.flatten()), np.max(Xall.flatten()), np.mean(Xall.flatten()), np.std(Xall.flatten()))) Xtrain, Xval, Ytrain, Yval = train_test_split(Xall, Yall, test_size=0.1, random_state=SEED) print("Train data: {}, Validation data: {}".format(Xtrain.shape, Xval.shape)) model = getmodel() if DO_TRAIN: history = model.fit_generator(generator=getgen().flow(Xtrain, Ytrain, batch_size=BS), steps_per_epoch=int(Xtrain.shape[0] / BS), epochs=totalepochs(SCHEDULE), validation_data=(Xval, Yval), verbose=2, callbacks=getcallbacks(Xtrain)) else: model.load_weights(WEIGHTS_NAME) print("Model performance on the validation set:") model.evaluate(Xval, Yval, verbose=2) results = tf.nn.softmax(model.predict(Xtest)).numpy() results = np.argmax(results, axis=1)