Exemplo n.º 1
0
def generate_sentences(model_path, seed_path, num_words):
    model = getmodel(31, 10000, 32, model_path)
    tokenizer_path = model_path + '.tokenizer.json'

    with open(tokenizer_path, 'r') as f:
        tokenizer = tokenizer_from_json(f.readlines()[0])


#    with open(seed_path, 'r') as f:
#        seed = f.readlines()[0]

    seed = create_indexes_tape('seed3', tokenizer)
    seed_seq = seed
    #    seed_seq = tokenizer.texts_to_sequences([seed])[0]
    #    seed_seq = tf.keras.preprocessing.sequence.pad_sequences([seed_seq], 31)[0]
    seed_seq = list(seed_seq)
    pred = None
    out_seq = []

    for _ in range(num_words):
        seed_seq.extend(out_seq)
        seq_input = seed_seq[-(31):]
        seq_input = np.expand_dims(seq_input, 0)
        res = model.predict([seq_input], 1)
        pred = res.squeeze().argmax()
        out_seq.append(pred)

    words = [tokenizer.index_word[w] for w in out_seq if w not in [0]]
    sentence = ' '.join(words).replace('<eom>', '\n')
    print(sentence)
Exemplo n.º 2
0
def trainmodel():

    sentiment = []
    text = []
    with open("/home/venky/Sem1/Machine Learning/Project/Data/training.csv") as file:

        linedata = []

        for line in file:

            linedata = line.split("|")
            sentiment.append(linedata[0])
            text.append(linedata[1])

    label = []
    for i in range(len(sentiment)):

        if sentiment[i] == "positive":

            label.append(1)

        else:

            label.append(0)

    preprocessedText = preprocess(text)

    input = getVector(preprocessedText)

    model = getmodel(input,label)

    return model
Exemplo n.º 3
0
def extract_features(directory):

    #feature dict
    features = dict()
    print("[INFO] loading model.....")
    model = mm.getmodel()
    print("[INFO] Model loaded.....")
    for name in tqdm(os.listdir(directory)):

        image_id = name.split('.')[0]
        #get filename
        filename = os.path.join(directory, name)
        #load image
        image = load_img(filename, target_size=(224, 224))
        #convert to array
        image = img_to_array(image)
        #reshape image to input size to the model
        image = image.reshape(
            (1, image.shape[0], image.shape[1], image.shape[2]))
        #prepare image for VGG model
        image = preprocess_input(image)

        #get features
        feature = model.predict(image, verbose=0)
        #store features
        features[image_id] = feature

    return features
Exemplo n.º 4
0
def train(dataset_path,
        run_hash, 
        seq_len=32, 
        vocab_size=10000,
        emb_dim=32,
        batch_size=128,
        epochs=20,
        train_split = 0.8,
        val_split = 0.2):
    
    logs_path, ckp_path, tok_path = check_dirs(run_hash)

    ckp_cb = tf.keras.callbacks.ModelCheckpoint(
        ckp_path,
        'val_accuracy', 
        save_best_only=False,
        save_weights_only=True)

    lr_cb = tf.keras.callbacks.LearningRateScheduler(
        create_lr_sched(epochs/2, epochs), True)

    tb_cb = tf.keras.callbacks.TensorBoard(
            logs_path, 10, True, True, 
            embeddings_freq=10,  
            embeddings_metadata=logs_path+'/meta.tsv')

    with open(tok_path, 'r') as f:
        tokenizer = tokenizer_from_json(f.read())

    indexes_tape = create_indexes_tape(dataset_path, tokenizer)
    train_nbatches = int((len(indexes_tape)-seq_len) * train_split / batch_size)
    val_nbatches = int((len(indexes_tape)-seq_len) * val_split / batch_size)
    
    train_ds, val_ds = create_datasets(
        indexes_tape,
        train_nbatches, 
        val_nbatches,
        batch_size, 
        seq_len, 
        vocab_size)

    model = getmodel(seq_len-1, vocab_size, emb_dim, ckp_path)

    embeddings = model.layers[0].weights[0].numpy() 
    export_vocabulary(vocab_size, tokenizer.word_index, logs_path)
    export_embeddings(embeddings, logs_path)

    hist = model.fit(
        train_ds, 
        batch_size=batch_size, 
        epochs=epochs,
        steps_per_epoch=train_nbatches,
        validation_data=val_ds, 
        callbacks=[ckp_cb, lr_cb, tb_cb])
Exemplo n.º 5
0
print(
    "Training data before split: {}\nTest data: {}\nTraining labels before split: {}\nTest labels: {}"
    .format(Xall.shape, Xtest.shape, Yall.shape, Ytest.shape))
Xall, Xtest = mapValues(Xall, 0, 255, save=True), mapValues(Xtest, 0, 255)
Xall, Xtest = normalize(Xall), normalize(Xtest)

print("Intensities after scaling: min={}, max={}, mean={}, std={}".format(
    np.min(Xall.flatten()), np.max(Xall.flatten()), np.mean(Xall.flatten()),
    np.std(Xall.flatten())))

Xtrain, Xval, Ytrain, Yval = train_test_split(Xall,
                                              Yall,
                                              test_size=0.1,
                                              random_state=SEED)
print("Train data: {}, Validation data: {}".format(Xtrain.shape, Xval.shape))
model = getmodel()
if DO_TRAIN:
    history = model.fit_generator(generator=getgen().flow(Xtrain,
                                                          Ytrain,
                                                          batch_size=BS),
                                  steps_per_epoch=int(Xtrain.shape[0] / BS),
                                  epochs=totalepochs(SCHEDULE),
                                  validation_data=(Xval, Yval),
                                  verbose=2,
                                  callbacks=getcallbacks(Xtrain))
else:
    model.load_weights(WEIGHTS_NAME)
print("Model performance on the validation set:")
model.evaluate(Xval, Yval, verbose=2)
results = tf.nn.softmax(model.predict(Xtest)).numpy()
results = np.argmax(results, axis=1)