def train(model, dist, undist, args): X_train, X_test, y_train, y_test = preproc.load_data( dist, undist, args.input_shape, 0.25) # model = models.blur_model(tuple(args.input_shape)) current_time = time.strftime("%H:%M:%S", time.localtime()) callbacks = [ EarlyStopping(monitor='val_loss', patience=2), ModelCheckpoint(filepath='weights/{}_{}.h5'.format( args.model, current_time), monitor='val_loss', save_best_only=True, mode='auto') ] model.fit(X_train, y_train, callbacks=callbacks, batch_size=args.batch_size, epochs=args.num_epochs, verbose=2, validation_data=(X_test, y_test)) score = model.evaluate(X_test, y_test, verbose=2) print('Test Loss:', score[0]) print('Test accuracy:', score[1])
def get_tag_trans_counts(trainfile): """compute a dict of counters for tag transitions :param trainfile: name of file containing training data :returns: dict, in which keys are tags, and values are counters of succeeding tags :rtype: dict """ tags_appeared = {START_TAG, END_TAG} tot_counts = defaultdict(lambda: Counter()) #counters = get_tag_word_counts(trainfile) total_transitions = defaultdict(float) #gen = conll_seq_generator(trainfile) title, X, Y = load_data(trainfile) for k in range(len(X)): w = X[k] t = Y[k] total_transitions[(START_TAG, t[0])] += 1 for i in range(len(t) - 1): total_transitions[(t[i], t[i + 1])] += 1 tags_appeared = tags_appeared.union(set(t)) total_transitions[(t[len(t) - 1], END_TAG)] += 1 for tag_1 in tags_appeared: if tag_1 != END_TAG: for tag_2 in tags_appeared: tot_counts[tag_1][tag_2] = total_transitions[(tag_1, tag_2)] # if tag_1 != END_TAG: # for tag_2 in tags_appeared: # if tag_2 != START_TAG: #and tag_2 != END_TAG: # for tag_2 in tags_appeared: # tot_counts[END_TAG][tag_2] = total_transitions[(END_TAG, tag_2)] return dict(tot_counts)
def get_tag_word_counts(trainfile): """ Produce a Counter of occurences of word for each tag Parameters: trainfile: -- the filename to be passed as argument to conll_seq_generator :returns: -- a default dict of counters, where the keys are tags. """ all_counters = defaultdict(lambda: Counter()) #(words, tags) title, X, Y = load_data(trainfile) for i in range(len(X)): for j in range(len(X[i])): all_counters[Y[i][j]][X[i][j]] += 1 return all_counters
def get_word_to_ix(input_file, max_size=100000): """ creates a vocab that has the list of most frequent occuring words such that the size of the vocab <=max_size, also adds an UNK token to the Vocab and then creates a dictionary that maps each word to a unique index, :returns: vocab, dict vocab: list of words in the vocabulary dict: maps word to unique index """ vocab_counter = Counter() X, Y = load_data(input_file) for i in range(len(X)): word_list = X[i] for word in word_list: vocab_counter[word] += 1 vocab = [word for word, val in vocab_counter.most_common(max_size - 1)] vocab.append(UNK) word_to_ix = {} ix = 0 for word in vocab: word_to_ix[word] = ix ix += 1 return vocab, word_to_ix
def get_tag_to_ix(input_file): """ creates a dictionary that maps each tag (including the START_TAG and END_TAG to a unique index and vice-versa :returns: dict1, dict2 dict1: maps tag to unique index dict2: maps each unique index to its own tag """ tag_to_ix = {} title, X, Y = load_data(input_file) for i in range(len(Y)): tag_list = Y[i] for tag in tag_list: if tag not in tag_to_ix: tag_to_ix[tag] = len(tag_to_ix) #adding START_TAG and END_TAG #if START_TAG not in tag_to_ix: # tag_to_ix[START_TAG] = len(tag_to_ix) #if END_TAG not in tag_to_ix: # tag_to_ix[END_TAG] = len(tag_to_ix) ix_to_tag = {v: k for k, v in tag_to_ix.items()} return tag_to_ix, ix_to_tag
results_path = Path('{}_results'.format(prefix)) log_path = results_path / '{}_out_{}.txt'.format(prefix, timestamp) model_path = results_path / '{}_model_{}.ckpt'.format(prefix, timestamp) img_path = results_path / '{}_cross_entropy_{}.png'.format(prefix, timestamp) # Init logger print(log_path, end='\r\n') logger = MyLogger(log_path) ## Disable TF log os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' ## Load data logger.debug('Loading data... ') t0 = datetime.now() Xtrain, Ytrain, Xtest, Ytest = load_data() Xtrain = flatten(Xtrain) Xtest = flatten(Xtest) Xtrain = (Xtrain - Xtrain.mean()) / Xtrain.std() Xtest = (Xtest - Xtest.mean()) / Xtest.std() dt = datetime.now() - t0 logger.debug('Done. [Elapsed {}]\r\n'.format(dt)) ## Define and fit model logger.debug('Model fitting...\r\n') t0 = datetime.now() # printing period for cost of test set and accuracy print_period = 1 # Number of samples to take from test set each time it computes cost
r, c = 5, 5 noise = np.random.normal(0, 1, (r * c, latent_dim)) gen_imgs = model.predict(noise) def normal(x): return (x - np.min(x)) / (np.max(x) - np.min(x)) gen_imgs = np.save("gen_imgs.npy", gen_imgs) plt.close() plt.imshow(gen_imgs[0, :, :, 0]) plt.show() X_train = preproc.load_data() # Normal Histograms gen_data = gen_imgs[:, :, :, 0].ravel() # print("max_gen: {}".format(max(gen_data))) # print("min_gen: {}".format(min(gen_data))) # gen_data = normal(gen_data) data = X_train[:, :, :].ravel() print("average_gen: {}".format(np.average(gen_data))) print("average_21: {}".format(np.average(data))) print("max_gen: {}".format(max(gen_data))) print("min_gen: {}".format(min(gen_data)))
import argparse import numpy as np from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.linear_model import LogisticRegression from sklearn.metrics import (accuracy_score, classification_report, confusion_matrix) from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier from sklearn.svm import SVC from preproc import load_data, tokenize parser = argparse.ArgumentParser() parser.add_argument("--classifier", type=str, default='lr') if __name__ == "__main__": train_df = load_data('train') valid_df = load_data('valid') x_train = train_df['text'] y_train = train_df['stars'] x_valid = valid_df['text'] y_valid = valid_df['stars'] tfidf = TfidfVectorizer(tokenizer=tokenize) tfidf.fit(x_train) args = parser.parse_args() if 'lr' == args.classifier: print("using single logistic regression") clf = LogisticRegression()