def test_model(self): pred = self.build() cost = tf.nn.softmax_cross_entropy_with_logits(labels=self.snt_wt, logits=pred) saver = tf.train.Saver() my_data = prep_data(mode='testing') with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: saver.restore(sess, "./params/model.ckpt") print('[+] restored parameters from disc.') self.test_loss = [] for i in tqdm(range(100)): p2v,p_wt,q2v = my_data.get_vectors() self.test_loss.append(sess.run(cost,feed_dict={self.Passage: p2v , self.Question: q2v , self.snt_wt: p_wt})) plt.plot(self.test_loss,'r--') plt.show() # def train_model_eager(): # tf.enable_eager_execution() # print('executing eagerly? {}'.format(tf.executing_eagerly())) """
def split_scale(): train, test = train_test_split(prep_data(), train_size=.8, random_state=123) scaler = MinMaxScaler(copy=True, feature_range=(0, 1)).fit(train) train_scaled = pd.DataFrame(scaler.transform(train), columns=train.columns.values).set_index( [train.index.values]) test_scaled = pd.DataFrame(scaler.transform(test), columns=test.columns.values).set_index( [test.index.values]) return train_scaled, test_scaled, scaler
def train_model(self): print('[+] started training') pred = self.build_1() print(len(pred)) print(self.snt_wt.shape) print('[+] model is built') cost = tf.nn.softmax_cross_entropy_with_logits(labels=self.snt_wt,logits=pred)#tf.nn.softmax(self.snt_wt) - pred print('[+] cost') init_op = tf.global_variables_initializer() print('[+] init_op') optimizer = tf.train.GradientDescentOptimizer(self.learning_rate).minimize(cost) print('[+] optimizer') saver = tf.train.Saver() with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: print('[+] session opened') # sess.run(init_op) saver.restore(sess, "./params/model.ckpt") print('[+] params loded from disc') my_data = prep_data(mode = 'training') self.MAX_P_WORDS = 50 self.MAX_Q_WORDS = 30 #q_list = list(self.df_qas['q_no']) self.train_loss = [] for i in range(self.no_epoch): print('epoch {}'.format(i)) avg_loss = 0. for i in tqdm(range(1000)): p2v,p_wt,q2v = my_data.get_vectors() _,loss_i = sess.run([optimizer,cost],feed_dict={self.Passage: p2v , self.Question: q2v , self.snt_wt: p_wt}) avg_loss += loss_i self.train_loss.append(avg_loss/1000.) save_path = saver.save(sess,"./params/model.ckpt") print('[+] saved parameters to disc.') plt.plot(self.train_loss,'r--') plt.show()
import prep as p import score as s import match as m import analyze_evaluate as ae track = "python" print("before __name__ guard") if __name__ == '__main__': print("importing data") data, email_ids, fdf, idx_dict = p.prep_data(track=track) print("scoring participants") df_matrix = s.scoring_alg(fdf, data, idx_dict, track=track) print("matching participants") df_matched = m.pair_participants(data, df_matrix, email_ids, idx_dict) print("analyze matches") ae.evaluate_matches(df_matched) print("saving matches to db or .csv") #m.save_matches(df_matched, track=track) print("create and save .csv for email sending") m.csv_announcement_email(df_matched) print("after __name__ guard")
try: name, epochs, batches = sys.argv[1:4] except ValueError: print('Usage: %s model_name epochs batch_size %s' % (script, xa)) exit(1) try: plot = sys.argv[4] except IndexError: plot = False return name, int(epochs), int(batches), plot if __name__ == '__main__': X, Y = get_data() train_x, train_y, test_x, test_y = prep_data(X, Y) # Getting our command line parameters name, epochs, batches, plot = get_params() # Do the training model, name, mp, history = train_model(name, train_x, train_y, epochs, batches, test_x, test_y) # Save models and the training history for later use mname = 'models/model-%s-%d-%d' % (name, epochs, batches) model.save(mname + '.h5') title = '%s (epochs=%d, batch_size=%d)' % (name, epochs, batches) # Test our model on both data that has been seen # (training data set) and unseen (test data set) print('Scores for %s' % title) # Notice that we need to specify batch_size in evaluate when we're # using LSTM. train_score = model.evaluate(train_x,
from acquire import get_data from prep import prep_data # Get the raw data from .csv or MySQL query raw = get_data() # Remove nulls df = prep_data(raw) # Milestones before Friday: # 2. Scale # 3. Super basic Model df.info() df.describe() # First pass for outlier detection: # Do the value counts and distribution make sense? # Is there anything way out of line here? df.bedrooms.value_counts() # encode as discrete df.bathrooms.value_counts() # encode as discrete df.sqft.value_counts() # can bin or scale df.taxvalue.value_counts() # scale this (also our target variable)
TEST_TAR_PATH = args['test_tar_path'] DATASET_PATH = args['dataset'] TEST_PATH = args['test'] INPUT_LENGTH = args['x_length'] OUTPUT_LENGTH = args['y_length'] HIDDEN_SIZE = args['hidden_size'] NUM_LAYERS = args['num_layers'] MODEL_WEIGHTS = args['model_weights'] if TAR_PATH is not None: untar(TAR_PATH) if TEST_TAR_PATH is not None: untar(TEST_TAR_PATH) X, Y, input_vocab, output_vocab, input_dict, output_dict = prep_data( DATASET_PATH, INPUT_LENGTH, OUTPUT_LENGTH) X_test, Y_test, test_input_vocab, test_output_vocab, _, _ = prep_data( TEST_PATH, INPUT_LENGTH, OUTPUT_LENGTH) auto_encoder = get_model(len(input_vocab), INPUT_LENGTH, len(output_vocab), OUTPUT_LENGTH, HIDDEN_SIZE, NUM_LAYERS) auto_encoder.load_weights(MODEL_WEIGHTS) auto_encoder.summary() expected_codes = vec_to_words(Y_test, test_output_vocab) tasks = input_vec_to_words(X_test, test_input_vocab) codes = predict_codes(auto_encoder, X_test, input_dict, INPUT_LENGTH, output_vocab)
def split_data(): train, test = train_test_split(prep_data(), train_size=.8, random_state=123) return train, test
from keras.utils import plot_model from prep import prep_data from model import get_model ap = argparse.ArgumentParser() ap.add_argument('--dataset', type=str, default='./data') ap.add_argument('--x_length', type=int, default=15) ap.add_argument('--y_length', type=int, default=20) ap.add_argument('--hidden_size', type=int, default=128) ap.add_argument('--num_layers', type=int, default=2) ap.add_argument('--show_shapes', type=bool, default=False) args = vars(ap.parse_args()) DATASET_PATH = args['dataset'] INPUT_LENGTH = args['x_length'] OUTPUT_LENGTH = args['y_length'] HIDDEN_SIZE = args['hidden_size'] NUM_LAYERS = args['num_layers'] SHOW_SHAPES = args['show_shapes'] _, _, input_vocab, output_vocab, _, _ = prep_data(DATASET_PATH, INPUT_LENGTH, OUTPUT_LENGTH) auto_encoder = get_model(len(input_vocab), INPUT_LENGTH, len(output_vocab), OUTPUT_LENGTH, HIDDEN_SIZE, NUM_LAYERS) plot_model(auto_encoder, to_file='model.png', show_shapes=SHOW_SHAPES)