Пример #1
0
for i in range(num_epochs):
  # get this minibatch
  rand_x = x_vals_train[batchIndxs[i],:]
  rand_y = y_vals_train[batchIndxs[i],:]
  # train on this batch, and record loss on it
  xy_dict = {x_data:rand_x, y_trgt:rand_y}
  
  # train, get the loss & it's summary, and predictions
  _,tmp_loss,pred = sess.run([train_step,loss,tf.round(tf.sigmoid(layerActivs[-1]))],\
      feed_dict = xy_dict)
  xy_dict[yhat] = pred
  # compute the evaulation metrics
  tmp_acc_train,tmp_pre_train,tmp_rec_train,tmp_f1s_train = sess.run(\
      [accuracy,precision,recall,F1], xy_dict)
  # smooth it all
  loss_ma = kl.smooth(loss_ma, tmp_loss, theta, (0 == i))
  loss_vec[i]=loss_ma
  train_ma[0] = kl.smooth(train_ma[0], tmp_acc_train, theta, (0 == i))
  train_ma[1] = kl.smooth(train_ma[1], tmp_pre_train, theta, (0 == i))
  train_ma[2] = kl.smooth(train_ma[2], tmp_rec_train, theta, (0 == i))
  train_ma[3] = kl.smooth(train_ma[3], tmp_f1s_train, theta, (0 == i))
  train_vec[i] = train_ma.copy()
       
  if (0 == i % print_freq): 
    print("Smoothed step %u/%u, train batch loss: %0.4f\n\ttrain batch perf: acc=%0.4f,pre=%0.4f,rec=%0.4f,F1=%0.4f"% 
        (i, num_epochs, loss_ma, *train_ma))
    # checkpoint progress
    saver.save(sess, os.getcwd()+'/log/'+log_file_name+'_ckpt', global_step=i)
  if i % save_freq == 0:
    # save summary stats
    l,p = sess.run([losSummary,perfSumm], xy_dict)
Пример #2
0
    rand_y = y_vals_train[rand_indices]

    #print("rand_x shape: %s" % str(rand_x.shape))
    #print("rand_y shape: %s" % str(rand_y.shape))

    #print("x_data shape: %s" % str(x_data.shape))
    #print("y_trgt shape: %s" % str(y_trgt.shape))

    # train on this batch, and record loss on it
    xy_dict = {x_data: rand_x, y_trgt: rand_y}

    # Note that the first report is after the first training step
    sess.run(train_step, feed_dict=xy_dict)

    tmp_loss = sess.run(loss, feed_dict=xy_dict)
    loss_ma = kl.smooth(loss_ma, tmp_loss, theta, (0 == i))
    loss_vec.append(loss_ma)

    # record accuracy over current training set
    tmp_acc_train = sess.run(accuracy, feed_dict=xy_dict)
    tmp_acc_train = kl.log_odds(tmp_acc_train)
    train_ma = kl.smooth(train_ma, tmp_acc_train, theta, (0 == i))
    train_vec.append(train_ma)

    # record accuracy over random part of test set
    num_tmp_test = int(num_test_rows / 100.0)
    rand_indices = random.sample(range(num_test_rows), num_tmp_test)
    rand_x = x_vals_test[rand_indices]
    rand_y = y_vals_test[rand_indices]
    tmp_acc_test = sess.run(accuracy,
                            feed_dict={
Пример #3
0
train_vec = [[0.0]*4 for _ in range(num_epochs)] # will record accuracy, precision, recall, F1

print_freq = num_epochs//10   # control console print & checkpoint frequency
save_freq = num_epochs//5     # control tensorboard save frequency
for i in range(num_epochs):
  # get this minibatch
  rand_x = x_vals_train[batchIndxs[i],:]
  rand_y = y_vals_train[batchIndxs[i],:]
  # train on this batch, and record loss on it
  xy_dict = {x_data:rand_x, y_trgt:rand_y}
    
  # Note that the first report is after the first training step
  sess.run(train_step, feed_dict=xy_dict)
  
  tmp_loss = sess.run(loss, feed_dict=xy_dict)
  loss_ma = kl.smooth(loss_ma, tmp_loss, theta, (0 == i))
  loss_vec[i]=loss_ma
    
  # record performance metrics over current minibatch
  pred = sess.run(tf.round(tf.sigmoid(out2)), feed_dict = xy_dict)
  yyhat_dict = {y_trgt:rand_y, yhat:pred}
  tmp_acc_train = sess.run(accuracy, feed_dict=yyhat_dict)
  train_ma[0] = kl.smooth(train_ma[0], tmp_acc_train, theta, (0 == i))
  tmp_pre_train = sess.run(precision, feed_dict=yyhat_dict)
  train_ma[1] = kl.smooth(train_ma[1], tmp_pre_train, theta, (0 == i))
  tmp_rec_train = sess.run(recall, feed_dict=yyhat_dict)
  train_ma[2] = kl.smooth(train_ma[2], tmp_rec_train, theta, (0 == i))
  tmp_f1s_train = sess.run(F1,feed_dict=yyhat_dict)
  train_ma[3] = kl.smooth(train_ma[3], tmp_f1s_train, theta, (0 == i))
  #tmp_acc_train = kl.log_odds(tmp_acc_train) 
  train_vec[i] = train_ma.copy()
Пример #4
0
 
 #print("rand_x shape: %s" % str(rand_x.shape))
 #print("rand_y shape: %s" % str(rand_y.shape))
 
 
 #print("x_data shape: %s" % str(x_data.shape))
 #print("y_trgt shape: %s" % str(y_trgt.shape))
 
 # train on this batch, and record loss on it
 xy_dict = {x_data:rand_x, y_trgt:rand_y}
 
 # Note that the first report is after the first training step
 sess.run(train_step, feed_dict=xy_dict)
 
 tmp_loss = sess.run(loss, feed_dict=xy_dict)
 loss_ma = kl.smooth(loss_ma, tmp_loss, theta, (0 == i))
 loss_vec.append(loss_ma)
 
 # record accuracy over current training set
 tmp_acc_train = sess.run(accuracy, feed_dict=xy_dict)
 tmp_acc_train = kl.log_odds(tmp_acc_train) 
 train_ma = kl.smooth(train_ma, tmp_acc_train, theta, (0 == i))
 train_vec.append(train_ma)
 
 
 # record accuracy over random part of test set
 # NOTE: as noted above, this currently must be the same batch_size
 # TODO: allow variable size inputs to Zeta
 num_tmp_test = batch_size
 rand_indices = random.sample(range(num_test_rows), num_tmp_test)
 rand_x = x_vals_test[rand_indices]
Пример #5
0
def RunNN(x_data, y_data, epochs, prng_seed = 0, trainPerc = 0.95, devePerc = 0.05,\
          learn_rate = 1.0, learn_rate_decay = 0.0, regulRate = 0.5, batch_size = 100, num_cdmp_actors = 4,\
          hiddenLayerWs = [1.5,1.0], optimMeth = 'GD', optimBetas=[0.0,0.0], dropoutKeep = 1.0, scenName = 'noname'):
  # create the integer decoded version of y - this is only needed for
  # calculation of the performance metrics
  y_decode = np.argmax(y_data,axis=1)
  # variable counts
  num_data_col = x_data.shape[1]
  num_choice_col = y_data.shape[1]
  # ---------------------------------------------
  #%%
  sys.stdout.flush()
  stime = datetime.datetime.now()
  logging.info("RunNN start time: " + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
  logging.info('Scenario: '+scenName)
  sys.stdout.flush()
  sys.stdout.flush()
  
  prng_seed = kl.set_prngs(prng_seed)
  
  ops.reset_default_graph()
  sess = tf.Session()
  
  # prepare log dir, if necessary
  try:
    os.mkdir(os.getcwd()+'/log')
  except FileExistsError:
    pass
  
  # ---------------------------------------------
  #%%
  # use sklearn to perform stratified randomized partitioning into training and dev sets
  # this is necessary because the vehicle choice dataset is very unbalanced
  sss = StratifiedShuffleSplit(n_splits=1, train_size=trainPerc, test_size = devePerc)
  train_indices,deve_indices = next(sss.split(x_data, y_data))
  num_train_rows = len(train_indices) # need this later on
  # create the patitions
  x_vals_train = x_data[train_indices,:]
  y_vals_train = y_data[train_indices,:]
  y_dec_train = y_decode[train_indices]
  
  x_vals_deve = x_data[deve_indices,:]
  y_vals_deve = y_data[deve_indices,:]
  y_dec_deve = y_decode[deve_indices]
  
  logging.info("num_train_rows: %u, num_deve_rows: %u" %(num_train_rows, len(deve_indices)))
  
  # ---------------------------------------------
  #%%
  # setup training
  a_stdv = 0.1          # standard dev. for initialization of node weights
  modelType = ['NN','KT'][num_cdmp_actors > 0]
  logging.info("num_cdmp_actors: %u" %  num_cdmp_actors)
  
  # nodes per layer - first is the number of features, last is always the number
  # of categories C being predicted
  layerWidths = [num_data_col]
  layerWidths.extend([int(round(num_data_col*i,0)) for i in hiddenLayerWs])
  layerWidths.append(num_choice_col)
  hidden_layers = len(layerWidths)-2     # number hidden layers
  keepProb = tf.placeholder(tf.float32)  # placeholder for dropout
  learnRate = tf.placeholder(tf.float32) # placeholder for adaptive learning rate
  
  with tf.name_scope('HyperParms'):
    lr = tf.constant(learn_rate)
    ld = tf.constant(learn_rate_decay)
    la = tf.constant(regulRate)
    bs = tf.constant(batch_size)
    ne = tf.constant(epochs['epochMax'])
    dp = tf.constant(devePerc)
    hl = tf.constant(hidden_layers)
    mt = tf.constant(num_cdmp_actors)
    om = tf.constant(['GD','RMSPROP','ADAM'].index(optimMeth))
    b1 = tf.constant(optimBetas[0])
    b2 = tf.constant(optimBetas[1])
    dk = tf.constant(dropoutKeep)
    # summaries
    parmSumm = tf.summary.merge([tf.summary.scalar('learn_rate', lr),\
              tf.summary.scalar('learn_rate_decay',ld),tf.summary.scalar('regul_rate', la),\
              tf.summary.scalar('minibatch_size',bs),tf.summary.scalar('epochs',ne),\
              tf.summary.scalar('dev_set_size',dp),tf.summary.scalar('hidden_layers',hl),\
              tf.summary.scalar('num_cdmp_actors',mt),tf.summary.scalar('optim_method',om),\
              tf.summary.scalar('optim_beta1',b1),tf.summary.scalar('optim_beta2',b2),\
              tf.summary.scalar('dropout_keep',dk)])
  
  # talk some
  logging.info('Hyperparameters\n\tlearning rate: %0.2f(%0.2f)\n\tregularization rate: %0.2f\n\tminibatch size: %u\n\tnumber epochs: %d\n\thidden layers: %d\n\tCDMP actors: %d\n\toptim. method: %s(%0.2f,%0.2f)\n\tdropout keep: %0.2f'\
        %(learn_rate,learn_rate_decay,regulRate,batch_size,epochs['epochMax'],hidden_layers,num_cdmp_actors,optimMeth,optimBetas[0],optimBetas[1],dropoutKeep))
  
  # create the logfile prefix
  log_file_name = 'log_%s%04d_%s_%s_%05d_%s_%d'%(modelType,hidden_layers,scenName,optimMeth,epochs['epochMax'],stime.strftime('%Y%m%d_%H%M%S'),prng_seed)
  
  # ---------------------------------------------
  #%%
  # number of rows could be batch_size, num_rows_train, or num_rows_deve.
  # So we mark it as None, which really means variable-size
  with tf.name_scope('Input'):
    x_data = tf.placeholder(shape=[None, num_data_col], dtype=tf.float32,name='X')
    y_trgt = tf.placeholder(shape=[None, num_choice_col], dtype=tf.float32,name='Y')
    y_dec = tf.placeholder(shape=[None,], dtype=tf.float32,name='Ydecode')
    yhat_dec = tf.placeholder(shape=[None, ], dtype=tf.float32,name='Yhatdecode')    
  
  logging.info("x_data shape: %s" % str(x_data.shape))
  logging.info("y_trgt shape: %s" % str(y_trgt.shape))
  
  # ---------------------------------------------
  #%%
  # build each of the layers
  lRng = range(1,len(layerWidths))
  ids = dict.fromkeys(lRng,0.0)
  layerParams = {'A':ids,'b':ids.copy()}
  layerActivs = [None]*(hidden_layers+2) #[0] is an unused placeholder for the data
  for i in lRng:
    # define the scope name
    if (i == (hidden_layers+1)) and (num_cdmp_actors > 0):
      nam = 'CDMP'
    else:
      nam = 'Layer%d'%i
      
    with tf.name_scope(nam):
      # create the variables for the node parameters
      if (i == (hidden_layers+1)) and (num_cdmp_actors > 0):
        # map final hidden layer to the influences vector
        Aw  = tf.Variable(tf.random_normal(shape=[layerWidths[i-1], num_cdmp_actors],\
                                           mean=0.0, stddev=a_stdv))
        logging.info("Aw shape: %s" % str(Aw.shape))
        # NOTE WELL: we do not "learn" on wghts.
        wghts = tf.matmul(layerActivs[i-1], Aw)
        logging.info("wghts shape: %s" % str(wghts.shape))
        # wghts have shape [batch_size, num_cdmp_actors], so there is one vector
        # (1 dim) for each household in this batch
        
        # map final hidden layer to the utilities matrix
        Au =  tf.Variable(tf.random_normal(shape=[layerWidths[i-1], num_cdmp_actors,\
          num_choice_col], mean=0.0, stddev=a_stdv))
        logging.info("Au shape: %s" % str(Au.shape))      
        # NOTE WELL: we do not "learn" on utils.
        utils = tf.tensordot(layerActivs[i-1], Au, axes = [[1], [0]])
        logging.info("utils shape: %s" % str(utils.shape))      
        # utils have shape [batch_size, num_cdmp_actors, num_choice_col], so
        # there is one matrix (2 dim) for each household in this batch
      else:
        layerParams['A'][i] = tf.Variable(tf.random_normal(shape=[layerWidths[i-1],\
                    layerWidths[i]], mean=0.0, stddev=a_stdv),name='A')
        layerParams['b'][i] = tf.Variable(tf.zeros(shape=[layerWidths[i]]),name='b') # a 0-rank array?
        logging.info('A%d shape: %s, b%d shape: %s'%(i,layerParams['A'][i].shape,i,layerParams['b'][i].shape))
      # create the 'variable' for the layer output
      if i == 1:
        # first layer, so input is the data
        layerActivs[i] = tf.nn.relu(tf.add(tf.matmul(x_data,layerParams['A'][i]),\
                   layerParams['b'][i]))
      elif i == (hidden_layers+1):
        # last layer, so no activation function
        if num_cdmp_actors > 0:
          # put the influences and utilities together
          zeta_0 = tf.tensordot(wghts, utils, axes = [[1], [1]])
          logging.info("zeta_0 shape: %s" % str(zeta_0.shape))
          # because it is a *tensor*product*, zeta is tensor of shape (BS, BS, NCC)
          # and includes all the mis-matches where weight-vector(i) was used with
          # util-matrix(j), i.e. Every combination of household weights with every
          # other household's utilities. But we only want the ones where they match.
          # this slice, gather, and reshape picks out the zeta vectors along the 
          # "bi == bj" diagonal.
          slice_ndx = tf.constant([[i,j,k] for i in range(batch_size) \
                                   for j in range(batch_size) for k in \
                                   range(num_choice_col) if i==j])
          # select the large zeta-tensor into a plain vector
          zeta_v = tf.gather_nd(zeta_0, slice_ndx) 
          # NOTE WELL: we do not "learn" on zeta.
          # reshape that vector into  the desired vectors
          layerActivs[i] = tf.reshape(zeta_v, [batch_size, num_choice_col])
          logging.info("After contraction-by-selection, shape of Zeta: %s" % (layerActivs[i].shape))
        else:
          layerActivs[i] =  tf.add(tf.matmul(layerActivs[i-1], layerParams['A'][i]),\
                     layerParams['b'][i])
      else:
        # hidden layer, so apply the activation function ...
        layerActivs[i] = tf.nn.relu(tf.add(tf.matmul(layerActivs[i-1],layerParams['A'][i]),\
                   layerParams['b'][i]))
        # ... then add dropout to the hidden layer
        layerActivs[i] = tf.nn.dropout(layerActivs[i], keepProb)
      logging.info('Activ%d shape: %s'%(i,layerActivs[i].shape))
  
  # ---------------------------------------------
  #%%
  # note that 'labels' are real numbers in [0,1] interval,
  # logits are in (-inf, +inf)
  with tf.name_scope('Loss'):
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = y_trgt, logits = layerActivs[-1]))
    losSummary = tf.summary.scalar('Loss', loss)
  
  with tf.name_scope('Train'):
    regularizer =  tf.add_n([tf.nn.l2_loss(A) for A in layerParams['A'].values()])
    if optimMeth == 'GD':
      my_opt = tf.train.GradientDescentOptimizer(learnRate)
    elif optimMeth == 'RMSPROP':
      my_opt = tf.train.RMSPropOptimizer(learnRate,momentum=optimBetas[0])
    elif optimMeth == 'ADAM':
      my_opt = tf.train.AdamOptimizer(learnRate,beta1=optimBetas[0],beta2=optimBetas[1])      
    else:
      raise ValueError('Optimizer can only be "GD", "RMSPROP", or "ADAM"!')
    train_step = my_opt.minimize(loss + regulRate*regularizer/(2*batch_size))
  
  # ---------------------------------------------
  #%%
  # record standard classification performance metrics
  # note that these require integer labels *not* OHE!
  with tf.name_scope('Eval'):
    _,accuracy = tf.metrics.accuracy(y_dec,yhat_dec)
    _,precision = tf.metrics.precision(y_dec,yhat_dec)
    _,recall = tf.metrics.recall(y_dec,yhat_dec)
    F1 = 2*tf.divide(tf.multiply(precision,recall),0.0001+tf.add(precision,recall))
    # define the writer summaries
    accSummary = tf.summary.scalar('Accuracy', accuracy)
    preSummary = tf.summary.scalar('Precision', precision)
    recSummary = tf.summary.scalar('Recall', recall)
    f1sSummary = tf.summary.scalar('F1',F1)
    perfSumm = tf.summary.merge([accSummary,preSummary,recSummary,f1sSummary])
  
  # ---------------------------------------------
  #%%
  # finally perform the training simulation
  # setup for results & model serialization
  writer = tf.summary.FileWriter(os.getcwd()+'/log/train/'+log_file_name, sess.graph)
  writer.add_summary(parmSumm.eval(session=sess))
  devWriter = tf.summary.FileWriter(os.getcwd()+'/log/dev/'+log_file_name,sess.graph)
  devWriter.add_summary(parmSumm.eval(session=sess))
  
  sess.run([tf.global_variables_initializer(),tf.local_variables_initializer()])
  # The saver object will save all the variables
  saver = tf.train.Saver()
  
  # randomly generate the minibatches all at once
  batchIndxs = np.random.randint(0,num_train_rows,(epochs['epochMax'],batch_size))
  
  # set up for moving averages
  theta = 0.010 # weight on new value  - only for printing/plotting MAs
  loss_ma = 0.0; train_ma = [0.0]*4 # accuracy, precision, recall, F1
  
  loss_vec = [0.0]*epochs['epochMax']
  train_vec = [[0.0]*4 for _ in range(epochs['epochMax'])] # will record accuracy, precision, recall, F1
  
  print_freq = epochs['epochMax']//10   # control console print & checkpoint frequency
  save_freq = epochs['epochMax']//5     # control tensorboard save frequency
  for i in range(epochs['epochMax']):
    # get this minibatch
    rand_x = x_vals_train[batchIndxs[i],:]
    rand_y = y_vals_train[batchIndxs[i],:]
    rand_y_dec = y_dec_train[batchIndxs[i]]
    # compute the adaptive learning rate
    learnRateAdapt = learn_rate*(1+learn_rate_decay*i)
    # train on this batch, and record loss on it
    xy_dict = {x_data:rand_x, y_trgt:rand_y, learnRate:learnRateAdapt,\
               keepProb:dropoutKeep, y_dec:rand_y_dec}
    
    # train, get the loss & it's summary, and predictions
    _,tmp_loss,classProbs = sess.run([train_step,loss,tf.nn.softmax(layerActivs[-1])],\
        feed_dict = xy_dict)
    pred = kl.predFromProb(classProbs)
    
    # need to handle nan loss
    if np.isnan(tmp_loss):
      loss_vec[i] = np.inf
      train_vec[i] = [0.0,0.0,0.0,0.0]
      # save summary stats
      xy_dict[yhat_dec] = pred
      l,p = sess.run([losSummary,perfSumm], xy_dict)
      writer.add_summary(l,i)
      writer.add_summary(p,i)
      logging.info('Early termination due to nan loss on epoch %u!'%i)
      break
    
    xy_dict[yhat_dec] = pred
    # compute the evaulation metrics
    tmp_acc_train,tmp_pre_train,tmp_rec_train,tmp_f1s_train = sess.run(\
        [accuracy,precision,recall,F1], xy_dict)
    # smooth it all
    loss_ma = kl.smooth(loss_ma, tmp_loss, theta, (0 == i))
    loss_vec[i]=loss_ma
    train_ma[0] = kl.smooth(train_ma[0], tmp_acc_train, theta, (0 == i))
    train_ma[1] = kl.smooth(train_ma[1], tmp_pre_train, theta, (0 == i))
    train_ma[2] = kl.smooth(train_ma[2], tmp_rec_train, theta, (0 == i))
    train_ma[3] = kl.smooth(train_ma[3], tmp_f1s_train, theta, (0 == i))
    train_vec[i] = train_ma.copy()
         
    if (0 == i % print_freq): 
      logging.info("Smoothed step %u/%u, train batch loss: %0.4f\n\ttrain batch perf: acc=%0.4f,pre=%0.4f,rec=%0.4f,F1=%0.4f"% 
          (i, epochs['epochMax'], loss_ma, *train_ma))
      # checkpoint progress
      saver.save(sess, os.getcwd()+'/log/'+log_file_name+'_ckpt', global_step=i)
    if i % save_freq == 0:
      # save summary stats
      l,p = sess.run([losSummary,perfSumm], xy_dict)
      writer.add_summary(l,i)
      writer.add_summary(p,i)
      
    # check for early termination
    if i > epochs['epochMin']:
      if abs(loss_vec[i]/loss_vec[i-epochs['epochLB']]-1) <= epochs['convgCrit']:
        logging.info('Early termination on epoch %d: relative smoothed loss diff. between %0.6f and %0.6f < %0.6f'%\
              (i,loss_vec[i],loss_vec[i-epochs['epochLB']],epochs['convgCrit']))
        # if terminating early, save the last status
        if i% save_freq != 0:
          l,p = sess.run([losSummary,perfSumm], xy_dict)
          writer.add_summary(l,i)
          writer.add_summary(p,i)
        break
  
  # ---------------------------------------------
  # compute and display the performance metrics for the dev set
  logging.info('Computing Dev Set Performance Metrics...')
  if num_cdmp_actors > 0:
    # get the number of rows short of an even number of batches
    rowsToAdd = (1+x_vals_deve.shape[0]//batch_size)*batch_size - x_vals_deve.shape[0]
    if (rowsToAdd > 0) and (rowsToAdd != batch_size):
      batchesX = np.r_[x_vals_deve,x_vals_deve[-rowsToAdd:,:]]
      batchesY = np.r_[y_vals_deve,y_vals_deve[-rowsToAdd:,:]]
      batchesYD = np.r_[y_dec_deve,y_dec_deve[-rowsToAdd:]]
    else:
      batchesX = x_vals_deve; batchesY = y_vals_deve; batchesYD = y_dec_deve
    # create the minibatches
    bX = np.split(batchesX,batchesX.shape[0]/batch_size,axis=0)
    bY = np.split(batchesY,batchesX.shape[0]/batch_size,axis=0)
    bYD = np.split(batchesYD,batchesX.shape[0]/batch_size,axis=0)
    # process the minibatches
    classProbs = []
    for x,y,yd in zip(bX,bY,bYD):
      xy_dict = {x_data:x, y_trgt:y, keepProb:1.0, y_dec:yd}
      classProbs.extend(sess.run(tf.nn.softmax(layerActivs[-1]), xy_dict))
    classProbs = np.r_[classProbs]
    # now remove the extra repeated observations at the end
    if (rowsToAdd > 0) and (rowsToAdd != batch_size):
      classProbs = classProbs[:(x_vals_deve.shape[0])]
    # finally build the new dictionary for model evaluation
    xy_dict = {x_data:x_vals_deve, y_trgt:y_vals_deve, y_dec:y_dec_deve,\
               keepProb:1.0}
    xy_dict[yhat_dec] = kl.predFromProb(classProbs)
  else:
    xy_dict = {x_data:x_vals_deve, y_trgt:y_vals_deve, keepProb:1.0, y_dec:y_dec_deve}
    classProbs = sess.run(tf.nn.softmax(layerActivs[-1]), xy_dict)
    xy_dict[yhat_dec] = kl.predFromProb(classProbs)
  acc_deve,pre_deve,rec_deve,f1s_deve,summ = sess.run([accuracy,precision,\
                                                       recall,F1,perfSumm],xy_dict)
  
  # convert integer class labels to OHE matrix
  prd = tf.placeholder(shape=xy_dict[yhat_dec].shape, dtype=tf.int32)
  OHE = tf.one_hot(prd,num_choice_col)
  predOHE = sess.run(OHE,feed_dict = {prd:xy_dict[yhat_dec]})
  
  # save the summary stats
  devWriter.add_summary(summ,epochs['epochMax']+1)
  
  #%%
  # display final training set performance metrics
  logging.info('Final Training Set Performance')
  logging.info('\tAccuracy = %0.4f'%train_vec[i][0])
  logging.info('\tPrecision = %0.4f'%train_vec[i][1])
  logging.info('\tRecall = %0.4f'%train_vec[i][2])
  logging.info('\tF1 = %0.4f'%train_vec[i][3])
  
  # display final dev set performance metrics
  logging.info('Final Dev Set Performance')
  logging.info('\tAccuracy = %0.4f'%acc_deve)
  logging.info('\tPrecision = %0.4f'%pre_deve)
  logging.info('\tRecall = %0.4f'%rec_deve)
  logging.info('\tF1 = %0.4f'%f1s_deve)
  
  logging.info('Actl. Counts by Class: %r'%np.sum(y_vals_deve,axis=0,dtype=int).tolist())
  logging.info('Pred. Counts by Class: %r'%np.sum(predOHE,axis=0,dtype=int).tolist())
  # save dev set actuals & preds - might not want to do this permanently
  np.savetxt(os.getcwd()+'/out/'+log_file_name+'_actu_pred.csv',np.c_[y_vals_deve,predOHE],'%d',\
             header='first %d columns are actuals, the rest are predictions'%num_choice_col)
  
  # ---------------------------------------------  
  #%%
  # Display performance plots
  # loss
  plt.figure()
  plt.plot(loss_vec[:(i+1)], 'k-')
  plt.title('Smoothed Cross Entropy Loss')
  plt.xlabel('Generation')
  plt.ylabel('Cross Entropy Loss, EMA')
  plt.show()
  plt.savefig(os.getcwd()+'/out/'+log_file_name+'_loss'+'.png')
  
  # classification performances
  plt.figure()
  plt.plot(train_vec[:(i+1)])
  plt.title('Smoothed Performance Metrics')
  plt.xlabel('Generation')
  plt.ylabel('Metrics, EMA')
  plt.legend(['Accuracy','Precision','Recall','F1'],loc='lower right')
  plt.show()
  plt.savefig(os.getcwd()+'/out/'+log_file_name+'_perf'+'.png')
  # ---------------------------------------------
  #%%
  # close out the session and save the event-files
  writer.close(); devWriter.close()
  # serialize final model results
  saver.save(sess, os.getcwd()+'/log/'+log_file_name+'_final')
  sess.close()
  
  sys.stdout.flush()
  logging.info('')
  etime = datetime.datetime.now()
  dtime = etime - stime
  logging.info("RunNN end time: " + etime.strftime("%Y-%m-%d %H:%M:%S") )
  logging.info("RunNN elapsed time: " + str(dtime))
  logging.info('Outputs saved with prefix: %s'%log_file_name)
  sys.stdout.flush()
  
  return [loss_vec[:(i+1)],acc_deve, pre_deve, rec_deve, f1s_deve]
Пример #6
0
def RunNN(x_data, y_data, epochs, prng_seed = 0, trainPerc = 0.95, devePerc = 0.05,\
          learn_rate = 1.0, learn_rate_decay = 0.0, regulRate = 0.5, batch_size = 100, num_cdmp_actors = 4,\
          hiddenLayerWs = [1.5,1.0], optimMeth = 'GD', optimBetas=[0.0,0.0], dropoutKeep = 1.0, scenName = 'noname'):
    # create the integer decoded version of y - this is only needed for
    # calculation of the performance metrics
    y_decode = np.argmax(y_data, axis=1)
    # variable counts
    num_data_col = x_data.shape[1]
    num_choice_col = y_data.shape[1]
    # ---------------------------------------------
    #%%
    sys.stdout.flush()
    stime = datetime.datetime.now()
    logging.info("RunNN start time: " +
                 datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
    logging.info('Scenario: ' + scenName)
    sys.stdout.flush()
    sys.stdout.flush()

    prng_seed = kl.set_prngs(prng_seed)

    ops.reset_default_graph()
    sess = tf.Session()

    # prepare log dir, if necessary
    try:
        os.mkdir(os.getcwd() + '/log')
    except FileExistsError:
        pass

    # ---------------------------------------------
    #%%
    # use sklearn to perform stratified randomized partitioning into training and dev sets
    # this is necessary because the vehicle choice dataset is very unbalanced
    sss = StratifiedShuffleSplit(n_splits=1,
                                 train_size=trainPerc,
                                 test_size=devePerc)
    train_indices, deve_indices = next(sss.split(x_data, y_data))
    num_train_rows = len(train_indices)  # need this later on
    # create the patitions
    x_vals_train = x_data[train_indices, :]
    y_vals_train = y_data[train_indices, :]
    y_dec_train = y_decode[train_indices]

    x_vals_deve = x_data[deve_indices, :]
    y_vals_deve = y_data[deve_indices, :]
    y_dec_deve = y_decode[deve_indices]

    logging.info("num_train_rows: %u, num_deve_rows: %u" %
                 (num_train_rows, len(deve_indices)))

    # ---------------------------------------------
    #%%
    # setup training
    a_stdv = 0.1  # standard dev. for initialization of node weights
    modelType = ['NN', 'KT'][num_cdmp_actors > 0]
    logging.info("num_cdmp_actors: %u" % num_cdmp_actors)

    # nodes per layer - first is the number of features, last is always the number
    # of categories C being predicted
    layerWidths = [num_data_col]
    layerWidths.extend(
        [int(round(num_data_col * i, 0)) for i in hiddenLayerWs])
    layerWidths.append(num_choice_col)
    hidden_layers = len(layerWidths) - 2  # number hidden layers
    keepProb = tf.placeholder(tf.float32)  # placeholder for dropout
    learnRate = tf.placeholder(
        tf.float32)  # placeholder for adaptive learning rate

    with tf.name_scope('HyperParms'):
        lr = tf.constant(learn_rate)
        ld = tf.constant(learn_rate_decay)
        la = tf.constant(regulRate)
        bs = tf.constant(batch_size)
        ne = tf.constant(epochs['epochMax'])
        dp = tf.constant(devePerc)
        hl = tf.constant(hidden_layers)
        mt = tf.constant(num_cdmp_actors)
        om = tf.constant(['GD', 'RMSPROP', 'ADAM'].index(optimMeth))
        b1 = tf.constant(optimBetas[0])
        b2 = tf.constant(optimBetas[1])
        dk = tf.constant(dropoutKeep)
        # summaries
        parmSumm = tf.summary.merge([tf.summary.scalar('learn_rate', lr),\
                  tf.summary.scalar('learn_rate_decay',ld),tf.summary.scalar('regul_rate', la),\
                  tf.summary.scalar('minibatch_size',bs),tf.summary.scalar('epochs',ne),\
                  tf.summary.scalar('dev_set_size',dp),tf.summary.scalar('hidden_layers',hl),\
                  tf.summary.scalar('num_cdmp_actors',mt),tf.summary.scalar('optim_method',om),\
                  tf.summary.scalar('optim_beta1',b1),tf.summary.scalar('optim_beta2',b2),\
                  tf.summary.scalar('dropout_keep',dk)])

    # talk some
    logging.info('Hyperparameters\n\tlearning rate: %0.2f(%0.2f)\n\tregularization rate: %0.2f\n\tminibatch size: %u\n\tnumber epochs: %d\n\thidden layers: %d\n\tCDMP actors: %d\n\toptim. method: %s(%0.2f,%0.2f)\n\tdropout keep: %0.2f'\
          %(learn_rate,learn_rate_decay,regulRate,batch_size,epochs['epochMax'],hidden_layers,num_cdmp_actors,optimMeth,optimBetas[0],optimBetas[1],dropoutKeep))

    # create the logfile prefix
    log_file_name = 'log_%s%04d_%s_%s_%05d_%s_%d' % (
        modelType, hidden_layers, scenName, optimMeth, epochs['epochMax'],
        stime.strftime('%Y%m%d_%H%M%S'), prng_seed)

    # ---------------------------------------------
    #%%
    # number of rows could be batch_size, num_rows_train, or num_rows_deve.
    # So we mark it as None, which really means variable-size
    with tf.name_scope('Input'):
        x_data = tf.placeholder(shape=[None, num_data_col],
                                dtype=tf.float32,
                                name='X')
        y_trgt = tf.placeholder(shape=[None, num_choice_col],
                                dtype=tf.float32,
                                name='Y')
        y_dec = tf.placeholder(shape=[
            None,
        ],
                               dtype=tf.float32,
                               name='Ydecode')
        yhat_dec = tf.placeholder(shape=[
            None,
        ],
                                  dtype=tf.float32,
                                  name='Yhatdecode')

    logging.info("x_data shape: %s" % str(x_data.shape))
    logging.info("y_trgt shape: %s" % str(y_trgt.shape))

    # ---------------------------------------------
    #%%
    # build each of the layers
    lRng = range(1, len(layerWidths))
    ids = dict.fromkeys(lRng, 0.0)
    layerParams = {'A': ids, 'b': ids.copy()}
    layerActivs = [None] * (hidden_layers + 2
                            )  #[0] is an unused placeholder for the data
    for i in lRng:
        # define the scope name
        if (i == (hidden_layers + 1)) and (num_cdmp_actors > 0):
            nam = 'CDMP'
        else:
            nam = 'Layer%d' % i

        with tf.name_scope(nam):
            # create the variables for the node parameters
            if (i == (hidden_layers + 1)) and (num_cdmp_actors > 0):
                # map final hidden layer to the influences vector
                Aw  = tf.Variable(tf.random_normal(shape=[layerWidths[i-1], num_cdmp_actors],\
                                                   mean=0.0, stddev=a_stdv))
                logging.info("Aw shape: %s" % str(Aw.shape))
                # NOTE WELL: we do not "learn" on wghts.
                wghts = tf.matmul(layerActivs[i - 1], Aw)
                logging.info("wghts shape: %s" % str(wghts.shape))
                # wghts have shape [batch_size, num_cdmp_actors], so there is one vector
                # (1 dim) for each household in this batch

                # map final hidden layer to the utilities matrix
                Au =  tf.Variable(tf.random_normal(shape=[layerWidths[i-1], num_cdmp_actors,\
                  num_choice_col], mean=0.0, stddev=a_stdv))
                logging.info("Au shape: %s" % str(Au.shape))
                # NOTE WELL: we do not "learn" on utils.
                utils = tf.tensordot(layerActivs[i - 1], Au, axes=[[1], [0]])
                logging.info("utils shape: %s" % str(utils.shape))
                # utils have shape [batch_size, num_cdmp_actors, num_choice_col], so
                # there is one matrix (2 dim) for each household in this batch
            else:
                layerParams['A'][i] = tf.Variable(tf.random_normal(shape=[layerWidths[i-1],\
                            layerWidths[i]], mean=0.0, stddev=a_stdv),name='A')
                layerParams['b'][i] = tf.Variable(
                    tf.zeros(shape=[layerWidths[i]]),
                    name='b')  # a 0-rank array?
                logging.info('A%d shape: %s, b%d shape: %s' %
                             (i, layerParams['A'][i].shape, i,
                              layerParams['b'][i].shape))
            # create the 'variable' for the layer output
            if i == 1:
                # first layer, so input is the data
                layerActivs[i] = tf.nn.relu(tf.add(tf.matmul(x_data,layerParams['A'][i]),\
                           layerParams['b'][i]))
            elif i == (hidden_layers + 1):
                # last layer, so no activation function
                if num_cdmp_actors > 0:
                    # put the influences and utilities together
                    zeta_0 = tf.tensordot(wghts, utils, axes=[[1], [1]])
                    logging.info("zeta_0 shape: %s" % str(zeta_0.shape))
                    # because it is a *tensor*product*, zeta is tensor of shape (BS, BS, NCC)
                    # and includes all the mis-matches where weight-vector(i) was used with
                    # util-matrix(j), i.e. Every combination of household weights with every
                    # other household's utilities. But we only want the ones where they match.
                    # this slice, gather, and reshape picks out the zeta vectors along the
                    # "bi == bj" diagonal.
                    slice_ndx = tf.constant([[i,j,k] for i in range(batch_size) \
                                             for j in range(batch_size) for k in \
                                             range(num_choice_col) if i==j])
                    # select the large zeta-tensor into a plain vector
                    zeta_v = tf.gather_nd(zeta_0, slice_ndx)
                    # NOTE WELL: we do not "learn" on zeta.
                    # reshape that vector into  the desired vectors
                    layerActivs[i] = tf.reshape(zeta_v,
                                                [batch_size, num_choice_col])
                    logging.info(
                        "After contraction-by-selection, shape of Zeta: %s" %
                        (layerActivs[i].shape))
                else:
                    layerActivs[i] =  tf.add(tf.matmul(layerActivs[i-1], layerParams['A'][i]),\
                               layerParams['b'][i])
            else:
                # hidden layer, so apply the activation function ...
                layerActivs[i] = tf.nn.relu(tf.add(tf.matmul(layerActivs[i-1],layerParams['A'][i]),\
                           layerParams['b'][i]))
                # ... then add dropout to the hidden layer
                layerActivs[i] = tf.nn.dropout(layerActivs[i], keepProb)
            logging.info('Activ%d shape: %s' % (i, layerActivs[i].shape))

    # ---------------------------------------------
    #%%
    # note that 'labels' are real numbers in [0,1] interval,
    # logits are in (-inf, +inf)
    with tf.name_scope('Loss'):
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=y_trgt,
                                                    logits=layerActivs[-1]))
        losSummary = tf.summary.scalar('Loss', loss)

    with tf.name_scope('Train'):
        regularizer = tf.add_n(
            [tf.nn.l2_loss(A) for A in layerParams['A'].values()])
        if optimMeth == 'GD':
            my_opt = tf.train.GradientDescentOptimizer(learnRate)
        elif optimMeth == 'RMSPROP':
            my_opt = tf.train.RMSPropOptimizer(learnRate,
                                               momentum=optimBetas[0])
        elif optimMeth == 'ADAM':
            my_opt = tf.train.AdamOptimizer(learnRate,
                                            beta1=optimBetas[0],
                                            beta2=optimBetas[1])
        else:
            raise ValueError(
                'Optimizer can only be "GD", "RMSPROP", or "ADAM"!')
        train_step = my_opt.minimize(loss + regulRate * regularizer /
                                     (2 * batch_size))

    # ---------------------------------------------
    #%%
    # record standard classification performance metrics
    # note that these require integer labels *not* OHE!
    with tf.name_scope('Eval'):
        _, accuracy = tf.metrics.accuracy(y_dec, yhat_dec)
        _, precision = tf.metrics.precision(y_dec, yhat_dec)
        _, recall = tf.metrics.recall(y_dec, yhat_dec)
        F1 = 2 * tf.divide(tf.multiply(precision, recall),
                           0.0001 + tf.add(precision, recall))
        # define the writer summaries
        accSummary = tf.summary.scalar('Accuracy', accuracy)
        preSummary = tf.summary.scalar('Precision', precision)
        recSummary = tf.summary.scalar('Recall', recall)
        f1sSummary = tf.summary.scalar('F1', F1)
        perfSumm = tf.summary.merge(
            [accSummary, preSummary, recSummary, f1sSummary])

    # ---------------------------------------------
    #%%
    # finally perform the training simulation
    # setup for results & model serialization
    writer = tf.summary.FileWriter(os.getcwd() + '/log/train/' + log_file_name,
                                   sess.graph)
    writer.add_summary(parmSumm.eval(session=sess))
    devWriter = tf.summary.FileWriter(
        os.getcwd() + '/log/dev/' + log_file_name, sess.graph)
    devWriter.add_summary(parmSumm.eval(session=sess))

    sess.run(
        [tf.global_variables_initializer(),
         tf.local_variables_initializer()])
    # The saver object will save all the variables
    saver = tf.train.Saver()

    # randomly generate the minibatches all at once
    batchIndxs = np.random.randint(0, num_train_rows,
                                   (epochs['epochMax'], batch_size))

    # set up for moving averages
    theta = 0.010  # weight on new value  - only for printing/plotting MAs
    loss_ma = 0.0
    train_ma = [0.0] * 4  # accuracy, precision, recall, F1

    loss_vec = [0.0] * epochs['epochMax']
    train_vec = [[0.0] * 4 for _ in range(epochs['epochMax'])
                 ]  # will record accuracy, precision, recall, F1

    print_freq = epochs[
        'epochMax'] // 10  # control console print & checkpoint frequency
    save_freq = epochs['epochMax'] // 5  # control tensorboard save frequency
    for i in range(epochs['epochMax']):
        # get this minibatch
        rand_x = x_vals_train[batchIndxs[i], :]
        rand_y = y_vals_train[batchIndxs[i], :]
        rand_y_dec = y_dec_train[batchIndxs[i]]
        # compute the adaptive learning rate
        learnRateAdapt = learn_rate * (1 + learn_rate_decay * i)
        # train on this batch, and record loss on it
        xy_dict = {x_data:rand_x, y_trgt:rand_y, learnRate:learnRateAdapt,\
                   keepProb:dropoutKeep, y_dec:rand_y_dec}

        # train, get the loss & it's summary, and predictions
        _,tmp_loss,classProbs = sess.run([train_step,loss,tf.nn.softmax(layerActivs[-1])],\
            feed_dict = xy_dict)
        pred = kl.predFromProb(classProbs)

        # need to handle nan loss
        if np.isnan(tmp_loss):
            loss_vec[i] = np.inf
            train_vec[i] = [0.0, 0.0, 0.0, 0.0]
            # save summary stats
            xy_dict[yhat_dec] = pred
            l, p = sess.run([losSummary, perfSumm], xy_dict)
            writer.add_summary(l, i)
            writer.add_summary(p, i)
            logging.info('Early termination due to nan loss on epoch %u!' % i)
            break

        xy_dict[yhat_dec] = pred
        # compute the evaulation metrics
        tmp_acc_train,tmp_pre_train,tmp_rec_train,tmp_f1s_train = sess.run(\
            [accuracy,precision,recall,F1], xy_dict)
        # smooth it all
        loss_ma = kl.smooth(loss_ma, tmp_loss, theta, (0 == i))
        loss_vec[i] = loss_ma
        train_ma[0] = kl.smooth(train_ma[0], tmp_acc_train, theta, (0 == i))
        train_ma[1] = kl.smooth(train_ma[1], tmp_pre_train, theta, (0 == i))
        train_ma[2] = kl.smooth(train_ma[2], tmp_rec_train, theta, (0 == i))
        train_ma[3] = kl.smooth(train_ma[3], tmp_f1s_train, theta, (0 == i))
        train_vec[i] = train_ma.copy()

        if (0 == i % print_freq):
            logging.info(
                "Smoothed step %u/%u, train batch loss: %0.4f\n\ttrain batch perf: acc=%0.4f,pre=%0.4f,rec=%0.4f,F1=%0.4f"
                % (i, epochs['epochMax'], loss_ma, *train_ma))
            # checkpoint progress
            saver.save(sess,
                       os.getcwd() + '/log/' + log_file_name + '_ckpt',
                       global_step=i)
        if i % save_freq == 0:
            # save summary stats
            l, p = sess.run([losSummary, perfSumm], xy_dict)
            writer.add_summary(l, i)
            writer.add_summary(p, i)

        # check for early termination
        if i > epochs['epochMin']:
            if abs(loss_vec[i] / loss_vec[i - epochs['epochLB']] -
                   1) <= epochs['convgCrit']:
                logging.info('Early termination on epoch %d: relative smoothed loss diff. between %0.6f and %0.6f < %0.6f'%\
                      (i,loss_vec[i],loss_vec[i-epochs['epochLB']],epochs['convgCrit']))
                # if terminating early, save the last status
                if i % save_freq != 0:
                    l, p = sess.run([losSummary, perfSumm], xy_dict)
                    writer.add_summary(l, i)
                    writer.add_summary(p, i)
                break

    # ---------------------------------------------
    # compute and display the performance metrics for the dev set
    logging.info('Computing Dev Set Performance Metrics...')
    if num_cdmp_actors > 0:
        # get the number of rows short of an even number of batches
        rowsToAdd = (1 + x_vals_deve.shape[0] //
                     batch_size) * batch_size - x_vals_deve.shape[0]
        if (rowsToAdd > 0) and (rowsToAdd != batch_size):
            batchesX = np.r_[x_vals_deve, x_vals_deve[-rowsToAdd:, :]]
            batchesY = np.r_[y_vals_deve, y_vals_deve[-rowsToAdd:, :]]
            batchesYD = np.r_[y_dec_deve, y_dec_deve[-rowsToAdd:]]
        else:
            batchesX = x_vals_deve
            batchesY = y_vals_deve
            batchesYD = y_dec_deve
        # create the minibatches
        bX = np.split(batchesX, batchesX.shape[0] / batch_size, axis=0)
        bY = np.split(batchesY, batchesX.shape[0] / batch_size, axis=0)
        bYD = np.split(batchesYD, batchesX.shape[0] / batch_size, axis=0)
        # process the minibatches
        classProbs = []
        for x, y, yd in zip(bX, bY, bYD):
            xy_dict = {x_data: x, y_trgt: y, keepProb: 1.0, y_dec: yd}
            classProbs.extend(sess.run(tf.nn.softmax(layerActivs[-1]),
                                       xy_dict))
        classProbs = np.r_[classProbs]
        # now remove the extra repeated observations at the end
        if (rowsToAdd > 0) and (rowsToAdd != batch_size):
            classProbs = classProbs[:(x_vals_deve.shape[0])]
        # finally build the new dictionary for model evaluation
        xy_dict = {x_data:x_vals_deve, y_trgt:y_vals_deve, y_dec:y_dec_deve,\
                   keepProb:1.0}
        xy_dict[yhat_dec] = kl.predFromProb(classProbs)
    else:
        xy_dict = {
            x_data: x_vals_deve,
            y_trgt: y_vals_deve,
            keepProb: 1.0,
            y_dec: y_dec_deve
        }
        classProbs = sess.run(tf.nn.softmax(layerActivs[-1]), xy_dict)
        xy_dict[yhat_dec] = kl.predFromProb(classProbs)
    acc_deve,pre_deve,rec_deve,f1s_deve,summ = sess.run([accuracy,precision,\
                                                         recall,F1,perfSumm],xy_dict)

    # convert integer class labels to OHE matrix
    prd = tf.placeholder(shape=xy_dict[yhat_dec].shape, dtype=tf.int32)
    OHE = tf.one_hot(prd, num_choice_col)
    predOHE = sess.run(OHE, feed_dict={prd: xy_dict[yhat_dec]})

    # save the summary stats
    devWriter.add_summary(summ, epochs['epochMax'] + 1)

    #%%
    # display final training set performance metrics
    logging.info('Final Training Set Performance')
    logging.info('\tAccuracy = %0.4f' % train_vec[i][0])
    logging.info('\tPrecision = %0.4f' % train_vec[i][1])
    logging.info('\tRecall = %0.4f' % train_vec[i][2])
    logging.info('\tF1 = %0.4f' % train_vec[i][3])

    # display final dev set performance metrics
    logging.info('Final Dev Set Performance')
    logging.info('\tAccuracy = %0.4f' % acc_deve)
    logging.info('\tPrecision = %0.4f' % pre_deve)
    logging.info('\tRecall = %0.4f' % rec_deve)
    logging.info('\tF1 = %0.4f' % f1s_deve)

    logging.info('Actl. Counts by Class: %r' %
                 np.sum(y_vals_deve, axis=0, dtype=int).tolist())
    logging.info('Pred. Counts by Class: %r' %
                 np.sum(predOHE, axis=0, dtype=int).tolist())
    # save dev set actuals & preds - might not want to do this permanently
    np.savetxt(os.getcwd()+'/out/'+log_file_name+'_actu_pred.csv',np.c_[y_vals_deve,predOHE],'%d',\
               header='first %d columns are actuals, the rest are predictions'%num_choice_col)

    # ---------------------------------------------
    #%%
    # Display performance plots
    # loss
    plt.figure()
    plt.plot(loss_vec[:(i + 1)], 'k-')
    plt.title('Smoothed Cross Entropy Loss')
    plt.xlabel('Generation')
    plt.ylabel('Cross Entropy Loss, EMA')
    plt.show()
    plt.savefig(os.getcwd() + '/out/' + log_file_name + '_loss' + '.png')

    # classification performances
    plt.figure()
    plt.plot(train_vec[:(i + 1)])
    plt.title('Smoothed Performance Metrics')
    plt.xlabel('Generation')
    plt.ylabel('Metrics, EMA')
    plt.legend(['Accuracy', 'Precision', 'Recall', 'F1'], loc='lower right')
    plt.show()
    plt.savefig(os.getcwd() + '/out/' + log_file_name + '_perf' + '.png')

    # ---------------------------------------------
    #%%
    # close out the session and save the event-files
    writer.close()
    devWriter.close()
    # serialize final model results
    saver.save(sess, os.getcwd() + '/log/' + log_file_name + '_final')
    sess.close()

    sys.stdout.flush()
    logging.info('')
    etime = datetime.datetime.now()
    dtime = etime - stime
    logging.info("RunNN end time: " + etime.strftime("%Y-%m-%d %H:%M:%S"))
    logging.info("RunNN elapsed time: " + str(dtime))
    logging.info('Outputs saved with prefix: %s' % log_file_name)
    sys.stdout.flush()

    return [loss_vec[:(i + 1)], acc_deve, pre_deve, rec_deve, f1s_deve]