示例#1
0
def do_training(indices, training_blob, heldout_blob, weights, weights_out, debiasing_weights):
    """
    Helper/wrapper function for parallel perceptron training.
    Runs one epoch of perceptron training and reports current accuracy on
    training data and on heldout data.
    """
    # Under de-biasing mode, we only allow features present in a given initial
    # weight vector. These are features that have been "selected" under a previously
    # run regularized training scheme.
    valid_feature_names = None
    if FLAGS.debiasing:
        valid_feature_names = getFeatureNames(debiasing_weights)

    for epoch in range(FLAGS.maxepochs):
        # Randomize order of examples; broadcast this randomized order to all processes.
        # The particular subset any perceptron process gets for this epoch is dependent
        # upon this randomized ordering.
        if myRank == 0 and FLAGS.shuffle:
            random.shuffle(indices)
        indices = mpi.broadcast(value=indices, root=0)

        ##################################################
        # SEARCH: Find 1-best under current model
        ##################################################
        # Run one epoch over training data
        io_helper.write_master("===EPOCH %d TRAINING===\n" %(epoch))
        newWeights_avg = perceptron_parallel(epoch, indices, training_blob, weights,
                                             valid_feature_names)
        ####################################
        # Dump weights for this iteration
        ####################################
        if myRank == 0:
            cPickle.dump(newWeights_avg, weights_out, protocol=cPickle.HIGHEST_PROTOCOL)
            # Need to flush output somehow here. Does weights_out.flush() work?
            weights_out.flush()

        ##################################################
        # Try a corpus re-decode here with the new weights
        # This returns a HELDOUT F-SCORE
        ##################################################
        # Decode dev data with same new learned weight vector
        if FLAGS.decodeheldout:
            io_helper.write_master("===EPOCH %d DECODE HELDOUT===\n" %(epoch))
            decode_parallel(newWeights_avg, indices_dev, heldout_blob, "dev")
    if myRank == 0:
        weights_out.close()
示例#2
0
文件: choa.py 项目: 460130107/choa
def do_training(indices, training_blob, heldout_blob, weights, weights_out, debiasing_weights):
  """
  Helper/wrapper function for parallel perceptron training.
  Runs one epoch of perceptron training and reports current accuracy on
  training data and on heldout data.
  """
  # Under de-biasing mode, we only allow features present in a given initial
  # weight vector. These are features that have been "selected" under a previously
  # run regularized training scheme.
  valid_feature_names = None
  if FLAGS.debiasing:
    valid_feature_names = getFeatureNames(debiasing_weights)

  for epoch in range(FLAGS.maxepochs):
    # Randomize order of examples; broadcast this randomized order to all processes.
    # The particular subset any perceptron process gets for this epoch is dependent
    # upon this randomized ordering.
    if myRank == 0 and FLAGS.shuffle:
      random.shuffle(indices)
    indices = mpi.broadcast(value=indices, root=0)

    ##################################################
    # SEARCH: Find 1-best under current model
    ##################################################
    # Run one epoch over training data
    io_helper.write_master("===EPOCH %d TRAINING===\n" %(epoch))
    newWeights_avg = perceptron_parallel(epoch, indices, training_blob, weights,
                                         valid_feature_names)
    ####################################
    # Dump weights for this iteration
    ####################################
    if myRank == 0:
      cPickle.dump(newWeights_avg, weights_out, protocol=cPickle.HIGHEST_PROTOCOL)
 	    # Need to flush output somehow here. Does weights_out.flush() work?
      weights_out.flush()

    ##################################################
    # Try a corpus re-decode here with the new weights
    # This returns a HELDOUT F-SCORE
    ##################################################
    # Decode dev data with same new learned weight vector
    if FLAGS.decodeheldout:
      io_helper.write_master("===EPOCH %d DECODE HELDOUT===\n" %(epoch))
      decode_parallel(newWeights_avg, indices_dev, heldout_blob, "dev")
  if myRank == 0:
    weights_out.close()
示例#3
0
文件: nile.py 项目: cidermole/nile-1
def do_training(indices, training_blob, heldout_blob, weights, weights_out, debiasing_weights):
  """
  Helper/wrapper function for parallel perceptron training.
  Runs one epoch of perceptron training and reports current accuracy on
  training data and on heldout data.
  """
  # Under de-biasing mode, we only allow features present in a given initial
  # weight vector. These are features that have been "selected" under a previously
  # run regularized training scheme.
  valid_feature_names = None
  if FLAGS.debiasing:
    valid_feature_names = getFeatureNames(debiasing_weights)

  # load training instances into memory
  active_instances = [key for key in ['f_instances','e_instances','etree_instances','ftree_instances','gold_instances','a1_instances','a2_instances','inverse_instances'] if training_blob[key] is not None]
  for key in active_instances:
    training_blob[key+'_unshuffled'] = training_blob[key].readlines()

  for epoch in range(FLAGS.maxepochs):
    # Randomize order of examples; broadcast this randomized order to all processes.
    # The particular subset any perceptron process gets for this epoch is dependent
    # upon this randomized ordering.
    if myRank == 0 and FLAGS.shuffle:
      random.shuffle(indices)
    indices = mpi.bcast(indices, root=0)

    # Create virtual files in shuffled order
    for key in active_instances:
      shuffled = StringIO.StringIO()
      unshuffled = training_blob[key+'_unshuffled']
      for i in indices:
        shuffled.write(unshuffled[i])
      shuffled.seek(0)
      training_blob[key] = shuffled

    ##################################################
    # SEARCH: Find 1-best under current model
    ##################################################
    # Run one epoch over training data
    io_helper.write_master("===EPOCH %d TRAINING===\n" %(epoch))
    newWeights_avg = perceptron_parallel(epoch, indices, training_blob, weights,
                                         valid_feature_names)
    ####################################
    # Dump weights for this iteration
    ####################################
    if myRank == 0:
      json.dump(newWeights_avg, weights_out)

      weights_out.write('\n')
      weights_out.flush()

    ##################################################
    # Try a corpus re-decode here with the new weights
    # This returns a HELDOUT F-SCORE
    ##################################################
    # Decode dev data with same new learned weight vector
    if FLAGS.decodeheldout:
      io_helper.write_master("===EPOCH %d DECODE HELDOUT===\n" %(epoch))
      decode_parallel(newWeights_avg, indices_dev, heldout_blob, "dev")

    ##################################################
    # Reset heldout files for reading
    ##################################################
    if FLAGS.decodeheldout:
      for key in active_instances:
        heldout_blob[key].seek(0)

  if myRank == 0:
    weights_out.close()