Пример #1
0
 def __init__(self, gpuID=None, stream=None):
     if gpuID is not None:
         if gpuID < len(cuda.list_devices()) and gpuID >= 0:
             cuda.close()
             cuda.select_device(gpuID)
         else:
             raise ValueError('GPU ID not found')
     if stream is None:
         self.stream = cuda.stream()
     else:
         assert isinstance(stream, numba.cuda.cudadrv.driver.Stream)
         self.stream = stream
     self.blas = numbapro.cudalib.cublas.Blas(stream=self.stream)
     self.blockdim = 32
     self.blockdim2 = (32, 32)
Пример #2
0
def main(params):
  batch_size = params['batch_size']
  dataset = params['dataset']
  word_count_threshold = params['word_count_threshold']
  do_grad_check = params['do_grad_check']
  max_epochs = params['max_epochs']
  host = socket.gethostname() # get computer hostname

  params['mode'] = 'CPU'

  # fetch the data provider
  dp = getDataProvider(dataset)

  misc = {} # stores various misc items that need to be passed around the framework

  # go over all training sentences and find the vocabulary we want to use, i.e. the words that occur
  # at least word_count_threshold number of times
  misc['wordtoix'], misc['ixtoword'], bias_init_vector = preProBuildWordVocab(dp.iterSentences('train'), word_count_threshold)
  # delegate the initialization of the model to the Generator class
  BatchGenerator = decodeGenerator(params)
  init_struct = BatchGenerator.init(params, misc)
  model, misc['update'], misc['regularize'] = (init_struct['model'], init_struct['update'], init_struct['regularize'])
  
  if params['mode'] == 'GPU':
    # force overwrite here. This is a bit of a hack, not happy about it
    model['bd'] = gp.garray(bias_init_vector.reshape(1, bias_init_vector.size))
  else:
    model['bd'] = bias_init_vector.reshape(1, bias_init_vector.size)

  print 'model init done.'
  print 'model has keys: ' + ', '.join(model.keys())
  print 'updating: ' + ', '.join( '%s [%dx%d]' % (k, model[k].shape[0], model[k].shape[1]) for k in misc['update'])
  print 'updating: ' + ', '.join( '%s [%dx%d]' % (k, model[k].shape[0], model[k].shape[1]) for k in misc['regularize'])
  print 'number of learnable parameters total: %d' % (sum(model[k].shape[0] * model[k].shape[1] for k in misc['update']), )

  # initialize the Solver and the cost function
  solver = Solver()
  def costfun(batch, model):
    # wrap the cost function to abstract some things away from the Solver
    return RNNGenCost(batch, model, params, misc)

  # calculate how many iterations we need
  num_sentences_total = dp.getSplitSize('train', ofwhat = 'sentences')
  num_iters_one_epoch = num_sentences_total / batch_size
  max_iters = max_epochs * num_iters_one_epoch
  eval_period_in_epochs = params['eval_period']
  eval_period_in_iters = max(1, int(num_iters_one_epoch * eval_period_in_epochs))
  abort = False
  top_val_ppl2 = -1
  smooth_train_ppl2 = len(misc['ixtoword']) # initially size of dictionary of confusion
  val_ppl2 = len(misc['ixtoword'])
  last_status_write_time = 0 # for writing worker job status reports
  json_worker_status = {}
  json_worker_status['params'] = params
  json_worker_status['history'] = []
  max_iters = 1
  for it in xrange(max_iters):
    if abort: break
    t0 = time.time()
    # fetch a batch of data
    batch = [dp.sampleImageSentencePair() for i in xrange(batch_size)]
    # evaluate cost, gradient and perform parameter update
    step_struct = solver.step(batch, model, costfun, **params)
    cost = step_struct['cost']
    dt = time.time() - t0

    # print training statistics
    train_ppl2 = step_struct['stats']['ppl2']
    smooth_train_ppl2 = 0.99 * smooth_train_ppl2 + 0.01 * train_ppl2 # smooth exponentially decaying moving average
    if it == 0: smooth_train_ppl2 = train_ppl2 # start out where we start out
    epoch = it * 1.0 / num_iters_one_epoch
    print '%d/%d batch done in %.3fs. at epoch %.2f. loss cost = %f, reg cost = %f, ppl2 = %.2f (smooth %.2f)' \
          % (it, max_iters, dt, epoch, cost['loss_cost'], cost['reg_cost'], \
             train_ppl2, smooth_train_ppl2)

    # perform gradient check if desired, with a bit of a burnin time (10 iterations)
    #if it == 10 and do_grad_check:
    #  solver.gradCheck(batch, model, costfun)
    #  print 'done gradcheck. continue?'
    #  raw_input()
    #
    ## detect if loss is exploding and kill the job if so
    #total_cost = cost['total_cost']
    #if it == 0:
    #  total_cost0 = total_cost # store this initial cost
    #if total_cost > total_cost0 * 2:
    #  print 'Aboring, cost seems to be exploding. Run gradcheck? Lower the learning rate?'
    #  abort = True # set the abort flag, we'll break out
    #
    ## logging: write JSON files for visual inspection of the training
    #tnow = time.time()
    #if tnow > last_status_write_time + 60*1: # every now and then lets write a report
    #  last_status_write_time = tnow
    #  jstatus = {}
    #  jstatus['time'] = datetime.datetime.now().isoformat()
    #  jstatus['iter'] = (it, max_iters)
    #  jstatus['epoch'] = (epoch, max_epochs)
    #  jstatus['time_per_batch'] = dt
    #  jstatus['smooth_train_ppl2'] = smooth_train_ppl2
    #  jstatus['val_ppl2'] = val_ppl2 # just write the last available one
    #  jstatus['train_ppl2'] = train_ppl2
    #  json_worker_status['history'].append(jstatus)
    #  status_file = os.path.join(params['worker_status_output_directory'], host + '_status.json')
    #  try:
    #    json.dump(json_worker_status, open(status_file, 'w'))
    #  except Exception, e: # todo be more clever here
    #    print 'tried to write worker status into %s but got error:' % (status_file, )
    #    print e
    #
    ## perform perplexity evaluation on the validation set and save a model checkpoint if it's good
    #is_last_iter = (it+1) == max_iters
    #if (((it+1) % eval_period_in_iters) == 0 and it < max_iters - 5) or is_last_iter:
    #  val_ppl2 = eval_split('val', dp, model, params, misc) # perform the evaluation on VAL set
    #  print 'validation perplexity = %f' % (val_ppl2, )
    #  write_checkpoint_ppl_threshold = params['write_checkpoint_ppl_threshold']
    #  if val_ppl2 < top_val_ppl2 or top_val_ppl2 < 0:
    #    if val_ppl2 < write_checkpoint_ppl_threshold or write_checkpoint_ppl_threshold < 0:
    #      # if we beat a previous record or if this is the first time
    #      # AND we also beat the user-defined threshold or it doesnt exist
    #      top_val_ppl2 = val_ppl2
    #      filename = 'model_checkpoint_%s_%s_%s_%.2f.p' % (dataset, host, params['fappend'], val_ppl2)
    #      filepath = os.path.join(params['checkpoint_output_directory'], filename)
    #      checkpoint = {}
    #      checkpoint['it'] = it
    #      checkpoint['epoch'] = epoch
    #      checkpoint['model'] = model
    #      checkpoint['params'] = params
    #      checkpoint['perplexity'] = val_ppl2
    #      checkpoint['wordtoix'] = misc['wordtoix']
    #      checkpoint['ixtoword'] = misc['ixtoword']
    #      try:
    #        pickle.dump(checkpoint, open(filepath, "wb"))
    #        print 'saved checkpoint in %s' % (filepath, )
    #      except Exception, e: # todo be more clever here
    #        print 'tried to write checkpoint into %s but got error: ' % (filepat, )
    #        print e
    cuda.close()
Пример #3
0
def main(params):
    batch_size = params['batch_size']
    dataset = params['dataset']
    word_count_threshold = params['word_count_threshold']
    do_grad_check = params['do_grad_check']
    max_epochs = params['max_epochs']
    host = socket.gethostname()  # get computer hostname

    params['mode'] = 'CPU'

    # fetch the data provider
    dp = getDataProvider(dataset)

    misc = {
    }  # stores various misc items that need to be passed around the framework

    # go over all training sentences and find the vocabulary we want to use, i.e. the words that occur
    # at least word_count_threshold number of times
    misc['wordtoix'], misc[
        'ixtoword'], bias_init_vector = preProBuildWordVocab(
            dp.iterSentences('train'), word_count_threshold)
    # delegate the initialization of the model to the Generator class
    BatchGenerator = decodeGenerator(params)
    init_struct = BatchGenerator.init(params, misc)
    model, misc['update'], misc['regularize'] = (init_struct['model'],
                                                 init_struct['update'],
                                                 init_struct['regularize'])

    if params['mode'] == 'GPU':
        # force overwrite here. This is a bit of a hack, not happy about it
        model['bd'] = gp.garray(
            bias_init_vector.reshape(1, bias_init_vector.size))
    else:
        model['bd'] = bias_init_vector.reshape(1, bias_init_vector.size)

    print 'model init done.'
    print 'model has keys: ' + ', '.join(model.keys())
    print 'updating: ' + ', '.join('%s [%dx%d]' %
                                   (k, model[k].shape[0], model[k].shape[1])
                                   for k in misc['update'])
    print 'updating: ' + ', '.join('%s [%dx%d]' %
                                   (k, model[k].shape[0], model[k].shape[1])
                                   for k in misc['regularize'])
    print 'number of learnable parameters total: %d' % (sum(
        model[k].shape[0] * model[k].shape[1] for k in misc['update']), )

    # initialize the Solver and the cost function
    solver = Solver()

    def costfun(batch, model):
        # wrap the cost function to abstract some things away from the Solver
        return RNNGenCost(batch, model, params, misc)

    # calculate how many iterations we need
    num_sentences_total = dp.getSplitSize('train', ofwhat='sentences')
    num_iters_one_epoch = num_sentences_total / batch_size
    max_iters = max_epochs * num_iters_one_epoch
    eval_period_in_epochs = params['eval_period']
    eval_period_in_iters = max(
        1, int(num_iters_one_epoch * eval_period_in_epochs))
    abort = False
    top_val_ppl2 = -1
    smooth_train_ppl2 = len(
        misc['ixtoword'])  # initially size of dictionary of confusion
    val_ppl2 = len(misc['ixtoword'])
    last_status_write_time = 0  # for writing worker job status reports
    json_worker_status = {}
    json_worker_status['params'] = params
    json_worker_status['history'] = []
    max_iters = 1
    for it in xrange(max_iters):
        if abort: break
        t0 = time.time()
        # fetch a batch of data
        batch = [dp.sampleImageSentencePair() for i in xrange(batch_size)]
        # evaluate cost, gradient and perform parameter update
        step_struct = solver.step(batch, model, costfun, **params)
        cost = step_struct['cost']
        dt = time.time() - t0

        # print training statistics
        train_ppl2 = step_struct['stats']['ppl2']
        smooth_train_ppl2 = 0.99 * smooth_train_ppl2 + 0.01 * train_ppl2  # smooth exponentially decaying moving average
        if it == 0:
            smooth_train_ppl2 = train_ppl2  # start out where we start out
        epoch = it * 1.0 / num_iters_one_epoch
        print '%d/%d batch done in %.3fs. at epoch %.2f. loss cost = %f, reg cost = %f, ppl2 = %.2f (smooth %.2f)' \
              % (it, max_iters, dt, epoch, cost['loss_cost'], cost['reg_cost'], \
                 train_ppl2, smooth_train_ppl2)

        # perform gradient check if desired, with a bit of a burnin time (10 iterations)
        #if it == 10 and do_grad_check:
        #  solver.gradCheck(batch, model, costfun)
        #  print 'done gradcheck. continue?'
        #  raw_input()
        #
        ## detect if loss is exploding and kill the job if so
        #total_cost = cost['total_cost']
        #if it == 0:
        #  total_cost0 = total_cost # store this initial cost
        #if total_cost > total_cost0 * 2:
        #  print 'Aboring, cost seems to be exploding. Run gradcheck? Lower the learning rate?'
        #  abort = True # set the abort flag, we'll break out
        #
        ## logging: write JSON files for visual inspection of the training
        #tnow = time.time()
        #if tnow > last_status_write_time + 60*1: # every now and then lets write a report
        #  last_status_write_time = tnow
        #  jstatus = {}
        #  jstatus['time'] = datetime.datetime.now().isoformat()
        #  jstatus['iter'] = (it, max_iters)
        #  jstatus['epoch'] = (epoch, max_epochs)
        #  jstatus['time_per_batch'] = dt
        #  jstatus['smooth_train_ppl2'] = smooth_train_ppl2
        #  jstatus['val_ppl2'] = val_ppl2 # just write the last available one
        #  jstatus['train_ppl2'] = train_ppl2
        #  json_worker_status['history'].append(jstatus)
        #  status_file = os.path.join(params['worker_status_output_directory'], host + '_status.json')
        #  try:
        #    json.dump(json_worker_status, open(status_file, 'w'))
        #  except Exception, e: # todo be more clever here
        #    print 'tried to write worker status into %s but got error:' % (status_file, )
        #    print e
        #
        ## perform perplexity evaluation on the validation set and save a model checkpoint if it's good
        #is_last_iter = (it+1) == max_iters
        #if (((it+1) % eval_period_in_iters) == 0 and it < max_iters - 5) or is_last_iter:
        #  val_ppl2 = eval_split('val', dp, model, params, misc) # perform the evaluation on VAL set
        #  print 'validation perplexity = %f' % (val_ppl2, )
        #  write_checkpoint_ppl_threshold = params['write_checkpoint_ppl_threshold']
        #  if val_ppl2 < top_val_ppl2 or top_val_ppl2 < 0:
        #    if val_ppl2 < write_checkpoint_ppl_threshold or write_checkpoint_ppl_threshold < 0:
        #      # if we beat a previous record or if this is the first time
        #      # AND we also beat the user-defined threshold or it doesnt exist
        #      top_val_ppl2 = val_ppl2
        #      filename = 'model_checkpoint_%s_%s_%s_%.2f.p' % (dataset, host, params['fappend'], val_ppl2)
        #      filepath = os.path.join(params['checkpoint_output_directory'], filename)
        #      checkpoint = {}
        #      checkpoint['it'] = it
        #      checkpoint['epoch'] = epoch
        #      checkpoint['model'] = model
        #      checkpoint['params'] = params
        #      checkpoint['perplexity'] = val_ppl2
        #      checkpoint['wordtoix'] = misc['wordtoix']
        #      checkpoint['ixtoword'] = misc['ixtoword']
        #      try:
        #        pickle.dump(checkpoint, open(filepath, "wb"))
        #        print 'saved checkpoint in %s' % (filepath, )
        #      except Exception, e: # todo be more clever here
        #        print 'tried to write checkpoint into %s but got error: ' % (filepat, )
        #        print e
        cuda.close()
Пример #4
0
    m_dev = curand.normal(0, 1, n, dtype=np.float32, device=True)
    n_dev = curand.normal(0, 1, n, dtype=np.float32, device=True)
    a_host = np.zeros(n, dtype=np.float32)
    a_dev = cuda.device_array_like(a_host)
    cuda_div[griddim, blockdim, stream](m_dev, n_dev, a_dev, n)
    #keeps a_dev on the device for the kernel ==> no access at this point to the device memory
    # so i cant know what appends to m_dev and n_dev best guess is python GC is
    # translated into desallocation on the device
    b_dev = curand.uniform((n * n), dtype=np.float32, device=True)
    c_dev = cuda.device_array_like(c_host, stream)
    block_kernel[griddim, blockdim, stream](start, n, a_dev, b_dev, c_dev)
    c_dev.copy_to_host(c_host, stream)
    stream.synchronize()

    return c_host


if __name__ == '__main__':

    t0 = time.time()
    n = 8000
    stream = cuda.stream()
    blockdim = 256
    griddim = n // 256 + 1
    c_host = block_increment(0, n)
    stream.synchronize()
    cuda.close()
    print(c_host)

    print(time.time() - t0)