Exemplo n.º 1
0
def main():
    print utils.get_max_vowels(first_sentence)
    print
    print utils.get_max_len(first_sentence)
    print
    print utils.reverse(second_sentence)
    print
    print utils.get_info_for_obj(os)
    print
    print utils.get_info_for_obj(sys)
    print
    print utils.get_pseudo_sum(124)
    print
    print utils.get_primes(10000)
Exemplo n.º 2
0
def main():
    print utils.get_max_vowels(first_sentence)
    print 
    print utils.get_max_len(first_sentence)
    print 
    print utils.reverse(second_sentence)
    print 
    print utils.get_info_for_obj(os)
    print 
    print utils.get_info_for_obj(sys)
    print 
    print utils.get_pseudo_sum(124)
    print 
    print utils.get_primes(10000)
Exemplo n.º 3
0
def print_stats(players, tokens):
    header = ("NOME", "FICHAS")
    body = zip(players, tokens)

    max_names_len = utils.get_max_len(players + [header[0]])
    max_tokens_len = utils.get_max_len(tokens + [header[1]])

    print(
        f"┌ {header[0]} { '─' * (max_names_len - 6 + 3) } {header[1]} { '─' * (max_tokens_len - 6) }┐"
    )

    for line in body:
        name, player_tokens = line

        name_length = len(name)
        name_spacer = " " * (max_names_len - name_length + 3)

        tokens_length = len(str(player_tokens))
        tokens_spacer = " " * (max_tokens_len - tokens_length)

        utils.print_colored(
            f"│ §y{name}{name_spacer}§g{player_tokens}{tokens_spacer} §0│")

    print("└" + "─" * (max_names_len + 3 + max_tokens_len + 2) + "┘")
Exemplo n.º 4
0
def load_dataset(data_dir, model_params, inference_mode=False):
    """Loads the .npz file, and splits the set into train/valid/test."""

    # normalizes the x and y columns usint the training set.
    # applies same scaling factor to valid and test set.

    datasets = []
    if isinstance(model_params.data_set, list):
        datasets = model_params.data_set
    else:
        datasets = [model_params.data_set]

    train_strokes = None
    valid_strokes = None
    test_strokes = None

    for dataset in datasets:
        data_filepath = os.path.join(data_dir, dataset)
        if data_dir.startswith('http://') or data_dir.startswith('https://'):
            tf.logging.info('Downloading %s', data_filepath)
            response = requests.get(data_filepath)
            data = np.load(StringIO(response.content))
        else:
            data = np.load(data_filepath)  # load this into dictionary
        tf.logging.info('Loaded {}/{}/{} from {}'.format(
            len(data['train']), len(data['valid']), len(data['test']),
            dataset))
        if train_strokes is None:
            train_strokes = data['train']
            valid_strokes = data['valid']
            test_strokes = data['test']
        else:
            train_strokes = np.concatenate((train_strokes, data['train']))
            valid_strokes = np.concatenate((valid_strokes, data['valid']))
            test_strokes = np.concatenate((test_strokes, data['test']))

    all_strokes = np.concatenate((train_strokes, valid_strokes, test_strokes))
    num_points = 0
    for stroke in all_strokes:
        num_points += len(stroke)
    avg_len = num_points / len(all_strokes)
    tf.logging.info('Dataset combined: {} ({}/{}/{}), avg len {}'.format(
        len(all_strokes), len(train_strokes), len(valid_strokes),
        len(test_strokes), int(avg_len)))

    # calculate the max strokes we need.
    max_seq_len = utils.get_max_len(all_strokes)
    # overwrite the hps with this calculation.
    model_params.max_seq_len = max_seq_len

    tf.logging.info('model_params.max_seq_len %i.', model_params.max_seq_len)

    eval_model_params = sketch_rnn_model.copy_hparams(model_params)

    eval_model_params.use_input_dropout = 0
    eval_model_params.use_recurrent_dropout = 0
    eval_model_params.use_output_dropout = 0
    eval_model_params.is_training = 1

    if inference_mode:
        eval_model_params.batch_size = 1
        eval_model_params.is_training = 0

    sample_model_params = sketch_rnn_model.copy_hparams(eval_model_params)
    sample_model_params.batch_size = 1  # only sample one at a time
    sample_model_params.max_seq_len = 1  # sample one point at a time

    train_set = utils.DataLoader(
        train_strokes,
        model_params.batch_size,
        max_seq_length=model_params.max_seq_len,
        random_scale_factor=model_params.random_scale_factor,
        augment_stroke_prob=model_params.augment_stroke_prob)

    normalizing_scale_factor = train_set.calculate_normalizing_scale_factor()
    train_set.normalize(normalizing_scale_factor)
    print('Length original', len(train_strokes), len(valid_strokes),
          len(test_strokes))
    valid_set = utils.DataLoader(valid_strokes,
                                 eval_model_params.batch_size,
                                 max_seq_length=eval_model_params.max_seq_len,
                                 random_scale_factor=0.0,
                                 augment_stroke_prob=0.0)
    valid_set.normalize(normalizing_scale_factor)

    test_set = utils.DataLoader(test_strokes,
                                eval_model_params.batch_size,
                                max_seq_length=eval_model_params.max_seq_len,
                                random_scale_factor=0.0,
                                augment_stroke_prob=0.0)
    test_set.normalize(normalizing_scale_factor)

    tf.logging.info('normalizing_scale_factor %4.4f.',
                    normalizing_scale_factor)

    result = [
        train_set, valid_set, test_set, model_params, eval_model_params,
        sample_model_params
    ]
    return result
Exemplo n.º 5
0
def load_dataset(data_dir, model_params, testing_mode=False):
    """Loads the .npz file, and splits the set into train/valid/test."""
    # normalizes the x and y columns using scale_factor.

    dataset = model_params.data_set
    data_filepath = os.path.join(data_dir, dataset)
    data = np.load(data_filepath, allow_pickle=True, encoding='latin1')

    # target data
    train_strokes = data['train']
    valid_strokes = data['valid']
    test_strokes = data['test']
    all_strokes = np.concatenate((train_strokes, valid_strokes, test_strokes))

    # standard data (reference data in paper)
    std_train_strokes = data['std_train']
    std_valid_strokes = data['std_valid']
    std_test_strokes = data['std_test']
    all_std_trokes = np.concatenate(
        (std_train_strokes, std_valid_strokes, std_test_strokes))

    print('Dataset combined: %d (train=%d/validate=%d/test=%d)' %
          (len(all_strokes), len(train_strokes), len(valid_strokes),
           len(test_strokes)))

    # calculate the max strokes we need.
    max_seq_len = utils.get_max_len(all_strokes)
    max_std_seq_len = utils.get_max_len(all_std_trokes)
    # overwrite the hps with this calculation.
    model_params.max_seq_len = max(max_seq_len, max_std_seq_len)
    print('model_params.max_seq_len set to %d.' % model_params.max_seq_len)

    eval_model_params = copy_hparams(model_params)
    eval_model_params.rnn_dropout_keep_prob = 1.0
    eval_model_params.is_training = True

    if testing_mode:  # for testing
        eval_model_params.batch_size = 1
        eval_model_params.is_training = False  # sample mode

    train_set = utils.DataLoader(
        train_strokes,
        model_params.batch_size,
        max_seq_length=model_params.max_seq_len,
        random_scale_factor=model_params.random_scale_factor,
        augment_stroke_prob=model_params.augment_stroke_prob)
    normalizing_scale_factor = model_params.scale_factor
    train_set.normalize(normalizing_scale_factor)

    valid_set = utils.DataLoader(valid_strokes,
                                 eval_model_params.batch_size,
                                 max_seq_length=eval_model_params.max_seq_len,
                                 random_scale_factor=0.0,
                                 augment_stroke_prob=0.0)
    valid_set.normalize(normalizing_scale_factor)

    test_set = utils.DataLoader(test_strokes,
                                eval_model_params.batch_size,
                                max_seq_length=eval_model_params.max_seq_len,
                                random_scale_factor=0.0,
                                augment_stroke_prob=0.0)
    test_set.normalize(normalizing_scale_factor)

    # process the reference dataset
    std_train_set = utils.DataLoader(
        std_train_strokes,
        model_params.batch_size,
        max_seq_length=model_params.max_seq_len,
        random_scale_factor=model_params.random_scale_factor,
        augment_stroke_prob=model_params.augment_stroke_prob)
    std_train_set.normalize(normalizing_scale_factor)

    std_valid_set = utils.DataLoader(
        std_valid_strokes,
        eval_model_params.batch_size,
        max_seq_length=eval_model_params.max_seq_len,
        random_scale_factor=0.0,
        augment_stroke_prob=0.0)
    std_valid_set.normalize(normalizing_scale_factor)

    std_test_set = utils.DataLoader(
        std_test_strokes,
        eval_model_params.batch_size,
        max_seq_length=eval_model_params.max_seq_len,
        random_scale_factor=0.0,
        augment_stroke_prob=0.0)
    std_test_set.normalize(normalizing_scale_factor)

    result = [
        train_set, valid_set, test_set, std_train_set, std_valid_set,
        std_test_set, model_params, eval_model_params
    ]
    return result
Exemplo n.º 6
0
def load_dataset(data_dir, model_params, inference_mode=False, contain_labels=False):
  """Loads the .npz file, and splits the set into train/valid/test."""

  # normalizes the x and y columns usint the training set.
  # applies same scaling factor to valid and test set.
  # contain_labels: set to True to return labels for classification tasks, default as False

  datasets = []
  if isinstance(model_params.data_set, list):
    datasets = model_params.data_set
  else:
    datasets = [model_params.data_set]

  train_strokes = None
  valid_strokes = None
  test_strokes = None
  label_index = 0
  class_num = len(datasets)
  for dataset in datasets:
    # Get input data
    data_filepath = os.path.join(data_dir, "sketch", dataset)
    onv_left_filepath = os.path.join(data_dir, "onv_9936_thick", dataset)
    onv_right_filepath = os.path.join(data_dir, "onv_9936_thick_right", dataset)
    if data_dir.startswith('http://') or data_dir.startswith('https://'):
      tf.logging.info('Downloading %s', data_filepath)
      response = requests.get(data_filepath)
      data = np.load(StringIO(response.content))
    else:
      tf.logging.info('Getting data from %s', data_filepath)
      data = np.load(data_filepath)  # load this into dictionary
      tf.logging.info('Getting left onv from %s', onv_left_filepath)
      onv_left = np.load(onv_left_filepath)
      tf.logging.info('Getting right onv from %s', onv_right_filepath)
      onv_right = np.load(onv_right_filepath)
   
    train_size = len(onv_left['train'])
    valid_size = len(onv_left['valid'])
    test_size = len(onv_left['test'])

    tf.logging.info('Loaded {}/{}/{} from {}'.format(
        train_size, valid_size, test_size, dataset))

    # set labels for classification task
    cur_train_labels = np.zeros((train_size, class_num))
    cur_valid_labels = np.zeros((valid_size, class_num))
    cur_test_labels = np.zeros((test_size, class_num))

    cur_train_labels[:, label_index] = 1
    cur_valid_labels[:, label_index] = 1
    cur_test_labels[:, label_index] = 1
    #print ("label_index", label_index, cur_train_labels[0])

    if train_strokes is None:
      train_strokes = data['train'][0:train_size]
      valid_strokes = data['valid'][0:valid_size]
      test_strokes = data['test'][0:test_size]

      train_onvs_left = onv_left['train'][0:train_size]
      valid_onvs_left = onv_left['valid'][0:valid_size]
      test_onvs_left = onv_left['test'][0:test_size]

      train_onvs_right = onv_right['train'][0:train_size]
      valid_onvs_right = onv_right['valid'][0:valid_size]
      test_onvs_right = onv_right['test'][0:test_size]
 
      train_labels = cur_train_labels[0:train_size]
      valid_labels = cur_valid_labels[0:valid_size]
      test_labels = cur_valid_labels[0:test_size]

    else:
      train_strokes = np.concatenate((train_strokes, data['train'][0:train_size]))
      valid_strokes = np.concatenate((valid_strokes, data['valid'][0:valid_size]))
      test_strokes = np.concatenate((test_strokes, data['test'][0:test_size]))

      train_onvs_left = np.concatenate((train_onvs_left, onv_left['train'][0:train_size]))
      valid_onvs_left = np.concatenate((valid_onvs_left, onv_left['valid'][0:valid_size]))
      test_onvs_left = np.concatenate((test_onvs_left, onv_left['test'][0:test_size]))

      train_onvs_right = np.concatenate((train_onvs_right, onv_right['train'][0:train_size]))
      valid_onvs_right = np.concatenate((valid_onvs_right, onv_right['valid'][0:valid_size]))
      test_onvs_right = np.concatenate((test_onvs_right, onv_right['test'][0:test_size]))

      train_labels = np.concatenate((train_labels, cur_train_labels[0:train_size]))
      valid_labels = np.concatenate((valid_labels, cur_valid_labels[0:valid_size]))
      test_labels = np.concatenate((test_labels, cur_test_labels[0:test_size]))

    label_index+=1

  all_strokes = np.concatenate((train_strokes, valid_strokes, test_strokes))
  num_points = 0
  for stroke in all_strokes:
    num_points += len(stroke)
  avg_len = num_points / len(all_strokes)
  tf.logging.info('Dataset combined: {} ({}/{}/{}), avg len {}'.format(
      len(all_strokes), len(train_strokes), len(valid_strokes),
      len(test_strokes), int(avg_len)))

  # calculate the max strokes we need.
  max_seq_len = utils.get_max_len(all_strokes)
  # overwrite the hps with this calculation.
  model_params.max_seq_len = max_seq_len

  tf.logging.info('model_params.max_seq_len %i.', model_params.max_seq_len)

  eval_model_params = sketch_rnn_model.copy_hparams(model_params)

  eval_model_params.use_input_dropout = 0
  eval_model_params.use_recurrent_dropout = 0
  eval_model_params.use_output_dropout = 0
  eval_model_params.is_training = 1

  if inference_mode:
    eval_model_params.batch_size = 1
    eval_model_params.is_training = 0

  sample_model_params = sketch_rnn_model.copy_hparams(eval_model_params)
  sample_model_params.batch_size = 1  # only sample one at a time
  sample_model_params.max_seq_len = 1  # sample one point at a time

  train_set = utils.DataLoader(
      train_strokes,
      model_params.batch_size,
      max_seq_length=model_params.max_seq_len,
      random_scale_factor=model_params.random_scale_factor,
      augment_stroke_prob=model_params.augment_stroke_prob)

  normalizing_scale_factor = train_set.calculate_normalizing_scale_factor()
  train_set.normalize(normalizing_scale_factor)

  valid_set = utils.DataLoader(
      valid_strokes,
      eval_model_params.batch_size,
      max_seq_length=eval_model_params.max_seq_len,
      random_scale_factor=0.0,
      augment_stroke_prob=0.0)
  valid_set.normalize(normalizing_scale_factor)

  test_set = utils.DataLoader(
      test_strokes,
      eval_model_params.batch_size,
      max_seq_length=eval_model_params.max_seq_len,
      random_scale_factor=0.0,
      augment_stroke_prob=0.0)
  test_set.normalize(normalizing_scale_factor)


  tf.logging.info('normalizing_scale_factor %4.4f.', normalizing_scale_factor)

  # onv preprocess
  print ("unique", np.unique(train_onvs_left))
  train_onvs_left = train_onvs_left / 255.0
  valid_onvs_left = valid_onvs_left / 255.0
  test_onvs_left = test_onvs_left / 255.0
  
  train_onvs_right = train_onvs_right / 255.0
  valid_onvs_right = valid_onvs_right / 255.0
  test_onvs_right = test_onvs_right / 255.0

  if not contain_labels:
    result = [
        train_set, valid_set, test_set, model_params, eval_model_params,
        sample_model_params, train_onvs_left, valid_onvs_left, test_onvs_left, 
        train_onvs_right, valid_onvs_right, test_onvs_right
    ]
  else: #return labels for classification tasks
    result = [
        train_set, valid_set, test_set, model_params, eval_model_params,
        sample_model_params, train_onvs_left, valid_onvs_left, test_onvs_left, 
        train_onvs_right, valid_onvs_right, test_onvs_right, train_labels, valid_labels, test_labels
    ]
  return result
Exemplo n.º 7
0
def load_dataset(data_dir, model_params, inference_mode=False):
    """Loads the .npz file, and splits the set into train/valid/test."""

    # normalizes the x and y columns using the training set.
    # applies same scaling factor to valid and test set.

    if isinstance(model_params.data_set, list):
        datasets = model_params.data_set
    else:
        datasets = [model_params.data_set]

    train_strokes = None
    valid_strokes = None
    test_strokes = None

    train_data = []
    valid_data = []
    test_data = []

    dataset_lengths = []

    all_strokes = []

    for i, dataset in enumerate(datasets):
        data_filepath = os.path.join(data_dir, dataset)
        if six.PY3:
            tmp_data = np.load(data_filepath,
                               encoding='latin1',
                               allow_pickle=True)
        else:
            tmp_data = np.load(data_filepath, allow_pickle=True)

        all_strokes = np.concatenate((all_strokes, tmp_data['train'],
                                      tmp_data['valid'], tmp_data['test']))

    max_seq_len = utils.get_max_len(all_strokes)
    model_params.max_seq_len = max_seq_len
    print('Max sequence length: ', max_seq_len)

    for i, dataset in enumerate(datasets):
        data_filepath = os.path.join(data_dir, dataset)
        if six.PY3:
            data = np.load(data_filepath, encoding='latin1', allow_pickle=True)
        else:
            data = np.load(data_filepath, allow_pickle=True)
        logger.info('Loaded {}/{}/{} from {}'.format(len(data['train']),
                                                     len(data['valid']),
                                                     len(data['test']),
                                                     dataset))
        train_strokes = data['train']
        valid_strokes = data['valid']
        test_strokes = data['test']

        train_set = utils.DataLoader(
            train_strokes,
            model_params.batch_size,
            max_seq_length=max_seq_len,
            random_scale_factor=model_params.random_scale_factor,
            augment_stroke_prob=model_params.augment_stroke_prob)

        normalizing_scale_factor = train_set.calculate_normalizing_scale_factor(
        )
        train_set.normalize(normalizing_scale_factor)
        train_set.strokes = [
            utils.to_big_strokes(stroke, max_seq_len)
            for stroke in train_set.strokes
        ]
        train_set.strokes = [
            np.insert(stroke, 0, [0, 0, 1, 0, 0], axis=0)
            for stroke in train_set.strokes
        ]

        valid_set = utils.DataLoader(
            valid_strokes,
            model_params.batch_size,
            max_seq_length=max_seq_len,
            random_scale_factor=model_params.random_scale_factor,
            augment_stroke_prob=model_params.augment_stroke_prob)

        valid_set.normalize(normalizing_scale_factor)
        valid_set.strokes = [
            utils.to_big_strokes(stroke, max_seq_len)
            for stroke in valid_set.strokes
        ]
        valid_set.strokes = [
            np.insert(stroke, 0, [0, 0, 1, 0, 0], axis=0)
            for stroke in valid_set.strokes
        ]

        test_set = utils.DataLoader(
            test_strokes,
            model_params.batch_size,
            max_seq_length=max_seq_len,
            random_scale_factor=model_params.random_scale_factor,
            augment_stroke_prob=model_params.augment_stroke_prob)

        test_set.normalize(normalizing_scale_factor)
        test_set.strokes = [
            utils.to_big_strokes(stroke, max_seq_len)
            for stroke in test_set.strokes
        ]
        test_set.strokes = [
            np.insert(stroke, 0, [0, 0, 1, 0, 0], axis=0)
            for stroke in test_set.strokes
        ]

        train_sketches = [{
            'dataset': dataset,
            'draw': sketch
        } for sketch in train_set.strokes]
        valid_sketches = [{
            'dataset': dataset,
            'draw': sketch
        } for sketch in valid_set.strokes]
        test_sketches = [{
            'dataset': dataset,
            'draw': sketch
        } for sketch in test_set.strokes]

        train_data.append(train_sketches)
        valid_data.append(valid_sketches)
        test_data.append(test_sketches)

    return [train_data, valid_data, test_data]
Exemplo n.º 8
0
def load_datasets(data_dir, model_params, inference_mode=False):
    """Load and preprocess data"""
    data = utils.load_dataset(data_dir)
    train_strokes = data['train']
    valid_strokes = data['valid']
    test_strokes = data['test']

    all_strokes = np.concatenate((train_strokes, valid_strokes, test_strokes))
    num_points = 0
    for stroke in all_strokes:
        num_points += len(stroke)
    avg_len = num_points / len(all_strokes)
    tf.logging.info('{} Shapes / {} Total points'.format(len(all_strokes), num_points))
    tf.logging.info('Dataset combined: {} ({}/{}/{}), avg len {}'.format(
        len(all_strokes), len(train_strokes), len(valid_strokes),
        len(test_strokes), int(avg_len)))

    # calculate the max strokes we need.
    max_seq_len = utils.get_max_len(all_strokes)
    # overwrite the hps with this calculation.
    model_params.max_seq_len = max_seq_len

    tf.logging.info('model_params.max_seq_len %i.', model_params.max_seq_len)

    eval_model_params = derender_model.copy_hparams(model_params)
    eval_model_params.use_input_dropout = 0
    eval_model_params.use_recurrent_dropout = 0
    eval_model_params.use_output_dropout = 0
    eval_model_params.is_training = 1

    if inference_mode:
        eval_model_params.batch_size = 1
        eval_model_params.is_training = 0

    sample_model_params = derender_model.copy_hparams(eval_model_params)
    sample_model_params.batch_size = 1  # only sample one at a time
    sample_model_params.max_seq_len = 1  # sample one point at a time

    train_set = utils.DataLoader(
        train_strokes,
        model_params.batch_size,
        max_seq_length=model_params.max_seq_len,
        random_scale_factor=model_params.random_scale_factor,
        augment_stroke_prob=model_params.augment_stroke_prob)

    normalizing_scale_factor = train_set.calculate_normalizing_scale_factor()
    train_set.normalize(normalizing_scale_factor)

    valid_set = utils.DataLoader(
        valid_strokes,
        eval_model_params.batch_size,
        max_seq_length=eval_model_params.max_seq_len,
        random_scale_factor=0.0,
        augment_stroke_prob=0.0)
    valid_set.normalize(normalizing_scale_factor)

    test_set = utils.DataLoader(
        test_strokes,
        eval_model_params.batch_size,
        max_seq_length=eval_model_params.max_seq_len,
        random_scale_factor=0.0,
        augment_stroke_prob=0.0)
    test_set.normalize(normalizing_scale_factor)

    tf.logging.info('normalizing_scale_factor %4.4f.', normalizing_scale_factor)

    result = [
        train_set, valid_set, test_set, model_params, eval_model_params,
        sample_model_params
    ]

    return result
def load_dataset(data_dir, model_params, inference_mode=False):
    """Loads the .npz file, and splits the set into train/valid/test."""

    # normalizes the x and y columns using the training set.
    # applies same scaling factor to valid and test set.

    if isinstance(model_params.data_set, list):
        datasets = model_params.data_set
    else:
        datasets = [model_params.data_set]

    train_strokes = None
    valid_strokes = None
    test_strokes = None

    png_paths_map = {'train': [], 'valid': [], 'test': []}

    for dataset in datasets:
        if data_dir.startswith('http://') or data_dir.startswith('https://'):
            data_filepath = '/'.join([data_dir, dataset])
            print('Downloading %s' % data_filepath)
            response = requests.get(data_filepath)
            data = np.load(six.BytesIO(response.content), encoding='latin')
        else:
            data_filepath = os.path.join(data_dir, 'npz', dataset)
            if six.PY3:
                data = np.load(data_filepath, encoding='latin1')
            else:
                data = np.load(data_filepath)
        print('Loaded {}/{}/{} from {}'.format(len(data['train']),
                                               len(data['valid']),
                                               len(data['test']), dataset))
        if train_strokes is None:
            train_strokes = data[
                'train']  # [N (#sketches),], each with [S (#points), 3]
            valid_strokes = data['valid']
            test_strokes = data['test']
        else:
            train_strokes = np.concatenate((train_strokes, data['train']))
            valid_strokes = np.concatenate((valid_strokes, data['valid']))
            test_strokes = np.concatenate((test_strokes, data['test']))

        splits = ['train', 'valid', 'test']
        for split in splits:
            for im_idx in range(len(data[split])):
                png_path = os.path.join(
                    data_dir, 'png', dataset[:-4], split,
                    str(model_params.img_H) + 'x' + str(model_params.img_W),
                    str(im_idx) + '.png')
                png_paths_map[split].append(png_path)

    all_strokes = np.concatenate((train_strokes, valid_strokes, test_strokes))
    num_points = 0
    for stroke in all_strokes:
        num_points += len(stroke)
    avg_len = num_points / len(all_strokes)
    print('Dataset combined: {} ({}/{}/{}), avg len {}'.format(
        len(all_strokes), len(train_strokes), len(valid_strokes),
        len(test_strokes), int(avg_len)))
    assert len(train_strokes) == len(png_paths_map['train'])
    assert len(valid_strokes) == len(png_paths_map['valid'])
    assert len(test_strokes) == len(png_paths_map['test'])

    # calculate the max strokes we need.
    max_seq_len = utils.get_max_len(all_strokes)

    # overwrite the hps with this calculation.
    model_params.max_seq_len = max_seq_len
    print('model_params.max_seq_len %i.' % model_params.max_seq_len)

    eval_model_params = sketch_rnn_model.copy_hparams(model_params)

    eval_model_params.use_input_dropout = 0
    eval_model_params.use_recurrent_dropout = 0
    eval_model_params.use_output_dropout = 0
    eval_model_params.is_training = 1

    if inference_mode:
        eval_model_params.batch_size = 1
        eval_model_params.is_training = 0

    sample_model_params = sketch_rnn_model.copy_hparams(eval_model_params)
    sample_model_params.batch_size = 1  # only sample one at a time
    sample_model_params.max_seq_len = 1  # sample one point at a time

    train_set = utils.DataLoader(
        train_strokes,
        png_paths_map['train'],
        model_params.img_H,
        model_params.img_W,
        model_params.batch_size,
        max_seq_length=model_params.max_seq_len,
        random_scale_factor=model_params.random_scale_factor,
        augment_stroke_prob=model_params.augment_stroke_prob)

    normalizing_scale_factor = train_set.calculate_normalizing_scale_factor()
    train_set.normalize(normalizing_scale_factor)

    valid_set = utils.DataLoader(valid_strokes,
                                 png_paths_map['valid'],
                                 eval_model_params.img_H,
                                 eval_model_params.img_W,
                                 eval_model_params.batch_size,
                                 max_seq_length=eval_model_params.max_seq_len,
                                 random_scale_factor=0.0,
                                 augment_stroke_prob=0.0)
    valid_set.normalize(normalizing_scale_factor)

    test_set = utils.DataLoader(test_strokes,
                                png_paths_map['test'],
                                eval_model_params.img_H,
                                eval_model_params.img_W,
                                eval_model_params.batch_size,
                                max_seq_length=eval_model_params.max_seq_len,
                                random_scale_factor=0.0,
                                augment_stroke_prob=0.0)
    test_set.normalize(normalizing_scale_factor)

    print('normalizing_scale_factor %4.4f.' % normalizing_scale_factor)

    result = [
        train_set, valid_set, test_set, model_params, eval_model_params,
        sample_model_params
    ]
    return result
Exemplo n.º 10
0
def load_dataset(data_dir, datasets, inference_mode=False):
    """Loads the .npz file, and splits the set into train/valid/test."""

    # normalizes the x and y columns usint the training set.
    # applies same scaling factor to valid and test set.

    train_strokes = None
    valid_strokes = None
    test_strokes = None

    for dataset in datasets:
        data_filepath = os.path.join(data_dir, dataset)
        if data_dir.startswith('http://') or data_dir.startswith('https://'):
            tf.logging.info('Downloading %s', data_filepath)
            response = requests.get(data_filepath)
            data = np.load(StringIO(response.content))
        else:
            if six.PY3:
                data = np.load(data_filepath, encoding='latin1')
            else:
                data = np.load(data_filepath)
        tf.logging.info('Loaded {}/{}/{} from {}'.format(
            len(data['train']), len(data['valid']), len(data['test']),
            dataset))
        if train_strokes is None:
            train_strokes = data['train']
            valid_strokes = data['valid']
            test_strokes = data['test']
        else:
            train_strokes = np.concatenate((train_strokes, data['train']))
            valid_strokes = np.concatenate((valid_strokes, data['valid']))
            test_strokes = np.concatenate((test_strokes, data['test']))

    all_strokes = np.concatenate((train_strokes, valid_strokes, test_strokes))
    num_points = 0
    for stroke in all_strokes:
        num_points += len(stroke)
    avg_len = num_points / len(all_strokes)
    tf.logging.info('Dataset combined: {} ({}/{}/{}), avg len {}'.format(
        len(all_strokes), len(train_strokes), len(valid_strokes),
        len(test_strokes), int(avg_len)))

    # calculate the max strokes we need.
    max_seq_len = utils.get_max_len(all_strokes)

    tf.logging.info('model_params.max_seq_len %i.', max_seq_len)

    train_set = utils.DataLoader(train_strokes,
                                 random_scale_factor=0.1,
                                 augment_stroke_prob=0.1)

    normalizing_scale_factor = train_set.calculate_normalizing_scale_factor()
    train_set.normalize(normalizing_scale_factor)

    valid_set = utils.DataLoader(valid_strokes,
                                 random_scale_factor=0.0,
                                 augment_stroke_prob=0.0)
    valid_set.normalize(normalizing_scale_factor)

    test_set = utils.DataLoader(test_strokes,
                                random_scale_factor=0.0,
                                augment_stroke_prob=0.0)
    test_set.normalize(normalizing_scale_factor)

    tf.logging.info('normalizing_scale_factor %4.4f.',
                    normalizing_scale_factor)

    result = [train_set, valid_set, test_set]
    return result
Exemplo n.º 11
0
def load_dataset(sketch_data_dir,
                 photo_data_dir,
                 model_params,
                 inference_mode=False):
    """Loads the .npz file, and splits the set into train/test."""

    # normalizes the x and y columns using the training set.
    # applies same scaling factor to test set.

    if isinstance(model_params.data_set, list):
        datasets = model_params.data_set
    else:
        datasets = [model_params.data_set]

    train_strokes = None
    test_strokes = None
    train_image_paths = []
    test_image_paths = []

    for dataset in datasets:
        if model_params.data_type == 'QMUL':
            train_data_filepath = os.path.join(sketch_data_dir, dataset,
                                               'train_svg_sim_spa_png.h5')
            test_data_filepath = os.path.join(sketch_data_dir, dataset,
                                              'test_svg_sim_spa_png.h5')

            train_data_dict = utils.load_hdf5(train_data_filepath)
            test_data_dict = utils.load_hdf5(test_data_filepath)

            train_sketch_data = utils.reassemble_data(
                train_data_dict['image_data'], train_data_dict['data_offset']
            )  # list of [N_sketches], each [N_points, 4]
            train_photo_names = train_data_dict[
                'image_base_name']  # [N_sketches, 1], byte
            train_photo_paths = [
                os.path.join(photo_data_dir,
                             train_photo_names[i, 0].decode() + '.png')
                for i in range(train_photo_names.shape[0])
            ]  # [N_sketches], str
            test_sketch_data = utils.reassemble_data(
                test_data_dict['image_data'], test_data_dict['data_offset']
            )  # list of [N_sketches], each [N_points, 4]
            test_photo_names = test_data_dict[
                'image_base_name']  # [N_sketches, 1], byte
            test_photo_paths = [
                os.path.join(photo_data_dir,
                             test_photo_names[i, 0].decode() + '.png')
                for i in range(test_photo_names.shape[0])
            ]  # [N_sketches], str

            # transfer stroke-4 to stroke-3
            train_sketch_data = utils.to_normal_strokes_4to3(train_sketch_data)
            test_sketch_data = utils.to_normal_strokes_4to3(
                test_sketch_data)  # [N_sketches,], each with [N_points, 3]

            if train_strokes is None:
                train_strokes = train_sketch_data
                test_strokes = test_sketch_data
            else:
                train_strokes = np.concatenate(
                    (train_strokes, train_sketch_data))
                test_strokes = np.concatenate((test_strokes, test_sketch_data))

        elif model_params.data_type == 'QuickDraw':
            data_filepath = os.path.join(sketch_data_dir, dataset, 'npz',
                                         'sketchrnn_' + dataset + '.npz')
            if six.PY3:
                data = np.load(data_filepath, encoding='latin1')
            else:
                data = np.load(data_filepath)

            if train_strokes is None:
                train_strokes = data[
                    'train']  # [N_sketches,], each with [N_points, 3]
                test_strokes = data['test']
            else:
                train_strokes = np.concatenate((train_strokes, data['train']))
                test_strokes = np.concatenate((test_strokes, data['test']))

            train_photo_paths = [
                os.path.join(
                    sketch_data_dir, dataset, 'png', 'train',
                    str(model_params.image_size) + 'x' +
                    str(model_params.image_size),
                    str(im_idx) + '.png')
                for im_idx in range(len(data['train']))
            ]
            test_photo_paths = [
                os.path.join(
                    sketch_data_dir, dataset, 'png', 'test',
                    str(model_params.image_size) + 'x' +
                    str(model_params.image_size),
                    str(im_idx) + '.png')
                for im_idx in range(len(data['test']))
            ]
        else:
            raise Exception('Unknown data type:', model_params.data_type)

        print('Loaded {}/{} from {} {}'.format(len(train_photo_paths),
                                               len(test_photo_paths),
                                               model_params.data_type,
                                               dataset))
        train_image_paths += train_photo_paths
        test_image_paths += test_photo_paths

    all_strokes = np.concatenate((train_strokes, test_strokes))
    num_points = 0
    for stroke in all_strokes:
        num_points += len(stroke)
    avg_len = num_points / len(all_strokes)
    print('Dataset combined: {} ({}/{}), avg len {}'.format(
        len(all_strokes), len(train_strokes), len(test_strokes), int(avg_len)))
    assert len(train_image_paths) == len(train_strokes)
    assert len(test_image_paths) == len(test_strokes)

    # calculate the max strokes we need.
    max_seq_len = utils.get_max_len(all_strokes)

    # overwrite the hps with this calculation.
    model_params.max_seq_len = max_seq_len
    print('model_params.max_seq_len %i.' % model_params.max_seq_len)

    eval_model_params = sketch_p2s_model.copy_hparams(model_params)
    eval_model_params.use_input_dropout = 0
    eval_model_params.use_recurrent_dropout = 0
    eval_model_params.use_output_dropout = 0
    eval_model_params.is_training = 1

    if inference_mode:
        eval_model_params.batch_size = 1
        eval_model_params.is_training = 0

    sample_model_params = sketch_p2s_model.copy_hparams(eval_model_params)
    sample_model_params.batch_size = 1  # only sample one at a time
    sample_model_params.max_seq_len = 1  # sample one point at a time

    train_set = utils.DataLoader(
        train_strokes,
        train_image_paths,
        model_params.image_size,
        model_params.image_size,
        model_params.batch_size,
        max_seq_length=model_params.max_seq_len,
        random_scale_factor=model_params.random_scale_factor,
        augment_stroke_prob=model_params.augment_stroke_prob)

    normalizing_scale_factor = train_set.calculate_normalizing_scale_factor()
    train_set.normalize(normalizing_scale_factor)

    # valid_set = utils.DataLoader(
    #     valid_strokes,
    #     eval_model_params.batch_size,
    #     max_seq_length=eval_model_params.max_seq_len,
    #     random_scale_factor=0.0,
    #     augment_stroke_prob=0.0)
    # valid_set.normalize(normalizing_scale_factor)

    test_set = utils.DataLoader(test_strokes,
                                test_image_paths,
                                model_params.image_size,
                                model_params.image_size,
                                eval_model_params.batch_size,
                                max_seq_length=eval_model_params.max_seq_len,
                                random_scale_factor=0.0,
                                augment_stroke_prob=0.0)
    test_set.normalize(normalizing_scale_factor)

    print('normalizing_scale_factor %4.4f.' % normalizing_scale_factor)

    result = [
        train_set, None, test_set, model_params, eval_model_params,
        sample_model_params
    ]
    return result