Пример #1
0
def read_data_sets(train_dir,
                   fake_data=False,
                   one_hot=False,
                   dtype=dtypes.float32,
                   reshape=True,
                   validation_size=5000):
    if fake_data:

        def fake():
            return DataSet([], [],
                           fake_data=True,
                           one_hot=one_hot,
                           dtype=dtype)

        train = fake()
        validation = fake()
        test = fake()
        return base.Datasets(train=train, validation=validation, test=test)

    with open(os.path.join(train_dir, 'small_chairs.npy')) as f:
        train_images = numpy.load(f)
        train_labels = numpy.zeros(len(train_images))

    train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape)
    validation = None
    test = None

    return base.Datasets(train=train, validation=validation, test=test)
Пример #2
0
def read_data_sets(train_dir,
                   fake_data=False,
                   one_hot=False,
                   dtype=dtypes.float32,
                   reshape=True,
                   validation_size=5000):
  if fake_data:

    def fake():
      return DataSet([], [], fake_data=True, one_hot=one_hot, dtype=dtype)

    train = fake()
    validation = fake()
    test = fake()
    return base.Datasets(train=train, validation=validation, test=test)

  TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
  TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
  

  local_file = base.maybe_download(TRAIN_IMAGES, train_dir,
                                   SOURCE_URL + TRAIN_IMAGES)
  with open(local_file, 'rb') as f:
    train_images = extract_images(f)

  local_file = base.maybe_download(TRAIN_LABELS, train_dir,
                                   SOURCE_URL + TRAIN_LABELS)
  with open(local_file, 'rb') as f:
    train_labels = extract_labels(f, one_hot=one_hot)

  local_file = os.path.join(train_dir, TEST_FILE)
  print('Extracting', TEST_FILE)
  with open(local_file, 'rb') as f:
    magic, num, rows, cols = struct.unpack(">IIII", f.read(16))
    test_images = numpy.fromfile(f, dtype=numpy.uint8)
    test_images = test_images.reshape(num, rows, cols, 1)

  if one_hot:
    test_labels = numpy.zeros((NUM_TEST,NUM_CLASSES), dtype=numpy.uint8)
  else:
    test_labels = numpy.zeros((NUM_TEST,), dtype=numpy.uint8)
  
  if not 0 <= validation_size <= len(train_images):
    raise ValueError(
        'Validation size should be between 0 and {}. Received: {}.'
        .format(len(train_images), validation_size))

  validation_images = train_images[:validation_size]
  validation_labels = train_labels[:validation_size]
  train_images = train_images[validation_size:]
  train_labels = train_labels[validation_size:]

  train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape)
  validation = DataSet(validation_images,
                       validation_labels,
                       dtype=dtype,
                       reshape=reshape)
  test = DataSet(test_images, test_labels, dtype=dtype, reshape=reshape)

  return base.Datasets(train=train, validation=validation, test=test)
Пример #3
0
def read_data_sets(train_dir,
                   fake_data=False,
                   one_hot=False,
                   dtype=dtypes.float32,
                   reshape=True,
                   validation_size=5000):
    if fake_data:

        def fake():
            return DataSet([], [],
                           fake_data=True,
                           one_hot=one_hot,
                           dtype=dtype)

        train = fake()
        validation = fake()
        test = fake()
        return base.Datasets(train=train, validation=validation, test=test)

    #This is the directory containing the flag images
    TRAIN_IMAGES = '/home/michael/data/crop/'
    IMAGE_SHAPE = 28  #28x28 pixel images

    RESIZED_IMAGES = resize_images(TRAIN_IMAGES, IMAGE_SHAPE)

    train_images = extract_images(RESIZED_IMAGES)

    train_labels = extract_labels(RESIZED_IMAGES)

    train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape)
    test = train
    validation = train

    return base.Datasets(train=train, validation=validation, test=test)
Пример #4
0
def read_data_sets2(train_dir,
                    fake_data=False,
                    one_hot=False,
                    dtype=dtypes.float32,
                    reshape=True,
                    validation_size=5000,
                    seed=None):
    if fake_data:

        def fake():
            return DataSet([], [],
                           fake_data=True,
                           one_hot=one_hot,
                           dtype=dtype,
                           seed=seed)

        train = fake()
        validation = fake()
        test = fake()
        return base.Datasets(train=train, validation=validation, test=test)

    TRAIN_IMAGES = 'train.data.60k.csv.ubyte2'
    TRAIN_LABELS = 'train.label.60k.csv.ubyte'
    TEST_IMAGES = 'test.data.10k.csv.ubyte2'
    TEST_LABELS = 'test.label.10k.csv.ubyte'

    local_file = TRAIN_IMAGES
    with open(local_file, 'rb') as f:
        train_images = extract_images(f)

    local_file = TRAIN_LABELS
    with open(local_file, 'rb') as f:
        train_labels = extract_labels(f, one_hot=one_hot)

    local_file = TEST_IMAGES
    with open(local_file, 'rb') as f:
        test_images = extract_images(f)

    local_file = TEST_LABELS
    with open(local_file, 'rb') as f:
        test_labels = extract_labels(f, one_hot=one_hot)

    if not 0 <= validation_size <= len(train_images):
        raise ValueError(
            'Validation size should be between 0 and {}. Received: {}.'.format(
                len(train_images), validation_size))

    validation_images = train_images[:validation_size]
    validation_labels = train_labels[:validation_size]
    train_images = train_images[validation_size:]
    train_labels = train_labels[validation_size:]

    options = dict(dtype=dtype, reshape=reshape, seed=seed)

    train = DataSet(train_images, train_labels, **options)
    validation = DataSet(validation_images, validation_labels, **options)
    test = DataSet(test_images, test_labels, **options)

    return base.Datasets(train=train, validation=validation, test=test)
Пример #5
0
def read_data_sets(train_dir,
                   fake_data=False,
                   one_hot=False,
                   dtype=dtypes.float32,
                   reshape=True,
                   validation_size=5000):
    if fake_data:

        def fake():
            return DataSet([], [],
                           fake_data=True,
                           one_hot=one_hot,
                           dtype=dtype)

        train = fake()
        validation = fake()
        test = fake()
        return base.Datasets(train=train, validation=validation, test=test)

    TRAIN_IMAGES = 'training-images-and-ubyte_4.gz'
    TRAIN_LABELS = 'training-labels-and-ubyte_4.gz'
    VALIDATION_IMAGES = 'validation-images-and-ubyte_4.gz'
    VALIDATION_LABELS = 'validation-labels-and_ubyte_4.gz'
    TEST_IMAGES = 'testing-images-and-ubyte_4.gz'
    TEST_LABELS = 'testing-labels-and-ubyte_4.gz'

    with open(train_dir + TRAIN_IMAGES, 'rb') as f:
        train_images = extract_images(f)

    with open(train_dir + TRAIN_LABELS, 'rb') as f:
        train_labels = extract_labels(f, one_hot=one_hot)

    #train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape)
    #return train

    with open(train_dir + VALIDATION_IMAGES, 'rb') as f:
        validation_images = extract_images(f)

    with open(train_dir + VALIDATION_LABELS, 'rb') as f:
        validation_labels = extract_labels(f, one_hot=one_hot)

    with open(train_dir + TEST_IMAGES, 'rb') as f:
        test_images = extract_images(f)

    with open(train_dir + TEST_LABELS, 'rb') as f:
        test_labels = extract_labels(f, one_hot=one_hot)
    #validation_images = train_images[:validation_size]
    #validation_labels = train_labels[:validation_size]
    #train_images = train_images[validation_size:]
    #train_labels = train_labels[validation_size:]

    train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape)
    validation = DataSet(validation_images,
                         validation_labels,
                         dtype=dtype,
                         reshape=reshape)
    test = DataSet(test_images, test_labels, dtype=dtype, reshape=reshape)

    return base.Datasets(train=train, validation=validation, test=test)
Пример #6
0
def read_data_sets(train_dir,
                   fake_data=False,
                   one_hot=False,
                   dtype=dtypes.float32,
                   reshape=True,
                   validation_size=5000):
  if fake_data:

    def fake():
      return DataSet([], [], fake_data=True, one_hot=one_hot, dtype=dtype)

    train = fake()
    validation = fake()
    test = fake()
    return base.Datasets(train=train, validation=validation, test=test)

  TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
  TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
  TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
  TEST_LABELS = 't10k-labels-idx1-ubyte.gz'

  local_file = base.maybe_download(TRAIN_IMAGES, train_dir,
                                   SOURCE_URL + TRAIN_IMAGES)
  with open(local_file, 'rb') as f:
    train_images = extract_images(f)

  local_file = base.maybe_download(TRAIN_LABELS, train_dir,
                                   SOURCE_URL + TRAIN_LABELS)
  with open(local_file, 'rb') as f:
    train_labels = extract_labels(f, one_hot=one_hot)

  local_file = base.maybe_download(TEST_IMAGES, train_dir,
                                   SOURCE_URL + TEST_IMAGES)
  with open(local_file, 'rb') as f:
    test_images = extract_images(f)

  local_file = base.maybe_download(TEST_LABELS, train_dir,
                                   SOURCE_URL + TEST_LABELS)
  with open(local_file, 'rb') as f:
    test_labels = extract_labels(f, one_hot=one_hot)

  if not 0 <= validation_size <= len(train_images):
    raise ValueError(
        'Validation size should be between 0 and {}. Received: {}.'
        .format(len(train_images), validation_size))

  validation_images = train_images[:validation_size]
  validation_labels = train_labels[:validation_size]
  train_images = train_images[validation_size:]
  train_labels = train_labels[validation_size:]

  train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape)
  validation = DataSet(validation_images,
                       validation_labels,
                       dtype=dtype,
                       reshape=reshape)
  test = DataSet(test_images, test_labels, dtype=dtype, reshape=reshape)

  return base.Datasets(train=train, validation=validation, test=test)
Пример #7
0
def read_data_sets(train_dir,
                   fake_data=False,
                   one_hot=False,
                   dtype=dtypes.float32,
                   reshape=True,
                   validation_size=5000):
    if fake_data:

        def fake():
            return DataSet([], [],
                           fake_data=True,
                           one_hot=one_hot,
                           dtype=dtype)

        train = fake()
        validation = fake()
        test = fake()
        return base.Datasets(train=train, validation=validation, test=test)

    TRAIN_IMAGES = 'training-images-and-ubyte_19.gz'
    TRAIN_LABELS = 'training-labels-and-ubyte_19.gz'
    #TRAIN_IMAGES = 'testing-images-and-ubyte_19.gz'
    #TRAIN_LABELS = 'testing-labels-and-ubyte_19.gz'
    TEST_IMAGES = 'testing-images-and-ubyte_19.gz'
    TEST_LABELS = 'testing-labels-and-ubyte_19.gz'

    with open(train_dir + TRAIN_IMAGES, 'rb') as f:
        train_images = extract_images(f)

    with open(train_dir + TRAIN_LABELS, 'rb') as f:
        train_labels = extract_labels(f, one_hot=one_hot)

    #train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape)
    #return train

    with open(train_dir + TEST_IMAGES, 'rb') as f:
        test_images = extract_images(f)

    with open(train_dir + TEST_LABELS, 'rb') as f:
        test_labels = extract_labels(f, one_hot=one_hot)

    if not 0 <= validation_size <= len(train_images):
        raise ValueError(
            'Validation size should be between 0 and {}. Received: {}.'.format(
                len(train_images), validation_size))

    validation_images = train_images[:validation_size]
    validation_labels = train_labels[:validation_size]
    train_images = train_images[validation_size:]
    train_labels = train_labels[validation_size:]

    train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape)
    validation = DataSet(validation_images,
                         validation_labels,
                         dtype=dtype,
                         reshape=reshape)
    test = DataSet(test_images, test_labels, dtype=dtype, reshape=reshape)

    return base.Datasets(train=train, validation=validation, test=test)
Пример #8
0
def read_data_sets(train_dir,
                   fake_data=False,
                   one_hot=False,
                   dtype=dtypes.float32,
                   reshape=True,
                   validation_size=5000,
                   seed=None):
    if fake_data:

        def fake():
            return DataSet([], [],
                           fake_data=True,
                           one_hot=one_hot,
                           dtype=dtype,
                           seed=seed)

        train = fake()
        validation = fake()
        test = fake()
        return base.Datasets(train=train, validation=validation, test=test)

    TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
    TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
    TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
    TEST_LABELS = 't10k-labels-idx1-ubyte.gz'

    HOST = "192.168.205.185"
    NAME_NODE_PORT = 50070
    client = hdfs.Client('http://{}:{}'.format(HOST, NAME_NODE_PORT))

    with client.read(train_dir + "/" + TRAIN_IMAGES) as f:
        train_images = extract_images(f)

    with client.read(train_dir + "/" + TRAIN_LABELS) as f:
        train_labels = extract_labels(f, one_hot=one_hot)

    with client.read(train_dir + "/" + TEST_IMAGES) as f:
        test_images = extract_images(f)

    with client.read(train_dir + "/" + TEST_LABELS) as f:
        test_labels = extract_labels(f, one_hot=one_hot)

    if not 0 <= validation_size <= len(train_images):
        raise ValueError(
            'Validation size should be between 0 and {}. Received: {}.'.format(
                len(train_images), validation_size))

    validation_images = train_images[:validation_size]
    validation_labels = train_labels[:validation_size]
    train_images = train_images[validation_size:]
    train_labels = train_labels[validation_size:]

    options = dict(dtype=dtype, reshape=reshape, seed=seed)

    train = DataSet(train_images, train_labels, **options)
    validation = DataSet(validation_images, validation_labels, **options)
    test = DataSet(test_images, test_labels, **options)

    return base.Datasets(train=train, validation=validation, test=test)
Пример #9
0
def read_data_sets(train_dir,
                   fake_data=False,
                   one_hot=False,
                   dtype=dtypes.float32,
                   reshape=True,
                   validation_size=5000):
  if fake_data:

    def fake():
      return DataSet([], [], fake_data=True, one_hot=one_hot, dtype=dtype)

    train = fake()
    validation = fake()
    test = fake()
    return base.Datasets(train=train, validation=validation, test=test)


  print('Starting download...')
  bucket = 'my-test-bucket'
  TRAIN_IMAGES = obj_tf.s3.download(bucket,'train-images-idx3-ubyte.gz')
  TRAIN_LABELS = obj_tf.s3.download(bucket,'train-labels-idx1-ubyte.gz')
  TEST_IMAGES = obj_tf.s3.download(bucket,'t10k-images-idx3-ubyte.gz')
  TEST_LABELS = obj_tf.s3.download(bucket,'t10k-labels-idx1-ubyte.gz')
  print('Done downloading...')

  local_file = TRAIN_IMAGES
  with open(local_file, 'rb') as f:
    train_images = extract_images(f)

  local_file = TRAIN_LABELS
  with open(local_file, 'rb') as f:
    train_labels = extract_labels(f, one_hot=one_hot)

  local_file = TEST_IMAGES
  with open(local_file, 'rb') as f:
    test_images = extract_images(f)

  local_file = TEST_LABELS
  with open(local_file, 'rb') as f:
    test_labels = extract_labels(f, one_hot=one_hot)

  if not 0 <= validation_size <= len(train_images):
    raise ValueError(
        'Validation size should be between 0 and {}. Received: {}.'
        .format(len(train_images), validation_size))

  validation_images = train_images[:validation_size]
  validation_labels = train_labels[:validation_size]
  train_images = train_images[validation_size:]
  train_labels = train_labels[validation_size:]

  train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape)
  validation = DataSet(validation_images,
                       validation_labels,
                       dtype=dtype,
                       reshape=reshape)
  test = DataSet(test_images, test_labels, dtype=dtype, reshape=reshape)

  return base.Datasets(train=train, validation=validation, test=test)
Пример #10
0
    def split_dataset(self, dtype=dtypes.float32, reshape=True, seed=None, validation_size=7000): 
        labels = self.dataset.train.labels  

        # SPLIT FIRST GROUP (1-4)
        # Find all training images/labels 1-4 
        train_labels_idx = np.nonzero(self.dataset.train.labels)[1]
        train_labels_idx = np.nonzero(train_labels_idx < 5)[0]
        train_labels = self.dataset.train.labels[train_labels_idx]
        train_images = self.dataset.train.images[train_labels_idx]

        # Find all testing images/labels 1-4 
        test_labels_idx = np.nonzero(self.dataset.test.labels)[1]
        test_labels_idx = np.nonzero(test_labels_idx < 5)[0]
        test_labels = self.dataset.test.labels[test_labels_idx] 
        test_images = self.dataset.test.images[test_labels_idx] 

        # Create validation/training groups 
        validation_images = train_images[:validation_size]
        validation_labels = train_labels[:validation_size]
        train_images = train_images[validation_size:]
        train_labels = train_labels[validation_size:]

        options = dict(dtype=dtype, reshape=False, seed=seed)

        # Define training, validation, and testing datasets  
        train = DataSet(train_images, train_labels, **options)
        validation = DataSet(validation_images, validation_labels, **options)
        test = DataSet(test_images, test_labels, **options)

        first_dataset = base.Datasets(train=train, validation=validation, test=test)

        # SPLIT SECOND GROUP (5-9)
        # Find all training images/labels 5-9 
        train_labels_idx = np.nonzero(self.dataset.train.labels)[1]
        train_labels_idx = np.nonzero(train_labels_idx >= 5)[0]
        train_labels_2 = self.dataset.train.labels[train_labels_idx]
        train_images_2 = self.dataset.train.images[train_labels_idx]

        # Find all testing images/labels 5-9 
        test_labels_idx = np.nonzero(self.dataset.test.labels)[1]
        test_labels_idx = np.nonzero(test_labels_idx >= 5)[0]
        test_labels_2 = self.dataset.test.labels[test_labels_idx] 
        test_images_2 = self.dataset.test.images[test_labels_idx] 

        # Create validation/training groups 
        validation_images_2 = train_images_2[:validation_size]
        validation_labels_2 = train_labels_2[:validation_size]
        train_images_2 = train_images_2[validation_size:]
        train_labels_2 = train_labels_2[validation_size:]

        # Define training, validation, and testing datasets  
        train_2 = DataSet(train_images_2, train_labels_2, **options)
        validation_2 = DataSet(validation_images_2, validation_labels_2, **options)
        test_2 = DataSet(test_images_2, test_labels_2, **options)

        second_dataset = base.Datasets(train=train_2, validation=validation_2, test=test_2)

        return first_dataset, second_dataset 
Пример #11
0
def read_csv_data_sets(train_dir,
                       num_classes=2,
                       day_len=2,
                       dup=1,
                       fake_data=False,
                       one_hot=True,
                       dtype=dtypes.float64,
                       reshape=False,
                       validation_size=50,
                       seed=None):
    if fake_data:

        def fake():
            return DataSet([], [],
                           fake_data=True,
                           one_hot=one_hot,
                           dtype=dtype,
                           seed=seed)

        train = fake()
        validation = fake()
        test = fake()
        return base.Datasets(train=train, validation=validation, test=test)

    TRAIN_CSV = 'train.csv.gz'
    TEST_CSV = 'test.csv.gz'

    train_file = os.path.join(train_dir, TRAIN_CSV)
    test_file = os.path.join(train_dir, TEST_CSV)
    print('train_file:', train_file)
    print('test_file:', test_file)

    train_images, train_labels = read_csv_images_lables(
        train_file, day_len, dup)
    test_images, test_labels = read_csv_images_lables(test_file, day_len, dup)
    if one_hot:
        train_labels = dense_to_one_hot(train_labels, num_classes)
        test_labels = dense_to_one_hot(test_labels, num_classes)

    if not 0 <= validation_size <= len(train_images):
        raise ValueError(
            'Validation size should be between 0 and {}. Received: {}.'.format(
                len(train_images), validation_size))

    validation_images = train_images[:validation_size]
    validation_labels = train_labels[:validation_size]
    train_images = train_images[validation_size:]
    train_labels = train_labels[validation_size:]

    options = dict(dtype=dtype, reshape=reshape, seed=seed)

    train = DataSet(train_images, train_labels, **options)
    validation = DataSet(validation_images, validation_labels, **options)
    test = DataSet(test_images, test_labels, **options)

    return base.Datasets(train=train, validation=validation, test=test)
Пример #12
0
def read_data_sets(train_dir,
                   fake_data=False,
                   one_hot=False,
                   dtype=dtypes.float32,
                   reshape=True,
                   validation_size=50,
                   seed=None):
  if fake_data:

    def fake():
      return DataSet(
          [], [], fake_data=True, one_hot=one_hot, dtype=dtype, seed=seed)

    train = fake()
    validation = fake()
    test = fake()
    return base.Datasets(train=train, validation=validation, test=test)

  # 수정부분 : 이미지 파일
  TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
  TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
  TEST_IMAGES = 'test-images-idx3-ubyte.gz'
  TEST_LABELS = 'test-labels-idx1-ubyte.gz'

  local_file = os.path.join(train_dir,TRAIN_IMAGES)
  with gfile.Open(local_file, 'rb') as f:
    train_images = extract_images(f)

  local_file = os.path.join(train_dir, TRAIN_LABELS)
  with gfile.Open(local_file, 'rb') as f:
    train_labels = extract_labels(f, one_hot=one_hot)

  local_file = os.path.join(train_dir, TEST_IMAGES)
  with gfile.Open(local_file, 'rb') as f:
    test_images = extract_images(f)

  local_file = os.path.join(train_dir, TEST_LABELS)
  with gfile.Open(local_file, 'rb') as f:
    test_labels = extract_labels(f, one_hot=one_hot)

  if not 0 <= validation_size <= len(train_images):
    raise ValueError('Validation size should be between 0 and {}. Received: {}.'
                     .format(len(train_images), validation_size))

  validation_images = train_images[:validation_size]
  validation_labels = train_labels[:validation_size]
  train_images = train_images[validation_size:]
  train_labels = train_labels[validation_size:]

  options = dict(dtype=dtype, reshape=reshape, seed=seed)

  train = DataSet(train_images, train_labels, **options)
  validation = DataSet(validation_images, validation_labels, **options)
  test = DataSet(test_images, test_labels, **options)

  return base.Datasets(train=train, validation=validation, test=test)
Пример #13
0
def read_data_sets(train_images_filename,
                   train_labels_filename,
                   test_images_filename,
                   test_labels_filename,
                   train_dir,
                   fake_data=False,
                   one_hot=False,
                   dtype=dtypes.float32,
                   reshape=True,
                   seed=None):
    if fake_data:

        def fake():
            return DataSet([], [],
                           fake_data=True,
                           one_hot=one_hot,
                           dtype=dtype,
                           seed=seed)

        train = fake()
        test = fake()
        return base.Datasets(train=train, test=test)

    local_file = base.maybe_download(train_images_filename, train_dir,
                                     SOURCE_URL + train_images_filename)
    with open(local_file, 'rb') as f:
        train_images = extract_images(f)

    local_file = base.maybe_download(train_labels_filename, train_dir,
                                     SOURCE_URL + train_labels_filename)
    with open(local_file, 'rb') as f:
        train_labels = extract_labels(f, one_hot=one_hot)

    local_file = base.maybe_download(test_images_filename, train_dir,
                                     SOURCE_URL + test_images_filename)
    with open(local_file, 'rb') as f:
        test_images = extract_images(f)

    local_file = base.maybe_download(test_labels_filename, train_dir,
                                     SOURCE_URL + test_labels_filename)
    with open(local_file, 'rb') as f:
        test_labels = extract_labels(f, one_hot=one_hot)

    train = DataSet(train_images,
                    train_labels,
                    dtype=dtype,
                    reshape=reshape,
                    seed=seed)

    test = DataSet(test_images,
                   test_labels,
                   dtype=dtype,
                   reshape=reshape,
                   seed=seed)

    return base.Datasets(train=train, validation=test, test=test)
Пример #14
0
def read_data_sets(train_dir,
                   fake_data=False,
                   one_hot=False,
                   dtype=dtypes.float32,
                   reshape=False,
                   validation_size=5000,
                   worker_id=-1,
                   n_workers=-1):
  if fake_data:

    def fake():
      return DataSet([], [], fake_data=True, one_hot=one_hot, dtype=dtype)

    train = fake()
    validation = fake()
    test = fake()
    return base.Datasets(train=train, validation=validation, test=test)

  TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
  TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
  TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
  TEST_LABELS = 't10k-labels-idx1-ubyte.gz'

  local_file = base.maybe_download(TRAIN_IMAGES, train_dir,
                                   SOURCE_URL + TRAIN_IMAGES)
  train_images = extract_data(local_file, 60000)

  local_file = base.maybe_download(TRAIN_LABELS, train_dir,
                                   SOURCE_URL + TRAIN_LABELS)
  train_labels = extract_labels(local_file, 60000)

  local_file = base.maybe_download(TEST_IMAGES, train_dir,
                                   SOURCE_URL + TEST_IMAGES)
  test_images = extract_data(local_file, 10000)

  local_file = base.maybe_download(TEST_LABELS, train_dir,
                                   SOURCE_URL + TEST_LABELS)
  test_labels = extract_labels(local_file, 10000)

  if not 0 <= validation_size <= len(train_images):
    raise ValueError(
        'Validation size should be between 0 and {}. Received: {}.'
        .format(len(train_images), validation_size))

  validation_images = test_images
  validation_labels = test_labels
  train_images = train_images
  train_labels = train_labels

  train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape)
  #train = DataSet(train_images, train_labels_tmp, dtype=dtype, reshape=reshape)

  validation = DataSet(validation_images, validation_labels, dtype=dtype, reshape=reshape)

  return base.Datasets(train=train, validation=validation, test=None)
Пример #15
0
def read_data_sets(train_dir,
                   fake_data=False,
                   one_hot=False,
                   dtype=dtypes.float32,
                   reshape=True):
    if fake_data:

        def fake():
            return DataSet([], [],
                           fake_data=True,
                           one_hot=one_hot,
                           dtype=dtype)

        train = fake()
        validation = fake()
        test = fake()
        return base.Datasets(train=train, validation=validation, test=test)

    TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
    TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
    TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
    TEST_LABELS = 't10k-labels-idx1-ubyte.gz'
    VALIDATION_SIZE = 5000

    local_file = base.maybe_download(TRAIN_IMAGES, train_dir,
                                     SOURCE_URL + TRAIN_IMAGES)
    train_images = extract_images(local_file)

    local_file = base.maybe_download(TRAIN_LABELS, train_dir,
                                     SOURCE_URL + TRAIN_LABELS)
    train_labels = extract_labels(local_file, one_hot=one_hot)

    local_file = base.maybe_download(TEST_IMAGES, train_dir,
                                     SOURCE_URL + TEST_IMAGES)
    test_images = extract_images(local_file)

    local_file = base.maybe_download(TEST_LABELS, train_dir,
                                     SOURCE_URL + TEST_LABELS)
    test_labels = extract_labels(local_file, one_hot=one_hot)

    validation_images = train_images[:VALIDATION_SIZE]
    validation_labels = train_labels[:VALIDATION_SIZE]
    train_images = train_images[VALIDATION_SIZE:]
    train_labels = train_labels[VALIDATION_SIZE:]

    train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape)
    validation = DataSet(validation_images,
                         validation_labels,
                         dtype=dtype,
                         reshape=reshape)
    test = DataSet(test_images, test_labels, dtype=dtype, reshape=reshape)

    return base.Datasets(train=train, validation=validation, test=test)
Пример #16
0
def read_ext_data_sets(train_dir,
                       fake_data=False,
                       one_hot=False,
                       dtype=dtypes.float32,
                       reshape=True):
    if fake_data:

        def fake():
            return DataSet([], [],
                           fake_data=True,
                           one_hot=one_hot,
                           dtype=dtype)

        train = fake()
        validation = fake()
        test = fake()
        return base.Datasets(train=train, validation=validation, test=test)

    VALIDATION_SIZE = 5000

    #  local_file = base.maybe_download(TRAIN_IMAGES, train_dir,
    #                                   SOURCE_URL + TRAIN_IMAGES)
    local_file = '/home/skynet0/data/train_images_ubyte.gz'
    train_images = extract_images(local_file)

    #  local_file = base.maybe_download(TRAIN_LABELS, train_dir,
    #                                   SOURCE_URL + TRAIN_LABELS)
    local_file = '/home/skynet0/data/train_labels_ubyte.gz'
    train_labels = extract_labels(local_file, one_hot=one_hot)

    #  local_file = base.maybe_download(TEST_IMAGES, train_dir,
    #                                   SOURCE_URL + TEST_IMAGES)
    local_file = '/home/skynet0/data/test_images_ubyte.gz'
    test_images = extract_images(local_file)

    #  local_file = base.maybe_download(TEST_LABELS, train_dir,
    #                                   SOURCE_URL + TEST_LABELS)
    local_file = '/home/skynet0/data/test_labels_ubyte.gz'
    test_labels = extract_labels(local_file, one_hot=one_hot)

    validation_images = train_images[:VALIDATION_SIZE]
    validation_labels = train_labels[:VALIDATION_SIZE]
    train_images = train_images[VALIDATION_SIZE:]
    train_labels = train_labels[VALIDATION_SIZE:]

    train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape)
    validation = DataSet(validation_images,
                         validation_labels,
                         dtype=dtype,
                         reshape=reshape)
    test = DataSet(test_images, test_labels, dtype=dtype, reshape=reshape)

    return base.Datasets(train=train, validation=validation, test=test)
Пример #17
0
def read_data_sets(train_dir,
                   fake_data=False,
                   one_hot=False,
                   dtype=dtypes.float32,
                   reshape=True,
                   validation_size=0):
  if fake_data:

    def fake():
      return DataSet([], [], fake_data=True, one_hot=one_hot, dtype=dtype)

    train = fake()
    validation = fake()
    test = fake()
    return base.Datasets(train=train, validation=validation, test=test)


  gz_file_name = 'cifar-10-python.tar.gz'

  local_file = base.maybe_download(gz_file_name, train_dir,
                                   SOURCE_URL + gz_file_name)

  train_images = []
  train_labels = []
  for i in range(1, 6):
    with open(os.path.join(train_dir, 'cifar-10-batches-py', 'data_batch_%d'%i)) as f:
      batch = numpy.load(f)
      tmp_images = batch['data'].reshape([-1, 3, 32, 32])
      train_images.append(tmp_images.transpose([0, 2, 3, 1]))
      train_labels += batch['labels']
  train_images = numpy.concatenate(train_images)
  train_labels = numpy.array(train_labels)

  if not 0 <= validation_size <= len(train_images):
    raise ValueError(
        'Validation size should be between 0 and {}. Received: {}.'
        .format(len(train_images), validation_size))

  validation_images = train_images[:validation_size]
  validation_labels = train_labels[:validation_size]
  train_images = train_images[validation_size:]
  train_labels = train_labels[validation_size:]

  train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape)
  validation = DataSet(validation_images,
                       validation_labels,
                       dtype=dtype,
                       reshape=reshape)
  #test = DataSet(test_images, test_labels, dtype=dtype, reshape=reshape)
  test = None

  return base.Datasets(train=train, validation=validation, test=test)
Пример #18
0
def read_data_sets(
    fake_data=False,
    one_hot=False,
    dtype=dtypes.float32,
    reshape=True,
    validation_size=5000,
    seed=None,
):
    if fake_data:

        def fake():
            return DataSet([], [],
                           fake_data=True,
                           one_hot=one_hot,
                           dtype=dtype,
                           seed=seed)

        train = fake()
        validation = fake()
        test = fake()
        return base.Datasets(train=train, validation=validation, test=test)

    with gfile.Open(train_data_dir, 'rb') as f:
        train_images = extract_images(f)

    with gfile.Open(train_labels_dir, 'rb') as f:
        train_labels = extract_labels(f, one_hot=one_hot)

    with gfile.Open(eval_data_dir, 'rb') as f:
        test_images = extract_images(f)

    with gfile.Open(eval_labels_dir, 'rb') as f:
        test_labels = extract_labels(f, one_hot=one_hot)

    if not 0 <= validation_size <= len(train_images):
        raise ValueError(
            'Validation size should be between 0 and {}. Received: {}.'.format(
                len(train_images), validation_size))

    validation_images = train_images[:validation_size]
    validation_labels = train_labels[:validation_size]
    train_images = train_images[validation_size:]
    train_labels = train_labels[validation_size:]

    options = dict(dtype=dtype, reshape=reshape, seed=seed)

    train = DataSet(train_images, train_labels, **options)
    validation = DataSet(validation_images, validation_labels, **options)
    test = DataSet(test_images, test_labels, **options)

    return base.Datasets(train=train, validation=validation, test=test)
Пример #19
0
def read_data_sets(train_labels_csv,
                   test_labels_csv,
                   fake_data=False,
                   one_hot=False,
                   dtype=dtypes.float32,
                   reshape=True,
                   validation_size=5000,
                   dataset_path='../'):
    """Read HASY data."""
    if fake_data:

        def fake():
            return DataSet([], [],
                           fake_data=True,
                           one_hot=one_hot,
                           dtype=dtype)

        train = fake()
        validation = fake()
        test = fake()
        return base.Datasets(train=train, validation=validation, test=test)

    symbol_id2index = generate_index(os.path.join(dataset_path, 'symbols.csv'))
    test_images, test_labels, _ = load_images(test_labels_csv, symbol_id2index)
    train_images, train_labels, _ = load_images(train_labels_csv,
                                                symbol_id2index)

    if not 0 <= validation_size <= len(train_images):
        raise ValueError(
            'Validation size should be between 0 and {}. Received: {}.'.format(
                len(train_images), validation_size))
    # Shuffle data
    perm = np.arange(len(train_labels))
    np.random.shuffle(perm)
    train_images = train_images[perm]
    train_labels = train_labels[perm]
    # Split training set in training and validation set
    validation_images = train_images[:validation_size]
    validation_labels = train_labels[:validation_size]
    train_images = train_images[validation_size:]
    train_labels = train_labels[validation_size:]

    train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape)
    validation = DataSet(validation_images,
                         validation_labels,
                         dtype=dtype,
                         reshape=reshape)
    test = DataSet(test_images, test_labels, dtype=dtype, reshape=reshape)

    return base.Datasets(train=train, validation=validation, test=test)
Пример #20
0
def read_data_sets(train_dir,
                   fake_data=False,
                   one_hot=False,
                   dtype=dtypes.float32,
                   reshape=True,
                   validation_rate=0.1,
                   seed=None,
                   mode="fire"):
    if fake_data:

        def fake():
            return DataSet([], [],
                           fake_data=True,
                           one_hot=one_hot,
                           dtype=dtype,
                           seed=seed)

        train = fake()
        validation = fake()
        test = fake()
        return base.Datasets(train=train, validation=validation, test=test)

    base_path = train_dir
    TRAIN_IMAGES = base_path + mode + '-images-idx3-ubyte.gz'
    TRAIN_LABELS = base_path + mode + '-labels-idx1-ubyte.gz'
    local_file = TRAIN_IMAGES
    with gfile.Open(local_file, 'rb') as f:
        train_images = extract_images(f)

    local_file = TRAIN_LABELS
    with gfile.Open(local_file, 'rb') as f:
        train_labels = extract_labels(f, one_hot=one_hot)
    validation_size = int(0.1 * len(train_images))

    if not 0 <= validation_size <= len(train_images):
        raise ValueError(
            'Validation size should be between 0 and {}. Received: {}.'.format(
                len(train_images), validation_size))

    validation_images = train_images[:validation_size]
    validation_labels = train_labels[:validation_size]
    train_images = train_images[validation_size:]
    train_labels = train_labels[validation_size:]

    options = dict(dtype=dtype, reshape=reshape, seed=seed)

    train = DataSet(train_images, train_labels, **options)
    validation = DataSet(validation_images, validation_labels, **options)
    return base.Datasets(train=train, validation=validation, test=validation)
Пример #21
0
def read_data_sets(train_dir,
                   fake_data=False,
                   one_hot=False,
                   dtype=dtypes.float32,
                   reshape=False,
                   validation_size=10):
    if fake_data:

        def fake():
            return DataSet([], [],
                           fake_data=True,
                           one_hot=one_hot,
                           dtype=dtype)

        train = fake()
        validation = fake()
        test = fake()
        return base.Datasets(train=train, validation=validation, test=test)

    train_file = glob(train_dir)
    # print(train_file)
    train_images, train_labels = extract_images(train_file)
    train_labels = extract_labels(train_labels, one_hot=one_hot)
    print(train_images.shape)

    # TEST_IMAGES = (['test_data/rb.HOT.15m(1).csv'])
    test_file = glob(cfg.test_dataset)
    test_images, test_labels = extract_images(test_file)
    test_labels = extract_labels(test_labels, one_hot=one_hot)

    if not 0 <= validation_size <= len(train_images):
        raise ValueError(
            'Validation size should be between 0 and {}. Received: {}.'.format(
                len(train_images), validation_size))

    validation_images = train_images[:validation_size]
    validation_labels = train_labels[:validation_size]
    train_images = train_images[validation_size:]
    train_labels = train_labels[validation_size:]

    train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape)
    validation = DataSet(validation_images,
                         validation_labels,
                         dtype=dtype,
                         reshape=reshape)
    test = DataSet(test_images, test_labels, dtype=dtype, reshape=reshape)

    return base.Datasets(train=train, validation=validation, test=test)
	def augment_mnist_data(dataset, augmenter, augmented_ratio=1):
		"""
		Augments a data set and returns it.
		:param augmenter: The augmenter
		:param augmented_ratio: Returns how many times the data set needs to be replicated.
		:param data set: The data set that needs to be augmented.
		:return:
		"""
		train_images = []
		train_labels = []

		training_length = len(dataset.train.images)
		# Loop all training images
		for i in tqdm(range(augmented_ratio * training_length), desc="Augmenting images", unit="image"):
			# Augment images
			train_images.append(
				np.reshape(augmenter.augment_image(dataset.train.images[i % training_length].reshape(28, 28, 1)), 784))

			# Append corresponding label
			train_labels.append(dataset.train.labels[i % training_length])

		train = Dataset(train_images, train_labels)

		test = dataset.test
		return base.Datasets(train=train, test=test, validation=None)
def load_fashion_mnist_A(validation_size=5000):

    (train_images,
     train_labels), (test_images,
                     test_labels) = tf.keras.datasets.fashion_mnist.load_data(
                     )  #keras only added to tensorflow
    # since version 1.4, currently using 1.1, fashion_mnist only added to keras.datasets in even later versions than 1.4
    # so apparently tensorflow==1.13 (latest) works even when i run run_spam_experiment. so I guess keep tf at this version
    # and all I had to do was downgrade spacy?

    if not 0 <= validation_size <= len(train_images):
        raise ValueError(
            'Validation size should be between 0 and {}. Received: {}.'.format(
                len(train_images), validation_size))

    validation_images = train_images[:validation_size]
    validation_labels = train_labels[:validation_size]
    train_images = train_images[validation_size:]
    train_labels = train_labels[validation_size:]

    train_images = train_images.astype(np.float32) / 255
    validation_images = validation_images.astype(np.float32) / 255
    test_images = test_images.astype(np.float32) / 255

    train = DataSet(train_images, train_labels)
    validation = DataSet(validation_images, validation_labels)
    test = DataSet(test_images, test_labels)

    return base.Datasets(train=train, validation=validation, test=test)
def get_feature_vectors(model):
    train_feature_vectors = np.concatenate(
        (model.sess.run(model.feature_vector,
                        feed_dict=model.all_train_feed_dict),
         model.sess.run(model.feature_vector,
                        feed_dict=model.all_validation_feed_dict)))
    validation_feature_vectors = np.empty([0, 32])
    test_feature_vectors = model.sess.run(model.feature_vector,
                                          feed_dict=model.all_test_feed_dict)
    # validation_feature_vectors = model.sess.run(model.feature_vector, feed_dict=model.all_validation_feed_dict)

    train_labels = np.concatenate(
        (model.data_sets.train.labels, model.data_sets.validation.labels))
    validation_labels = np.empty([0])
    test_labels = model.data_sets.test.labels

    # print('train_feature_vectors.shape', type(train_feature_vectors))
    # print('train_feature_vectors.shape', type(train_feature_vectors))
    #
    print('train_feature_vectors.shape', train_feature_vectors.shape)
    print('test_feature_vectors.shape', test_feature_vectors.shape)
    print('validation_feature_vectors.shape', validation_feature_vectors.shape)

    print('train_labels.shape', train_labels.shape)
    print('test_labels.shape', test_labels.shape)
    print('validation_labels.shape', validation_labels.shape)

    train = DataSet(train_feature_vectors, train_labels)
    validation = DataSet(validation_feature_vectors, validation_labels)
    test = DataSet(test_feature_vectors, test_labels)

    return base.Datasets(train=train, validation=validation, test=test)
Пример #25
0
def read_data_sets(dataset_tf_dir,
                   dtype=dtypes.float32,
                   reshape=True,
                   seed=None):


  total_images = extract_images(dataset_tf_dir,if_training=True)
  total_labels = extract_labels(dataset_tf_dir,if_training=True)
  total_idx = [i for i in range(len(total_labels))]
  test_images = total_images[::10]
  test_labels = total_labels[::10]
  test_idx = total_idx[::10]
  train_idx = list(set(total_idx) - set(test_idx))
  train_images = total_images[numpy.array(train_idx)]
  train_labels = total_labels[numpy.array(train_idx)] 
  validation_images = extract_images(dataset_tf_dir,if_training=False)
  validation_labels = extract_labels(dataset_tf_dir,if_training=False)


  train = DataSet(
      train_images, train_labels, dtype=dtype, reshape=reshape, seed=seed)
  validation = DataSet(
      validation_images,
      validation_labels,
      dtype=dtype,
      reshape=reshape,
      seed=seed)
  test = DataSet(
      test_images, test_labels, dtype=dtype, reshape=reshape, seed=seed)

  return base.Datasets(train=train, validation=validation, test=test)
Пример #26
0
def read_data_sets(data_path,
                   val_size=0.1,
                   test_size=0.1,
                   n_steps=10,
                   n_test_steps=10,
                   seed=None):
    print("loading time series ...")
    data = np.load(data_path)
    # Expand the dimension if univariate time series
    if (np.ndim(data) == 1):
        data = np.expand_dims(data, axis=1)
    print("input type ", type(data), np.shape(data))

    # """normalize the data"""
    # print("normalize to (0-1)")
    # data = normalize_columns(data)

    ntest = int(round(len(data) * (1 - test_size)))
    nval = int(round(len(data[:ntest]) * (1 - val_size)))

    train_data, valid_data, test_data = data[:nval, ], data[
        nval:ntest, ], data[ntest:, ]

    train_options = dict(num_steps=n_steps, seed=seed)
    test_options = dict(num_steps=n_test_steps, seed=seed)
    train = DataSet(train_data, **train_options)
    valid = DataSet(valid_data, **train_options)
    test = DataSet(test_data, **test_options)

    return base.Datasets(train=train, validation=valid, test=test)
Пример #27
0
def dataset_reshaped(data_sets):
  train_images=data_sets.train.x
  train_images=train_images.reshape(train_images.shape[0],28,28,1)

  train_labels=data_sets.train.labels
  n_values = np.max(train_labels) + 1
  train_labels=np.eye(n_values)[train_labels]

  validation_images=data_sets.validation.x
  validation_images=validation_images.reshape(validation_images.shape[0],28,28,1)
  validation_labels=data_sets.validation.labels
  n_values = np.max(validation_labels) + 1
  validation_labels=np.eye(n_values)[validation_labels]

  test_images=data_sets.test.x
  test_images=test_images.reshape(test_images.shape[0],28,28,1)
  test_labels=data_sets.test.labels
  n_values = np.max(test_labels) + 1
  test_labels=np.eye(n_values)[test_labels]

  train = DataSet(train_images, train_labels,size_change=True)
  validation = DataSet(validation_images, validation_labels,size_change=True)
  test = DataSet(test_images, test_labels,size_change=True)

  return base.Datasets(train=train, validation=validation, test=test)
Пример #28
0
def generate_inception_features(model,
                                poisoned_X_train_subset,
                                labels_subset,
                                batch_size=None):
    poisoned_train = DataSet(poisoned_X_train_subset, labels_subset)
    poisoned_data_sets = base.Datasets(train=poisoned_train,
                                       validation=None,
                                       test=None)

    if batch_size == None:
        batch_size = len(labels_subset)

    num_examples = poisoned_data_sets.train.num_examples
    assert num_examples % batch_size == 0
    num_iter = int(num_examples / batch_size)

    poisoned_data_sets.train.reset_batch()

    inception_features_val = []
    print(np.shape(poisoned_data_sets.train.x))
    for i in range(num_iter):
        feed_dict = model.fill_feed_dict_with_batch(poisoned_data_sets.train,
                                                    batch_size=batch_size)
        inception_features_val_temp = model.sess.run(model.inception_features,
                                                     feed_dict=feed_dict)
        inception_features_val.append(inception_features_val_temp)

    return np.concatenate(inception_features_val)
Пример #29
0
def load_dbpedia(size='small', test_with_fake_data=False):
    """Get DBpedia datasets from CSV files."""
    if not test_with_fake_data:
        data_dir = os.path.join(os.getenv('TF_EXP_BASE_DIR', ''),
                                'dbpedia_data')
        maybe_download_dbpedia(data_dir)

        train_path = os.path.join(data_dir, 'dbpedia_csv', 'train.csv')
        test_path = os.path.join(data_dir, 'dbpedia_csv', 'code.csv')

        if size == 'small':
            # Reduce the size of original data by a factor of 1000.
            base.shrink_csv(train_path, 1000)
            base.shrink_csv(test_path, 1000)
            train_path = train_path.replace('train.csv', 'train_small.csv')
            test_path = test_path.replace('code.csv', 'test_small.csv')
    else:
        module_path = os.path.dirname(__file__)
        train_path = os.path.join(module_path, 'data', 'text_train.csv')
        test_path = os.path.join(module_path, 'data', 'text_test.csv')

    train = base.load_csv_without_header(train_path,
                                         target_dtype=np.int32,
                                         features_dtype=np.str,
                                         target_column=0)
    test = base.load_csv_without_header(test_path,
                                        target_dtype=np.int32,
                                        features_dtype=np.str,
                                        target_column=0)

    return base.Datasets(train=train, validation=None, test=test)
Пример #30
0
def read_data_sets(pictures,
                   labels,
                   num_classes,
                   one_hot=False,
                   dtype=dtypes.float32,
                   reshape=True,
                   validation_size=5000):
    if not len(pictures) == len(labels):
        raise ValueError('Numbers of pictures and labels doesnot match')

    labels = dense_to_one_hot(labels, num_classes)

    test_size = len(pictures) // 8
    test_images = pictures[:test_size]
    test_labels = labels[:test_size]

    train_images = pictures[test_size:]
    train_labels = labels[test_size:]

    validation_size = len(train_images) // 7

    validation_images = train_images[:validation_size]
    validation_labels = train_labels[:validation_size]
    train_images = train_images[validation_size:]
    train_labels = train_labels[validation_size:]

    train = DataSet(train_images, train_labels, dtype=dtype, reshape=reshape)
    validation = DataSet(validation_images,
                         validation_labels,
                         dtype=dtype,
                         reshape=reshape)
    test = DataSet(test_images, test_labels, dtype=dtype, reshape=reshape)

    return base.Datasets(train=train, validation=validation, test=test)