Exemplo n.º 1
0
def load_dbpedia(size='small', test_with_fake_data=False):
    """Get DBpedia datasets from CSV files."""
    if not test_with_fake_data:
        data_dir = os.path.join(os.getenv('TF_EXP_BASE_DIR', ''),
                                'dbpedia_data')
        maybe_download_dbpedia(data_dir)

        train_path = os.path.join(data_dir, 'dbpedia_csv', 'train.csv')
        test_path = os.path.join(data_dir, 'dbpedia_csv', 'code.csv')

        if size == 'small':
            # Reduce the size of original data by a factor of 1000.
            base.shrink_csv(train_path, 1000)
            base.shrink_csv(test_path, 1000)
            train_path = train_path.replace('train.csv', 'train_small.csv')
            test_path = test_path.replace('code.csv', 'test_small.csv')
    else:
        module_path = os.path.dirname(__file__)
        train_path = os.path.join(module_path, 'data', 'text_train.csv')
        test_path = os.path.join(module_path, 'data', 'text_test.csv')

    train = base.load_csv_without_header(train_path,
                                         target_dtype=np.int32,
                                         features_dtype=np.str,
                                         target_column=0)
    test = base.load_csv_without_header(test_path,
                                        target_dtype=np.int32,
                                        features_dtype=np.str,
                                        target_column=0)

    return base.Datasets(train=train, validation=None, test=test)
Exemplo n.º 2
0
def load_dbpedia(size='small', test_with_fake_data=False):
  """Get DBpedia datasets from CSV files."""
  if not test_with_fake_data:
    data_dir = os.path.join(os.getenv('TF_EXP_BASE_DIR', ''), 'dbpedia_data')
    maybe_download_dbpedia(data_dir)

    train_path = os.path.join(data_dir, 'dbpedia_csv', 'train.csv')
    test_path = os.path.join(data_dir, 'dbpedia_csv', 'test.csv')

    if size == 'small':
      # Reduce the size of original data by a factor of 1000.
      base.shrink_csv(train_path, 1000)
      base.shrink_csv(test_path, 1000)
      train_path = train_path.replace('train.csv', 'train_small.csv')
      test_path = test_path.replace('test.csv', 'test_small.csv')
  else:
    module_path = os.path.dirname(__file__)
    train_path = os.path.join(module_path, 'data', 'text_train.csv')
    test_path = os.path.join(module_path, 'data', 'text_test.csv')

  train = base.load_csv_without_header(
      train_path, target_dtype=np.int32, features_dtype=np.str, target_column=0)
  test = base.load_csv_without_header(
      test_path, target_dtype=np.int32, features_dtype=np.str, target_column=0)

  return base.Datasets(train=train, validation=None, test=test)
def main(unused_argv):
    text_datasets.maybe_download_dbpedia("dbpedia_data")
    # Reduce the size of original data by a factor of 1000.
    base.shrink_csv("dbpedia_data/dbpedia_csv/train.csv", 1000)
    base.shrink_csv("dbpedia_data/dbpedia_csv/test.csv", 1000)
Exemplo n.º 4
0
def main(unused_argv):
    text_datasets.maybe_download_dbpedia('dbpedia_data')
    # Reduce the size of original data by a factor of 1000.
    base.shrink_csv('dbpedia_data/dbpedia_csv/train.csv', 1000)
    base.shrink_csv('dbpedia_data/dbpedia_csv/code.csv', 1000)