def load_dbpedia(size='small', test_with_fake_data=False): """Get DBpedia datasets from CSV files.""" if not test_with_fake_data: data_dir = os.path.join(os.getenv('TF_EXP_BASE_DIR', ''), 'dbpedia_data') maybe_download_dbpedia(data_dir) train_path = os.path.join(data_dir, 'dbpedia_csv', 'train.csv') test_path = os.path.join(data_dir, 'dbpedia_csv', 'code.csv') if size == 'small': # Reduce the size of original data by a factor of 1000. base.shrink_csv(train_path, 1000) base.shrink_csv(test_path, 1000) train_path = train_path.replace('train.csv', 'train_small.csv') test_path = test_path.replace('code.csv', 'test_small.csv') else: module_path = os.path.dirname(__file__) train_path = os.path.join(module_path, 'data', 'text_train.csv') test_path = os.path.join(module_path, 'data', 'text_test.csv') train = base.load_csv_without_header(train_path, target_dtype=np.int32, features_dtype=np.str, target_column=0) test = base.load_csv_without_header(test_path, target_dtype=np.int32, features_dtype=np.str, target_column=0) return base.Datasets(train=train, validation=None, test=test)
def load_dbpedia(size='small', test_with_fake_data=False): """Get DBpedia datasets from CSV files.""" if not test_with_fake_data: data_dir = os.path.join(os.getenv('TF_EXP_BASE_DIR', ''), 'dbpedia_data') maybe_download_dbpedia(data_dir) train_path = os.path.join(data_dir, 'dbpedia_csv', 'train.csv') test_path = os.path.join(data_dir, 'dbpedia_csv', 'test.csv') if size == 'small': # Reduce the size of original data by a factor of 1000. base.shrink_csv(train_path, 1000) base.shrink_csv(test_path, 1000) train_path = train_path.replace('train.csv', 'train_small.csv') test_path = test_path.replace('test.csv', 'test_small.csv') else: module_path = os.path.dirname(__file__) train_path = os.path.join(module_path, 'data', 'text_train.csv') test_path = os.path.join(module_path, 'data', 'text_test.csv') train = base.load_csv_without_header( train_path, target_dtype=np.int32, features_dtype=np.str, target_column=0) test = base.load_csv_without_header( test_path, target_dtype=np.int32, features_dtype=np.str, target_column=0) return base.Datasets(train=train, validation=None, test=test)
def main(unused_argv): text_datasets.maybe_download_dbpedia("dbpedia_data") # Reduce the size of original data by a factor of 1000. base.shrink_csv("dbpedia_data/dbpedia_csv/train.csv", 1000) base.shrink_csv("dbpedia_data/dbpedia_csv/test.csv", 1000)
def main(unused_argv): text_datasets.maybe_download_dbpedia('dbpedia_data') # Reduce the size of original data by a factor of 1000. base.shrink_csv('dbpedia_data/dbpedia_csv/train.csv', 1000) base.shrink_csv('dbpedia_data/dbpedia_csv/code.csv', 1000)