t0 = time() a = Extractor() cache_path = '../dataset/cache/' + dataset_name + '.pckl' if os.path.isfile(cache_path) and args.reload_extraction is False: logging.info('########## LOADING PROJECT FROM CACHE ##########') a.load(cache_path) else: logging.info('########## EXTRACTING PROJECT ##########') dataset_path = args.PROJECT if not os.path.exists(dataset_path): sys.exit('Specified dataset not found in dataset folder. Aborting') a.clean_dataset(dataset_path) a.extr_folder_classes(dataset_path) a.save(cache_path) logging.info('Finished extracting {0:.4f}s'.format(time() - t0)) # Preprocess extracted dataset. Use cached version if available or not specified otherwise. t0 = time() cache_path = '../dataset/cache/' + dataset_name + '_prep.pckl' if os.path.isfile(cache_path) and (args.reload_preprocessing is False and args.reload_extraction is False): logging.info('########## LOADING PRERPOCESSED DATA FROM CACHE ##########') b = Preprocessor() b.load(cache_path) else: logging.info('########## PREPROCESSING PROJECT ##########') b = Preprocessor(a.classes, type='class', pkg_start=args.pkg_start) b.save(cache_path) logging.info('Finished preprocessing {0:.4f}s'.format(time() - t0))