Пример #1
0
t0 = time()
a = Extractor()
cache_path = '../dataset/cache/' + dataset_name + '.pckl'

if os.path.isfile(cache_path) and args.reload_extraction is False:
    logging.info('########## LOADING PROJECT FROM CACHE ##########')
    a.load(cache_path)
else:
    logging.info('##########     EXTRACTING PROJECT     ##########')
    dataset_path = args.PROJECT
    if not os.path.exists(dataset_path):
        sys.exit('Specified dataset not found in dataset folder. Aborting')
    a.clean_dataset(dataset_path)
    a.extr_folder_classes(dataset_path)
    a.save(cache_path)
logging.info('Finished extracting {0:.4f}s'.format(time() - t0))

# Preprocess extracted dataset. Use cached version if available or not specified otherwise.
t0 = time()
cache_path = '../dataset/cache/' + dataset_name + '_prep.pckl'
if os.path.isfile(cache_path) and (args.reload_preprocessing is False
                                   and args.reload_extraction is False):
    logging.info('########## LOADING PRERPOCESSED DATA FROM CACHE ##########')
    b = Preprocessor()
    b.load(cache_path)
else:
    logging.info('########## PREPROCESSING PROJECT ##########')
    b = Preprocessor(a.classes, type='class', pkg_start=args.pkg_start)
    b.save(cache_path)
logging.info('Finished preprocessing {0:.4f}s'.format(time() - t0))