def __enter__(self): # We want to start with a clean environment every time. if os.path.exists(SINGULARITY_VM_INPUT_DIR): rmtree(SINGULARITY_VM_INPUT_DIR) if os.path.exists(SINGULARITY_VM_OUTPUT_DIR): rmtree(SINGULARITY_VM_OUTPUT_DIR) makedirs(SINGULARITY_VM_OUTPUT_DIR) copytree(self.path_to_build, SINGULARITY_VM_INPUT_DIR, symlinks=True) self.vm = Vagrant(root=SINGULARITY_VM, quiet_stdout=False, quiet_stderr=False) return self
def try_build_image(repo_dir, image_path, team_name): image_name = os.path.splitext(os.path.basename(image_path))[0] logs = LogFiles(team_name, image_name) try: with SingularityVM(repo_dir) as vm: generated_image = vm.run() vm.store_logs(logs.log_path, logs.err_path) if generated_image: makedirs(os.path.dirname(image_path)) copy(generated_image, image_path) return True, logs return False, logs except Error, errors: print "Failed to copy input files" with open(logs.err_path, "w") as f: f.write( "Creating the Singularity image failed with the following errors:\n" ) for err in errors.args[0]: f.write("Exception when copying '%s' to '%s': %s\n" % err) return False, logs
def __init__(self, experiment_name, parser, data_path, label_filename, models_path, dummy_mode=False): """ The helper manages model interaction with data. :param experiment_name: Name of the experiment :param parser: Argparse to add further paramters to the cli :param data_path: Absolute path where the data is stored :param label_filename: File name of the csv file that contains the filename and label data. :param models_path: Path to where model data is stored. """ self.models_path = models_path self.experiment_name = experiment_name train_dataset, test_dataset, eval_dataset, number_of_classes = self.dataset.init_sets( data_path, label_filename, ratio=self.dataset_ratios, dummy_mode=dummy_mode) parser = self.lightning_module.add_model_specific_args(parser, None) hyperparams = parser.parse_args() self.module = self.lightning_module( hyperparams, number_of_classes, train_dataset, eval_dataset, test_dataset, ) gpus = json.loads(hyperparams.gpus) self.save_dir = models_path / model_name / experiment_name makedirs(self.save_dir) if 'distributed_backend' not in hyperparams: hyperparams.distributed_backend = 'dp' # todo: connect ddp, fix gpu specification # Logger Specification # We use TubeLogger for nicer structure version = 1 logger = TestTubeLogger( save_dir=self.save_dir, version=version # fixed to one to ensure checkpoint load ) ckpt_folder = self.save_dir / 'default' / 'version_{}'.format( version) / 'checkpoints' resume_from_checkpoint = self.find_best_epoch(ckpt_folder) # trainer with some optimizations self.trainer = ptl.Trainer( gpus=gpus if len(gpus) else 0, profiler=False, # for once is good auto_scale_batch_size=False, # i prefer manually auto_lr_find=False, # mostly diverges # distributed_backend='ddp', # doesnt fill on ddp precision=32, # throws error on 16 default_root_dir=self.save_dir, logger=logger, resume_from_checkpoint=resume_from_checkpoint, callbacks=[ ModelCheckpoint(save_last=True, save_top_k=1, verbose=True, monitor='val_acc', mode='max', prefix=''), ])
# 为了更通用,建议采用import attnencoder as encoder from encoder import EncoderRNN from decoder import AttnDecoderRNN from seq2seq.seq2seq_eval import evaluateSet from utils.util import * import config args = config.get_args() print(args.task) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(device) config.makedirs(args.save_path) if(args.task=="translation"): from dataset.translation import * if(args.dataset=='eng-fra'): input_lang, output_lang, pairs = prepareData('eng', 'fra', True, index=True) test_size = int(len(pairs)/6) random.shuffle(pairs) test_set = pairs[0:test_size] train_set = pairs[test_size:len(pairs)] print("Train pairs %s; Test pairs %s" % (len(pairs)-test_size, test_size)) print(random.choice(pairs)) elif(args.task=="qa"): from dataset.xiaohuangji import *
default=Path('..') / RAW_DATA_PATH) parser.add_argument('--dest_path', help='Source path where audio data files are stored', default=Path('..') / RAW_DATA_PATH / 'skipped') parser.add_argument('--label_file', help='file name of label file', default='labels.csv') parser.add_argument('--ext', help='name of a valid extension to be cleansed', default=None) args = parser.parse_args() src_path = Path(args.src_path) dest_path = Path(args.dest_path) _ext = args.ext makedirs(dest_path) label_filename = args.label_file df = pd.read_csv(src_path / label_filename) new_filenames = [] new_labels = [] df_filenames = set(df['filename']) os_filenames = set(os.listdir(src_path)) # keep the filenames that are in the folder and not in the label file dif_filenames = os_filenames - df_filenames for filename in dif_filenames: ext = filename.split('.')[-1] if _ext and ext == _ext: pass
import pathlib from config import makedirs TEST_STATIC_FILES_PATH = pathlib.Path('./tests/static/') TEST_RAW_DATA_PATH = TEST_STATIC_FILES_PATH / 'data/raw/' TEST_FEATURES_DATA_PATH = TEST_STATIC_FILES_PATH / 'data/features/' TEST_DIGEST_DATA_PATH = TEST_STATIC_FILES_PATH / 'data/digest/' TEST_MODELS_DATA_PATH = TEST_STATIC_FILES_PATH / 'data/models/' makedirs(TEST_STATIC_FILES_PATH) makedirs(TEST_RAW_DATA_PATH) makedirs(TEST_FEATURES_DATA_PATH) makedirs(TEST_DIGEST_DATA_PATH) makedirs(TEST_MODELS_DATA_PATH)