Пример #1
0
 def __enter__(self):
     # We want to start with a clean environment every time.
     if os.path.exists(SINGULARITY_VM_INPUT_DIR):
         rmtree(SINGULARITY_VM_INPUT_DIR)
     if os.path.exists(SINGULARITY_VM_OUTPUT_DIR):
         rmtree(SINGULARITY_VM_OUTPUT_DIR)
     makedirs(SINGULARITY_VM_OUTPUT_DIR)
     copytree(self.path_to_build, SINGULARITY_VM_INPUT_DIR, symlinks=True)
     self.vm = Vagrant(root=SINGULARITY_VM,
                       quiet_stdout=False,
                       quiet_stderr=False)
     return self
Пример #2
0
def try_build_image(repo_dir, image_path, team_name):
    image_name = os.path.splitext(os.path.basename(image_path))[0]
    logs = LogFiles(team_name, image_name)
    try:
        with SingularityVM(repo_dir) as vm:
            generated_image = vm.run()
            vm.store_logs(logs.log_path, logs.err_path)
            if generated_image:
                makedirs(os.path.dirname(image_path))
                copy(generated_image, image_path)
                return True, logs
            return False, logs
    except Error, errors:
        print "Failed to copy input files"
        with open(logs.err_path, "w") as f:
            f.write(
                "Creating the Singularity image failed with the following errors:\n"
            )
            for err in errors.args[0]:
                f.write("Exception when copying '%s' to '%s': %s\n" % err)
        return False, logs
Пример #3
0
 def __init__(self,
              experiment_name,
              parser,
              data_path,
              label_filename,
              models_path,
              dummy_mode=False):
     """
     The helper manages model interaction with data.
     :param experiment_name: Name of the experiment
     :param parser: Argparse to add further paramters to the cli
     :param data_path: Absolute path where the data is stored
     :param label_filename: File name of the csv file that contains the filename and label data.
     :param models_path: Path to where model data is stored.
     """
     self.models_path = models_path
     self.experiment_name = experiment_name
     train_dataset, test_dataset, eval_dataset, number_of_classes = self.dataset.init_sets(
         data_path,
         label_filename,
         ratio=self.dataset_ratios,
         dummy_mode=dummy_mode)
     parser = self.lightning_module.add_model_specific_args(parser, None)
     hyperparams = parser.parse_args()
     self.module = self.lightning_module(
         hyperparams,
         number_of_classes,
         train_dataset,
         eval_dataset,
         test_dataset,
     )
     gpus = json.loads(hyperparams.gpus)
     self.save_dir = models_path / model_name / experiment_name
     makedirs(self.save_dir)
     if 'distributed_backend' not in hyperparams:
         hyperparams.distributed_backend = 'dp'
     # todo: connect ddp, fix gpu specification
     # Logger Specification
     # We use TubeLogger for nicer structure
     version = 1
     logger = TestTubeLogger(
         save_dir=self.save_dir,
         version=version  # fixed to one to ensure checkpoint load
     )
     ckpt_folder = self.save_dir / 'default' / 'version_{}'.format(
         version) / 'checkpoints'
     resume_from_checkpoint = self.find_best_epoch(ckpt_folder)
     # trainer with some optimizations
     self.trainer = ptl.Trainer(
         gpus=gpus if len(gpus) else 0,
         profiler=False,  # for once is good
         auto_scale_batch_size=False,  # i prefer manually
         auto_lr_find=False,  # mostly diverges
         # distributed_backend='ddp',  # doesnt fill on ddp
         precision=32,  # throws error on 16
         default_root_dir=self.save_dir,
         logger=logger,
         resume_from_checkpoint=resume_from_checkpoint,
         callbacks=[
             ModelCheckpoint(save_last=True,
                             save_top_k=1,
                             verbose=True,
                             monitor='val_acc',
                             mode='max',
                             prefix=''),
         ])
Пример #4
0
# 为了更通用,建议采用import attnencoder as encoder
from encoder import EncoderRNN
from decoder import AttnDecoderRNN
from seq2seq.seq2seq_eval import evaluateSet
from utils.util import *



import config
args = config.get_args()
print(args.task)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

config.makedirs(args.save_path)

if(args.task=="translation"):
    from dataset.translation import *
    if(args.dataset=='eng-fra'):
        input_lang, output_lang, pairs = prepareData('eng', 'fra', True, index=True)
        test_size = int(len(pairs)/6)
        
    random.shuffle(pairs)
    test_set = pairs[0:test_size]
    train_set = pairs[test_size:len(pairs)]
    print("Train pairs %s; Test pairs %s" % (len(pairs)-test_size, test_size))
    print(random.choice(pairs))
    
elif(args.task=="qa"):
    from dataset.xiaohuangji import *
Пример #5
0
                        default=Path('..') / RAW_DATA_PATH)
    parser.add_argument('--dest_path',
                        help='Source path where audio data files are stored',
                        default=Path('..') / RAW_DATA_PATH / 'skipped')
    parser.add_argument('--label_file',
                        help='file name of label file',
                        default='labels.csv')
    parser.add_argument('--ext',
                        help='name of a valid extension to be cleansed',
                        default=None)
    args = parser.parse_args()
    src_path = Path(args.src_path)
    dest_path = Path(args.dest_path)
    _ext = args.ext

    makedirs(dest_path)
    label_filename = args.label_file

    df = pd.read_csv(src_path / label_filename)
    new_filenames = []
    new_labels = []

    df_filenames = set(df['filename'])
    os_filenames = set(os.listdir(src_path))
    # keep the filenames that are in the folder and not in the label file
    dif_filenames = os_filenames - df_filenames

    for filename in dif_filenames:
        ext = filename.split('.')[-1]
        if _ext and ext == _ext:
            pass
Пример #6
0
import pathlib

from config import makedirs

TEST_STATIC_FILES_PATH = pathlib.Path('./tests/static/')
TEST_RAW_DATA_PATH = TEST_STATIC_FILES_PATH / 'data/raw/'
TEST_FEATURES_DATA_PATH = TEST_STATIC_FILES_PATH / 'data/features/'
TEST_DIGEST_DATA_PATH = TEST_STATIC_FILES_PATH / 'data/digest/'
TEST_MODELS_DATA_PATH = TEST_STATIC_FILES_PATH / 'data/models/'

makedirs(TEST_STATIC_FILES_PATH)
makedirs(TEST_RAW_DATA_PATH)
makedirs(TEST_FEATURES_DATA_PATH)
makedirs(TEST_DIGEST_DATA_PATH)
makedirs(TEST_MODELS_DATA_PATH)