Exemplo n.º 1
0
    parser.add_argument("--folder", type=str, nargs="?", default=None)
    args = vars(parser.parse_args())

    ds_shape = d5ds.dataset_shape('ucf101')
    ds_classes, sample_shape = ds_shape[0], ds_shape[1:]

    train_set, validation_set = d5ds.load_ucf101('0', 'label', folder=args['folder'],
                                                 normalize=True, max_length=700, skip_frames=10)

    seq_lengths = [av.open(path).streams.video[0].frames for path in train_set.data]
    train_sampler = d5.BucketSampler(train_set, BATCH_SIZE, seq_lengths, max_length=500,
                                     transformations=[d5ref.Crop((224, 224)),])
    validation_sampler = d5.OrderedSampler(validation_set, BATCH_SIZE,
                                           transformations=[d5ref.Crop((224, 224)),])

    model = ResNet50LSTM(num_classes=ds_classes, pretrained=True)

    loss = torch.nn.CrossEntropyLoss()
    executor = d5fw.PyTorchNativeGraphExecutor(model, loss, device=d5.GPUDevice())
    optimizer = d5fw.GradientDescent(executor, 'loss')

    METRICS = [
        d5.TestAccuracy(),
        d5.WallclockTime(reruns=0, avg_over=1)
    ]

    results = d5.test_training(executor, train_sampler, validation_sampler,
                               optimizer, EPOCHS, BATCH_SIZE, 'output',
                               metrics=[m for m in METRICS])

Exemplo n.º 2
0
def run_recipe(fixed: Dict[str, Any], mutable: Dict[str, Any],
               metrics: List[Tuple[d5.TestMetric, Any]]) -> bool:
    """ Runs a Deep500 recipe (see file documentation). Returns True on success
        and False on failure, printing the unacceptable metrics. """

    # Argument validation
    if any(k in mutable for k in fixed.keys()):
        raise RuntimeError('Fixed and mutable components cannot overlap')

    # Create unified dictionary
    comps = dict(fixed, **mutable)

    # Add missing arguments and keyword arguments
    old_keys = list(comps.keys())
    for k in old_keys:
        if (k not in ['batch_size', 'epochs', 'events']
                and not (k.endswith('_args') or k.endswith('_kwargs'))):
            if ('%s_args' % k) not in comps:
                comps['%s_args' % k] = tuple()
            if ('%s_kwargs' % k) not in comps:
                comps['%s_kwargs' % k] = {}

    ########################################################################
    # Obtain dataset metadata
    if 'dataset' not in comps:
        raise SyntaxError('Dataset must be specified in training recipe')

    if isinstance(comps['dataset'], str):
        loss_op = d5ds.dataset_loss(comps['dataset'])
        ds_shape = d5ds.dataset_shape(comps['dataset'])
    else:
        loss_op = comps['dataset'].loss
        ds_shape = comps['dataset'].shape
    ds_classes, sample_shape = ds_shape[0], ds_shape[1:]

    # Construct network
    if 'model' not in comps:
        raise SyntaxError('Model must be specified in recipe')
    if 'batch_size' not in comps:
        raise SyntaxError('Batch size must be specified in training recipe')
    batch = comps['batch_size']

    if isinstance(comps['model'], str):
        # ONNX file
        if os.path.isfile(comps['model']):
            network = d5.parser.load_and_parse_model(comps['model'])
            input_node = network.get_input_nodes()[0].name
            output_node = network.get_output_nodes()[0].name
        else:  # Standard model
            network, input_node, output_node = \
                d5nt.create_model(comps['model'], batch, *comps['model_args'],
                                  classes=ds_classes, shape=sample_shape,
                                  **comps['model_kwargs'])
    else:  # Callable
        network, input_node, output_node = comps['model'](
            batch,
            *comps['model_args'],
            classes=ds_classes,
            shape=sample_shape,
            **comps['model_kwargs'])

    # Add loss function to model
    network.add_operation(loss_op([output_node, 'label'], 'loss'))

    # Construct dataset
    if isinstance(comps['dataset'], str):
        train_set, validation_set = d5ds.load_dataset(
            comps['dataset'], input_node, 'label', *comps['dataset_args'],
            **comps['dataset_kwargs'])
    else:
        train_set, validation_set = comps['dataset'](input_node, 'label',
                                                     *comps['dataset_args'],
                                                     **comps['dataset_kwargs'])

    # Construct samplers
    if 'train_sampler' in comps:
        if isinstance(comps['train_sampler'], d5.Sampler):
            train_sampler = comps['train_sampler']
        else:
            train_sampler = comps['train_sampler'](
                train_set, batch, *comps['train_sampler_args'],
                **comps['train_sampler_kwargs'])
    else:
        train_sampler = train_set

    if 'validation_sampler' in comps:
        if isinstance(comps['validation_sampler'], d5.Sampler):
            validation_sampler = comps['validation_sampler']
        else:
            validation_sampler = comps['validation_sampler'](
                validation_set, batch, *comps['validation_sampler_args'],
                **comps['validation_sampler_kwargs'])
    else:
        validation_sampler = validation_set

    # Construct executor
    if 'executor' not in comps:
        raise SyntaxError('Executor must be specified in recipe')
    if isinstance(comps['executor'], d5.GraphExecutor):
        executor = comps['executor']
    else:
        executor = comps['executor'](network, *comps['executor_args'],
                                     **comps['executor_kwargs'])

    # Construct optimizer
    if 'optimizer' not in comps:
        raise SyntaxError('Optimizer must be specified in training recipe')
    optimizer = comps['optimizer'](executor, 'loss', *comps['optimizer_args'],
                                   **comps['optimizer_kwargs'])

    # Add total time to metrics
    metrics.append((d5.WallclockTime(reruns=0, avg_over=1), None))

    ########################################################################
    # Create trainer and run
    if 'epochs' not in comps:
        raise SyntaxError('Epochs must be specified in training recipe')
    if 'events' not in comps:
        comps['events'] = None
    results = d5.test_training(executor,
                               train_sampler,
                               validation_sampler,
                               optimizer,
                               comps['epochs'],
                               batch,
                               output_node,
                               metrics=[m[0] for m in metrics],
                               events=comps['events'])

    # Verify results
    ok = True
    for (metric, acceptable), result in zip(metrics, results):
        if acceptable is not None:
            if result < acceptable:
                print('FAIL %s: %s (Acceptable: %s)' %
                      (type(metric).__name__, result, acceptable))
                ok = False

    if not ok:
        return False
    else:
        print('PASSED')
        return True
Exemplo n.º 3
0
    # Create distributed samplers
    if PARTITION_DATASET:
        train_sampler = d5.PartitionedDistributedSampler(
            d5.ShuffleSampler(train_set, BATCH_SIZE), comm)
    else:
        train_sampler = d5.DistributedSampler(
            d5.ShuffleSampler(train_set, BATCH_SIZE), comm)

    if comm is None or comm.rank == 0:
        # No need to distribute test_set
        test_sampler = d5.ShuffleSampler(test_set, BATCH_SIZE)
    else:
        # No need to test if not rank 0
        test_sampler = None

    #############################

    # Events: Only print progress on rank 0
    events = d5.DefaultRunnerEvents(MAX_EPOCHS) if comm is None or comm.rank == 0 else []

    # Metrics: Add communication volume
    metrics = d5.DefaultTrainingMetrics() + [d5.CommunicationVolume()]

    # Run distributed training
    d5.test_training(executor, train_sampler, test_sampler, optimizer, 
                     MAX_EPOCHS, BATCH_SIZE, OUTPUT_NODE, events=events, 
                     metrics=metrics)
    
    # Wait for everyone to finish and finalize MPI if necessary
    d5.mpi_end_barrier()
Exemplo n.º 4
0
        z_t = self.z[param_name]

        z_t2 = z_t - self.alpha_t * eta_t * grad
        y_t2 = old_param - eta_t * grad

        self.z[param_name] = z_t2
        self.y[param_name] = y_t2
        self.squares[param_name] = squared_grad
        adjusted_lr = self.lr / (self.eps + np.sqrt(squared_grad))

        self.init = False
        return old_param - adjusted_lr * grad

if __name__ == '__main__':
    from deep500 import networks as d5net, datasets as d5ds
    from deep500.frameworks import tensorflow as d5tf
    from deep500.frameworks import reference as d5ref
    batch_size = 1024

    # Create network and dataset
    net, innode, outnode = d5net.create_model('simple_cnn', batch_size)
    net.add_operation(d5.ops.LabelCrossEntropy([outnode, 'label'], 'loss'))
    train, test = d5ds.load_dataset('mnist', innode, 'label')

    # Create executor and optimizer
    executor = d5tf.from_model(net)
    opt = AcceleGradOptimizer(executor)
    
    # Run training
    d5.test_training(executor, train, test, opt, 5, batch_size, outnode)