Пример #1
0
def main():
    batchsize = args.batch_size if args.gpus is '' else \
        args.batch_size / len(args.gpus.split(','))
    print 'batchsize is ', batchsize

    # define network structure
    net = get_symbol(batchsize)

    # load data
    train_img_list = './data/train_correct.txt'
    val_img_list = './data/test_correct.txt'

    train = PrefetchDataIter(train_img_list,
                             batch_size=args.batch_size,
                             is_color=True,
                             root_dir="/home/donny/112x96/")
    val = PrefetchDataIter(val_img_list,
                           batch_size=args.batch_size,
                           is_color=True,
                           root_dir="/home/donny/112x96/")
    # train, val = mnist_iterator(batch_size=args.batch_size, input_shape=data_shape)

    # train
    # ctx = mx.gpu(1)
    # mod = mx.mod.Module(net, context = ctx, data_names = ('data',), label_names = ('softmax_label', 'center_label',))
    # mod.bind(data_shapes=train.provide_data,
    #         label_shapes=train.provide_label)

    # mod.fit(train, eval_data=val,
    #        optimizer_params={'learning_rate':0.01, 'momentum': 0.9}, num_epoch=30)
    print 'training model ...'
    train_model.fit(args, net, (train, val), data_shape)
Пример #2
0
def main():
    batchsize = args.batch_size if args.gpus is '' else \
        args.batch_size / len(args.gpus.split(','))
    print 'batchsize is ', batchsize

    # define network structure
    net = get_symbol(batchsize)

    # load data
    train, val = mnist_iterator(batch_size=args.batch_size,
                                input_shape=data_shape)

    # train
    print 'training model ...'
    train_model.fit(args, net, (train, val), data_shape)
Пример #3
0
def run():
    net = get_mlp()
    # train
    train_model.fit(args, net, get_iterator(data_shape))
    def __callback(param):
        if param.nbatch % every_n_batch == 0:
            (free, total) = cuda.mem_get_info()
            logging.info('        GPU Memory: %.2f%%' % (100.0 * free / total))

    return __callback


################################################################################
print("*" * 80)
print("  WITHOUT mirroring")
print("*" * 80)

# train
train_model.fit(args,
                net,
                get_iterator,
                batch_end_callback=report_gpu_memory())

################################################################################
print("*" * 80)
print("  WITH mirroring via attributes")
print("*" * 80)

# train
train_model.fit(args,
                net_mirrored,
                get_iterator,
                batch_end_callback=report_gpu_memory())

################################################################################
import os
Пример #5
0
    train = mx.io.ImageRecordIter(
        path_imgrec = args.data_dir + "Train-FromourList-multiLabel.bin",#.bin file
        mean_img    = args.data_dir + "mean-train-multiLabel.bin",
        data_shape  = data_shape,
        batch_size  = args.batch_size,
        rand_crop   = True,
        rand_mirror = True,
        num_parts   = kv.num_workers,
        part_index  = kv.rank,
	path_imglist="/Path-to-training-list.txt",
	label_width=3090)

    val = mx.io.ImageRecordIter(
        path_imgrec = args.data_dir + "Validation.bin",
        mean_img    = args.data_dir + "mean-val-multiLabel.bin",
        rand_crop   = False,
        rand_mirror = False,
        data_shape  = data_shape,
        batch_size  = args.batch_size,
        num_parts   = kv.num_workers,
        part_index  = kv.rank,
	path_imglist="/Path-to-val-list.txt",
	label_width=3090)

    return (train, val)

# train
train_model.fit(args, net, get_iterator)#call the train_model.py


Пример #6
0
                        help='the prefix of the model to save')
    parser.add_argument('--num-epochs', type=int, default=10,
                        help='the number of training epochs')
    parser.add_argument('--load-epoch', type=int,
                        help="load the model on an epoch using the model-prefix")
    parser.add_argument('--kv-store', type=str, default='local',
                        help='the kvstore type')
    parser.add_argument('--lr-factor', type=float, default=1,
                        help='times the lr with a factor for every lr-factor-epoch epoch')
    parser.add_argument('--lr-factor-epoch', type=float, default=1,
                        help='the number of epoch to factor the lr, could be .5')
    return parser.parse_args()


if __name__ == '__main__':
    args = parse_args()
    use_caffe_loss = args.caffe_loss

    if args.network == 'mlp':
        data_shape = (784, )
        net = get_mlp()
    else:
        data_shape = (1, 28, 28)
        net = get_lenet()

    # train
    if use_caffe_loss:
        train_model.fit(args, net, get_iterator(data_shape), mx.metric.Caffe())
    else:
        train_model.fit(args, net, get_iterator(data_shape))
Пример #7
0
    parser.add_argument(
        '--lr-factor-epoch',
        type=float,
        default=1,
        help='the number of epoch to factor the lr, could be .5')
    return parser.parse_args()


if __name__ == '__main__':
    args = parse_args()
    use_caffe_loss = args.caffe_loss
    use_caffe_data = args.caffe_data

    data_shape = ()
    if args.network == 'mlp':
        data_shape = (784, )
        net = get_mlp()
    elif args.network == 'lenet':
        if not use_caffe_data:
            data_shape = (1, 28, 28)
        net = get_lenet()
    else:
        net = get_network_from_json_file(args.network)

    # train
    if use_caffe_loss:
        train_model.fit(args, net, get_iterator(data_shape, use_caffe_data),
                        mx.gluon.metric.Caffe())
    else:
        train_model.fit(args, net, get_iterator(data_shape, use_caffe_data))
Пример #8
0
def please_train(args, net):
    # parser = get_parser()
    # args = parser.parse_args(args)
    train_model.fit(args, net, get_iterator)
Пример #9
0
    parser.add_argument('--batch-size', type=int, default=128,
                        help='the batch size')
    parser.add_argument('--lr', type=float, default=.0001,
                        help='the initial learning rate')
    parser.add_argument('--model-prefix', type=str,
                        help='the prefix of the model to load/save')
    parser.add_argument('--save-model-prefix', type=str,
                        help='the prefix of the model to save')
    parser.add_argument('--num-epochs', type=int, default=10,
                        help='the number of training epochs')
    parser.add_argument('--load-epoch', type=int,
                        help="load the model on an epoch using the model-prefix")
    parser.add_argument('--kv-store', type=str, default='local',
                        help='the kvstore type')
    parser.add_argument('--lr-factor', type=float, default=1,
                        help='times the lr with a factor for every lr-factor-epoch epoch')
    parser.add_argument('--lr-factor-epoch', type=float, default=1,
                        help='the number of epoch to factor the lr, could be .5')
    return parser.parse_args()


if __name__ == '__main__':
    args = parse_args()

    if args.network == 'lenet':
        data_shape = (1,128,128)
        net = get_lenet()

    # train
    train_model.fit(args, net, get_iterator(data_shape))
Пример #10
0
    parser.add_argument('--kv-store', type=str, default='local',
                        help='the kvstore type')
    parser.add_argument('--lr-factor', type=float, default=1,
                        help='times the lr with a factor for every lr-factor-epoch epoch')
    parser.add_argument('--lr-factor-epoch', type=float, default=1,
                        help='the number of epoch to factor the lr, could be .5')
    return parser.parse_args()


if __name__ == '__main__':
    args = parse_args()
    use_caffe_loss = args.caffe_loss
    use_caffe_data = args.caffe_data

    data_shape = ()
    if args.network == 'mlp':
        data_shape = (784, )
        net = get_mlp()
    elif args.network == 'lenet':
        if not use_caffe_data:
            data_shape = (1, 28, 28)
        net = get_lenet()
    else:
        net = get_network_from_json_file(args.network)

    # train
    if use_caffe_loss:
        train_model.fit(args, net, get_iterator(data_shape, use_caffe_data), mx.metric.Caffe())
    else:
        train_model.fit(args, net, get_iterator(data_shape, use_caffe_data))
Пример #11
0
def please_train(args, net):
  # parser = get_parser()
  # args = parser.parse_args(args)
  train_model.fit(args, net, get_iterator)
Пример #12
0
                  d_ff=hp["d_ff"],
                  dropout=0.1).to(device)
pooling = SelfAttentionPooling(d_m, dropout=0.1).to(device)
model = Transformer(encoder, pooling, d_m, label_shape, dropout=0.2).to(device)

opt = torch.optim.Adam(model.parameters(),
                       lr=hp["lr"],
                       weight_decay=hp["weight_decay"])

loss_func = torch.nn.CrossEntropyLoss()

best_eer = 99.
if hp["comet"]:
    with experiment.train():
        for epoch in tqdm(range(epochs)):
            cce_loss = fit(model, loss_func, opt, train_ds_gen, device)
            experiment.log_metric("cce", cce_loss, epoch=epoch)

            val_eer = test(model,
                           val_ds_gen,
                           val_utt,
                           val_pwd,
                           val_trial,
                           device,
                           tta=val_tta)
            experiment.log_metric("val eer", val_eer, epoch=epoch)
            if float(val_eer) < best_eer:
                print("New best EER: %f" % float(val_eer))
                best_eer = float(val_eer)

    with experiment.test():
Пример #13
0
        args.data_dir, args.train_dataset),
                                  mean_r=123.68,
                                  mean_g=116.779,
                                  mean_b=103.939,
                                  data_shape=data_shape,
                                  batch_size=args.batch_size,
                                  rand_crop=True,
                                  rand_mirror=True,
                                  num_parts=kv.num_workers,
                                  part_index=kv.rank)

    val = mx.io.ImageRecordIter(path_imgrec=os.path.join(
        args.data_dir, args.val_dataset),
                                mean_r=123.68,
                                mean_g=116.779,
                                mean_b=103.939,
                                rand_crop=False,
                                rand_mirror=False,
                                data_shape=data_shape,
                                batch_size=args.batch_size,
                                num_parts=kv.num_workers,
                                part_index=kv.rank)

    return (train, val)


if args.benchmark:
    train_model.fit(args, net, get_sythentic_data_iter)
else:
    train_model.fit(args, net, get_iterator)
Пример #14
0
    
    test = mx.io.ImageRecordIter(
        path_imgrec = "data/" + args.region + "-Fold" + `args.fold` +  "-test.rec",
        data_shape  = data_shape,
        batch_size  = args.batch_size,
        rand_crop   = False,
        rand_mirror = False,
        shuffle = False,
        num_parts   = kv.num_workers,
        part_index  = kv.rank
    )
    return (train, test)


# train
trainedModel = train_model.fit(args, net, get_iterator)

trainedModel.save("cratersTrained-" + args.region + "-Fold" + `args.fold`)


# #test
# test = mx.io.ImageRecordIter(
#     path_imgrec = "data/" + args.region + "-Fold" + `args.fold` +  "-test.rec",
#     data_shape  = data_shape,
#     batch_size  = 10,
#     rand_crop   = False,
#     rand_mirror = False,
#     shuffle = True
# )
# 
# model = mx.model.FeedForward.load(args.model_prefix + "-0", 5)
Пример #15
0
def train(args):
    load_num = int(args.load_num)
    data_record_dir = args.data_record_dir
    data_record_dir = os.path.join(data_record_dir, cfgs.DATASET_NAME)
    log_dir = args.log_path
    gpu_list = args.gpu_list
    batch_size = args.batch_size
    #*****************************************************************set log
    logger = logging.getLogger()
    fh = logging.FileHandler(
        os.path.join(
            log_dir,
            time.strftime('%F-%T', time.localtime()).replace(':', '-') +
            '.log'))
    fh.setLevel(logging.DEBUG)
    # ch = logging.StreamHandler()
    # ch.setLevel(logging.INFO)
    logger.addHandler(fh)
    # logger.addHandler(ch)
    #*********************************************************get train model
    if gpu_list is None:
        devs = mx.cpu(0)
    else:
        #devs = [mx.gpu(int(i)) for i in range(len(gpu_list.split(',')))]
        devs = mx.gpu(0)
    #logging.info("use gpu list: ",devs)
    '''
    sym,arg_param,aux_param = load_model(load_num)
    net,new_arg,new_aux = get_layer_output(sym,arg_param,aux_param,'flatten')
    net_load = load_parms(net,new_arg,new_aux,devs)
    '''
    #model_prefix = cfgs.MODEL_PREFIX
    #assert model_prefix is not None
    #model_prefix = os.path.join(args.model_dir,cfgs.DATASET_NAME,model_prefix)
    #net_train = gluon.SymbolBlock.imports(model_prefix+'-symbol.json', ['data'], model_prefix+'-'+'%04d' % load_num +'.params')
    #mobilenetv20_features_pool0_fwd
    net_train = get_symbol(devs)
    net_train = get_pretrained_layer(net_train, 'resnetv10_pool1_fwd')
    sigmoid_layer = add_layer(devs)
    net_train = graph(net_train, sigmoid_layer)
    net_train.hybridize()
    #net_train.summary(nd.zeros((1, 3, 224, 224),ctx=mx.cpu(0)))
    #*******************************************************************load data
    train_rec_path = os.path.join(data_record_dir, 'train.rec')
    val_rec_path = os.path.join(data_record_dir, 'test.rec')
    train_dataiter = FaceImageIter(
        batch_size=batch_size,
        data_shape=(3, 112, 112),
        path_imgrec=train_rec_path,
        shuffle=True,
        cutoff=0,
    )
    train_dataiter = mx.io.PrefetchingIter(train_dataiter)
    train_loader = DataIterLoader(train_dataiter)
    val_dataiter = FaceImageIter(
        batch_size=batch_size,
        data_shape=(3, 112, 112),
        path_imgrec=val_rec_path,
        shuffle=True,
        cutoff=0,
    )
    val_dataiter = mx.io.PrefetchingIter(val_dataiter)
    val_loader = DataIterLoader(val_dataiter)
    #******************************************************************train
    fit(net_train,train_loader,val_loader,\
        ctx=devs,
        epoch=args.epochs,
        save_epoch=args.save_weight_period,
        load_epoch=load_num,
        learning_rate=args.lr,
        batch_size=batch_size,
        model_dir=args.model_dir)
Пример #16
0
        help="load the model on an epoch using the model-prefix")
    parser.add_argument('--kv-store',
                        type=str,
                        default='local',
                        help='the kvstore type')
    parser.add_argument(
        '--lr-factor',
        type=float,
        default=1,
        help='times the lr with a factor for every lr-factor-epoch epoch')
    parser.add_argument(
        '--lr-factor-epoch',
        type=float,
        default=1,
        help='the number of epoch to factor the lr, could be .5')
    return parser.parse_args()


if __name__ == '__main__':
    args = parse_args()

    if args.network == 'mlp':
        data_shape = (784, )
        net = get_mlp()
    else:
        data_shape = (1, 28, 28)
        net = get_lenet()

    # train
    train_model.fit(args, net, get_iterator(data_shape))
Пример #17
0
        mean_g=128,
        mean_b=128,
        scale=0.0078125,
        max_aspect_ratio=0.35,
        data_shape=data_shape,
        batch_size=args.batch_size,
        rand_crop=True,
        rand_mirror=True,
    )

    # validate data iterator
    val = mx.io.ImageRecordIter(
        path_imgrec=args.data_dir + "va.rec",
        mean_r=128,
        mean_b=128,
        mean_g=128,
        scale=0.0078125,
        rand_crop=False,
        rand_mirror=False,
        data_shape=data_shape,
        batch_size=args.batch_size,
    )

    return (train, val)


# train
tic = time.time()
train_model.fit(args, net, get_iterator)
print "time elapsed to train model", time.time() - tic
Пример #18
0
import pycuda.driver as cuda
import logging
def report_gpu_memory(every_n_batch=50):
    def __callback(param):
        if param.nbatch % every_n_batch == 0:
            (free, total) = cuda.mem_get_info()
            logging.info('        GPU Memory: %.2f%%' % (100.0*free / total))
    return __callback

################################################################################
print("*" * 80)
print("  WITHOUT mirroring")
print("*" * 80)

# train
train_model.fit(args, net, get_iterator, batch_end_callback=report_gpu_memory())

################################################################################
print("*" * 80)
print("  WITH mirroring via attributes")
print("*" * 80)

# train
train_model.fit(args, net_mirrored, get_iterator, batch_end_callback=report_gpu_memory())

################################################################################
import os
os.environ['MXNET_BACKWARD_DO_MIRROR'] = '1'
print("*" * 80)
print("  WITH mirroring via environment variable")
print("*" * 80)
Пример #19
0
    data_shape = (3, args.data_shape, args.data_shape)
    train = mx.io.ImageRecordIter(path_imgrec=os.path.join(
        args.data_dir, args.train_dataset),
                                  mean_r=123.68,
                                  mean_g=116.779,
                                  mean_b=103.939,
                                  data_shape=data_shape,
                                  batch_size=args.batch_size,
                                  rand_crop=True,
                                  rand_mirror=True,
                                  num_parts=kv.num_workers,
                                  part_index=kv.rank)

    val = mx.io.ImageRecordIter(path_imgrec=os.path.join(
        args.data_dir, args.val_dataset),
                                mean_r=123.68,
                                mean_g=116.779,
                                mean_b=103.939,
                                rand_crop=False,
                                rand_mirror=False,
                                data_shape=data_shape,
                                batch_size=args.batch_size,
                                num_parts=kv.num_workers,
                                part_index=kv.rank)

    return (train, val)


# train
train_model.fit(args, net, get_iterator)
Пример #20
0
args = parser.parse_args()
#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

from symbol_factory import symbol_factory as _sf
from optimizer_factory import optimizer_factory as _of
sf = _sf()
of = _of()

#check dirs
if "hdfs:" in args.model_dir:
    print "making dir on hdfs"
    child = subprocess.Popen("hdfs dfs -mkdir " + args.model_dir, shell=True)
    return_code = child.wait()
    if return_code != 0:
        raise Exception("hdfs model dir exist or make dir error, abort")
else:
    try:
        print "making dir on local server"
        os.mkdirs(args.model_dir)
    except:
        raise Exception("local model dir exist or make dir error, abort")

#main code
net_cmds = open("./.net_config").read()
opt_cmds = open("./.opt_config").read()
net = sf(net_cmds)
opt_name, opt_params = of(opt_cmds)

import train_model as train_model
train_model.fit(args, net, opt_name, opt_params)