def to_bigdl_criterion(kloss): if isinstance(kloss, six.string_types): kloss = kloss.lower() if kloss == "categorical_crossentropy" or kloss == categorical_crossentropy: return bcriterion.CategoricalCrossEntropy() elif kloss == "mse" or kloss == "mean_squared_error" or kloss == mse: return bcriterion.MSECriterion() elif kloss == "binary_crossentropy" or kloss == binary_crossentropy: return bcriterion.BCECriterion() elif kloss == "mae" or kloss == "mean_absolute_error" or kloss == mae: return bcriterion.AbsCriterion() elif kloss == "hinge" or kloss == hinge: return bcriterion.MarginCriterion() elif kloss == "mean_absolute_percentage_error" or \ kloss == "mape" or kloss == mean_absolute_percentage_error: return bcriterion.MeanAbsolutePercentageCriterion() elif kloss == "mean_squared_logarithmic_error" or \ kloss == "msle" or kloss == mean_squared_logarithmic_error: return bcriterion.MeanSquaredLogarithmicCriterion() elif kloss == "squared_hinge" or kloss == squared_hinge: return bcriterion.MarginCriterion(squared=True) elif kloss == "sparse_categorical_crossentropy" or \ kloss == sparse_categorical_crossentropy: return bcriterion.ClassNLLCriterion(logProbAsInput=False) elif kloss == "kullback_leibler_divergence" or \ kloss == "kld" or kloss == kullback_leibler_divergence: return bcriterion.KullbackLeiblerDivergenceCriterion() elif kloss == "poisson" or kloss == poisson: return bcriterion.PoissonCriterion() elif kloss == "cosine_proximity" or kloss == "cosine" or kloss == cosine_proximity: return bcriterion.CosineProximityCriterion() else: raise Exception("Not supported loss: %s" % kloss)
def to_bigdl_criterion(kloss): # TODO: it may pass in an object and with parameters if kloss == "categorical_crossentropy": return bcriterion.ClassNLLCriterion() elif kloss == "mse" or kloss == "mean_squared_error": return bcriterion.MSECriterion() elif kloss == "binary_crossentropy": return bcriterion.BCECriterion() elif kloss == "mae" or kloss == "mean_absolute_error": return bcriterion.AbsCriterion() else: raise Exception("Not supported type: %s" % kloss)
def main(): parser = get_parser() args = parser.parse_args() # BATCH_SIZE must be multiple of <executor.cores>: # in this case multiple of 3: 3,6,9,12 etc. if args.batch_size % args.executor_cores != 0: raise RuntimeError( 'batch size must be multiple of <executor-cores> parameter!' ) cores = args.executor_cores batch_size = args.batch_size conf = ( common.create_spark_conf() .setAppName('pyspark-mnist') .setMaster(args.master) ) conf = conf.set('spark.executor.cores', cores) conf = conf.set('spark.cores.max', cores) conf.set("spark.jars",os.environ.get('BIGDL_JARS')) LOG.info('initialize with spark conf:') sc = pyspark.SparkContext(conf=conf) common.init_engine() LOG.info('initialize training RDD:') ##Files from kuberlab dataset files = glob.glob(os.environ.get('DATA_DIR')+'/train/*.png') LOG.info('Train size: %d',len(files)) def mapper(x): label = int(x.split('/')[-1].split('-')[-1][:-4])+1 image = imageio.imread('file://'+x).astype(np.float32).reshape(1,28,28)/255 return common.Sample.from_ndarray(image, label) train_rdd = sc.parallelize(files).map(mapper) opt = optimizer.Optimizer( model=build_model(10), training_rdd=train_rdd, criterion=criterion.ClassNLLCriterion(), optim_method=optimizer.SGD( learningrate=0.01, learningrate_decay=0.0002 ), end_trigger=optimizer.MaxEpoch(args.epoch), batch_size=batch_size ) trained_model = opt.optimize() LOG.info("training finished") LOG.info('saving model...') path = args.output_dir if not os.path.exists(path): os.makedirs(path) trained_model.saveModel( path + '/model.pb', path + '/model.bin', over_write=True ) client.update_task_info({'checkpoint_path': path,'model_path': path}) LOG.info('successfully saved!') files = glob.glob(os.environ.get('DATA_DIR')+'/test/*.png') LOG.info('Validation size: %d',len(files)) test_rdd = sc.parallelize(files).map(mapper) results = trained_model.evaluate(test_rdd, batch_size , [optimizer.Top1Accuracy()]) accuracy = results[0].result client.update_task_info({'test_accuracy': float(accuracy)}) sc.stop()