def fromCmdArgs(cls, args: argparse.Namespace) -> TrainOptions: """Creates TrainOptions instance from cmdline arguments""" if args.f != "": jsonFile = Path(makePathAbsolute(args.f)) if jsonFile.exists() and jsonFile.is_file(): with jsonFile.open() as f: content = f.read() return jsonpickle.decode(content) else: raise ValueError("Could not find JSON input file") else: return cls( inputFile=args.i, outputDir=args.o, ecWeightsFile=args.a, type=args.t, fpType=args.k, fpSize=args.s, encFPSize=args.d, epochs=args.e, kFolds=args.K, testingFraction=args.l, enableMultiLabel=args.m, verbose=args.v, trainAC=args.trainAC, trainFNN=args.trainFNN, compressFeatures=args.c, sampleFractionOnes=args.sampleFractionOnes, sampleDown=args.sampleDown )
def train(opts: options.TrainOptions): """ Run the main training procedure :param opts: Options defining the details of the training """ df = fp.importDataFile(opts.inputFile, import_function=fp.importSmilesCSV, fp_size=opts.fpSize) # Create output dir if it doesn't exist createDirectory(opts.outputDir) if opts.compressFeatures: # compress features if opts.trainAC: # train an autoencoder on the full feature matrix encoder = ac.train_full_ac(df, opts) else: # load trained model for autoencoder (_, encoder) = ac.define_ac_model(input_size=opts.fpSize, encoding_dim=opts.encFPSize) encoder.load_weights(makePathAbsolute(opts.ecWeightsFile)) # compress the fingerprints using the autoencoder df = ac.compress_fingerprints(df, encoder) if opts.trainFNN: # train single label models fNN.train_nn_models(df=df, opts=opts) # train multi-label models if opts.enableMultiLabel: fNN.train_nn_models_multi(df=df, opts=opts)
def main(): """ Main function that runs training/prediction defined by command line arguments """ parser = options.createCommandlineParser() prog_args: Namespace = parser.parse_args() try: if prog_args.method == "convert": directory = makePathAbsolute(prog_args.f) if path.isdir(directory): createLogger(path.join(directory, "convert.log")) logging.info(f"Convert all data files in {directory}") fp.convert_all(directory) else: raise ValueError("Input directory is not a directory") if prog_args.method == "train": train_opts = options.TrainOptions.fromCmdArgs(prog_args) fixed_opts = dataclasses.replace( train_opts, inputFile=makePathAbsolute(train_opts.inputFile), outputDir=makePathAbsolute(train_opts.outputDir)) createDirectory(fixed_opts.outputDir) createLogger(path.join(fixed_opts.outputDir, "train.log")) logging.info( f"The following arguments are received or filled with default values:\n{fixed_opts}" ) train(fixed_opts) exit(0) elif prog_args.method == "predict": predict_opts = options.PredictOptions.fromCmdArgs(prog_args) fixed_opts = dataclasses.replace( predict_opts, inputFile=makePathAbsolute(predict_opts.inputFile), outputDir=makePathAbsolute(predict_opts.outputDir)) createDirectory(fixed_opts.outputDir) createLogger(path.join(fixed_opts.outputDir, "predict.log")) logging.info( f"The following arguments are received or filled with default values:\n{prog_args}" ) predict(fixed_opts) exit(0) except AttributeError as e: print(e) parser.print_usage()
def fromCmdArgs(cls, args: argparse.Namespace) -> PredictOptions: """Creates TrainOptions instance from cmdline arguments""" if args.f != "": jsonFile = Path(makePathAbsolute(args.f)) if jsonFile.exists() and jsonFile.is_file(): with jsonFile.open() as f: content = f.read() return jsonpickle.decode(content) else: raise ValueError("Could not find JSON input file") else: return cls( inputFile=args.i, outputDir=args.o, ecWeightsFile=args.ECmodel, model=args.model, target=args.target, fpSize=args.s, encFPSize=args.d, type=args.t, fpType=args.k, )