Пример #1
0
def main():
    parser = argparse.ArgumentParser(description='Generates train.r. A sample command is mGenForE.py -e ob/e1/ ')
    parser.add_argument('-e', required=True,help='Experiement folder to use to find the features and targets')
    parser.add_argument('-a', required=True,help='Algorithm name')
    parser.add_argument('-s', required=True,help='Location of the folder containing all the sub experiments')
    parser.add_argument('-targetClass',required=True,help="binomial(target takes only true and false) / multinomial (target values takes more than 2 values)")
    parser.add_argument('-skipM',required=False,help="yes or no , If you want to regenerate already generated algorithm model file then make this value No")
    parser.add_argument('-td',required=True,help="Day on which it was trained")
    parser.add_argument('-dt',required=True,help="Number of days it was trained")
    parser.add_argument('-wt',required=True,help="default/exp , weight type to be given to different days")
    parser.add_argument('-iT',required=False,help='Instrument name')
    parser.add_argument('-sP',required=False,help='Strike price of instrument')
    parser.add_argument('-oT',required=False,help='Options Type')
    args = parser.parse_args()

    attribute.initializeInstDetails(args.iT,args.sP,args.oT)
    if args.skipM == None:
        args.skipM = "yes"

    print "Using the experiment folder " + args.e
    
    config = ConfigObj(args.e+"/design.ini")

    print "The config parameters that I am working with are"
    print config 

    dirName=os.path.dirname(args.e)

    algo = rCodeGen.getAlgoName(args)

    args.s = args.s + "/"
    rProgName = "train" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-wt." + args.wt+ attribute.generateExtension()  +\
                "-For"+os.path.basename(os.path.dirname(args.s))+"SubE.r"
    rProgLocation = dirName+'/'+rProgName
    rScript = open(rProgLocation,'w')

    rScript.write('#!/usr/bin/Rscript \n')

    if(algo == 'glmnet'):
        rScript.write('require (glmnet) \n')
    elif(algo == 'randomForest'):
        rScript.write('require (randomForest) \n')
    elif(algo == 'mda'):
        rScript.write('require (mda) \n')

    rCodeGen.ForSetUpChecks(rScript)
    rCodeGen.ToReadTargetFile(rScript,config)
    rCodeGen.ForWtVectorGeneration(rScript,args.wt.lower())
    for target in config['target']:
        rCodeGen.ToReadFeatureFiles(rScript,config,target)
        rCodeGen.ForSanityChecks(rScript,config,target)
    
    designFiles = utility.list_files(args.s)

    for designFile in designFiles:
        print "Generating r code for " + designFile
        rScript.write('\n\nprint ("Running r code for ' + designFile + '")\n')
        config = ConfigObj(designFile)
        for target in config['target']:
            lModelGeneratedAfterTraining = os.path.dirname(designFile) + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
            '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension()  + '.model'
            if os.path.isfile(lModelGeneratedAfterTraining)and ( args.skipM.lower() == "yes" ):
                print "Model File " + lModelGeneratedAfterTraining + " already exists . So it will not be formed again . If you want to re-generate model then re-run with -skipM=No"
            else:
                rCodeGen.ToCreateDataFrameForTraining(rScript,config,target)
                rCodeGen.ForTraining(rScript,args,config,target)
                rCodeGen.saveTrainingModel(rScript,args,os.path.dirname(designFile),target)
    rScript.write('rm(list=ls())')
    rScript.close()
    print "Finished generating R training program: " + rProgLocation
    os.system("chmod +x "+rProgLocation)
Пример #2
0
def main():
    parser = argparse.ArgumentParser(description='Generates train.r. A sample command is mGenForE.py -e ob/e1/ ')
    parser.add_argument('-e', required=True,help='Experiement folder to use to find the features and targets')
    parser.add_argument('-a', required=True,help='Algorithm name')
    parser.add_argument('-targetClass',required=True,help="binomial(target takes only true and false) / multinomial (target values takes more than 2 values)")
    parser.add_argument('-skipT',required=False,help="yes or no , If you want to regenerate already generated algorithm model file then make this value No")
    parser.add_argument('-td',required=True,help="Day on which it was trained")
    parser.add_argument('-dt',required=True,help="Number of days it was trained")
    parser.add_argument('-wt',required=True,help="default/exp , weight type to be given to different days")
    parser.add_argument('-iT',required=False,help='Instrument name')
    parser.add_argument('-sP',required=False,help='Strike price of instrument')
    parser.add_argument('-oT',required=False,help='Options Type')
    parser.add_argument('-treeType',required=False,help="Tree read for trade engine")
    parser.add_argument('-tTD',required=False,help="Tree number of days to be used")
    args = parser.parse_args()

    attribute.initializeInstDetails(args.iT,args.sP,args.oT)
    if args.skipT == None:
        args.skipT = "yes"
    if args.tTD == None:
        args.tTD = args.dt
    print "Using the experiment folder " + args.e

    config = ConfigObj(args.e+"/design1.ini")
    configInit = ConfigObj(args.e+"design.ini")
    
#     configInitList = []
#     for iniFile in os.listdir(args.e + "/"):
#         if '.ini' in iniFile and iniFile != 'design.ini':
#             index = iniFile[ file.index(".") - 1 ]
#             configInitList.append( ( index, ConfigObj(args.e+"/"+iniFile) ) )
#     configInit = dict(configInitList)
        
    print "The config parameters that I am working with are"
    print config

    dirName=os.path.dirname(args.e)+"/"

    algo = rCodeGen.getAlgoName(args)
    
    rProgName = "traintree" +  "-td." + os.path.basename(os.path.abspath(args.td)) + "-tTD" + args.tTD + "-dt." + args.dt + "-wt." + args.wt + attribute.generateExtension() +".r"
    rProgLocation = dirName+'/'+rProgName
    rScript = open(rProgLocation,'w')
    rScript.write('#!/usr/bin/Rscript \n')
    rScript.write('require (rpart) \n')
        
    rCodeGen.ForSetUpChecks(rScript)
    lAllFilePresent = True
    
    for target in config['target']:
        lTreeFileName = dirName+"/"+algo+ target+'-td.' + os.path.basename(os.path.abspath(args.td)) + "-tTD" + args.tTD + '-dt.' + args.dt + attribute.generateExtension() +".tree" + args.treeType
        if os.path.isfile(lTreeFileName) and ( args.skipT.lower() == "yes" ):
            continue
        else:
            lAllFilePresent = False
            break
    if lAllFilePresent == False:
        rCodeGen.ToReadTargetFile(rScript,config)
        rCodeGen.ForWtVectorGeneration(rScript,args.wt.lower())

        for target in config['target']:
            rCodeGen.ToReadFeatureFiles(rScript,config,target)
            rCodeGen.ToReadPredictionFiles(rScript,config,target,configInit)
            rCodeGen.ForSanityChecks(rScript,config,target)
            lTreeFileName = dirName+"/"+algo+ target+'-td.' + os.path.basename(os.path.abspath(args.td)) + "-tTD" + args.tTD +'-dt.' + args.dt + attribute.generateExtension() +".tree" + args.treeType
            if os.path.isfile(lTreeFileName) and ( args.skipT.lower() == "yes" ):
                print "Model File " + lTreeFileName + " already exists . So it will not be formed again . If you want to re-generate model then re-run with -skipT=No"
            else:
                rCodeGen.ToRenameDataBeforeTraining(rScript,config,target)
                rCodeGen.ForTrainingTree(rScript,args,config,target, args.treeType)
                print lTreeFileName
                rCodeGen.saveTrainingTree(rScript,args,dirName,target, lTreeFileName)


    rScript.close()
    print "Finished generating R training program: " + rProgLocation
    os.system("chmod +x "+rProgLocation)
Пример #3
0
def main():
    parser = argparse.ArgumentParser(description='Generates train.r. A sample command is mGenForE.py -e ob/e1/ ')
    parser.add_argument('-e', required=True,help='Experiement folder to use to find the features and targets')
    parser.add_argument('-a', required=True,help='Algorithm name')
    parser.add_argument('-s', required=True,help='Location of the folder containing all the sub experiments')
    parser.add_argument('-targetClass',required=True,help="binomial(target takes only true and false) / multinomial (target values takes more than 2 values)")
    parser.add_argument('-skipM',required=False,help="yes or no , If you want to regenerate already generated algorithm model file then make this value No")
    parser.add_argument('-skipP',required=False,help="yes or no , If you want to regenerate already generated algorithm prediction file then make this value No")
    parser.add_argument('-pd', required=True,help='Prediction directory')
    parser.add_argument('-td',required=True,help="Day on which it was trained")
    parser.add_argument('-dt',required=True,help="Number of days it was trained")
    parser.add_argument('-wt',required=True,help="default/exp , weight type to be given to different days")
    parser.add_argument('-iT',required=False,help='Instrument name')
    parser.add_argument('-sP',required=False,help='Strike price of instrument')
    parser.add_argument('-oT',required=False,help='Options Type')
    args = parser.parse_args()

    attribute.initializeInstDetails(args.iT,args.sP,args.oT)
    if args.skipM == None:
        args.skipM = "yes"
    if args.skipP == None:
        args.skipP = "yes"
                
    print "Using the experiment folder " + args.e
    
    print "Training files steps"
    config = ConfigObj(args.e+"/design.ini")

    print "The config parameters that I am working with are"
    print config 

    predictionDataDirectoryName = args.pd.replace('/ro/','/wf/')
    predictionDataDirectoryName = predictionDataDirectoryName + "/p/" + os.path.basename(os.path.dirname(args.e))
    if not os.path.exists(predictionDataDirectoryName):
        os.mkdir(predictionDataDirectoryName)
        
    dirName=os.path.dirname(args.e)

    algo = rCodeGen.getAlgoName(args)

    args.s = args.s + "/"
    rProgName = "trainPredict"+ algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt +\
     "-pd." + os.path.basename(os.path.abspath(args.pd)) +  "-wt." + args.wt+ attribute.generateExtension()  + "-For"+os.path.basename(os.path.dirname(args.s))+"SubE.r"
    rProgLocation = dirName+'/'+rProgName
    rScript = open(rProgLocation,'w')

    rScript.write('#!/usr/bin/Rscript \n')

    if(algo == 'glmnet'):
        rScript.write('require (glmnet) \n')
    elif(algo == 'randomForest'):
        rScript.write('require (randomForest) \n')
    elif(algo == 'mda'):
        rScript.write('require (mda) \n')
    
    rCodeGen.ForSetUpChecksForTrainPredictTogather(rScript)
    rCodeGen.ToReadTargetFile(rScript,config)
    rCodeGen.ForWtVectorGeneration(rScript,args.wt.lower())
    for target in config['target']:
        rCodeGen.ToReadFeatureFiles(rScript,config,target,2)
        rCodeGen.ForSanityChecks(rScript,config,target)

    print "For prediction data set"
    configForPredictions = ConfigObj(args.e+"/design.ini")
    print "The config parameters that I am working with are"
    for target in configForPredictions['target']:
        feature_keys = configForPredictions['features-'+target].keys()
        features = configForPredictions['features-'+target]
        for key in feature_keys:
            new_key = key + "P"
            features[new_key] = features[key]
            del features[key]
        print configForPredictions 

        rCodeGen.ToReadFeatureFiles(rScript,configForPredictions,target,4)
        rCodeGen.ForSanityChecks(rScript,configForPredictions,target)
    
    designFiles = utility.list_files(args.s)

    for designFile in designFiles:
        print "Generating r code for " + designFile
        rScript.write('\n\nprint ("Running r code for ' + designFile + '")\n')
        config = ConfigObj(designFile)
        configForPredictions = ConfigObj(designFile)
        #--------------MODEL--------------------
        for target in config['target']:
            feature_keys = configForPredictions['features-'+target].keys()
            features = configForPredictions['features-'+target]
            for key in feature_keys:
                new_key = key + "P"
                features[new_key] = features[key]
                del features[key]
            lModelGeneratedAfterTraining = os.path.dirname(designFile) + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
            '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension()  + '.model'
            if os.path.isfile(lModelGeneratedAfterTraining)and ( args.skipM.lower() == "yes" ):
                print "Model File " + lModelGeneratedAfterTraining + " already exists . So it will not be formed again . If you want to re-generate model then re-run with -skipM=No"
            else:
                rCodeGen.ToCreateDataFrameForTraining(rScript,config,target)
                rCodeGen.ForTraining(rScript,args,config,target)
                rCodeGen.saveTrainingModel(rScript,args,os.path.dirname(designFile),target)
        
        #--------------Prediction Part--------------------
            predictionFileName = predictionDataDirectoryName + "/" +  args.a + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
                                 '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(designFile)) +\
                                 "-wt." + args.wt+ attribute.generateExtension()  +".predictions"
            if not os.path.isfile(predictionFileName) or ( args.skipP.lower() == "no" ):
                rCodeGen.ForPredictions(rScript,configForPredictions,args,designFile,target,4)
            else:
                print "Prediction File " + predictionFileName + "Already exists , not generating it again . If you want to generate it again then rerun it with -skipP no "
    rScript.write('rm(list=ls())')
    rScript.close()
    print "Finished generating R training program: " + rProgLocation
    os.system("chmod +x "+rProgLocation)
Пример #4
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Generates train.r. A sample command is mGenForE.py -e ob/e1/ ')
    parser.add_argument(
        '-e',
        required=True,
        help='Experiement folder to use to find the features and targets')
    parser.add_argument('-a', required=True, help='Algorithm name')
    parser.add_argument(
        '-targetClass',
        required=True,
        help=
        "binomial(target takes only true and false) / multinomial (target values takes more than 2 values)"
    )
    parser.add_argument(
        '-skipT',
        required=False,
        help=
        "yes or no , If you want to regenerate already generated algorithm model file then make this value No"
    )
    parser.add_argument('-td',
                        required=True,
                        help="Day on which it was trained")
    parser.add_argument('-dt',
                        required=True,
                        help="Number of days it was trained")
    parser.add_argument(
        '-wt',
        required=True,
        help="default/exp , weight type to be given to different days")
    parser.add_argument('-iT', required=False, help='Instrument name')
    parser.add_argument('-sP',
                        required=False,
                        help='Strike price of instrument')
    parser.add_argument('-oT', required=False, help='Options Type')
    parser.add_argument('-treeType',
                        required=False,
                        help="Tree read for trade engine")
    parser.add_argument('-tTD',
                        required=False,
                        help="Tree number of days to be used")
    args = parser.parse_args()

    attribute.initializeInstDetails(args.iT, args.sP, args.oT)
    if args.skipT == None:
        args.skipT = "yes"
    if args.tTD == None:
        args.tTD = args.dt
    print "Using the experiment folder " + args.e

    config = ConfigObj(args.e + "/design1.ini")
    configInit = ConfigObj(args.e + "design.ini")

    #     configInitList = []
    #     for iniFile in os.listdir(args.e + "/"):
    #         if '.ini' in iniFile and iniFile != 'design.ini':
    #             index = iniFile[ file.index(".") - 1 ]
    #             configInitList.append( ( index, ConfigObj(args.e+"/"+iniFile) ) )
    #     configInit = dict(configInitList)

    print "The config parameters that I am working with are"
    print config

    dirName = os.path.dirname(args.e) + "/"

    algo = rCodeGen.getAlgoName(args)

    rProgName = "traintree" + "-td." + os.path.basename(
        os.path.abspath(args.td)
    ) + "-tTD" + args.tTD + "-dt." + args.dt + "-wt." + args.wt + attribute.generateExtension(
    ) + ".r"
    rProgLocation = dirName + '/' + rProgName
    rScript = open(rProgLocation, 'w')
    rScript.write('#!/usr/bin/Rscript \n')
    rScript.write('require (rpart) \n')

    rCodeGen.ForSetUpChecks(rScript)
    lAllFilePresent = True

    for target in config['target']:
        lTreeFileName = dirName + "/" + algo + target + '-td.' + os.path.basename(
            os.path.abspath(args.td)
        ) + "-tTD" + args.tTD + '-dt.' + args.dt + attribute.generateExtension(
        ) + ".tree" + args.treeType
        if os.path.isfile(lTreeFileName) and (args.skipT.lower() == "yes"):
            continue
        else:
            lAllFilePresent = False
            break
    if lAllFilePresent == False:
        rCodeGen.ToReadTargetFile(rScript, config)
        rCodeGen.ForWtVectorGeneration(rScript, args.wt.lower())

        for target in config['target']:
            rCodeGen.ToReadFeatureFiles(rScript, config, target)
            rCodeGen.ToReadPredictionFiles(rScript, config, target, configInit)
            rCodeGen.ForSanityChecks(rScript, config, target)
            lTreeFileName = dirName + "/" + algo + target + '-td.' + os.path.basename(
                os.path.abspath(args.td)
            ) + "-tTD" + args.tTD + '-dt.' + args.dt + attribute.generateExtension(
            ) + ".tree" + args.treeType
            if os.path.isfile(lTreeFileName) and (args.skipT.lower() == "yes"):
                print "Model File " + lTreeFileName + " already exists . So it will not be formed again . If you want to re-generate model then re-run with -skipT=No"
            else:
                rCodeGen.ToRenameDataBeforeTraining(rScript, config, target)
                rCodeGen.ForTrainingTree(rScript, args, config, target,
                                         args.treeType)
                print lTreeFileName
                rCodeGen.saveTrainingTree(rScript, args, dirName, target,
                                          lTreeFileName)

    rScript.close()
    print "Finished generating R training program: " + rProgLocation
    os.system("chmod +x " + rProgLocation)
Пример #5
0
if args.skipP == None:
    args.skipP = "yes"
if args.skipT == None:
    args.skipT = "yes"
if args.mpMearge == None:
    args.mpMearge = "yes"
if args.dt == None:
    args.dt = "1"
if args.treeUsed == None:
    args.treeUsed = "no"                   
if(args.sequence == "dp"):
    import dp
if args.skipTr == None:
    args.skipTr = "yes" 

algo = rCodeGen.getAlgoName(args)

if args.a is not None:
    allAlgos = [args.a]
else:
    allAlgos = ['logitr','glmnet','randomForest']
    
config = ConfigObj(args.e+"/design.ini")
targetAttributes = attribute.getTargetVariableKeys(config)
one_feature_attributes = attribute.getFeatureVariableKeys(config , targetAttributes.keys()[0])
lengthOfFeatures = len(one_feature_attributes)

allDataDirectories = attribute.getListOfTrainingDirectoriesNames( int(args.nDays) , args.td ,args.iT)
dataFolder = args.td
generatorsFolder = args.g
commandList = []
Пример #6
0
def main():
    parser = argparse.ArgumentParser(description='Generates train.r. A sample command is mGenForE.py -e ob/e1/ ')
    parser.add_argument('-e', required=True,help='Experiement folder to use to find the features and targets')
    parser.add_argument('-a', required=True,help='Algorithm name')
    parser.add_argument('-targetClass',required=True,help="binomial(target takes only true and false) / multinomial (target values takes more than 2 values)")
    parser.add_argument('-skipM',required=False,help="yes or no , If you want to regenerate already generated algorithm model file then make this value No")
    parser.add_argument('-td',required=True,help="Day on which it was trained")
    parser.add_argument('-dt',required=True,help="Number of days it was trained")
    parser.add_argument('-wt',required=True,help="default/exp , weight type to be given to different days")
    parser.add_argument('-iT',required=False,help='Instrument name')
    parser.add_argument('-sP',required=False,help='Strike price of instrument')
    parser.add_argument('-oT',required=False,help='Options Type')
    parser.add_argument('-double',required=False,help='Double training of in model')
    args = parser.parse_args()

    attribute.initializeInstDetails(args.iT,args.sP,args.oT)
    if args.skipM == None:
        args.skipM = "yes"

    print "Using the experiment folder " + args.e

    config = ConfigObj(args.e+"/design.ini")

    print "The config parameters that I am working with are"
    print config

    dirName=os.path.dirname(args.e)+"/"

    algo = rCodeGen.getAlgoName(args)
    if args.double:
        rProgName = "train" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-wt." + args.wt + attribute.generateExtension() +"double.r"
    else:
        rProgName = "train" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-wt." + args.wt + attribute.generateExtension() +".r"
    rProgLocation = dirName+'/'+rProgName
    rScript = open(rProgLocation,'w')
    rScript.write('#!/usr/bin/Rscript \n')
    if(algo == 'glmnet'):
        rScript.write('require (glmnet) \n')
    elif(algo == 'randomForest'):
        rScript.write('require (randomForest) \n')
    elif(algo == 'mda'):
        rScript.write('require (mda) \n')
    rCodeGen.ForSetUpChecks(rScript)
    lAllFilePresent = True
    for target in config['target']:
        if args.double:
            lModelGeneratedAfterTraining = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\
                             + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension()  +'double.model'
        else:
            lModelGeneratedAfterTraining = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\
                             + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension()  +'.model'           
        if os.path.isfile(lModelGeneratedAfterTraining) and ( args.skipM.lower() == "yes" ):
            continue
        else:
            lAllFilePresent = False
            break
    if lAllFilePresent == False:
        rCodeGen.ToReadTargetFile(rScript,config)
        rCodeGen.ForWtVectorGeneration(rScript,args.wt.lower())

        for target in config['target']:
            rCodeGen.ToReadFeatureFiles(rScript,config,target)
            rCodeGen.ForSanityChecks(rScript,config,target)
            if args.double:
                lModelGeneratedAfterTraining = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\
                                 + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension()  +'double.model'
                lTempModelName = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\
                                 + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension()  +'.model'
            else:
                lModelGeneratedAfterTraining = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\
                                 + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension()  +'.model' 
            if os.path.isfile(lModelGeneratedAfterTraining) and ( args.skipM.lower() == "yes" ):
                print "Model File " + lModelGeneratedAfterTraining + " already exists . So it will not be formed again . So it will not be formed again . If you want to re-generate model then re-run with -skipM=No"
            else:
                rCodeGen.ToCreateDataFrameForTraining(rScript,config,target)
                if args.double:
                    if os.path.isfile(lTempModelName):
                        rCodeGen.ForLoadingModel(rScript,args,dirName,target,config)
                    else:
                        rCodeGen.ForTraining(rScript,args,config,target)
                    rCodeGen.forPreparingWtVectorForDoubleTraining(rScript,args,target)
                    rCodeGen.saveTrainingModel(rScript,args,dirName,target,"double")
                else:
                    rCodeGen.ForTraining(rScript,args,config,target)
                    rCodeGen.saveTrainingModel(rScript,args,dirName,target)


    rScript.close()
    print "Finished generating R training program: " + rProgLocation
    os.system("chmod +x "+rProgLocation)