Пример #1
0
def main():
    parser = argparse.ArgumentParser(description='Generates train.r. A sample command is :- src/corrRGenForE.py -e ob/e/nsefut/CorExpHINDALCO/ -td ob/data/ro/nsefut/20141017/ -dt 10 -iT HINDALCO -sP -1 -oT 0')
    parser.add_argument('-e', required=True,help='Experiement folder to use to find the features and targets')
    parser.add_argument('-td',required=True,help="Day on which it was trained")
    parser.add_argument('-dt',required=True,help="Number of days it was trained")
    parser.add_argument('-iT',required=False,help='Instrument name')
    parser.add_argument('-sP',required=False,help='Strike price of instrument')
    parser.add_argument('-oT',required=False,help='Options Type')
    args = parser.parse_args()

    attribute.initializeInstDetails(args.iT,args.sP,args.oT)
    print "Using the experiment folder " + args.e
    print args.e+"/design.ini"
    config = ConfigObj(args.e+"/design.ini")

    print "The config parameters that I am working with are"
    print config
    dirName=os.path.dirname(args.e)+"/"
    trainingDaysDirectory = attribute.getListOfTrainingDirectoriesNames( int(args.dt) , args.td ,args.iT)
    for l_trainingday in trainingDaysDirectory:
        rProgName = "corr-date-"+ os.path.basename(os.path.abspath(l_trainingday)) +"-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + attribute.generateExtension() +".r"
        rProgLocation = dirName+'/'+rProgName
        rScript = open(rProgLocation,'w')
        rScript.write('#!/usr/bin/Rscript \n')
        rCodeGen.ForSetUpChecks(rScript)
        lCorrelationFileName = dirName + '/correlation-coef-date-'+  os.path.basename(os.path.abspath(l_trainingday)) + '-td.' + os.path.basename(os.path.abspath(args.td))+ '-dt.' + args.dt + attribute.generateExtension() + ".coef" 
        rCodeGen.ToReadTargetFile(rScript,config)
        for target in config['target']:
            rCodeGen.ToFindCorrelationDatewiseAndPrintingToFile(rScript,config,target,lCorrelationFileName)
        rScript.close()
        os.system("chmod +x "+rProgLocation)
Пример #2
0
def main():
    global args
    args = parseCommandLine()
    experimentFolder = args.e
    trainDataFolder = args.td
    predictDataFolder = args.pd
    attribute.initializeInstDetails(args.iT,args.sP,args.oT)
    if args.mpMearge.lower() == "yes":
        commandList = getTrainPredictCommandList(experimentFolder,args.a,trainDataFolder,predictDataFolder,args.dt,args.wt)
        if args.sequence == 'lp':
            pool = multiprocessing.Pool() # this will return the number of CPU's
            results = pool.map(wrapper,commandList) # Calls trainWrapper function with each element of list trainScriptNames
        else:
            results = map(wrapper,commandList)           
    else: 
        commandList = getTrainCommandList(experimentFolder,args.a,trainDataFolder,args.dt,args.wt)
        if args.sequence == 'lp':
            # to run it in local parallel mode
            pool = multiprocessing.Pool() # this will return the number of CPU's
            results = pool.map(wrapper,commandList) # Calls trainWrapper function with each element of list trainScriptNames
        else:
            results = map(wrapper,commandList)
     
        commandList = getPredictCommandList(experimentFolder,args.a,predictDataFolder,trainDataFolder,args.dt,args.wt)
        if args.sequence == 'lp':
            # to run it in local parallel mode
            pool = multiprocessing.Pool() # this will return the number of CPU's
            results = pool.map(wrapper,commandList) # Calls trainWrapper function with each element of list trainScriptNames
        else:
            results = map(wrapper,commandList)
Пример #3
0
def main():
    try:
        attribute.initializeInstDetails(args.iT, args.sP, args.oT)
        outputFileName = attribute.getFileNameFromOperationCommand(
            args.a1, args.a2, args.operand, args.d)
        if (os.path.isfile(outputFileName)):
            print "The attribute has already been generated. If you want to re-generate it then first delete the attribute file.", outputFileName
            lNameAfterDecimal = outputFileName.split(".")[-1]
            attributeBinaryFileName = outputFileName.replace(
                lNameAfterDecimal, "bin")
            if (os.path.isfile(attributeBinaryFileName)):
                print attributeBinaryFileName
                os._exit(
                    0
                )  # We do not take it as a error condition hence return 0 and not -1
            else:
                attribute.callRProgramToConvertToBinary(outputFileName)
                os._exit(0)
        attribute.aList, lListOfHeaderColNames = attribute.operateOnAttributes(
            args.a1, args.a2, args.operand, args.d)
        attribute.writeToFile(outputFileName, lListOfHeaderColNames)
        attribute.callRProgramToConvertToBinary(outputFileName)
    except:
        traceback.print_exc()
        e = sys.exc_info()[0]
        print e
        os._exit(-1)
Пример #4
0
def main():
    args = parseCommandLine()
    dataFolder = args.d
    generatorsFolder = args.g

    attribute.initializeInstDetails(args.iT,args.sP,args.oT,args.rev) 
    args.sequence = "lp"
    
    experimentFolder = args.e + "/design.ini"
    insideFeatureCommandList = getCommandListForInsideFeatures( experimentFolder,dataFolder,generatorsFolder,args.tickSize )
    utility.runCommandList(insideFeatureCommandList,args)
    
    intermediateFeatureCommandList = getCommandListForIntermediateFeatures(experimentFolder,dataFolder,generatorsFolder,args.tickSize)
    utility.runCommandList(intermediateFeatureCommandList,args)
    
    commandList = getCommandList(experimentFolder,dataFolder,generatorsFolder,args.tickSize)
    utility.runCommandList(commandList,args)

    try:
        experimentFolder = args.e + "/design1.ini"
    
        insideFeatureCommandList = getCommandListForInsideFeatures( experimentFolder,dataFolder,generatorsFolder,args.tickSize )
        utility.runCommandList(insideFeatureCommandList,args)
    
        intermediateFeatureCommandList = getCommandListForIntermediateFeatures(experimentFolder,dataFolder,generatorsFolder,args.tickSize)
        utility.runCommandList(intermediateFeatureCommandList,args)
    
        commandList = getCommandList(experimentFolder,dataFolder,generatorsFolder,args.tickSize)
        return utility.runCommandList(commandList,args)

    except:
        return
Пример #5
0
Файл: aGen.py Проект: grmte/ml
def main():
    try:
        attribute.initializeInstDetails(args.iT, args.sP, args.oT, args.rev)
        if args.i is not None:
            attribute.checkIfAttributeOutputFileExists(
                os.path.basename(moduleName), args.n, args.i, args.o, args.m,
                args.d)
        else:
            attribute.checkIfAttributeOutputFileExists(
                os.path.basename(moduleName), args.n, args.c, args.o, args.m,
                args.d)
        if args.rev != None and args.rev.lower() == "yes":
            dataFile.getSelectedDataIntoMatrix(args.d)
        else:
            if (args.cType == "synthetic"):
                if "For6Levels" not in args.g:
                    dataFile.getDataIntoMatrix(args.d, args.c, level=5)
                else:
                    try:
                        dataFile.getDataIntoMatrix(args.d, args.c, level=5)
                    except:
                        dataFile.getDataIntoMatrix(args.d, args.c, level=6)
            else:
                if "For6Levels" not in args.g:
                    dataFile.getDataIntoMatrix(args.d, level=5)
                else:
                    try:
                        dataFile.getDataIntoMatrix(args.d, level=5)
                    except:
                        dataFile.getDataIntoMatrix(args.d, level=6)

        attribute.initList()
        lHeaderColumnNamesList = userModule.extractAttributeFromDataMatrix(
            args)
        if args.i is not None:
            fileName = attribute.getOutputFileNameFromGeneratorName(
                os.path.basename(moduleName), args.n, args.i, args.o, args.m,
                args.d)
        else:
            fileName = attribute.getOutputFileNameFromGeneratorName(
                os.path.basename(moduleName), args.n, args.c, args.o, args.m,
                args.d)
        print fileName
        attribute.writeToFile(fileName, lHeaderColumnNamesList)
        attribute.callRProgramToConvertToBinary(fileName)
    except:
        traceback.print_exc()
        e = sys.exc_info()[0]
        print e
        os._exit(-1)
Пример #6
0
def main():
    args = parseCommandLine()
    experimentFolder = args.e
    dataFolder = args.d
    generatorsFolder = args.g
    attribute.initializeInstDetails(args.iT,args.sP,args.oT)
    args.sequence = "lp"
#     insideFeatureCommandList = getCommandListForInsideFeatures( experimentFolder,dataFolder,generatorsFolder,args.tickSize )
#     utility.runCommandList(insideFeatureCommandList,args)
#     
#     intermediateFeatureCommandList = getCommandListForIntermediateFeatures(experimentFolder,dataFolder,generatorsFolder,args.tickSize)
#     utility.runCommandList(intermediateFeatureCommandList,args)
#     
    commandList = getCommandList(experimentFolder,dataFolder,generatorsFolder,args.tickSize)
    return utility.runCommandList(commandList,args)
Пример #7
0
def main():
    args = parseCommandLine()
    experimentFolder = args.e
    dataFolder = args.d
    generatorsFolder = args.g
    attribute.initializeInstDetails(args.iT, args.sP, args.oT)
    args.sequence = "lp"
    #     insideFeatureCommandList = getCommandListForInsideFeatures( experimentFolder,dataFolder,generatorsFolder,args.tickSize )
    #     utility.runCommandList(insideFeatureCommandList,args)
    #
    #     intermediateFeatureCommandList = getCommandListForIntermediateFeatures(experimentFolder,dataFolder,generatorsFolder,args.tickSize)
    #     utility.runCommandList(intermediateFeatureCommandList,args)
    #
    commandList = getCommandList(experimentFolder, dataFolder,
                                 generatorsFolder, args.tickSize)
    return utility.runCommandList(commandList, args)
Пример #8
0
def main():
    global args
    args = parseCommandLine()
    experimentFolder = args.e
    trainDataFolder = args.td
    predictDataFolder = args.pd
    attribute.initializeInstDetails(args.iT, args.sP, args.oT)
    if args.mpMearge.lower() == "yes":
        commandList = getTrainPredictCommandList(experimentFolder, args.a,
                                                 trainDataFolder,
                                                 predictDataFolder, args.dt,
                                                 args.wt)
        if args.sequence == 'lp':
            pool = multiprocessing.Pool(
            )  # this will return the number of CPU's
            results = pool.map(
                wrapper, commandList
            )  # Calls trainWrapper function with each element of list trainScriptNames
        else:
            results = map(wrapper, commandList)
    else:
        commandList = getTrainCommandList(experimentFolder, args.a,
                                          trainDataFolder, args.dt, args.wt)
        if args.sequence == 'lp':
            # to run it in local parallel mode
            pool = multiprocessing.Pool(
            )  # this will return the number of CPU's
            results = pool.map(
                wrapper, commandList
            )  # Calls trainWrapper function with each element of list trainScriptNames
        else:
            results = map(wrapper, commandList)

        commandList = getPredictCommandList(experimentFolder, args.a,
                                            predictDataFolder, trainDataFolder,
                                            args.dt, args.wt)
        if args.sequence == 'lp':
            # to run it in local parallel mode
            pool = multiprocessing.Pool(
            )  # this will return the number of CPU's
            results = pool.map(
                wrapper, commandList
            )  # Calls trainWrapper function with each element of list trainScriptNames
        else:
            results = map(wrapper, commandList)
Пример #9
0
Файл: aGen.py Проект: grmte/ml
def main():
   try:
      attribute.initializeInstDetails(args.iT,args.sP,args.oT,args.rev) 
      if args.i is not None: 
          attribute.checkIfAttributeOutputFileExists(os.path.basename(moduleName),args.n,args.i,args.o,args.m,args.d)
      else:
          attribute.checkIfAttributeOutputFileExists(os.path.basename(moduleName),args.n,args.c,args.o,args.m,args.d)
      if args.rev!= None and args.rev.lower()=="yes":
          dataFile.getSelectedDataIntoMatrix(args.d)
      else:
          if(args.cType == "synthetic"):
              if "For6Levels" not in args.g:
                  dataFile.getDataIntoMatrix(args.d,args.c , level=5)
              else:
                  try:
                      dataFile.getDataIntoMatrix(args.d,args.c, level=5)
                  except:
                      dataFile.getDataIntoMatrix(args.d,args.c, level=6)
          else:
              if "For6Levels" not in args.g:
                  dataFile.getDataIntoMatrix(args.d , level=5)
              else:
                  try:
                      dataFile.getDataIntoMatrix(args.d, level=5)
                  except:
                      dataFile.getDataIntoMatrix(args.d, level=6)

      attribute.initList()
      lHeaderColumnNamesList = userModule.extractAttributeFromDataMatrix(args)
      if args.i is not None:
          fileName = attribute.getOutputFileNameFromGeneratorName(os.path.basename(moduleName),args.n,args.i,args.o,args.m,args.d )
      else:
          fileName = attribute.getOutputFileNameFromGeneratorName(os.path.basename(moduleName),args.n,args.c,args.o,args.m,args.d )
      print fileName
      attribute.writeToFile(fileName , lHeaderColumnNamesList)
      attribute.callRProgramToConvertToBinary(fileName) 
   except:
      traceback.print_exc()
      e = sys.exc_info()[0]
      print e
      os._exit(-1)
Пример #10
0
def main():
#   try:
      attribute.initializeInstDetails(args.iT,args.sP,args.oT) 
      dataFile.getDataIntoMatrix(args.d)
      attribute.checkIfNewDataFileExists(args.d)
      newFileName = dataFile.getNewDataFileName(args.d)
      header = "Instrument;AskQ0;AskP0;AskQ1;AskP1;AskQ2;AskP2;AskQ3;AskP3;AskQ4;AskP4;BidQ0;BidP0;BidQ1;BidP1;BidQ2;BidP2;BidQ3;BidP3;BidQ4;BidP4;TTQ;LTP;LTQ;LTT;ATP;TBQ;TSQ;CP;OP;HP;LP;TimeStamp;SerialNo;MsgCode;OrderType;Quantity1;Price1;Quantity2;Price2;ExchangeTS;BestBidQ;BestBidP;BestAskQ;BestAskP\n"
      print newFileName
      newFileObject = open(newFileName,"w")
      newFileObject.write(header)
      previousLine = ""
      currentRowCount = 0
      for dataRow in dataFile.matrix:
        
        lineWithoutBestPrices = ";".join(dataRow[:colNumberOfData.TTQ])
        if previousLine <> lineWithoutBestPrices:
            newFileObject.write(";".join(dataRow) + "\n")
            previousLine = lineWithoutBestPrices
        currentRowCount = currentRowCount + 1
        if(currentRowCount % 1000 == 0):
            print "Processed row number " + str(currentRowCount)
Пример #11
0
def main():
    try:
        attribute.initializeInstDetails(args.iT,args.sP,args.oT)  
        outputFileName = attribute.getFileNameFromOperationCommand(args.a1,args.a2,args.operand,args.d)
        if (os.path.isfile(outputFileName)):
            print "The attribute has already been generated. If you want to re-generate it then first delete the attribute file." , outputFileName
            lNameAfterDecimal = outputFileName.split(".")[-1] 
            attributeBinaryFileName = outputFileName.replace(lNameAfterDecimal,"bin")
            if (os.path.isfile(attributeBinaryFileName)):
                print   attributeBinaryFileName
                os._exit(0)  # We do not take it as a error condition hence return 0 and not -1
            else:
                attribute.callRProgramToConvertToBinary(outputFileName) 
                os._exit(0) 
        attribute.aList,lListOfHeaderColNames = attribute.operateOnAttributes(args.a1,args.a2,args.operand,args.d)
        attribute.writeToFile(outputFileName,lListOfHeaderColNames)
        attribute.callRProgramToConvertToBinary(outputFileName)
    except:
        traceback.print_exc()
        e = sys.exc_info()[0]
        print e
        os._exit(-1)
Пример #12
0
def main():
    args = parseCommandLine()
    dataFolder = args.d
    generatorsFolder = args.g

    attribute.initializeInstDetails(args.iT, args.sP, args.oT, args.rev)
    args.sequence = "lp"

    experimentFolder = args.e + "/design.ini"
    insideFeatureCommandList = getCommandListForInsideFeatures(
        experimentFolder, dataFolder, generatorsFolder, args.tickSize)
    utility.runCommandList(insideFeatureCommandList, args)

    intermediateFeatureCommandList = getCommandListForIntermediateFeatures(
        experimentFolder, dataFolder, generatorsFolder, args.tickSize)
    utility.runCommandList(intermediateFeatureCommandList, args)

    commandList = getCommandList(experimentFolder, dataFolder,
                                 generatorsFolder, args.tickSize)
    utility.runCommandList(commandList, args)

    try:
        experimentFolder = args.e + "/design1.ini"

        insideFeatureCommandList = getCommandListForInsideFeatures(
            experimentFolder, dataFolder, generatorsFolder, args.tickSize)
        utility.runCommandList(insideFeatureCommandList, args)

        intermediateFeatureCommandList = getCommandListForIntermediateFeatures(
            experimentFolder, dataFolder, generatorsFolder, args.tickSize)
        utility.runCommandList(intermediateFeatureCommandList, args)

        commandList = getCommandList(experimentFolder, dataFolder,
                                     generatorsFolder, args.tickSize)
        return utility.runCommandList(commandList, args)

    except:
        return
Пример #13
0
def main():
    parser = argparse.ArgumentParser(description='Generates predict.r which will use design.model to make predictions. Sample command is pGenForE.py -e ob/e1/')
    parser.add_argument('-e', required=True,help='Directory to find the experiement designs')
    parser.add_argument('-a', required=True,help='Algorithm name')
    parser.add_argument('-pd', required=True,help='Prediction directory')
    parser.add_argument('-td',required=True,help="Day on which it was trained")
    parser.add_argument('-dt',required=True,help="Number of days it was trained")
    parser.add_argument('-wt',required=True,help="exp/default")
    parser.add_argument('-targetClass',required=True,help="For which model was used ; binomial(target takes only true and false) / multinomial (target values takes more than 2 values)")
    parser.add_argument('-skipP',required=False,help="yes or no , If you want to regenerate already generated algorithm prediction file then make this value No")
    parser.add_argument('-s',required=False,help="Experiment sub folders")
    parser.add_argument('-iT',required=False,help='Instrument name')
    parser.add_argument('-sP',required=False,help='Strike price of instrument')
    parser.add_argument('-oT',required=False,help='Options Type')
    parser.add_argument('-double',required=False,help='Double training of in model')
    args = parser.parse_args()

    attribute.initializeInstDetails(args.iT,args.sP,args.oT)
    if args.skipP == None:
        args.skipP = "yes"
    if args.s == None:
        args.s = args.e

    print "\nRunning pGen.py to generate the predict script"
    print "Using the experiment folder " + args.e

    config = ConfigObj(args.s+"/design.ini")

    print "The config parameters that I am working with are"
    print config

    dirName=os.path.dirname(args.s)
    if args.a is None:
        algo ='glmnet'
    else:
        algo =args.a
    
    import pdb
    #pdb.set_trace()
    if args.double:
        rProgName = "predict" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-pd." + os.path.basename(os.path.abspath(args.pd)) \
                    + "-wt." + args.wt+ attribute.generateExtension()   + "double.r"
    else:
        rProgName = "predict" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-pd." + os.path.basename(os.path.abspath(args.pd)) \
                    + "-wt." + args.wt+ attribute.generateExtension()   + ".r"
    rProgLocation = dirName+'/'+rProgName
    rScript = open(rProgLocation,'w')

    rScript.write('#!/usr/bin/Rscript \n')
    predictDataDirectoryName = args.pd.replace('/ro/','/wf/')
    predictDataDirectoryName = predictDataDirectoryName + "/p/" + os.path.basename(os.path.dirname(args.e)) + "/"
    if not os.path.exists(predictDataDirectoryName):
        os.mkdir(predictDataDirectoryName)
    if(args.a == 'glmnet'):
        rScript.write('require (glmnet) \n')
    elif(args.a == 'randomForest'):
        rScript.write('require (randomForest) \n')
    rCodeGen.ForSetUpChecks(rScript)
    lAllFilePresent = True
    for target in config['target']:
        if args.double:
            predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \
            + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \
            "-wt." + args.wt+ attribute.generateExtension()  +"double.predictions"
        else:
            predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \
            + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \
            "-wt." + args.wt+ attribute.generateExtension()  +".predictions"           
        if os.path.isfile(predictionFileName) and ( args.skipP.lower() == "yes" ):
            continue
        else:
            lAllFilePresent = False
            break
    if lAllFilePresent == False:
        for target in config['target']:
            rCodeGen.ToReadFeatureFiles(rScript,config,target)
            rCodeGen.ForSanityChecks(rScript,config,target)
            if args.double:
                predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \
                + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \
                "-wt." + args.wt+ attribute.generateExtension()  +"double.predictions"
            else:
                predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \
                + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \
                "-wt." + args.wt+ attribute.generateExtension()  +".predictions"   
                            
            if not os.path.isfile(predictionFileName) or ( args.skipP.lower() == "no" ):
                if args.double:
                    lModelGeneratedAfterTraining = args.s + '/' + args.a + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt + 'double.model'
                    rCodeGen.ForPredictions(rScript,config,args,args.s,target,2,"double")
                else:
                    lModelGeneratedAfterTraining = args.s + '/' + args.a + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt + '.model' 
                    rCodeGen.ForPredictions(rScript,config,args,args.s,target)
                print lModelGeneratedAfterTraining
            else:
                print predictionFileName + "Already exists , not generating it again . If you want to generate it again then rerun it with -skipP no "
    rScript.close()
    print "Finished generating R prediction program: " + rProgLocation
    os.system("chmod +x "+rProgLocation)
Пример #14
0
def main():
    parser = argparse.ArgumentParser(description='Generates train.r. A sample command is mGenForE.py -e ob/e1/ ')
    parser.add_argument('-e', required=True,help='Experiement folder to use to find the features and targets')
    parser.add_argument('-a', required=True,help='Algorithm name')
    parser.add_argument('-s', required=True,help='Location of the folder containing all the sub experiments')
    parser.add_argument('-targetClass',required=True,help="binomial(target takes only true and false) / multinomial (target values takes more than 2 values)")
    parser.add_argument('-skipM',required=False,help="yes or no , If you want to regenerate already generated algorithm model file then make this value No")
    parser.add_argument('-skipP',required=False,help="yes or no , If you want to regenerate already generated algorithm prediction file then make this value No")
    parser.add_argument('-pd', required=True,help='Prediction directory')
    parser.add_argument('-td',required=True,help="Day on which it was trained")
    parser.add_argument('-dt',required=True,help="Number of days it was trained")
    parser.add_argument('-wt',required=True,help="default/exp , weight type to be given to different days")
    parser.add_argument('-iT',required=False,help='Instrument name')
    parser.add_argument('-sP',required=False,help='Strike price of instrument')
    parser.add_argument('-oT',required=False,help='Options Type')
    args = parser.parse_args()

    attribute.initializeInstDetails(args.iT,args.sP,args.oT)
    if args.skipM == None:
        args.skipM = "yes"
    if args.skipP == None:
        args.skipP = "yes"
                
    print "Using the experiment folder " + args.e
    
    print "Training files steps"
    config = ConfigObj(args.e+"/design.ini")

    print "The config parameters that I am working with are"
    print config 

    predictionDataDirectoryName = args.pd.replace('/ro/','/wf/')
    predictionDataDirectoryName = predictionDataDirectoryName + "/p/" + os.path.basename(os.path.dirname(args.e))
    if not os.path.exists(predictionDataDirectoryName):
        os.mkdir(predictionDataDirectoryName)
        
    dirName=os.path.dirname(args.e)

    algo = rCodeGen.getAlgoName(args)

    args.s = args.s + "/"
    rProgName = "trainPredict"+ algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt +\
     "-pd." + os.path.basename(os.path.abspath(args.pd)) +  "-wt." + args.wt+ attribute.generateExtension()  + "-For"+os.path.basename(os.path.dirname(args.s))+"SubE.r"
    rProgLocation = dirName+'/'+rProgName
    rScript = open(rProgLocation,'w')

    rScript.write('#!/usr/bin/Rscript \n')

    if(algo == 'glmnet'):
        rScript.write('require (glmnet) \n')
    elif(algo == 'randomForest'):
        rScript.write('require (randomForest) \n')
    elif(algo == 'mda'):
        rScript.write('require (mda) \n')
    
    rCodeGen.ForSetUpChecksForTrainPredictTogather(rScript)
    rCodeGen.ToReadTargetFile(rScript,config)
    rCodeGen.ForWtVectorGeneration(rScript,args.wt.lower())
    for target in config['target']:
        rCodeGen.ToReadFeatureFiles(rScript,config,target,2)
        rCodeGen.ForSanityChecks(rScript,config,target)

    print "For prediction data set"
    configForPredictions = ConfigObj(args.e+"/design.ini")
    print "The config parameters that I am working with are"
    for target in configForPredictions['target']:
        feature_keys = configForPredictions['features-'+target].keys()
        features = configForPredictions['features-'+target]
        for key in feature_keys:
            new_key = key + "P"
            features[new_key] = features[key]
            del features[key]
        print configForPredictions 

        rCodeGen.ToReadFeatureFiles(rScript,configForPredictions,target,4)
        rCodeGen.ForSanityChecks(rScript,configForPredictions,target)
    
    designFiles = utility.list_files(args.s)

    for designFile in designFiles:
        print "Generating r code for " + designFile
        rScript.write('\n\nprint ("Running r code for ' + designFile + '")\n')
        config = ConfigObj(designFile)
        configForPredictions = ConfigObj(designFile)
        #--------------MODEL--------------------
        for target in config['target']:
            feature_keys = configForPredictions['features-'+target].keys()
            features = configForPredictions['features-'+target]
            for key in feature_keys:
                new_key = key + "P"
                features[new_key] = features[key]
                del features[key]
            lModelGeneratedAfterTraining = os.path.dirname(designFile) + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
            '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension()  + '.model'
            if os.path.isfile(lModelGeneratedAfterTraining)and ( args.skipM.lower() == "yes" ):
                print "Model File " + lModelGeneratedAfterTraining + " already exists . So it will not be formed again . If you want to re-generate model then re-run with -skipM=No"
            else:
                rCodeGen.ToCreateDataFrameForTraining(rScript,config,target)
                rCodeGen.ForTraining(rScript,args,config,target)
                rCodeGen.saveTrainingModel(rScript,args,os.path.dirname(designFile),target)
        
        #--------------Prediction Part--------------------
            predictionFileName = predictionDataDirectoryName + "/" +  args.a + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
                                 '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(designFile)) +\
                                 "-wt." + args.wt+ attribute.generateExtension()  +".predictions"
            if not os.path.isfile(predictionFileName) or ( args.skipP.lower() == "no" ):
                rCodeGen.ForPredictions(rScript,configForPredictions,args,designFile,target,4)
            else:
                print "Prediction File " + predictionFileName + "Already exists , not generating it again . If you want to generate it again then rerun it with -skipP no "
    rScript.write('rm(list=ls())')
    rScript.close()
    print "Finished generating R training program: " + rProgLocation
    os.system("chmod +x "+rProgLocation)
Пример #15
0
def main():
    parser = argparse.ArgumentParser(description='Generates train.r. A sample command is mGenForE.py -e ob/e1/ ')
    parser.add_argument('-e', required=True,help='Experiement folder to use to find the features and targets')
    parser.add_argument('-a', required=True,help='Algorithm name')
    parser.add_argument('-s', required=True,help='Location of the folder containing all the sub experiments')
    parser.add_argument('-targetClass',required=True,help="binomial(target takes only true and false) / multinomial (target values takes more than 2 values)")
    parser.add_argument('-skipM',required=False,help="yes or no , If you want to regenerate already generated algorithm model file then make this value No")
    parser.add_argument('-td',required=True,help="Day on which it was trained")
    parser.add_argument('-dt',required=True,help="Number of days it was trained")
    parser.add_argument('-wt',required=True,help="default/exp , weight type to be given to different days")
    parser.add_argument('-iT',required=False,help='Instrument name')
    parser.add_argument('-sP',required=False,help='Strike price of instrument')
    parser.add_argument('-oT',required=False,help='Options Type')
    args = parser.parse_args()

    attribute.initializeInstDetails(args.iT,args.sP,args.oT)
    if args.skipM == None:
        args.skipM = "yes"

    print "Using the experiment folder " + args.e
    
    config = ConfigObj(args.e+"/design.ini")

    print "The config parameters that I am working with are"
    print config 

    dirName=os.path.dirname(args.e)

    algo = rCodeGen.getAlgoName(args)

    args.s = args.s + "/"
    rProgName = "train" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-wt." + args.wt+ attribute.generateExtension()  +\
                "-For"+os.path.basename(os.path.dirname(args.s))+"SubE.r"
    rProgLocation = dirName+'/'+rProgName
    rScript = open(rProgLocation,'w')

    rScript.write('#!/usr/bin/Rscript \n')

    if(algo == 'glmnet'):
        rScript.write('require (glmnet) \n')
    elif(algo == 'randomForest'):
        rScript.write('require (randomForest) \n')
    elif(algo == 'mda'):
        rScript.write('require (mda) \n')

    rCodeGen.ForSetUpChecks(rScript)
    rCodeGen.ToReadTargetFile(rScript,config)
    rCodeGen.ForWtVectorGeneration(rScript,args.wt.lower())
    for target in config['target']:
        rCodeGen.ToReadFeatureFiles(rScript,config,target)
        rCodeGen.ForSanityChecks(rScript,config,target)
    
    designFiles = utility.list_files(args.s)

    for designFile in designFiles:
        print "Generating r code for " + designFile
        rScript.write('\n\nprint ("Running r code for ' + designFile + '")\n')
        config = ConfigObj(designFile)
        for target in config['target']:
            lModelGeneratedAfterTraining = os.path.dirname(designFile) + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + \
            '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension()  + '.model'
            if os.path.isfile(lModelGeneratedAfterTraining)and ( args.skipM.lower() == "yes" ):
                print "Model File " + lModelGeneratedAfterTraining + " already exists . So it will not be formed again . If you want to re-generate model then re-run with -skipM=No"
            else:
                rCodeGen.ToCreateDataFrameForTraining(rScript,config,target)
                rCodeGen.ForTraining(rScript,args,config,target)
                rCodeGen.saveTrainingModel(rScript,args,os.path.dirname(designFile),target)
    rScript.write('rm(list=ls())')
    rScript.close()
    print "Finished generating R training program: " + rProgLocation
    os.system("chmod +x "+rProgLocation)
Пример #16
0
def main():
    parser = argparse.ArgumentParser(description='Generates train.r. A sample command is mGenForE.py -e ob/e1/ ')
    parser.add_argument('-e', required=True,help='Experiement folder to use to find the features and targets')
    parser.add_argument('-a', required=True,help='Algorithm name')
    parser.add_argument('-targetClass',required=True,help="binomial(target takes only true and false) / multinomial (target values takes more than 2 values)")
    parser.add_argument('-skipT',required=False,help="yes or no , If you want to regenerate already generated algorithm model file then make this value No")
    parser.add_argument('-td',required=True,help="Day on which it was trained")
    parser.add_argument('-dt',required=True,help="Number of days it was trained")
    parser.add_argument('-wt',required=True,help="default/exp , weight type to be given to different days")
    parser.add_argument('-iT',required=False,help='Instrument name')
    parser.add_argument('-sP',required=False,help='Strike price of instrument')
    parser.add_argument('-oT',required=False,help='Options Type')
    parser.add_argument('-treeType',required=False,help="Tree read for trade engine")
    parser.add_argument('-tTD',required=False,help="Tree number of days to be used")
    args = parser.parse_args()

    attribute.initializeInstDetails(args.iT,args.sP,args.oT)
    if args.skipT == None:
        args.skipT = "yes"
    if args.tTD == None:
        args.tTD = args.dt
    print "Using the experiment folder " + args.e

    config = ConfigObj(args.e+"/design1.ini")
    configInit = ConfigObj(args.e+"design.ini")
    
#     configInitList = []
#     for iniFile in os.listdir(args.e + "/"):
#         if '.ini' in iniFile and iniFile != 'design.ini':
#             index = iniFile[ file.index(".") - 1 ]
#             configInitList.append( ( index, ConfigObj(args.e+"/"+iniFile) ) )
#     configInit = dict(configInitList)
        
    print "The config parameters that I am working with are"
    print config

    dirName=os.path.dirname(args.e)+"/"

    algo = rCodeGen.getAlgoName(args)
    
    rProgName = "traintree" +  "-td." + os.path.basename(os.path.abspath(args.td)) + "-tTD" + args.tTD + "-dt." + args.dt + "-wt." + args.wt + attribute.generateExtension() +".r"
    rProgLocation = dirName+'/'+rProgName
    rScript = open(rProgLocation,'w')
    rScript.write('#!/usr/bin/Rscript \n')
    rScript.write('require (rpart) \n')
        
    rCodeGen.ForSetUpChecks(rScript)
    lAllFilePresent = True
    
    for target in config['target']:
        lTreeFileName = dirName+"/"+algo+ target+'-td.' + os.path.basename(os.path.abspath(args.td)) + "-tTD" + args.tTD + '-dt.' + args.dt + attribute.generateExtension() +".tree" + args.treeType
        if os.path.isfile(lTreeFileName) and ( args.skipT.lower() == "yes" ):
            continue
        else:
            lAllFilePresent = False
            break
    if lAllFilePresent == False:
        rCodeGen.ToReadTargetFile(rScript,config)
        rCodeGen.ForWtVectorGeneration(rScript,args.wt.lower())

        for target in config['target']:
            rCodeGen.ToReadFeatureFiles(rScript,config,target)
            rCodeGen.ToReadPredictionFiles(rScript,config,target,configInit)
            rCodeGen.ForSanityChecks(rScript,config,target)
            lTreeFileName = dirName+"/"+algo+ target+'-td.' + os.path.basename(os.path.abspath(args.td)) + "-tTD" + args.tTD +'-dt.' + args.dt + attribute.generateExtension() +".tree" + args.treeType
            if os.path.isfile(lTreeFileName) and ( args.skipT.lower() == "yes" ):
                print "Model File " + lTreeFileName + " already exists . So it will not be formed again . If you want to re-generate model then re-run with -skipT=No"
            else:
                rCodeGen.ToRenameDataBeforeTraining(rScript,config,target)
                rCodeGen.ForTrainingTree(rScript,args,config,target, args.treeType)
                print lTreeFileName
                rCodeGen.saveTrainingTree(rScript,args,dirName,target, lTreeFileName)


    rScript.close()
    print "Finished generating R training program: " + rProgLocation
    os.system("chmod +x "+rProgLocation)
Пример #17
0
def main():
    parser = argparse.ArgumentParser(description='Generates train.r. A sample command is mGenForE.py -e ob/e1/ ')
    parser.add_argument('-e', required=True,help='Experiement folder to use to find the features and targets')
    parser.add_argument('-a', required=True,help='Algorithm name')
    parser.add_argument('-targetClass',required=True,help="binomial(target takes only true and false) / multinomial (target values takes more than 2 values)")
    parser.add_argument('-skipM',required=False,help="yes or no , If you want to regenerate already generated algorithm model file then make this value No")
    parser.add_argument('-td',required=True,help="Day on which it was trained")
    parser.add_argument('-dt',required=True,help="Number of days it was trained")
    parser.add_argument('-wt',required=True,help="default/exp , weight type to be given to different days")
    parser.add_argument('-iT',required=False,help='Instrument name')
    parser.add_argument('-sP',required=False,help='Strike price of instrument')
    parser.add_argument('-oT',required=False,help='Options Type')
    parser.add_argument('-double',required=False,help='Double training of in model')
    args = parser.parse_args()

    attribute.initializeInstDetails(args.iT,args.sP,args.oT)
    if args.skipM == None:
        args.skipM = "yes"

    print "Using the experiment folder " + args.e

    config = ConfigObj(args.e+"/design.ini")

    print "The config parameters that I am working with are"
    print config

    dirName=os.path.dirname(args.e)+"/"

    algo = rCodeGen.getAlgoName(args)
    if args.double:
        rProgName = "train" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-wt." + args.wt + attribute.generateExtension() +"double.r"
    else:
        rProgName = "train" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-wt." + args.wt + attribute.generateExtension() +".r"
    rProgLocation = dirName+'/'+rProgName
    rScript = open(rProgLocation,'w')
    rScript.write('#!/usr/bin/Rscript \n')
    if(algo == 'glmnet'):
        rScript.write('require (glmnet) \n')
    elif(algo == 'randomForest'):
        rScript.write('require (randomForest) \n')
    elif(algo == 'mda'):
        rScript.write('require (mda) \n')
    rCodeGen.ForSetUpChecks(rScript)
    lAllFilePresent = True
    for target in config['target']:
        if args.double:
            lModelGeneratedAfterTraining = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\
                             + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension()  +'double.model'
        else:
            lModelGeneratedAfterTraining = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\
                             + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension()  +'.model'           
        if os.path.isfile(lModelGeneratedAfterTraining) and ( args.skipM.lower() == "yes" ):
            continue
        else:
            lAllFilePresent = False
            break
    if lAllFilePresent == False:
        rCodeGen.ToReadTargetFile(rScript,config)
        rCodeGen.ForWtVectorGeneration(rScript,args.wt.lower())

        for target in config['target']:
            rCodeGen.ToReadFeatureFiles(rScript,config,target)
            rCodeGen.ForSanityChecks(rScript,config,target)
            if args.double:
                lModelGeneratedAfterTraining = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\
                                 + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension()  +'double.model'
                lTempModelName = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\
                                 + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension()  +'.model'
            else:
                lModelGeneratedAfterTraining = dirName + '/' + algo + target + '-td.' + os.path.basename(os.path.abspath(args.td))\
                                 + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt+ attribute.generateExtension()  +'.model' 
            if os.path.isfile(lModelGeneratedAfterTraining) and ( args.skipM.lower() == "yes" ):
                print "Model File " + lModelGeneratedAfterTraining + " already exists . So it will not be formed again . So it will not be formed again . If you want to re-generate model then re-run with -skipM=No"
            else:
                rCodeGen.ToCreateDataFrameForTraining(rScript,config,target)
                if args.double:
                    if os.path.isfile(lTempModelName):
                        rCodeGen.ForLoadingModel(rScript,args,dirName,target,config)
                    else:
                        rCodeGen.ForTraining(rScript,args,config,target)
                    rCodeGen.forPreparingWtVectorForDoubleTraining(rScript,args,target)
                    rCodeGen.saveTrainingModel(rScript,args,dirName,target,"double")
                else:
                    rCodeGen.ForTraining(rScript,args,config,target)
                    rCodeGen.saveTrainingModel(rScript,args,dirName,target)


    rScript.close()
    print "Finished generating R training program: " + rProgLocation
    os.system("chmod +x "+rProgLocation)
Пример #18
0
import rCodeGen, utility
import attribute
import aGenForE

parser = argparse.ArgumentParser(description='This program will get results for all the subexperiments. \n' , formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('-td', required=True,help='Training directory')
parser.add_argument('-dt',required=True,help='Number of days after start training day specified . Defaults to 1 ')
parser.add_argument('-run', required=True,help='dry (only show dont execute) or real (show and execute)')
parser.add_argument('-sequence', required=True,help='lp (Local parallel) / dp (Distributed parallel) / serial')
parser.add_argument('-nComputers',required=True,help="Number of computers at which task has to be run present in the data set")
parser.add_argument('-iT',required=False,help='Instrument name')
parser.add_argument('-sP',required=False,help='Strike price of instrument')
parser.add_argument('-oT',required=False,help='Options Type')
args = parser.parse_args()

attribute.initializeInstDetails(args.iT,args.sP,args.oT)

if(args.sequence == "dp"):
    import dp
allDataDirectories = attribute.getListOfTrainingDirectoriesNames( int(args.dt) , args.td ,args.iT)
commandList = []
for directories in allDataDirectories:
#src/removeDuplicatesFromOrderBook.py -d ob/data/ro/nsefut/20141126/ -iT SBIN -oT 0 -sP -1
    commandList.append(["removeDuplicatesFromOrderBook.py", "-d", directories, "-iT", args.iT, "-oT", args.oT, "-sP", args.sP])

        
for chunkNum in range(0,len(commandList),int(args.nComputers)):
    lSubGenList = commandList[chunkNum:chunkNum+int(args.nComputers)]
    utility.runCommandList(lSubGenList,args)
    print dp.printGroupStatus() 
Пример #19
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Generates predict.r which will use design.model to make predictions. Sample command is pGenForE.py -e ob/e1/'
    )
    parser.add_argument('-e',
                        required=True,
                        help='Directory to find the experiement designs')
    parser.add_argument('-a', required=True, help='Algorithm name')
    parser.add_argument('-pd', required=True, help='Prediction directory')
    parser.add_argument('-td',
                        required=True,
                        help="Day on which it was trained")
    parser.add_argument('-dt',
                        required=True,
                        help="Number of days it was trained")
    parser.add_argument('-wt', required=True, help="exp/default")
    parser.add_argument(
        '-targetClass',
        required=True,
        help=
        "For which model was used ; binomial(target takes only true and false) / multinomial (target values takes more than 2 values)"
    )
    parser.add_argument(
        '-skipP',
        required=False,
        help=
        "yes or no , If you want to regenerate already generated algorithm prediction file then make this value No"
    )
    parser.add_argument('-s', required=False, help="Experiment sub folders")
    parser.add_argument('-iT', required=False, help='Instrument name')
    parser.add_argument('-sP',
                        required=False,
                        help='Strike price of instrument')
    parser.add_argument('-oT', required=False, help='Options Type')
    parser.add_argument('-double',
                        required=False,
                        help='Double training of in model')
    args = parser.parse_args()

    attribute.initializeInstDetails(args.iT, args.sP, args.oT)
    if args.skipP == None:
        args.skipP = "yes"
    if args.s == None:
        args.s = args.e

    print "\nRunning pGen.py to generate the predict script"
    print "Using the experiment folder " + args.e

    config = ConfigObj(args.s + "/design.ini")

    print "The config parameters that I am working with are"
    print config

    dirName = os.path.dirname(args.s)
    if args.a is None:
        algo = 'glmnet'
    else:
        algo = args.a

    import pdb
    #pdb.set_trace()
    if args.double:
        rProgName = "predict" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-pd." + os.path.basename(os.path.abspath(args.pd)) \
                    + "-wt." + args.wt+ attribute.generateExtension()   + "double.r"
    else:
        rProgName = "predict" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-pd." + os.path.basename(os.path.abspath(args.pd)) \
                    + "-wt." + args.wt+ attribute.generateExtension()   + ".r"
    rProgLocation = dirName + '/' + rProgName
    rScript = open(rProgLocation, 'w')

    rScript.write('#!/usr/bin/Rscript \n')
    predictDataDirectoryName = args.pd.replace('/ro/', '/wf/')
    predictDataDirectoryName = predictDataDirectoryName + "/p/" + os.path.basename(
        os.path.dirname(args.e)) + "/"
    if not os.path.exists(predictDataDirectoryName):
        os.mkdir(predictDataDirectoryName)
    if (args.a == 'glmnet'):
        rScript.write('require (glmnet) \n')
    elif (args.a == 'randomForest'):
        rScript.write('require (randomForest) \n')
    rCodeGen.ForSetUpChecks(rScript)
    lAllFilePresent = True
    for target in config['target']:
        if args.double:
            predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \
            + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \
            "-wt." + args.wt+ attribute.generateExtension()  +"double.predictions"
        else:
            predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \
            + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \
            "-wt." + args.wt+ attribute.generateExtension()  +".predictions"
        if os.path.isfile(predictionFileName) and (args.skipP.lower()
                                                   == "yes"):
            continue
        else:
            lAllFilePresent = False
            break
    if lAllFilePresent == False:
        for target in config['target']:
            rCodeGen.ToReadFeatureFiles(rScript, config, target)
            rCodeGen.ForSanityChecks(rScript, config, target)
            if args.double:
                predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \
                + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \
                "-wt." + args.wt+ attribute.generateExtension()  +"double.predictions"
            else:
                predictionFileName = predictDataDirectoryName + "/" + args.a + target +'-td.' + os.path.basename(os.path.abspath(args.td)) \
                + '-dt.' + args.dt + '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(args.s)) + \
                "-wt." + args.wt+ attribute.generateExtension()  +".predictions"

            if not os.path.isfile(predictionFileName) or (args.skipP.lower()
                                                          == "no"):
                if args.double:
                    lModelGeneratedAfterTraining = args.s + '/' + args.a + target + '-td.' + os.path.basename(
                        os.path.abspath(args.td)
                    ) + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt + 'double.model'
                    rCodeGen.ForPredictions(rScript, config, args, args.s,
                                            target, 2, "double")
                else:
                    lModelGeneratedAfterTraining = args.s + '/' + args.a + target + '-td.' + os.path.basename(
                        os.path.abspath(args.td)
                    ) + '-dt.' + args.dt + '-targetClass.' + args.targetClass + "-wt." + args.wt + '.model'
                    rCodeGen.ForPredictions(rScript, config, args, args.s,
                                            target)
                print lModelGeneratedAfterTraining
            else:
                print predictionFileName + "Already exists , not generating it again . If you want to generate it again then rerun it with -skipP no "
    rScript.close()
    print "Finished generating R prediction program: " + rProgLocation
    os.system("chmod +x " + rProgLocation)
Пример #20
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Generates train.r. A sample command is mGenForE.py -e ob/e1/ ')
    parser.add_argument(
        '-e',
        required=True,
        help='Experiement folder to use to find the features and targets')
    parser.add_argument('-a', required=True, help='Algorithm name')
    parser.add_argument(
        '-targetClass',
        required=True,
        help=
        "binomial(target takes only true and false) / multinomial (target values takes more than 2 values)"
    )
    parser.add_argument(
        '-skipT',
        required=False,
        help=
        "yes or no , If you want to regenerate already generated algorithm model file then make this value No"
    )
    parser.add_argument('-td',
                        required=True,
                        help="Day on which it was trained")
    parser.add_argument('-dt',
                        required=True,
                        help="Number of days it was trained")
    parser.add_argument(
        '-wt',
        required=True,
        help="default/exp , weight type to be given to different days")
    parser.add_argument('-iT', required=False, help='Instrument name')
    parser.add_argument('-sP',
                        required=False,
                        help='Strike price of instrument')
    parser.add_argument('-oT', required=False, help='Options Type')
    parser.add_argument('-treeType',
                        required=False,
                        help="Tree read for trade engine")
    parser.add_argument('-tTD',
                        required=False,
                        help="Tree number of days to be used")
    args = parser.parse_args()

    attribute.initializeInstDetails(args.iT, args.sP, args.oT)
    if args.skipT == None:
        args.skipT = "yes"
    if args.tTD == None:
        args.tTD = args.dt
    print "Using the experiment folder " + args.e

    config = ConfigObj(args.e + "/design1.ini")
    configInit = ConfigObj(args.e + "design.ini")

    #     configInitList = []
    #     for iniFile in os.listdir(args.e + "/"):
    #         if '.ini' in iniFile and iniFile != 'design.ini':
    #             index = iniFile[ file.index(".") - 1 ]
    #             configInitList.append( ( index, ConfigObj(args.e+"/"+iniFile) ) )
    #     configInit = dict(configInitList)

    print "The config parameters that I am working with are"
    print config

    dirName = os.path.dirname(args.e) + "/"

    algo = rCodeGen.getAlgoName(args)

    rProgName = "traintree" + "-td." + os.path.basename(
        os.path.abspath(args.td)
    ) + "-tTD" + args.tTD + "-dt." + args.dt + "-wt." + args.wt + attribute.generateExtension(
    ) + ".r"
    rProgLocation = dirName + '/' + rProgName
    rScript = open(rProgLocation, 'w')
    rScript.write('#!/usr/bin/Rscript \n')
    rScript.write('require (rpart) \n')

    rCodeGen.ForSetUpChecks(rScript)
    lAllFilePresent = True

    for target in config['target']:
        lTreeFileName = dirName + "/" + algo + target + '-td.' + os.path.basename(
            os.path.abspath(args.td)
        ) + "-tTD" + args.tTD + '-dt.' + args.dt + attribute.generateExtension(
        ) + ".tree" + args.treeType
        if os.path.isfile(lTreeFileName) and (args.skipT.lower() == "yes"):
            continue
        else:
            lAllFilePresent = False
            break
    if lAllFilePresent == False:
        rCodeGen.ToReadTargetFile(rScript, config)
        rCodeGen.ForWtVectorGeneration(rScript, args.wt.lower())

        for target in config['target']:
            rCodeGen.ToReadFeatureFiles(rScript, config, target)
            rCodeGen.ToReadPredictionFiles(rScript, config, target, configInit)
            rCodeGen.ForSanityChecks(rScript, config, target)
            lTreeFileName = dirName + "/" + algo + target + '-td.' + os.path.basename(
                os.path.abspath(args.td)
            ) + "-tTD" + args.tTD + '-dt.' + args.dt + attribute.generateExtension(
            ) + ".tree" + args.treeType
            if os.path.isfile(lTreeFileName) and (args.skipT.lower() == "yes"):
                print "Model File " + lTreeFileName + " already exists . So it will not be formed again . If you want to re-generate model then re-run with -skipT=No"
            else:
                rCodeGen.ToRenameDataBeforeTraining(rScript, config, target)
                rCodeGen.ForTrainingTree(rScript, args, config, target,
                                         args.treeType)
                print lTreeFileName
                rCodeGen.saveTrainingTree(rScript, args, dirName, target,
                                          lTreeFileName)

    rScript.close()
    print "Finished generating R training program: " + rProgLocation
    os.system("chmod +x " + rProgLocation)
Пример #21
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Generates predict.r which will use design.model to make predictions. Sample command is pGenForE.py -e ob/e1/'
    )
    parser.add_argument('-e',
                        required=True,
                        help='Directory to find the experiement designs')
    parser.add_argument('-a', required=True, help='Algorithm name')
    parser.add_argument(
        '-s',
        required=True,
        help='Location of the subfolder that contains the sub experiments')
    parser.add_argument('-pd', required=True, help='Prediction directory')
    parser.add_argument('-td',
                        required=True,
                        help="Day on which it was trained")
    parser.add_argument('-dt',
                        required=True,
                        help="Number of days it was trained")
    parser.add_argument(
        '-targetClass',
        required=True,
        help=
        "For which model was used ; binomial(target takes only true and false) / multinomial (target values takes more than 2 values)"
    )
    parser.add_argument(
        '-skipP',
        required=False,
        help=
        "yes or no , If you want to regenerate already generated algorithm prediction file then make this value No"
    )
    parser.add_argument(
        '-wt',
        required=False,
        help="default/exp , weight type to be given to different days")
    parser.add_argument('-iT', required=False, help='Instrument name')
    parser.add_argument('-sP',
                        required=False,
                        help='Strike price of instrument')
    parser.add_argument('-oT', required=False, help='Options Type')
    args = parser.parse_args()

    attribute.initializeInstDetails(args.iT, args.sP, args.oT)
    if args.skipP == None:
        args.skipP = "yes"

    print "\nRunning pGen.py to generate the predict script"
    print "Using the experiment folder " + args.e

    config = ConfigObj(args.e + "/design.ini")

    print "The config parameters that I am working with are"
    print config

    dirName = os.path.dirname(args.e)

    if args.a is None:
        algo = 'glmnet'
    else:
        algo = args.a

    args.s = args.s + "/"

    predictDataDirectoryName = args.pd.replace('/ro/', '/wf/')
    predictDataDirectoryName = predictDataDirectoryName + "/p/" + os.path.basename(
        os.path.dirname(args.e))
    if not os.path.exists(predictDataDirectoryName):
        os.mkdir(predictDataDirectoryName)

    rProgName = "predict" + algo + "-td." + os.path.basename(os.path.abspath(args.td)) + "-dt." + args.dt + "-pd." + os.path.basename(os.path.abspath(args.pd)) \
                + "-wt." + args.wt + attribute.generateExtension() +"-For"+os.path.basename(os.path.dirname(args.s))+"SubE.r"
    rProgLocation = dirName + '/' + rProgName
    rScript = open(rProgLocation, 'w')

    rScript.write('#!/usr/bin/Rscript \n')
    if (args.a == 'glmnet'):
        rScript.write('require (glmnet) \n')
    elif (args.a == 'randomForest'):
        rScript.write('require (randomForest) \n')

    rCodeGen.ForSetUpChecks(rScript)
    for target in config['target']:
        rCodeGen.ToReadFeatureFiles(rScript, config, target)
        rCodeGen.ForSanityChecks(rScript, config, target)

    designFiles = utility.list_files(args.s)

    for designFile in designFiles:
        print "Generating r code for " + designFile
        rScript.write('\n\nprint ("Running r code for' + designFile + '")\n')
        config = ConfigObj(designFile)
        for target in config['target']:
            predictionFileName = predictDataDirectoryName + "/" +  args.a + target + '-td.' + os.path.basename(os.path.abspath(args.td)) + '-dt.' + args.dt +\
             '-targetClass.' + args.targetClass + '-f.' + os.path.basename(os.path.dirname(designFile)) + "-wt." + args.wt+ attribute.generateExtension()  +".predictions"
            if not os.path.isfile(predictionFileName) or (args.skipP.lower()
                                                          == "no"):
                rCodeGen.ForPredictions(rScript, config, args, designFile,
                                        target)
            else:
                print predictionFileName + "Already exists , not generating it again . If you want to generate it again then rerun it with -skipP no "

    rScript.write('rm(list=ls())')
    rScript.close()
    print "Finished generating R prediction program: " + rProgLocation
    os.system("chmod +x " + rProgLocation)