def main(argv): inputFile = None if len(argv) < 1: Usage() exit(1) inputFile = argv[0] printBoundPKL = False if len(argv) >= 2: printBoundPKL = True if inputFile is None: print 'ERROR: Please provide an input file' exit(1) if not os.path.isfile(inputFile): print 'ERROR: The input file does not exist: ', inputFile exit(1) content = DistanceUtils.LoadRawDistProbFile(inputFile) targetName, sequence, predictedDistProbMatrix, predictedContactProbMatrix = content[: 4] """ Skip this step since in version 3, we use an unbiased deep model if labelWeight is not None: fixedProb = dict() for apt in predictedDistProb.keys(): #print 'shapes: ', predictedDistProb[apt].shape, np.array(labelWeight[apt]).shape, np.array(labelDistribution[apt]).shape fixedProb[apt] = DistanceUtils.FixDistProb( predictedDistProb[apt], labelWeight[apt], labelDistribution[apt]) else: ## in this case, the probability values in predictedDistProb are already corrected fixedProb = predictedDistProb fixedProb = predictedDistProb if printProbMatrix: probFileName = targetName + probFileSuffix fh = open(probFileName, 'wb') cPickle.dump(fixedProb, fh, protocol = cPickle.HIGHEST_PROTOCOL) fh.close() """ bounds = EstimateDistanceBounds(predictedDistProbMatrix) ## output Cb-Cb bound in text format if bounds.has_key('CbCb'): boundFileName = targetName + '.bound.txt' boundMatrix = bounds['CbCb'] SaveBoundInListFormat(targetName, sequence, boundMatrix, boundFileName) if not printBoundPKL: return boundFileName = targetName + '.bound.pkl' with open(boundFileName, 'wb') as fh: cPickle.dump((bounds, targetName, sequence), fh, protocol=cPickle.HIGHEST_PROTOCOL)
def main(argv): inputFile = None targetName = None labelNames = config.allAtomPairNames + config.allOrientationNames potentialFileSuffix = 'pkl' minPotential = -30.0 maxPotential = 30.0 UseWeight4Orientation = True UseWeight4Distance = True ## the largest dist cutoff rc = 18 alpha4DFIRE = 1.61 alpha4DFIREstr = '1.61' rgScale4DOPE = 1. ## reference reference = 'DFIRE' ## UseRef4Orientation = True ## refFile for SimuRW refFile = None #savefolder = os.getcwd() savefile="" if len(argv) < 1: Usage() exit(1) try: opts, args = getopt.getopt(argv,"a:w:r:l:u:f:s:o",["labelNames=", "useWeight=", "refState=", "minPotential=", "maxPotential=", "refFile=", "savefile=", "noRef4Orientation="]) #print opts, args except getopt.GetoptError: Usage() exit(1) if len(args) != 1: Usage() exit(1) inputFile = args[0] for opt, arg in opts: if opt in ("-a", "--labelNames"): labelNames = config.ParseLabelNames(arg) elif opt in ("-w", "--useWeight"): scheme = np.int32(arg) UseWeight4Orientation = (2 & scheme)>0 UseWeight4Distance = (1 & scheme)>0 elif opt in ("-r", "--refState"): fields = arg.split('+') reference = fields[0].upper() if reference not in allRefTypes: print 'ERROR: allowed reference types: ', allRefTypes exit(1) if len(fields) > 1: if fields[1].isdigit(): rc = np.int32(fields[1]) else: rc = np.float32(fields[1]) if reference == 'DFIRE': if len(fields) > 2: alpha4DFIREstr = fields[2] alpha4DFIRE = np.float32(fields[2]) elif reference == 'DOPE': if len(fields) > 2: rgScale4DOPE = np.float32(fields[2]) elif reference == 'SimuRW'.upper(): #rc = np.float32(fields[1]) print 'Using SimuRW potential' else: print 'ERROR: unsupported reference format: ', arg exit(1) elif opt in ("-f", "--refFile"): refFile = arg if not os.path.isfile(refFile): print 'the provided file for reference state is not valid: ', refFile exit(1) elif opt in ("-o", "--noRef4Orientation"): UseRef4Orientation = False elif opt in ("-s", "--savefile"): savefile = arg elif opt in ("-l", "--minPotential"): minPotential = np.float32(arg) elif opt in ("-u", "--maxPotential"): maxPotential = np.float32(arg) else: Usage() exit(1) if inputFile is None: print 'ERROR: Please provide an input file' exit(1) if not os.path.isfile(inputFile): print 'ERROR: The input file does not exist: ', inputFile exit(1) if reference in allRefTypesWithFiles and refFile is None: print 'ERROR: The file for user-sepcified reference state is empty' exit(1) if reference == 'DFIRE': if alpha4DFIRE > 10: ## take a random value between 1.57 and 1.63 alpha4DFIRE=random.uniform(1.57, 1.63) print 'alpha for DFIRE potential is ', alpha4DFIRE if alpha4DFIRE<1.55 or alpha4DFIRE>1.75: print 'ERROR: alpha4DFIRE shall be between 1.55 and 1.75' exit(1) if reference == 'DOPE': print 'rgScale for DOPE potential is', rgScale4DOPE if rgScale4DOPE > 1.2 or rgScale4DOPE <0.8: print 'ERROR: rgScale4DOPE shall be between 0.8 and 1.2' exit(1) if UseWeight4Distance: print 'Use weight for distance potential' if UseWeight4Orientation: print 'Use weight for orientation potential' if not UseRef4Orientation: print 'Do not use reference for orientation' content = DistanceUtils.LoadRawDistProbFile(inputFile) assert len(content) >=6 name, sequence, predictedProb, predictedContactProb, labelWeight, labelDistribution = content[:6] assert labelWeight is not None, "labelWeight shall not be empty" predData = (predictedProb, labelWeight, labelDistribution) targetName = os.path.basename(inputFile).split('.')[0] print 'Generating potential for ', targetName, 'with the following labels: ', labelNames filenames = [ targetName, 'pairPotential'] if reference == 'DFIRE': pairPotential, cutoffs, validProb, distPotential, oriPotential = CalcDistOriPotential(predData, labelNames, distPotType='DFIRE', param4Potential=alpha4DFIRE, largestDistance=rc, useWeight4Dist=UseWeight4Distance, useRef4Ori=UseRef4Orientation, useWeight4Ori=UseWeight4Orientation, minPotential=minPotential, maxPotential=maxPotential) filenames.extend([reference, str(rc), alpha4DFIREstr]) elif reference == 'DOPE': pairPotential, cutoffs, validProb, distPotential, oriPotential = CalcDistOriPotential(predData, labelNames, distPotType='DOPE', param4Potential=rgScale4DOPE, largestDistance=rc, useWeight4Dist=UseWeight4Distance, useRef4Ori=UseRef4Orientation, useWeight4Ori=UseWeight4Orientation, minPotential=minPotential, maxPotential=maxPotential) filenames.extend([reference, str(rc), str(rgScale4DOPE)]) else: print 'ERROR: unimplemented potential type: ', reference exit(1) if bool(oriPotential) and UseRef4Orientation: filenames.append('Ref4O') wStr=None if (bool(distPotential) and UseWeight4Distance) and (bool(oriPotential) and UseWeight4Orientation): wStr = 'Wt4OD' elif bool(oriPotential) and UseWeight4Orientation: wStr = 'Wt4O' elif bool(distPotential) and UseWeight4Distance: wStr = 'Wt4D' if wStr is not None: filenames.append(wStr) filenames.append('pkl') if savefile == "": savefile = '.'.join(filenames) ## save the result with open(savefile, 'wb') as fh: cPickle.dump((name, sequence, pairPotential, cutoffs, validProb), fh, protocol=cPickle.HIGHEST_PROTOCOL)
def main(argv): inputFile = None targetName = None labelNames = ['CbCb'] potentialFileSuffix = 'pkl' minPotential = -30.0 maxPotential = 30.0 minSeqSep = 3 minSeqSepStr='3' ## the largest dist cutoff rc = 18 alpha4DFIRE = 1.61 rgScale4DOPE = 1. ## reference reference = 'DFIRE' ## refFile refFile = None try: opts, args = getopt.getopt(argv,"i:a:r:l:u:s:f:tn",["input=", "atomPairType=", "refState=", "minPotential=", "maxPotential=", "minSeqSep=", "refFile=", "textFormat=", "nonZero="]) print opts, args except getopt.GetoptError: Usage() exit(1) if len(opts) < 1: Usage() exit(1) for opt, arg in opts: if opt in ("-i", "--input"): inputFile = arg elif opt in ("-a", "--atomPairType"): labelNames = config.ParseLabelNames(arg) elif opt in ("-r", "--refState"): fields = arg.split('+') reference = fields[0].upper() if reference not in allRefTypes: print 'allowed reference types: ', allRefTypes exit(1) if len(fields) > 1: if reference == 'DFIRE': rc = np.float32(fields[1]) if len(fields) > 2: alpha4DFIRE = np.float32(fields[2]) elif reference == 'DOPE': rc = np.float32(fields[1]) if len(fields) > 2: rgScale4DOPE = np.float32(fields[2]) elif reference == 'SimuRW'.upper(): rc = np.float32(fields[1]) else: print 'WARNING: unsupported reference format: ', arg elif opt in ("-f", "--refFile"): refFile = arg if not os.path.isfile(refFile): print 'the provided file for reference state is not valid: ', refFile exit(1) elif opt in ("-l", "--minPotential"): minPotential = np.float32(arg) elif opt in ("-u", "--maxPotential"): maxPotential = np.float32(arg) elif opt in ("-s", "--minSeqSep"): minSeqSep = np.int32(arg) minSeqSepStr = arg if minSeqSep < 1: print 'ERROR: minSeqSep shall be at least 1' exit(1) elif opt in ("-t", "--textFormat"): potentialFileSuffix = '.txt' elif opt in ("-n", "--nonZero"): resetFlag = False else: Usage() exit(1) if inputFile is None: print 'Please provide an input file' exit(1) if not os.path.isfile(inputFile): print 'The input file does not exist: ', inputFile exit(1) if reference in allRefTypesWithFiles and refFile is None: print 'The file for user-sepcified reference state is empty' exit(1) targetName = os.path.basename(inputFile).split('.')[0] content = DistanceUtils.LoadRawDistProbFile(inputFile) assert len(content) >=6 name, sequence, predictedDistProb, predictedContactProb, labelWeight, labelDistribution = content[:6] assert labelWeight is not None, "labelWeight shall not be empty" ## if needed, add code to here the predicted dist probability filenames = [ targetName, 'distPotential'] if reference == 'DFIRE': potential = CalcPotentialByDFIRE(predictedDistProb, alpha=alpha4DFIRE, largestDistance=rc, minPotential=minPotential, maxPotential=maxPotential) filenames.extend([reference, str(rc), str(alpha4DFIRE), potentialFileSuffix]) elif reference == 'DOPE': potential = CalcPotentialByDOPE(predictedDistProb, largestDistance=rc, rgScale=rgScale4DOPE, minPotential=minPotential, maxPotential=maxPotential) filenames.extend([reference, str(rc), str(rgScale4DOPE), potentialFileSuffix]) elif reference == 'SimuRW'.upper(): potential = CalcPotentialBySimuRW(predictedDistProb, refFile, largestDistance=rc, minPotential=minPotential, maxPotential=maxPotential) filenames.extend([reference, str(rc), potentialFileSuffix]) else: print 'ERROR: unimplemented reference state: ', reference exit(1) potentialFileName = '.'.join(filenames) ## save to PKL file if potentialFileName.endswith('.pkl'): fh = open(potentialFileName, 'wb') potential_new = dict() distCutoffs = dict() for response, pot in potential.iteritems(): labelName = config.Response2LabelName(response) if labelName not in set(labelNames): continue potential_new[response] = pot distCutoffs[response] = config.GetCutoffs(response) cPickle.dump((name, sequence, potential_new, distCutoffs), fh, protocol=cPickle.HIGHEST_PROTOCOL) fh.close() return ## save to text file potentialFileName = targetName + '.distPotential.s' + minSeqSepStr + potentialFileSuffix fh = open(potentialFileName, 'w') fh.write('#TARGET\t' + targetName + '\n') fh.write('#SEQ\t' + sequence + '\n') fh.write('#DistanceBinBoundaries\t' + "Please check config.py" + '\n') for response, pot in potential.iteritems(): labelName, labelType, subType = config.ParseResponse(response) if labelName not in set(labelNames): continue size = pot.shape for i in xrange(size[0]): rawPotStrs = [] for j in xrange(i+ minSeqSep, size[1]): atom1, atom2 = config.SelectAtomPair(sequence, i, j, labelName) y = pot[i, j] rawPotStr = ' '.join(['AtomPair', atom1.upper(), str(i+1), atom2.upper(), str(j+1), subType] + [ "{:.4f}".format(e) for e in y ] ) rawPotStrs.append(rawPotStr) if len(rawPotStrs) >0: fh.write('\n'.join(rawPotStrs) + '\n') fh.close()