def idealize_and_relax_pdb_set( PdbCstPairs ): for PdbName, CstName in PdbCstPairs: print '(PdbName, CstName) ', (PdbName, CstName) ''' idealize peptide bonds with command line subprocess ''' subprocess.check_output([ 'idealize_jd2.default.linuxgccrelease', '-s', PdbName ]) IdealizedPdbOldName = re.sub( r'(.*).pdb$', r'\1_0001.pdb', PdbName ) IdealizedPdbNewName = re.sub( r'(.*).pdb$', r'\1_Ideal.pdb', PdbName ) subprocess.check_output(['mv', IdealizedPdbOldName, IdealizedPdbNewName]) time.sleep(0.2) IdealizedCappedPose = rosetta.pose_from_pdb( IdealizedPdbNewName ) # make constraint mover Constrainer = rosetta.ConstraintSetMover() # get constraints from file Constrainer.constraint_file(CstName) Constrainer.apply(IdealizedCappedPose) ''' SET UP WEIGHTS ''' Talaris = rosetta.getScoreFunction() TalarisPlusCst = rosetta.getScoreFunction() TalarisPlusCst.set_weight(rosetta.atom_pair_constraint, 10.0) TalarisPlusCst.set_weight(rosetta.angle_constraint, 5.0) TalarisPlusCst.set_weight(rosetta.dihedral_constraint, 2.5) print 'relaxing %s with %s'%(IdealizedPdbNewName, CstName) # relax w/ cst rosetta.relax_pose(IdealizedCappedPose, TalarisPlusCst, 'tag') # relax w/o cst rosetta.relax_pose(IdealizedCappedPose, Talaris, 'tag') RelaxedPdbName = re.sub(r'(.*)_Ideal.pdb$', r'\1_Relax.pdb', IdealizedPdbNewName) rosetta.dump_pdb(IdealizedCappedPose, RelaxedPdbName)
def optimize_repeat_pdb( (Pdb, CstSets, RepeatLength) ): ''' parallelizable ''' # idealize peptide bonds with command line subprocess subprocess.check_output(['idealize_jd2.default.linuxgccrelease', '-s', Pdb]) IdealizedPdbOldName = Pdb.replace('.pdb', '_0001.pdb') IdealizedPdbNewName = Pdb.replace('.pdb', '_ideal.pdb') subprocess.check_output(['mv', IdealizedPdbOldName, IdealizedPdbNewName]) time.sleep(0.5) Pose = rosetta.pose_from_pdb(IdealizedPdbNewName) PoseLength = Pose.n_residue() assert PoseLength % RepeatLength == 0, 'pdb input into optimize_repeat_pdb must have integer multiple of repeat_length number of residues' NumberRepeats = PoseLength / RepeatLength # print 'NumberRepeats', NumberRepeats # print 'RepeatLength', RepeatLength Sequence = Pose.sequence() # print Sequence RepeatRanges = [] Start = 1 for Repeat in range(NumberRepeats): End = Start + RepeatLength - 1 RepeatRanges.append((Start, End)) Start += RepeatLength assert len(RepeatRanges) == NumberRepeats # print 'RepeatRanges', RepeatRanges MidRepeat = ( NumberRepeats / 2 ) - 1 ReferenceRange = RepeatRanges[MidRepeat] # print 'MidRepeat', MidRepeat # print 'ReferenceRange', ReferenceRange SetupNCS = symmetry.SetupNCSMover() for TargetRange in RepeatRanges: if TargetRange != ReferenceRange: # print 'OtherRange', TargetRange # skip first three residue (not enougth atoms for torsion), and amino acid types allowed to vary if TargetRange[0] == 1: SetupNCS.add_group( "%dA-%dA"%(ReferenceRange[0]+3, ReferenceRange[1]), "%dA-%dA"%(TargetRange[0]+3, TargetRange[1]) ) # skip last residue (not enougth atoms for torsion) elif TargetRange[1] == PoseLength: SetupNCS.add_group( "%dA-%dA"%(ReferenceRange[0], ReferenceRange[1]-3), "%dA-%dA"%(TargetRange[0], TargetRange[1]-3) ) else: SetupNCS.add_group( "%dA-%dA"%(ReferenceRange[0], ReferenceRange[1]), "%dA-%dA"%(TargetRange[0], TargetRange[1]) ) SetupNCS.apply(Pose) # default talaris 2013 score function plus dihedral wieght for symmetry ncs mimization SymmTalaris = rosetta.getScoreFunction() SymmTalaris.set_weight(rosetta.dihedral_constraint, 1.0) TalarisPlusCst = rosetta.getScoreFunction() TalarisPlusCst.set_weight(rosetta.atom_pair_constraint, 10.0) TalarisPlusCst.set_weight(rosetta.angle_constraint, 5.0) TalarisPlusCst.set_weight(rosetta.dihedral_constraint, 3.0) TalarisPlusCstLowerFaRep = rosetta.getScoreFunction() TalarisPlusCstLowerFaRep.set_weight(rosetta.atom_pair_constraint, 10.0) TalarisPlusCstLowerFaRep.set_weight(rosetta.angle_constraint, 5.0) TalarisPlusCstLowerFaRep.set_weight(rosetta.dihedral_constraint, 3.0) TalarisPlusCstLowerFaRep.set_weight(rosetta.fa_rep, 0.25) print 'Pdb:', Pdb OptimizedPoses = [] PoseIDs = [] for Cst in CstSets: print 'Cst:', Cst CstPose = Pose.clone() CstStemName = re.sub(r'^(.*)\.cst$', r'\1', Cst) # make constraint mover Constrainer = rosetta.ConstraintSetMover() # get constraints from file Constrainer.constraint_file(Cst) Constrainer.apply(CstPose) FxnTags = [ 'TalCst', 'LowFaRep' ] for i, ScoreFunction in enumerate( [ TalarisPlusCst, TalarisPlusCstLowerFaRep ] ): # for AbsoluteWeight in [1, 5, 10, 100]: RelaxPose = CstPose.clone() rosetta.relax_pose(RelaxPose, ScoreFunction, 'tag') rosetta.dump_pdb( RelaxPose, CstStemName+'_%s.pdb'%FxnTags[i] ) # remove all constraints RelaxPose.remove_constraints() # reapply ncs constraints SetupNCS.apply(RelaxPose) rosetta.relax_pose(RelaxPose, SymmTalaris, 'tag') # Trekker.score(RelaxPose) rosetta.dump_pdb( RelaxPose, CstStemName+'_%s_Relax.pdb'%FxnTags[i] ) JustRelaxPose = Pose.clone() SetupNCS.apply( JustRelaxPose ) rosetta.relax_pose( JustRelaxPose, SymmTalaris, 'tag' ) rosetta.dump_pdb( JustRelaxPose, CstStemName+'_JustRelax.pdb' )
def main(argv=None): # if argv is None: # argv = sys.argv if argv != None: sys.argv =[ sys.argv[0] ]+[ arg for arg in argv ] # print 'sys.argv', sys.argv # Arg block ArgParser = argparse.ArgumentParser(description=' expand_cst.py ( -help ) %s'%InfoString) # Required args ArgParser.add_argument('-ref_pdb', type=str, help=' reference pdb ', required=True) ArgParser.add_argument('-ref_cst', type=str, help=' corresponding to reference pdb ', required=True) ArgParser.add_argument('-repeat_pdb_tag', type=str, help=' input pdb tag ', required=True) # Optional args ArgParser.add_argument('-out', type=str, help=' Output directory ', default='./') Args = ArgParser.parse_args() if Args.out [-1] != '/': Args.out = Args.out + '/' # default talaris 2013 score function ScoreFunction = rosetta.getScoreFunction() # turning on constraint weights ScoreFunction.set_weight(rosetta.atom_pair_constraint, 1.0) ScoreFunction.set_weight(rosetta.angle_constraint, 1.0) ScoreFunction.set_weight(rosetta.dihedral_constraint, 1.0) RefPdb = Args.ref_pdb # print RefPdb ReferencePose = rosetta.pose_from_pdb( RefPdb ) print 'ReferencePose', ReferencePose # modify rosetta cst w/o rosetta Constrainer = constraint_extrapolator(Args.ref_cst) # RefCst = Args.ref_cst # # make constraint mover # Constrainer = rosetta.ConstraintSetMover() # # get constraints from file # Constrainer.constraint_file(RefCst) # # Apply constraints to pose # Constrainer.apply(ReferencePose) # return Constrainer Pdbs = glob.glob( '*%s*.pdb'%Args.repeat_pdb_tag ) assert len(Pdbs), r"No pdbs found with glob: \n %s \n '* % s *.pdb' % Args.repeat_pdb_tag "%Args.repeat_pdb_tag for Pdb in Pdbs: ## For debug put pdb of interest here: # if Pdb == 'src15_38__22_45_rep24_1EZG.pdb': print 'Pdb:', Pdb Pose = rosetta.pose_from_pdb(Pdb) try: SourceRangeString = re.sub(r'.*src(\d+_\d+__\d+_\d+).*pdb', r'\1', Pdb) SourceRanges = [ [ int(Number) for Number in Range.split('_') ] for Range in SourceRangeString.split('__') ] except ValueError: print 'No src range tag, skipping: %s '%Pdb continue print 'SourceRanges:', SourceRanges RepeatLength = int( re.sub(r'.*rep(\d+).*pdb', r'\1', Pdb) ) print 'RepeatLength', RepeatLength print # print [Pdb] PdbTag = (Pdb+'!').replace('.pdb!', '').replace('!', '') CstName = PdbTag+'.cst' ExtrapolatedConstraints = Constrainer.extrapolate_from_repeat_unit(SourceRanges[0][0], SourceRanges[0][1], RepeatLength, Pose, CstName, PdbTag)
def extrapolate_from_repeat_unit(self, ReferenceStart, ReferenceEnd, RepeatUnitLength, NewPose, FinalCstName, PdbTag): ''' renumbers based on repeat unit pose ''' # Loop through positions in range of archetype # To avoid double counting first only add constraints from archetype residues to # more C-terminal residues NewLength = NewPose.n_residue() self.Range = (1, NewLength) self.NewPoseStartShift = ReferenceStart - 1 # for 1 indexing UnitShiftMultiples = (NewLength / RepeatUnitLength) UnitShiftList = [ RepeatUnitLength * Multiple for Multiple in range( UnitShiftMultiples ) ] Edge1Cst, Edge2Cst, BothEdgeCst, MiddleCst = self.shift_and_sort_constraints(ReferenceStart, ReferenceEnd, RepeatUnitLength) # self.output_cst(Edge1Cst, 'Edge1.cst') # self.output_cst(Edge2Cst, 'Edge2.cst') # self.output_cst(BothEdgeCst, 'BothEdgeCst.cst') # self.output_cst(MiddleCst, 'Middle.cst') # print 'Edge1Cst:', Edge1Cst, '\n' # print 'Edge2Cst:', Edge2Cst, '\n' # print 'BothEdgeCst:', BothEdgeCst, '\n' # print 'MiddleCst:', MiddleCst, '\n' print 'UnitShiftList:', UnitShiftList print 'RepeatUnitLength:', RepeatUnitLength MiddleRepeatCstList = [] MiddleSkippedCst = 0 for Constraint in MiddleCst: AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint # Loops through all repeat positions corresponding to reference position for Shift in UnitShiftList: # print 'Shift:', Shift # print 'AtomResidueCoords:', AtomResidueCoords ShiftedAtomResidueCoords = [ (AtomName, ResidueNumber+Shift) for AtomName, ResidueNumber in AtomResidueCoords ] if pose_has(NewPose, ShiftedAtomResidueCoords): MiddleRepeatCstList.append( ( ShiftedAtomResidueCoords, ConstraintParameters, CstLineNumber, CstType ) ) else: MiddleSkippedCst += 1 # print 'Skipping constraint involving:', ShiftedAtomResidueCoords Edge1RepeatCstList = [] Edge1SkippedCst = 0 for Constraint in Edge1Cst: AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint for Shift in UnitShiftList[1:]: ShiftedAtomResidueCoords = [ (AtomName, ResidueNumber+Shift) for AtomName, ResidueNumber in AtomResidueCoords ] if pose_has(NewPose, ShiftedAtomResidueCoords): Edge1RepeatCstList.append( ( ShiftedAtomResidueCoords, ConstraintParameters, CstLineNumber, CstType ) ) else: Edge1SkippedCst += 1 # print 'Skipping constraint involving:', ShiftedAtomResidueCoords Edge2RepeatCstList = [] Edge2SkippedCst = 0 for Constraint in Edge2Cst: AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint for Shift in UnitShiftList[:-1]: ShiftedAtomResidueCoords = [ (AtomName, ResidueNumber+Shift) for AtomName, ResidueNumber in AtomResidueCoords ] if pose_has(NewPose, ShiftedAtomResidueCoords): Edge2RepeatCstList.append( ( ShiftedAtomResidueCoords, ConstraintParameters, CstLineNumber, CstType ) ) else: Edge2SkippedCst += 1 # print 'Skipping constraint involving:', ShiftedAtomResidueCoords BothEdgeRepeatCstList = [] BothEdgeSkippedCst = 0 for Constraint in BothEdgeCst: AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint for Shift in UnitShiftList: ShiftedAtomResidueCoords = [ (AtomName, ResidueNumber+Shift) for AtomName, ResidueNumber in AtomResidueCoords ] if pose_has(NewPose, ShiftedAtomResidueCoords): BothEdgeRepeatCstList.append( ( ShiftedAtomResidueCoords, ConstraintParameters, CstLineNumber, CstType ) ) else: BothEdgeSkippedCst += 1 # print 'Skipping constraint involving:', ShiftedAtomResidueCoords # RepPose.constraint_set().show_definition(ostream(sys.stdout), RepPose ) self.output_cst(MiddleRepeatCstList, '%s_MidRepTemp.cst'%PdbTag) self.output_cst(Edge1RepeatCstList, '%s_Edge1RepTemp.cst'%PdbTag) self.output_cst(Edge2RepeatCstList, '%s_Edge2RepTemp.cst'%PdbTag) self.output_cst(BothEdgeRepeatCstList, '%s_BothEdgeRepTemp.cst'%PdbTag) AllRepeatCst = Edge1RepeatCstList[:] AllRepeatCst.extend(Edge1RepeatCstList) AllRepeatCst.extend(Edge2RepeatCstList) AllRepeatCst.extend(BothEdgeRepeatCstList) self.output_cst(AllRepeatCst, 'AllRepeatCst.cst') ''' trying out constraints to pick between edge 1 and edge 2 (and filter?) ''' # print # print 'MiddleSkippedCst', MiddleSkippedCst # print 'Edge1SkippedCst', Edge1SkippedCst # print 'Edge2SkippedCst', Edge2SkippedCst # print 'BothEdgeSkippedCst', BothEdgeSkippedCst # print # print 'MiddleRepeatCst' NumberMiddleRepeatCst = len(MiddleRepeatCstList) # print 'Edge1RepeatCst' NumberEdge1RepeatCst = len(Edge1RepeatCstList) # print 'Edge2RepeatCst' NumberEdge2RepeatCst = len(Edge2RepeatCstList) # print 'BothEdgeRepeatCst' NumberBothEdgeRepeatCst = len(BothEdgeRepeatCstList) NumberAllRepeatCst = len(AllRepeatCst) # # All default talaris 2013 non zero weights set to zero CstScoreFunction = set_all_weights_zero( rosetta.getScoreFunction() ) # # turning on constraint weights CstScoreFunction.set_weight( rosetta.atom_pair_constraint, 1.0 ) CstScoreFunction.set_weight( rosetta.angle_constraint, 1.0 ) CstScoreFunction.set_weight( rosetta.dihedral_constraint, 1.0 ) print 'MiddlePose should have %d constraints !!! '%NumberMiddleRepeatCst MiddlePose = NewPose.clone() if NumberEdge1RepeatCst: ConstraintSetter = rosetta.ConstraintSetMover() ConstraintSetter.constraint_file('%s_MidRepTemp.cst'%PdbTag) ConstraintSetter.apply(MiddlePose) # return ConstraintSetter # return MiddlePose CstScoreFunction.show(MiddlePose) # MiddlePose.constraint_set().show_definition(ostream(sys.stdout), MiddlePose ) print print 'Edge1Pose should have %d constraints !!! '%NumberEdge1RepeatCst Edge1Pose = NewPose.clone() if NumberEdge1RepeatCst: ConstraintSetter = rosetta.ConstraintSetMover() ConstraintSetter.constraint_file('%s_Edge1RepTemp.cst'%PdbTag) ConstraintSetter.apply(Edge1Pose) CstScoreFunction.show(Edge1Pose) Edge1Score = CstScoreFunction(Edge1Pose) Edge1ScoreNorm = Edge1Score / NumberEdge1RepeatCst # Edge1Pose.constraint_set().show_definition(ostream(sys.stdout), Edge1Pose ) print print 'Edge2Pose should have %d constraints !!! '%NumberEdge2RepeatCst Edge2Pose = NewPose.clone() if NumberEdge2RepeatCst: ConstraintSetter = rosetta.ConstraintSetMover() ConstraintSetter.constraint_file('%s_Edge2RepTemp.cst'%PdbTag) ConstraintSetter.apply(Edge2Pose) CstScoreFunction.show(Edge2Pose) Edge2Score = CstScoreFunction(Edge2Pose) Edge2ScoreNorm = Edge2Score / NumberEdge2RepeatCst # Edge2Pose.constraint_set().show_definition(ostream(sys.stdout), Edge2Pose ) print # print 'BothEdgePose should have %d constraints !!! '%NumberBothEdgeRepeatCst # BothEdgePose = NewPose.clone() # if NumberBothEdgeRepeatCst: # ConstraintSetter = rosetta.ConstraintSetMover() # ConstraintSetter.constraint_file('%s_AllRepeatCstTemp.cst'%PdbTag) # ConstraintSetter.apply(BothEdgePose) # CstScoreFunction.show(BothEdgePose) # BothEdgeScore = CstScoreFunction(BothEdgePose) # BothEdgeScoreNorm = BothEdgeScore / NumberBothEdgeRepeatCst # # BothEdgePose.constraint_set().show_definition(ostream(sys.stdout), BothEdgePose ) # print # print 'AllCstPose should have %d constraints !!! '%NumberAllRepeatCst # AllCstPose = NewPose.clone() # ConstraintSetter = rosetta.ConstraintSetMover() # ConstraintSetter.constraint_file('%s_AllRepeatCstTemp.cst'%PdbTag) # ConstraintSetter.apply(AllCstPose) # CstScoreFunction.show(AllCstPose) # # AllCstPose.constraint_set().show_definition(ostream(sys.stdout), AllCstPose ) # print CuratedRepeatCst = MiddleRepeatCstList[:] ## whether these should be included or not is up in the air!! CuratedRepeatCst.extend(BothEdgeRepeatCstList) if NumberEdge1RepeatCst and NumberEdge2RepeatCst: if Edge1ScoreNorm <= Edge2ScoreNorm: CuratedRepeatCst.extend(Edge1RepeatCstList) else: CuratedRepeatCst.extend(Edge2RepeatCstList) elif NumberEdge1RepeatCst: CuratedRepeatCst.extend(Edge1RepeatCstList) elif NumberEdge2RepeatCst: CuratedRepeatCst.extend(Edge2RepeatCstList) # CuratedRepeatCst # print 'Edge1ScoreNorm, Edge2ScoreNorm', Edge1ScoreNorm, Edge2ScoreNorm # self.output_cst(CuratedRepeatCst, FinalCstName) AllWithEdge1RepeatCst = MiddleRepeatCstList[:] ## whether these should be included or not is up in the air!! # AllWithEdge1RepeatCst.extend(BothEdgeRepeatCstList) AllWithEdge1RepeatCst.extend(Edge1RepeatCstList) AllWithEdge2RepeatCst = MiddleRepeatCstList[:] ## whether these should be included or not is up in the air!! # AllWithEdge2RepeatCst.extend(BothEdgeRepeatCstList) AllWithEdge2RepeatCst.extend(Edge2RepeatCstList) ModFinalCstName = (FinalCstName+'!').replace('.cst!', '') self.output_cst(AllWithEdge1RepeatCst, ModFinalCstName+'_e1.cst') self.output_cst(AllWithEdge2RepeatCst, ModFinalCstName+'_e2.cst') RemainingTempFiles = glob.glob( '%s_*Temp.cst'%PdbTag ) for File in RemainingTempFiles: subprocess.check_output(['rm', File])
def main(ExtraResidues=0, ipython=0): ### Required args ArgParser = argparse.ArgumentParser( description= " for plotting pdb scores and selecting subsets based on absolute or per residue scores " ) ArgParser.add_argument('-pdb_glob', type=str, help=" pdb stem, start of globs for pdbs and csts ", required=True) ArgParser.add_argument('-native', type=str, help=" pdb to compare designs against ", required=True) ### Default args ArgParser.add_argument( '-cst', type=str, help=" to provide cst manually, will apply to all globed pdbs!!! ", default=False) ArgParser.add_argument('-param', type=str, nargs='+', help=" params ", default=[]) ArgParser.add_argument('-norm', type=int, help=" 0|(1) normalize scores by residue ", default=1) ### following args are for plotly: ### change if you use this script!!! ArgParser.add_argument('-plotly_id', type=str, help=" ", default="pylesharley") # required=True ) ArgParser.add_argument('-plotly_key', type=str, help=" ", default="cc5z4a8kst") # required=True ) ArgParser.add_argument('-plot', type=int, help=" 0|(1) plot scores with plotly ", default=1) ArgParser.add_argument('-name', type=str, help=" plot tag ", default='') ArgParser.add_argument( '-and_or', type=str, help=" And/Or logic for score cutoffs. Default = 'and' ", default='and') ArgParser.add_argument( '-multi', type=int, help=" 0|(1) plot different methods together on same plot ", default=1) Args = ArgParser.parse_args() Pdbs = glob.glob(Args.pdb_glob) print 'globed %d pdbs' % len(Pdbs) if ExtraResidues == 0 and len(Args.param) > 0: try: ExtraParams = rosetta.Vector1(Args.param) ExtraResidues = rosetta.generate_nonstandard_residue_set( ExtraParams) except: ExtraParams = rosetta.Vector1(Args.param) ExtraResidues = rosetta.generate_nonstandard_residue_set( ExtraParams) ### for ipython mode if ipython: return ExtraResidues Args.and_or = Args.and_or.lower() assert Args.and_or == 'and' or Args.and_or == 'or', " -and_or must equal 'and' or 'or' " RepeatLengths = [] ProcessTags = {} TagList = [] TagByPdbName = {} # better to find out of native pdb is wrong before waiting for pdb scoring Check = open(Args.native, 'r') # print ' first loop ' OverlapStarts = [] for Pdb in Pdbs: Tag = re.sub(r'^.*rep\d+(.*)\.pdb$', r'\1', Pdb) for OtherPdb in Pdbs: OtherTag = re.sub(r'^.*rep\d+(.*)\.pdb$', r'\1', Pdb) i = 0 if Pdb != OtherPdb: while Pdb[:i] == OtherPdb[:i]: i += 1 Overlap = OtherPdb[:i - 1] OverlapStarts.append((len(Overlap), Overlap)) OverlapStarts.sort() ShortestOverlap = OverlapStarts[0][1] # print 'OverlapStarts', OverlapStarts # print 'ShortestOverlap', ShortestOverlap for Pdb in Pdbs: try: RepeatLength = int(re.sub(r'^.*rep(\d+).*pdb$', r'\1', Pdb)) except ValueError: RepeatLength = 0 # SourceStart = int(re.sub(r'^.*src(\d+).*pdb$', r'\1', Pdb)) assert RepeatLength != Pdb, " regular expression extraction of 'rep' (repeat length) value failed on %s " % Pdb # assert SourceStart != Pdb and RepeatLength != Pdb, ' regular expression extraction of rep or src value failed on %s '%Pdb RepeatLengths.append(RepeatLength) #### re.sub out tag from design process Tag = re.sub(r'^.*rep\d+(.*)\.pdb$', r'\1', Pdb) Tag = re.sub(r'^%s(.*)\.pdb$' % (ShortestOverlap), r'\1', Tag) TagByPdbName[Pdb] = Tag try: TagNumber = ProcessTags[Tag] except: TagNumber = len(ProcessTags) + 1 ProcessTags[Tag] = TagNumber TagList.append(TagNumber) # Scoring is redundant, once for sorting outside plotter, then again in plotter # making not redundant not a priority. # Scoring in the plotter object is so multiple score functions can be plotted easily # Sort by repeat length, then score if Args.multi: # Sort by repeat length, then method tag, then score MultiPoseSortingTuples = [] else: PoseSortingTuples = [] Talaris = rosetta.getScoreFunction() for i, Pdb in enumerate(Pdbs): RepeatLength = RepeatLengths[i] ProcessNumber = TagList[i] Pose = rosetta.pose_from_pdb(Pdb) if Args.norm: Score = Talaris(Pose) / Pose.n_residue() else: Score = Talaris(Pose) # print 'Pdb', Pdb if Args.multi: MultiPoseSortingTuples.append( (RepeatLength, ProcessNumber, Score, Pose)) else: PoseSortingTuples.append((RepeatLength, Score, Pose)) if Args.multi: # Sort by repeat length, then method tag, then score MultiPoseSortingTuples.sort() else: # sorts by repeat length (shortest to longest) then score (best to worst) PoseSortingTuples.sort() if Args.multi: # print 'MultiPoseSortingTuples', MultiPoseSortingTuples SortedTuples = MultiPoseSortingTuples else: # print 'PoseSortingTuples', PoseSortingTuples SortedTuples = PoseSortingTuples LastLength = 0 LastTag = 0 AllGroups = [] CurrentGroup = [] for PoseTuple in SortedTuples: Length = PoseTuple[0] if Args.multi: Tag = PoseTuple[1] if LastLength and Length != LastLength: AllGroups.append(CurrentGroup) CurrentGroup = [] if Args.multi: if LastTag and Tag != LastTag: AllGroups.append(CurrentGroup) CurrentGroup = [] CurrentGroup.append(PoseTuple) LastLength = Length if Args.multi: LastTag = Tag # for last repeat length AllGroups.append(CurrentGroup) ''' Build score functions here: ''' Talaris = rosetta.getScoreFunction() # This line returns a talaris function with all default weights set to 0 CstScore = set_all_weights_zero(rosetta.getScoreFunction()) CstScore.set_weight(rosetta.atom_pair_constraint, 10.0) CstScore.set_weight(rosetta.angle_constraint, 5.0) CstScore.set_weight(rosetta.dihedral_constraint, 3.0) HbondScore = set_all_weights_zero(rosetta.getScoreFunction()) HbondScore.set_weight(rosetta.hbond_sr_bb, 1.170) HbondScore.set_weight(rosetta.hbond_lr_bb, 1.170) HbondScore.set_weight(rosetta.hbond_bb_sc, 1.170) HbondScore.set_weight(rosetta.hbond_sc, 1.100) Disulfide = set_all_weights_zero(rosetta.getScoreFunction()) Disulfide.set_weight(rosetta.dslf_fa13, 1.0) if Args.plot: if Args.norm: PerRes = True else: PerRes = False ''' Add and remove score functions here ''' Plotter = plotly_plotter( Args.plotly_id, Args.plotly_key, Args.native, ScoreFxns=[CstScore, Talaris, HbondScore, Disulfide], FxnNames=['ConstraintScore', 'Talaris2013', 'H-bond', 'Disulfide'], PerResidue=PerRes) XaxisSortingTuples = [] for PoseGroup in AllGroups: # for PoseGroup in [SortedTuples]: if len(PoseGroup): # print # print 'Group:', PoseGroup Poses = [PoseTuple[-1] for PoseTuple in PoseGroup] # print PoseGroup RepeatLength = PoseGroup[0][0] # print '\n'.join( [ Pose.pdb_info().name() for Pose in Poses ] ) # print 'Zero index pose tuple:' # print PoseGroup[0] if Args.plot: GroupPdbName = PoseGroup[0][-1].pdb_info().name() if Args.multi: Tag = TagByPdbName[GroupPdbName] if Args.cst: Plotter.score_poses(Poses, Args.cst, Tag) else: Plotter.score_poses(Poses, 1, Tag) # return Plotter Plotter.plot_2d_score_combinations() print 'Plotter.Score2dComboTraces', 3, Plotter.Score2dComboTraces Plotter.draw_comparisons() print 'plotting...' if len(Args.name): Name = Args.name else: Name = '%s based %d res ' % (Args.native, RepeatLength) Plotter.render_scatter_plot(PlotName=Name) while 1: ScoreFunctionScoreCutoffs = [] for i, Name in enumerate(Plotter.FxnNames): while 1: try: Cutoff = float( raw_input( '\tEnter cutoff value (maximum) for %s function: ' % Name)) break except ValueError: pass ScoreFunctionScoreCutoffs.append(Cutoff) print 'Cutoff values set at:' for i, Name in enumerate(Plotter.FxnNames): # print Name, ScoreFunctionScoreCutoffs[i] Plotter.ScoreFunctionScoredPdbs[i].sort() PassingPdbs = [] for i, Name in enumerate(Plotter.FxnNames): PassThisFxn = [] Cutoff = ScoreFunctionScoreCutoffs[i] # print Plotter.ScoreFunctionScoredPdbs[i] for Score, Pdb in Plotter.ScoreFunctionScoredPdbs[i]: if Score <= Cutoff: PassThisFxn.append(Pdb) else: break PassingPdbs.append(PassThisFxn) PdbsPassingAll = PassingPdbs[0] if Args.and_or == 'and': for OtherSet in PassingPdbs[1:]: PdbsPassingAll = list(set(PdbsPassingAll) & set(OtherSet)) else: for OtherSet in PassingPdbs[1:]: PdbsPassingAll = list(set(PdbsPassingAll + OtherSet)) Outdir = raw_input( '\tEnter folder to copy pdbs that pass these thresholds (%s logic) to: ' % Args.and_or) if not os.path.isdir(Outdir): subprocess.check_output(['mkdir', Outdir]) if Outdir[-1] != '/': Outdir = Outdir + '/' for Pdb in PdbsPassingAll: subprocess.check_output(['cp', Pdb, Outdir]) if Plotter.CstDict[Pdb] != None: subprocess.check_output(['cp', Plotter.CstDict[Pdb], Outdir]) Continue = str( raw_input( '\tEnter Y to add another set of selection threshold, or anything else to quit: ' )).upper() if Continue == 'Y': pass else: break
def main(argv=None): # if argv is None: # argv = sys.argv if argv != None: sys.argv = [sys.argv[0]] + [arg for arg in argv] # print 'sys.argv', sys.argv # Arg block ArgParser = argparse.ArgumentParser( description=' expand_cst.py ( -help ) %s' % InfoString) # Required args ArgParser.add_argument('-ref_pdb', type=str, help=' reference pdb ', required=True) ArgParser.add_argument('-ref_cst', type=str, help=' corresponding to reference pdb ', required=True) ArgParser.add_argument('-repeat_pdb_tag', type=str, help=' input pdb tag ', required=True) # Optional args ArgParser.add_argument('-out', type=str, help=' Output directory ', default='./') Args = ArgParser.parse_args() if Args.out[-1] != '/': Args.out = Args.out + '/' # default talaris 2013 score function ScoreFunction = rosetta.getScoreFunction() # turning on constraint weights ScoreFunction.set_weight(rosetta.atom_pair_constraint, 1.0) ScoreFunction.set_weight(rosetta.angle_constraint, 1.0) ScoreFunction.set_weight(rosetta.dihedral_constraint, 1.0) RefPdb = Args.ref_pdb # print RefPdb ReferencePose = rosetta.pose_from_pdb(RefPdb) print 'ReferencePose', ReferencePose # modify rosetta cst w/o rosetta Constrainer = constraint_extrapolator(Args.ref_cst) # RefCst = Args.ref_cst # # make constraint mover # Constrainer = rosetta.ConstraintSetMover() # # get constraints from file # Constrainer.constraint_file(RefCst) # # Apply constraints to pose # Constrainer.apply(ReferencePose) # return Constrainer Pdbs = glob.glob('*%s*.pdb' % Args.repeat_pdb_tag) assert len( Pdbs ), r"No pdbs found with glob: \n %s \n '* % s *.pdb' % Args.repeat_pdb_tag " % Args.repeat_pdb_tag for Pdb in Pdbs: ## For debug put pdb of interest here: # if Pdb == 'src15_38__22_45_rep24_1EZG.pdb': print 'Pdb:', Pdb Pose = rosetta.pose_from_pdb(Pdb) try: SourceRangeString = re.sub(r'.*src(\d+_\d+__\d+_\d+).*pdb', r'\1', Pdb) SourceRanges = [[int(Number) for Number in Range.split('_')] for Range in SourceRangeString.split('__')] except ValueError: print 'No src range tag, skipping: %s ' % Pdb continue print 'SourceRanges:', SourceRanges RepeatLength = int(re.sub(r'.*rep(\d+).*pdb', r'\1', Pdb)) print 'RepeatLength', RepeatLength print # print [Pdb] PdbTag = (Pdb + '!').replace('.pdb!', '').replace('!', '') CstName = PdbTag + '.cst' ExtrapolatedConstraints = Constrainer.extrapolate_from_repeat_unit( SourceRanges[0][0], SourceRanges[0][1], RepeatLength, Pose, CstName, PdbTag)
def extrapolate_from_repeat_unit(self, ReferenceStart, ReferenceEnd, RepeatUnitLength, NewPose, FinalCstName, PdbTag): ''' renumbers based on repeat unit pose ''' # Loop through positions in range of archetype # To avoid double counting first only add constraints from archetype residues to # more C-terminal residues NewLength = NewPose.n_residue() self.Range = (1, NewLength) self.NewPoseStartShift = ReferenceStart - 1 # for 1 indexing UnitShiftMultiples = (NewLength / RepeatUnitLength) UnitShiftList = [ RepeatUnitLength * Multiple for Multiple in range(UnitShiftMultiples) ] Edge1Cst, Edge2Cst, BothEdgeCst, MiddleCst = self.shift_and_sort_constraints( ReferenceStart, ReferenceEnd, RepeatUnitLength) # self.output_cst(Edge1Cst, 'Edge1.cst') # self.output_cst(Edge2Cst, 'Edge2.cst') # self.output_cst(BothEdgeCst, 'BothEdgeCst.cst') # self.output_cst(MiddleCst, 'Middle.cst') # print 'Edge1Cst:', Edge1Cst, '\n' # print 'Edge2Cst:', Edge2Cst, '\n' # print 'BothEdgeCst:', BothEdgeCst, '\n' # print 'MiddleCst:', MiddleCst, '\n' print 'UnitShiftList:', UnitShiftList print 'RepeatUnitLength:', RepeatUnitLength MiddleRepeatCstList = [] MiddleSkippedCst = 0 for Constraint in MiddleCst: AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint # Loops through all repeat positions corresponding to reference position for Shift in UnitShiftList: # print 'Shift:', Shift # print 'AtomResidueCoords:', AtomResidueCoords ShiftedAtomResidueCoords = [ (AtomName, ResidueNumber + Shift) for AtomName, ResidueNumber in AtomResidueCoords ] if pose_has(NewPose, ShiftedAtomResidueCoords): MiddleRepeatCstList.append( (ShiftedAtomResidueCoords, ConstraintParameters, CstLineNumber, CstType)) else: MiddleSkippedCst += 1 # print 'Skipping constraint involving:', ShiftedAtomResidueCoords Edge1RepeatCstList = [] Edge1SkippedCst = 0 for Constraint in Edge1Cst: AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint for Shift in UnitShiftList[1:]: ShiftedAtomResidueCoords = [ (AtomName, ResidueNumber + Shift) for AtomName, ResidueNumber in AtomResidueCoords ] if pose_has(NewPose, ShiftedAtomResidueCoords): Edge1RepeatCstList.append( (ShiftedAtomResidueCoords, ConstraintParameters, CstLineNumber, CstType)) else: Edge1SkippedCst += 1 # print 'Skipping constraint involving:', ShiftedAtomResidueCoords Edge2RepeatCstList = [] Edge2SkippedCst = 0 for Constraint in Edge2Cst: AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint for Shift in UnitShiftList[:-1]: ShiftedAtomResidueCoords = [ (AtomName, ResidueNumber + Shift) for AtomName, ResidueNumber in AtomResidueCoords ] if pose_has(NewPose, ShiftedAtomResidueCoords): Edge2RepeatCstList.append( (ShiftedAtomResidueCoords, ConstraintParameters, CstLineNumber, CstType)) else: Edge2SkippedCst += 1 # print 'Skipping constraint involving:', ShiftedAtomResidueCoords BothEdgeRepeatCstList = [] BothEdgeSkippedCst = 0 for Constraint in BothEdgeCst: AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint for Shift in UnitShiftList: ShiftedAtomResidueCoords = [ (AtomName, ResidueNumber + Shift) for AtomName, ResidueNumber in AtomResidueCoords ] if pose_has(NewPose, ShiftedAtomResidueCoords): BothEdgeRepeatCstList.append( (ShiftedAtomResidueCoords, ConstraintParameters, CstLineNumber, CstType)) else: BothEdgeSkippedCst += 1 # print 'Skipping constraint involving:', ShiftedAtomResidueCoords # RepPose.constraint_set().show_definition(ostream(sys.stdout), RepPose ) self.output_cst(MiddleRepeatCstList, '%s_MidRepTemp.cst' % PdbTag) self.output_cst(Edge1RepeatCstList, '%s_Edge1RepTemp.cst' % PdbTag) self.output_cst(Edge2RepeatCstList, '%s_Edge2RepTemp.cst' % PdbTag) self.output_cst(BothEdgeRepeatCstList, '%s_BothEdgeRepTemp.cst' % PdbTag) AllRepeatCst = Edge1RepeatCstList[:] AllRepeatCst.extend(Edge1RepeatCstList) AllRepeatCst.extend(Edge2RepeatCstList) AllRepeatCst.extend(BothEdgeRepeatCstList) self.output_cst(AllRepeatCst, 'AllRepeatCst.cst') ''' trying out constraints to pick between edge 1 and edge 2 (and filter?) ''' # print # print 'MiddleSkippedCst', MiddleSkippedCst # print 'Edge1SkippedCst', Edge1SkippedCst # print 'Edge2SkippedCst', Edge2SkippedCst # print 'BothEdgeSkippedCst', BothEdgeSkippedCst # print # print 'MiddleRepeatCst' NumberMiddleRepeatCst = len(MiddleRepeatCstList) # print 'Edge1RepeatCst' NumberEdge1RepeatCst = len(Edge1RepeatCstList) # print 'Edge2RepeatCst' NumberEdge2RepeatCst = len(Edge2RepeatCstList) # print 'BothEdgeRepeatCst' NumberBothEdgeRepeatCst = len(BothEdgeRepeatCstList) NumberAllRepeatCst = len(AllRepeatCst) # # All default talaris 2013 non zero weights set to zero CstScoreFunction = set_all_weights_zero(rosetta.getScoreFunction()) # # turning on constraint weights CstScoreFunction.set_weight(rosetta.atom_pair_constraint, 1.0) CstScoreFunction.set_weight(rosetta.angle_constraint, 1.0) CstScoreFunction.set_weight(rosetta.dihedral_constraint, 1.0) print 'MiddlePose should have %d constraints !!! ' % NumberMiddleRepeatCst MiddlePose = NewPose.clone() if NumberEdge1RepeatCst: ConstraintSetter = rosetta.ConstraintSetMover() ConstraintSetter.constraint_file('%s_MidRepTemp.cst' % PdbTag) ConstraintSetter.apply(MiddlePose) # return ConstraintSetter # return MiddlePose CstScoreFunction.show(MiddlePose) # MiddlePose.constraint_set().show_definition(ostream(sys.stdout), MiddlePose ) print print 'Edge1Pose should have %d constraints !!! ' % NumberEdge1RepeatCst Edge1Pose = NewPose.clone() if NumberEdge1RepeatCst: ConstraintSetter = rosetta.ConstraintSetMover() ConstraintSetter.constraint_file('%s_Edge1RepTemp.cst' % PdbTag) ConstraintSetter.apply(Edge1Pose) CstScoreFunction.show(Edge1Pose) Edge1Score = CstScoreFunction(Edge1Pose) Edge1ScoreNorm = Edge1Score / NumberEdge1RepeatCst # Edge1Pose.constraint_set().show_definition(ostream(sys.stdout), Edge1Pose ) print print 'Edge2Pose should have %d constraints !!! ' % NumberEdge2RepeatCst Edge2Pose = NewPose.clone() if NumberEdge2RepeatCst: ConstraintSetter = rosetta.ConstraintSetMover() ConstraintSetter.constraint_file('%s_Edge2RepTemp.cst' % PdbTag) ConstraintSetter.apply(Edge2Pose) CstScoreFunction.show(Edge2Pose) Edge2Score = CstScoreFunction(Edge2Pose) Edge2ScoreNorm = Edge2Score / NumberEdge2RepeatCst # Edge2Pose.constraint_set().show_definition(ostream(sys.stdout), Edge2Pose ) print # print 'BothEdgePose should have %d constraints !!! '%NumberBothEdgeRepeatCst # BothEdgePose = NewPose.clone() # if NumberBothEdgeRepeatCst: # ConstraintSetter = rosetta.ConstraintSetMover() # ConstraintSetter.constraint_file('%s_AllRepeatCstTemp.cst'%PdbTag) # ConstraintSetter.apply(BothEdgePose) # CstScoreFunction.show(BothEdgePose) # BothEdgeScore = CstScoreFunction(BothEdgePose) # BothEdgeScoreNorm = BothEdgeScore / NumberBothEdgeRepeatCst # # BothEdgePose.constraint_set().show_definition(ostream(sys.stdout), BothEdgePose ) # print # print 'AllCstPose should have %d constraints !!! '%NumberAllRepeatCst # AllCstPose = NewPose.clone() # ConstraintSetter = rosetta.ConstraintSetMover() # ConstraintSetter.constraint_file('%s_AllRepeatCstTemp.cst'%PdbTag) # ConstraintSetter.apply(AllCstPose) # CstScoreFunction.show(AllCstPose) # # AllCstPose.constraint_set().show_definition(ostream(sys.stdout), AllCstPose ) # print CuratedRepeatCst = MiddleRepeatCstList[:] ## whether these should be included or not is up in the air!! CuratedRepeatCst.extend(BothEdgeRepeatCstList) if NumberEdge1RepeatCst and NumberEdge2RepeatCst: if Edge1ScoreNorm <= Edge2ScoreNorm: CuratedRepeatCst.extend(Edge1RepeatCstList) else: CuratedRepeatCst.extend(Edge2RepeatCstList) elif NumberEdge1RepeatCst: CuratedRepeatCst.extend(Edge1RepeatCstList) elif NumberEdge2RepeatCst: CuratedRepeatCst.extend(Edge2RepeatCstList) # CuratedRepeatCst # print 'Edge1ScoreNorm, Edge2ScoreNorm', Edge1ScoreNorm, Edge2ScoreNorm # self.output_cst(CuratedRepeatCst, FinalCstName) AllWithEdge1RepeatCst = MiddleRepeatCstList[:] ## whether these should be included or not is up in the air!! # AllWithEdge1RepeatCst.extend(BothEdgeRepeatCstList) AllWithEdge1RepeatCst.extend(Edge1RepeatCstList) AllWithEdge2RepeatCst = MiddleRepeatCstList[:] ## whether these should be included or not is up in the air!! # AllWithEdge2RepeatCst.extend(BothEdgeRepeatCstList) AllWithEdge2RepeatCst.extend(Edge2RepeatCstList) ModFinalCstName = (FinalCstName + '!').replace('.cst!', '') self.output_cst(AllWithEdge1RepeatCst, ModFinalCstName + '_e1.cst') self.output_cst(AllWithEdge2RepeatCst, ModFinalCstName + '_e2.cst') RemainingTempFiles = glob.glob('%s_*Temp.cst' % PdbTag) for File in RemainingTempFiles: subprocess.check_output(['rm', File])
def cap_and_relax_pdb( (RepeatPdb, ReferencePdb, ReferenceCst) ): RepeatPose = rosetta.pose_from_pdb(RepeatPdb) TrimmedRepeatPose = grafting.return_region( RepeatPose, 3, RepeatPose.n_residue()-3 ) TrimmedRepeatPose.pdb_info( rosetta.core.pose.PDBInfo( TrimmedRepeatPose ) ) ReferencePose = rosetta.pose_from_pdb( ReferencePdb ) ReferencePose.pdb_info( rosetta.core.pose.PDBInfo( ReferencePose ) ) # rosetta.dump_pdb(TrimmedRepeatPose, 'Trimmed.pdb') RepeatLength = int(re.sub(r'.*rep(\d+).*pdb', r'\1', RepeatPdb)) SourceRanges = re.sub(r'.*src(\d+_\d+__\d+_\d+).*pdb', r'\1', RepeatPdb) SourceRanges = SourceRanges.split('__') SourceRanges = [ [ int(Value) for Value in Range.split('_') ] for Range in SourceRanges ] SourceStart = SourceRanges[0][0] SourceEnd = SourceRanges[0][1] ''' Add N terminal cap ''' NcapPose = grafting.return_region( ReferencePose, 1, SourceStart+5 ) # rosetta.dump_pdb(NcapPose, 'Ncap.pdb') NcapLength = NcapPose.n_residue() NcapOverhangPositions = [ Position for Position in range(NcapLength-3, NcapLength+1) ] # print NcapOverhangPositions NcapOverhangArray = get_residue_array( NcapPose, NcapOverhangPositions ) RepStartOverhangPositions = [1,2,3,4] RepStartOverhangArray = get_residue_array( TrimmedRepeatPose, RepStartOverhangPositions ) # print RepStartOverhangArray RMSD, rMtx, tVec = solenoid_tools.rmsd_2_np_arrays_rosetta( NcapOverhangArray, RepStartOverhangArray ) rosetta.Pose.apply_transform_Rx_plus_v(TrimmedRepeatPose, rMtx, tVec) # rosetta.dump_pdb( TrimmedRepeatPose, 'TrimmedShifted.pdb' ) NcapPlusRepeatPose, RMSD, NcapCorrespondingResidues = fuse(NcapPose, TrimmedRepeatPose) print 'Ncap attachment RMSD %f'%RMSD # rosetta.dump_pdb( NcapPlusRepeatPose, 'NcapPlusRepeat.pdb' ) NcapPlusRepeatPose.pdb_info( rosetta.core.pose.PDBInfo( NcapPlusRepeatPose ) ) ''' Add C terminal cap ''' Cshift = SourceEnd-6 CcapPose = grafting.return_region( ReferencePose, Cshift, ReferencePose.n_residue() ) # rosetta.dump_pdb(CcapPose, 'Ccap.pdb') CcapOverhangPositions = [1,2,3,4] CcapOverhangArray = get_residue_array( CcapPose, CcapOverhangPositions ) RepEndOverhangPositions = [ Position for Position in range( NcapPlusRepeatPose.n_residue()-3, NcapPlusRepeatPose.n_residue()+1 ) ] # print 'RepEndOverhangPositions', RepEndOverhangPositions RepEndOverhangArray = get_residue_array( NcapPlusRepeatPose, RepEndOverhangPositions ) RMSD, rMtx, tVec = solenoid_tools.rmsd_2_np_arrays_rosetta( RepEndOverhangArray, CcapOverhangArray ) rosetta.Pose.apply_transform_Rx_plus_v(CcapPose, rMtx, tVec) # rosetta.dump_pdb( CcapPose, 'CcapPose.pdb' ) CappedRepeatPose, RMSD, CcapCorrespondingResidues = fuse(NcapPlusRepeatPose, CcapPose) print 'Ccap attachment RMSD %f'%RMSD CappedNamePdb = re.sub(r'(.*).pdb$', r'\1_Cap.pdb', RepeatPdb) assert CappedNamePdb != RepeatPdb, 'regular expression substitution failed!' rosetta.dump_pdb( CappedRepeatPose, CappedNamePdb ) ''' Generate csts for cap/repeat edges ''' CstExtrapolator = constraint_extrapolator(ReferenceCst) ConstraintSet = [] ' N cap constraints are easy; no shifts are needed ' # For catching when individual constraints have been considered already Redundict = {} for Position in range(1, SourceStart+6): # print 'Position', Position # Skip positions w/out constraints try: PositionCstDict = CstExtrapolator.Cst[Position] except KeyError: continue for AtomName in PositionCstDict: for Constraint in PositionCstDict[AtomName]: # unpack tuple values AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint # Redundancy check with redundict try: Check = Redundict[CstLineNumber] # if cst considered already, skip it! continue except KeyError: Redundict[CstLineNumber] = 1 if pose_has(CappedRepeatPose, AtomResidueCoords): ConstraintSet.append(Constraint) ' C cap constraints are harder; need to shift due to pose expansion ' # CstExtrapolator.output_cst(ConstraintSet, 'NcapConstraints.cst')\ Redundict = {} # print 'CcapCorrespondingResidues', CcapCorrespondingResidues RepeatCcapPositionStart = CcapCorrespondingResidues[0][0] # print 'RepeatCcapPositionStart', RepeatCcapPositionStart ShiftToRepeatPose = RepeatCcapPositionStart - Cshift # print 'ShiftToRepeatPose', ShiftToRepeatPose for Position in range( Cshift, ReferencePose.n_residue()+1 ): # Skip positions w/out constraints try: PositionCstDict = CstExtrapolator.Cst[Position] except KeyError: continue for AtomName in PositionCstDict: for Constraint in PositionCstDict[AtomName]: # unpack tuple values AtomResidueCoords, ConstraintParameters, CstLineNumber, CstType = Constraint # Redundancy check with redundict try: Check = Redundict[CstLineNumber] # if cst considered already, skip it! continue except KeyError: Redundict[CstLineNumber] = 1 ExpandedPoseAtomResidueCoords = [] # iterate through atom residue pairs for AtomResiduePair in AtomResidueCoords: # print 'AtomResiduePair', AtomResiduePair ExpandedPosePosition = (AtomResiduePair[1]) + ShiftToRepeatPose # print 'ExpandedPosePosition', ExpandedPosePosition ExpandedPoseAtomResidueCoords.append( ( AtomResiduePair[0], ExpandedPosePosition ) ) ShiftedConstraint = ExpandedPoseAtomResidueCoords, ConstraintParameters, CstLineNumber, CstType if pose_has(CappedRepeatPose, ExpandedPoseAtomResidueCoords): ConstraintSet.append(ShiftedConstraint) CapCstName = re.sub(r'(.*).pdb$', r'\1.cst', CappedNamePdb) CstExtrapolator.output_cst(ConstraintSet, CapCstName) ''' idealize peptide bonds with command line subprocess ''' subprocess.check_output(['idealize_jd2.default.linuxgccrelease', '-s', CappedNamePdb]) IdealizedPdbOldName = re.sub(r'(.*).pdb$', r'\1_0001.pdb', CappedNamePdb) IdealizedPdbNewName = re.sub(r'(.*).pdb$', r'\1_Ideal.pdb', CappedNamePdb) subprocess.check_output(['mv', IdealizedPdbOldName, IdealizedPdbNewName]) time.sleep(0.2) IdealizedCappedPose = rosetta.pose_from_pdb( IdealizedPdbNewName ) # make constraint mover Constrainer = rosetta.ConstraintSetMover() # get constraints from file Constrainer.constraint_file(CapCstName) Constrainer.apply(IdealizedCappedPose) ''' SET UP WEIGHTS AS decided ''' # RelativeWeight = 0.1 Talaris = rosetta.getScoreFunction() TalarisPlusCst = rosetta.getScoreFunction() AtomPairCst = set_all_weights_zero( rosetta.getScoreFunction() ) AtomPairCst.set_weight(rosetta.atom_pair_constraint, 1.0) # RosettaScore = Talaris(IdealizedCappedPose) # AtomPairCstScore = AtomPairCst(IdealizedCappedPose) # Weight = ( RosettaScore * RelativeWeight ) / AtomPairCstScore Weight = 1.0 TalarisPlusCst.set_weight(rosetta.atom_pair_constraint, Weight) TalarisPlusCst.set_weight(rosetta.angle_constraint, Weight) TalarisPlusCst.set_weight(rosetta.dihedral_constraint, Weight) print 'relaxing %s with %s'%(IdealizedPdbNewName, CapCstName) print ' Weight %d '%Weight rosetta.relax_pose(IdealizedCappedPose, TalarisPlusCst, 'tag') RelaxedPdbName = re.sub(r'(.*)_Ideal.pdb$', r'\1__Relax.pdb', IdealizedPdbNewName) rosetta.dump_pdb(IdealizedCappedPose, RelaxedPdbName) rosetta.relax_pose(IdealizedCappedPose, Talaris, 'tag') RelaxedPdbName = re.sub(r'(.*)_Ideal.pdb$', r'\1__Relax2.pdb', IdealizedPdbNewName) rosetta.dump_pdb(IdealizedCappedPose, RelaxedPdbName)
def main(argv=None): if argv is None: argv = sys.argv ArgParser = argparse.ArgumentParser(description=" for plotting pdb scores and selecting subsets based on absolute or per residue scores ") ArgParser.add_argument('-pdb_glob', type=str, help=" pdb stem, start of globs for pdbs and csts ", required=True ) ArgParser.add_argument('-native_pdb', type=str, help=" pdb to compare designs against ", required=True ) ArgParser.add_argument('-out', type=str, help=" folder to move files to ", required=True ) ArgParser.add_argument('-score', type=float, help=" select all structures with less than this REU / residue ", default=None ) ArgParser.add_argument('-plot', type=int, help=" 0|(1) plot scores with plotly ", default=1 ) ArgParser.add_argument('-norm', type=int, help=" 0|(1) normalize scores by residue ", default=1 ) ArgParser.add_argument('-name', type=str, help=" plot tag ", default='' ) Args = ArgParser.parse_args() print Args Pdbs = glob.glob( Args.pdb_glob ) print 'globed %d pdbs'%len(Pdbs) if not os.path.isdir(Args.out): subprocess.check_output(['mkdir', Args.out]) if Args.out [-1] != '/': Args.out = Args.out + '/' if Args.name != '': Args.out = Args.out + ' ' NativePose = rosetta.pose_from_pdb( Args.native_pdb ) RepeatLengths = [] for Pdb in Pdbs: RepeatLength = int(re.sub(r'^.*rep(\d+).*pdb$', r'\1', Pdb)) # SourceStart = int(re.sub(r'^.*src(\d+).*pdb$', r'\1', Pdb)) assert RepeatLength != Pdb, " regular expression extraction of 'rep' (repeat length) value failed on %s "%Pdb # assert SourceStart != Pdb and RepeatLength != Pdb, ' regular expression extraction of rep or src value failed on %s '%Pdb RepeatLengths.append(RepeatLength) # RepeatLengths.append(SourceStart) PoseSortingTuples = [] # Scoring is redundant, once for sorting outside plotter, then again in plotter # making not redundant not a priority. # Scoring in the plotter object is so multiple score functions can be plotted easily Talaris = rosetta.getScoreFunction() for i, Pdb in enumerate(Pdbs): RepeatLength = RepeatLengths[i] Pose = rosetta.pose_from_pdb(Pdb) if Args.norm: Score = Talaris(Pose) / Pose.n_residue() else: Score = Talaris(Pose) PoseSortingTuples.append( (RepeatLength, Score, Pose) ) # sorts by repeat length (shortest to longest) then score (best to worst) PoseSortingTuples.sort() # print 'PoseSortingTuples', PoseSortingTuples AllRepeatLengthGroups = [] RepeatRepeatLengthGroup = [] LastLength = 0 for PoseTuple in PoseSortingTuples: Length = PoseTuple[0] if LastLength and Length != LastLength: AllRepeatLengthGroups.append(RepeatRepeatLengthGroup) RepeatRepeatLengthGroup = [] RepeatRepeatLengthGroup.append(PoseTuple) LastLength = Length # for last repeat length AllRepeatLengthGroups.append(RepeatRepeatLengthGroup) # print 'AllRepeatLengthGroups', AllRepeatLengthGroups # Add more score functions as wanted if Args.plot: Plotter = plotly_plotter(ScoreFxns=[ Talaris ], FxnNames=[ 'Talaris' ], EnergyPerResidue=True ) for RepeatLengthGroup in AllRepeatLengthGroups: print 'RepeatLengthGroup', RepeatLengthGroup Poses = [ PoseTuple[2] for PoseTuple in RepeatLengthGroup ] RepeatLength = RepeatLengthGroup[0][0] if Args.plot: Plotter.clear_traces() Xaxis = Plotter.score_poses( Poses ) Plotter.add_comparsion_threshold( NativePose, Xaxis ) Plotter.plot_traces( PlotName='%s%s based %d res repeats globed with %s'%(Args.name, Args.native_pdb, RepeatLength, Args.pdb_glob) ) if Args.score != None: with open('%sScores.log'%Args.out, 'a') as Log: for RepLen, Score, Pose in RepeatLengthGroup: if Score > Args.score: break PdbName = Pose.pdb_info().name() subprocess.check_output([ 'cp', PdbName, Args.out ]) print>>Log, '%s\t%.3f'%(PdbName, Score)
def main(ExtraResidues=0, ipython=0): ### Required args ArgParser = argparse.ArgumentParser(description=" for plotting pdb scores and selecting subsets based on absolute or per residue scores ") ArgParser.add_argument('-pdb_glob', type=str, help=" pdb stem, start of globs for pdbs and csts ", required=True ) ArgParser.add_argument('-native', type=str, help=" pdb to compare designs against ", required=True ) ### Default args ArgParser.add_argument('-cst', type=str, help=" to provide cst manually, will apply to all globed pdbs!!! ", default=False ) ArgParser.add_argument('-param', type=str, nargs='+', help=" params ", default=[] ) ArgParser.add_argument('-norm', type=int, help=" 0|(1) normalize scores by residue ", default=1 ) ### following args are for plotly: ### change if you use this script!!! ArgParser.add_argument('-plotly_id', type=str, help=" ", default="pylesharley") # required=True ) ArgParser.add_argument('-plotly_key', type=str, help=" ", default="cc5z4a8kst") # required=True ) ArgParser.add_argument('-plot', type=int, help=" 0|(1) plot scores with plotly ", default=1 ) ArgParser.add_argument('-name', type=str, help=" plot tag ", default='' ) ArgParser.add_argument('-and_or', type=str, help=" And/Or logic for score cutoffs. Default = 'and' ", default='and' ) ArgParser.add_argument('-multi', type=int, help=" 0|(1) plot different methods together on same plot ", default=1 ) Args = ArgParser.parse_args() Pdbs = glob.glob( Args.pdb_glob ) print 'globed %d pdbs'%len(Pdbs) if ExtraResidues == 0 and len(Args.param) > 0: try: ExtraParams = rosetta.Vector1( Args.param ) ExtraResidues = rosetta.generate_nonstandard_residue_set( ExtraParams ) except: ExtraParams = rosetta.Vector1( Args.param ) ExtraResidues = rosetta.generate_nonstandard_residue_set( ExtraParams ) ### for ipython mode if ipython: return ExtraResidues Args.and_or = Args.and_or.lower() assert Args.and_or == 'and' or Args.and_or == 'or', " -and_or must equal 'and' or 'or' " RepeatLengths = [] ProcessTags = {} TagList = [] TagByPdbName = {} # better to find out of native pdb is wrong before waiting for pdb scoring Check = open(Args.native, 'r') # print ' first loop ' OverlapStarts = [] for Pdb in Pdbs: Tag = re.sub(r'^.*rep\d+(.*)\.pdb$', r'\1', Pdb) for OtherPdb in Pdbs: OtherTag = re.sub(r'^.*rep\d+(.*)\.pdb$', r'\1', Pdb) i = 0 if Pdb != OtherPdb: while Pdb[:i] == OtherPdb[:i]: i+=1 Overlap = OtherPdb[:i-1] OverlapStarts.append( ( len(Overlap), Overlap ) ) OverlapStarts.sort() ShortestOverlap = OverlapStarts[0][1] # print 'OverlapStarts', OverlapStarts # print 'ShortestOverlap', ShortestOverlap for Pdb in Pdbs: try: RepeatLength = int(re.sub(r'^.*rep(\d+).*pdb$', r'\1', Pdb)) except ValueError: RepeatLength = 0 # SourceStart = int(re.sub(r'^.*src(\d+).*pdb$', r'\1', Pdb)) assert RepeatLength != Pdb, " regular expression extraction of 'rep' (repeat length) value failed on %s "%Pdb # assert SourceStart != Pdb and RepeatLength != Pdb, ' regular expression extraction of rep or src value failed on %s '%Pdb RepeatLengths.append(RepeatLength) #### re.sub out tag from design process Tag = re.sub(r'^.*rep\d+(.*)\.pdb$', r'\1', Pdb) Tag = re.sub(r'^%s(.*)\.pdb$'%(ShortestOverlap), r'\1', Tag) TagByPdbName[Pdb] = Tag try: TagNumber = ProcessTags[Tag] except: TagNumber = len(ProcessTags) + 1 ProcessTags[Tag] = TagNumber TagList.append(TagNumber) # Scoring is redundant, once for sorting outside plotter, then again in plotter # making not redundant not a priority. # Scoring in the plotter object is so multiple score functions can be plotted easily # Sort by repeat length, then score if Args.multi: # Sort by repeat length, then method tag, then score MultiPoseSortingTuples = [] else: PoseSortingTuples = [] Talaris = rosetta.getScoreFunction() for i, Pdb in enumerate(Pdbs): RepeatLength = RepeatLengths[i] ProcessNumber = TagList[i] Pose = rosetta.pose_from_pdb(Pdb) if Args.norm: Score = Talaris(Pose) / Pose.n_residue() else: Score = Talaris(Pose) # print 'Pdb', Pdb if Args.multi: MultiPoseSortingTuples.append( (RepeatLength, ProcessNumber, Score, Pose) ) else: PoseSortingTuples.append( (RepeatLength, Score, Pose) ) if Args.multi: # Sort by repeat length, then method tag, then score MultiPoseSortingTuples.sort() else: # sorts by repeat length (shortest to longest) then score (best to worst) PoseSortingTuples.sort() if Args.multi: # print 'MultiPoseSortingTuples', MultiPoseSortingTuples SortedTuples = MultiPoseSortingTuples else: # print 'PoseSortingTuples', PoseSortingTuples SortedTuples = PoseSortingTuples LastLength = 0 LastTag = 0 AllGroups = [] CurrentGroup = [] for PoseTuple in SortedTuples: Length = PoseTuple[0] if Args.multi: Tag = PoseTuple[1] if LastLength and Length != LastLength: AllGroups.append(CurrentGroup) CurrentGroup = [] if Args.multi: if LastTag and Tag != LastTag: AllGroups.append(CurrentGroup) CurrentGroup = [] CurrentGroup.append(PoseTuple) LastLength = Length if Args.multi: LastTag = Tag # for last repeat length AllGroups.append(CurrentGroup) ''' Build score functions here: ''' Talaris = rosetta.getScoreFunction() # This line returns a talaris function with all default weights set to 0 CstScore = set_all_weights_zero( rosetta.getScoreFunction() ) CstScore.set_weight(rosetta.atom_pair_constraint, 10.0) CstScore.set_weight(rosetta.angle_constraint, 5.0) CstScore.set_weight(rosetta.dihedral_constraint, 3.0) HbondScore = set_all_weights_zero( rosetta.getScoreFunction() ) HbondScore.set_weight(rosetta.hbond_sr_bb, 1.170) HbondScore.set_weight(rosetta.hbond_lr_bb, 1.170) HbondScore.set_weight(rosetta.hbond_bb_sc, 1.170) HbondScore.set_weight(rosetta.hbond_sc, 1.100) Disulfide = set_all_weights_zero( rosetta.getScoreFunction() ) Disulfide.set_weight(rosetta.dslf_fa13, 1.0) if Args.plot: if Args.norm: PerRes = True else: PerRes = False ''' Add and remove score functions here ''' Plotter = plotly_plotter( Args.plotly_id, Args.plotly_key, Args.native, ScoreFxns=[ CstScore, Talaris, HbondScore, Disulfide ], FxnNames=[ 'ConstraintScore', 'Talaris2013', 'H-bond', 'Disulfide' ], PerResidue=PerRes ) XaxisSortingTuples = [] for PoseGroup in AllGroups: # for PoseGroup in [SortedTuples]: if len(PoseGroup): # print # print 'Group:', PoseGroup Poses = [ PoseTuple[-1] for PoseTuple in PoseGroup ] # print PoseGroup RepeatLength = PoseGroup[0][0] # print '\n'.join( [ Pose.pdb_info().name() for Pose in Poses ] ) # print 'Zero index pose tuple:' # print PoseGroup[0] if Args.plot: GroupPdbName = PoseGroup[0][-1].pdb_info().name() if Args.multi: Tag = TagByPdbName[GroupPdbName] if Args.cst: Plotter.score_poses( Poses, Args.cst, Tag ) else: Plotter.score_poses( Poses, 1, Tag ) # return Plotter Plotter.plot_2d_score_combinations() print 'Plotter.Score2dComboTraces', 3, Plotter.Score2dComboTraces Plotter.draw_comparisons() print 'plotting...' if len(Args.name): Name = Args.name else: Name = '%s based %d res '%( Args.native, RepeatLength ) Plotter.render_scatter_plot( PlotName=Name ) while 1: ScoreFunctionScoreCutoffs = [] for i, Name in enumerate( Plotter.FxnNames ): while 1: try: Cutoff = float( raw_input('\tEnter cutoff value (maximum) for %s function: '%Name) ) break except ValueError: pass ScoreFunctionScoreCutoffs.append(Cutoff) print 'Cutoff values set at:' for i, Name in enumerate( Plotter.FxnNames ): # print Name, ScoreFunctionScoreCutoffs[i] Plotter.ScoreFunctionScoredPdbs[i].sort() PassingPdbs = [] for i, Name in enumerate( Plotter.FxnNames ): PassThisFxn = [] Cutoff = ScoreFunctionScoreCutoffs[i] # print Plotter.ScoreFunctionScoredPdbs[i] for Score, Pdb in Plotter.ScoreFunctionScoredPdbs[i]: if Score <= Cutoff: PassThisFxn.append(Pdb) else: break PassingPdbs.append( PassThisFxn ) PdbsPassingAll = PassingPdbs[0] if Args.and_or == 'and': for OtherSet in PassingPdbs[1:]: PdbsPassingAll = list( set(PdbsPassingAll) & set(OtherSet) ) else: for OtherSet in PassingPdbs[1:]: PdbsPassingAll = list( set(PdbsPassingAll + OtherSet) ) Outdir = raw_input( '\tEnter folder to copy pdbs that pass these thresholds (%s logic) to: '%Args.and_or ) if not os.path.isdir(Outdir): subprocess.check_output(['mkdir', Outdir]) if Outdir [-1] != '/': Outdir = Outdir + '/' for Pdb in PdbsPassingAll: subprocess.check_output([ 'cp', Pdb, Outdir ]) if Plotter.CstDict[Pdb] != None: subprocess.check_output([ 'cp', Plotter.CstDict[Pdb], Outdir ]) Continue = str( raw_input( '\tEnter Y to add another set of selection threshold, or anything else to quit: ') ).upper() if Continue == 'Y': pass else: break
def optimize_repeat_pdb((Pdb, CstSets, RepeatLength)): ''' parallelizable ''' # idealize peptide bonds with command line subprocess subprocess.check_output( ['idealize_jd2.default.linuxgccrelease', '-s', Pdb]) IdealizedPdbOldName = Pdb.replace('.pdb', '_0001.pdb') IdealizedPdbNewName = Pdb.replace('.pdb', '_ideal.pdb') subprocess.check_output(['mv', IdealizedPdbOldName, IdealizedPdbNewName]) time.sleep(0.5) Pose = rosetta.pose_from_pdb(IdealizedPdbNewName) PoseLength = Pose.n_residue() assert PoseLength % RepeatLength == 0, 'pdb input into optimize_repeat_pdb must have integer multiple of repeat_length number of residues' NumberRepeats = PoseLength / RepeatLength # print 'NumberRepeats', NumberRepeats # print 'RepeatLength', RepeatLength Sequence = Pose.sequence() # print Sequence RepeatRanges = [] Start = 1 for Repeat in range(NumberRepeats): End = Start + RepeatLength - 1 RepeatRanges.append((Start, End)) Start += RepeatLength assert len(RepeatRanges) == NumberRepeats # print 'RepeatRanges', RepeatRanges MidRepeat = (NumberRepeats / 2) - 1 ReferenceRange = RepeatRanges[MidRepeat] # print 'MidRepeat', MidRepeat # print 'ReferenceRange', ReferenceRange SetupNCS = symmetry.SetupNCSMover() for TargetRange in RepeatRanges: if TargetRange != ReferenceRange: # print 'OtherRange', TargetRange # skip first three residue (not enougth atoms for torsion), and amino acid types allowed to vary if TargetRange[0] == 1: SetupNCS.add_group( "%dA-%dA" % (ReferenceRange[0] + 3, ReferenceRange[1]), "%dA-%dA" % (TargetRange[0] + 3, TargetRange[1])) # skip last residue (not enougth atoms for torsion) elif TargetRange[1] == PoseLength: SetupNCS.add_group( "%dA-%dA" % (ReferenceRange[0], ReferenceRange[1] - 3), "%dA-%dA" % (TargetRange[0], TargetRange[1] - 3)) else: SetupNCS.add_group( "%dA-%dA" % (ReferenceRange[0], ReferenceRange[1]), "%dA-%dA" % (TargetRange[0], TargetRange[1])) SetupNCS.apply(Pose) # default talaris 2013 score function plus dihedral wieght for symmetry ncs mimization SymmTalaris = rosetta.getScoreFunction() SymmTalaris.set_weight(rosetta.dihedral_constraint, 1.0) TalarisPlusCst = rosetta.getScoreFunction() TalarisPlusCst.set_weight(rosetta.atom_pair_constraint, 10.0) TalarisPlusCst.set_weight(rosetta.angle_constraint, 5.0) TalarisPlusCst.set_weight(rosetta.dihedral_constraint, 3.0) TalarisPlusCstLowerFaRep = rosetta.getScoreFunction() TalarisPlusCstLowerFaRep.set_weight(rosetta.atom_pair_constraint, 10.0) TalarisPlusCstLowerFaRep.set_weight(rosetta.angle_constraint, 5.0) TalarisPlusCstLowerFaRep.set_weight(rosetta.dihedral_constraint, 3.0) TalarisPlusCstLowerFaRep.set_weight(rosetta.fa_rep, 0.25) print 'Pdb:', Pdb OptimizedPoses = [] PoseIDs = [] for Cst in CstSets: print 'Cst:', Cst CstPose = Pose.clone() CstStemName = re.sub(r'^(.*)\.cst$', r'\1', Cst) # make constraint mover Constrainer = rosetta.ConstraintSetMover() # get constraints from file Constrainer.constraint_file(Cst) Constrainer.apply(CstPose) FxnTags = ['TalCst', 'LowFaRep'] for i, ScoreFunction in enumerate( [TalarisPlusCst, TalarisPlusCstLowerFaRep]): # for AbsoluteWeight in [1, 5, 10, 100]: RelaxPose = CstPose.clone() rosetta.relax_pose(RelaxPose, ScoreFunction, 'tag') rosetta.dump_pdb(RelaxPose, CstStemName + '_%s.pdb' % FxnTags[i]) # remove all constraints RelaxPose.remove_constraints() # reapply ncs constraints SetupNCS.apply(RelaxPose) rosetta.relax_pose(RelaxPose, SymmTalaris, 'tag') # Trekker.score(RelaxPose) rosetta.dump_pdb(RelaxPose, CstStemName + '_%s_Relax.pdb' % FxnTags[i]) JustRelaxPose = Pose.clone() SetupNCS.apply(JustRelaxPose) rosetta.relax_pose(JustRelaxPose, SymmTalaris, 'tag') rosetta.dump_pdb(JustRelaxPose, CstStemName + '_JustRelax.pdb')