Exemplo n.º 1
0
def best_of_experiments(acids_sequence, n, m, folding=None):
    """Take a sequence of acids. Give it a certain folding. Try to fold the
    protein m times to improve this folding. Print the resulting score. Repeat
    this n times. Return the best result.
    """
    print('Start folding:', folding)
    best_score = 1
    for _ in range(n):
        protein = Protein(acids_sequence)
        if folding == 'cube_folding':
            if not Algorithms.cube_folding(protein, shift='', d3=True):
                print("failed to get a cube folding as start")
                continue
        elif folding == 'random_folding':
            if not Algorithms.random_folding(protein):
                print("failed to get a random folding as start")
                continue
        start_score = Algorithms.score(protein)
        if not Algorithms.fold_n_times(m, protein):
            print("failed to fold n times")
        end_score = Algorithms.score(protein)
        if end_score < best_score:
            best_score = end_score
            best_result = [acid.copy() for acid in protein.acids]
        print(
            'Start with score:\t{}\t\tEnd with score:\t{}\t\tBest score:\t{}'.
            format(start_score, end_score, best_score))
    protein.acids = best_result
    return protein
Exemplo n.º 2
0
def main():
    # checks whether program is used correctly
    check()
    best_fold_points = 0
    # makes user input into the protein class
    protein = Protein(argv[1])
    # checks whether current option is better than all previous ones
    options = Option(protein.length)
    field = Field(protein.length, protein.sequence)
    best_fold = options.options[0]
    #
    ### while(not_all_options):
    #
    # creates field and fold based on the protein and the current option
    for option in options.options:
        if (field.fill_field(protein.sequence, option)):
            # check wether current fold is the best and remembers it if it is
            if int(fold_points(field,
                               protein.errorpoint)) > int(best_fold_points):
                best_fold_points = fold_points(field, protein.errorpoint)
                best_fold = option
        field.clear_field(protein.length)
        field.x_cdn = protein.length - 1
        field.y_cdn = protein.length
    # prints best_fold_points and best_fold and current field
    print(best_fold_points)
    print(best_fold)
    field.fill_field(protein.sequence, best_fold)
    for line in field.field:
        print(line)
def branch_n_bound(p_string, prob_above_avg, prob_below_avg, dimension, matrix_size):
    '''
    This algorithm will fold a protein using a probability based version of the
    Branch and Bound algorithm.  If the probabilities for pruning are set to 1 and 1,
    this algorithm behaves as a depth-first alhorithm and searches the whole statespace
    for the best solution.
    '''
    # Set global variables
    global protein_string, prob_below_average, prob_above_average, length_total, energy_min_all, energy_min_partial

    prob_below_average = prob_below_avg
    prob_above_average = prob_above_avg
    protein_string = p_string
    length_total = len(protein_string)

    # Initialize global dictionaries
    global energy_counter, matrix_sizes, energy_tracker
    energy_tracker = [{} for i in range(length_total)]
    energy_counter = {}
    matrix_sizes = {}

    # Create a protein object with a specific matrix size
    protein = Protein(matrix_size, dimension)

    # Initialize energy variable that keeps the lowest energy for a complete protein
    energy_min_all = 1

    # Initialize energy variable that keeps the lowest energy for a protein of each length
    energy_min_partial = [0] * length_total

    # Place first two amino acids
    protein.place_first_two(protein_string)
    previous_location = protein.last_acid

    # Call next_acid function to place a new amino acid
    next_acid(protein, previous_location)

    print(energy_tracker)
    print(energy_counter)
    print(sum(energy_counter.values()))

    if protein_min:
        return protein_min, energy_counter, matrix_sizes
    else:
        exit("Error: No protein 'protein_min' to return")
def beamsearch(p_string, width, dimension, matrix_size):
    '''
    Runs a Beam Search algorithm with a predetermined width which determies how
    many proteins are kept at each new generation. The best conformation,
    the one that has the lowest energy, is saved and returned along with
    a dictionary of all energy counts and the minimal matrix sizes for the folded
    protein.
    '''
    # Set global variables
    global best_nodes, protein_length, protein_string, energy_counter, proteins, B_width, matrix_sizes, initial_protein

    protein_string = p_string
    protein_length = len(protein_string)
    B_width = width

    # Create a protein object with a specific matrix size
    initial_protein = Protein(matrix_size, dimension)

    # Place the first two amino acids
    initial_protein.place_first_two(protein_string)
    previous_location = [initial_protein.last_acid]

    # Initialize dictionaries
    energy_counter = {}
    matrix_sizes = {}

    # Initialize the proteins dictionary that keeps track of the protein objects
    proteins = {}
    for i in range(B_width):
        proteins[i] = initial_protein

    # Start the search
    find_possibilities(previous_location)

    # Take the top protein as best protein
    protein_min = proteins[0]
    energy_min = protein_min.energy

    if protein_min:
        return protein_min, energy_counter, matrix_sizes
    else:
        exit("Error: No protein 'protein_min' to return")
Exemplo n.º 5
0
def csv_loader(path, filename):
    '''
	Args:
		filename: Name of the file being loaded.
		obj: Optional value indicating is the .csv being loaded to 
			protein objects.
	Returns:
		array: Array of protein objects or values.
	Raises:
		
	'''
    df = pd.read_csv(path + filename)
    array = []
    for val in df.values:
        protein = Protein(val[0], val[1])
        if len(
                protein.get_splitted_ec_number()
        ) > 0:  #this is to take care i.e. 'n2' -numbers, they will have the 'n' removed
            array.append(protein)
    return array
Exemplo n.º 6
0
def construct_protein_list(file_name):
    proteins_list = []

    input_data = genfromtxt(file_name, dtype=None, delimiter=';', names=True)
    for line in input_data:
        current_protein = Protein(id=b2str(line['pdb']),
                                  azole=b2str(line['azole']),
                                  azole_group=str(line['azole_group']))
        proteins_list.append(current_protein)

    return proteins_list
def add_acids(protein: Protein, start: int, end: int):
    '''
    Adds acids that were previously removed between the start and end point
    '''
    acid_index_list = list(range(start + 1, end))
    end_location = None

    # If there is a start and end acid outside of the cut
    if start >= 0 and end <= protein.length - 1:
        end_location = protein.get_acid_index(end).location
        current_location = protein.get_acid_index(start).location

    # When the last acid is cut off
    elif start >= 0:
        current_location = protein.get_acid_index(start).location

    # When the first acid is cut off
    else:
        acid_index_list = acid_index_list[::-1]
        current_location = protein.get_acid_index(end).location

    # Add acids recursively
    _add_acids(protein, acid_index_list, end_location, current_location, 0)
Exemplo n.º 8
0
def main():
    # checks whether program is used correctly
    check()
    # makes user input into the protein class
    protein = Protein(argv[1])
    # folds protein for testing
    if len(protein.current_option) > 6:
        protein.current_option[1] = "up"
        protein.current_option[2] = "up"
        protein.current_option[3] = "left"
        protein.current_option[4] = "left"
        protein.current_option[5] = "down"
        protein.current_option[6] = "down"
    # checks whether current option is better than all previous ones
    options = Option(protein.length)
#    for option in options:
        option
Exemplo n.º 9
0
    def __init__(self, sequence, maxScore=0, **kwargs):
        super(Fold2D, self).__init__(**kwargs)
        self.do_scale=False
        self.do_rotation=False
        self.translation_touches=2
        self.translationLock=False
        self.reverseLock=False

        self.bg = Background(20)
        self.add_widget(self.bg)
        
        self.protein = Protein(sequence)
        self.add_widget(self.protein)
        self.scoreCounter = ScoreCounter(0,maxScore)
        self.add_widget(self.scoreCounter)
        self.win=False
Exemplo n.º 10
0
    def read_pdb(self, pdb):
        self.clear()
        chain_id = '-'
        res_num = None
        res_insert = ' '
        is_last_chain_protein = False
        for line in open(pdb, 'r').readlines():

            if line.startswith("ATOM"):
                atom = AtomFromPdbLine(line)
                if not is_last_chain_protein or chain_id != atom.chain_id:
                    protein = Protein()
                    protein.id = atom.chain_id
                    chain_id = protein.id
                    self.append_chain(protein)
                    is_last_chain_protein = True
                    res_num = None
                if (res_num != atom.res_num) or (res_insert !=
                                                 atom.res_insert):
                    residue = Residue(atom.res_type, atom.chain_id,
                                      atom.res_num, atom.res_insert)
                    residue.chain_id = chain_id
                    protein.append_residue_no_renum(residue)
                    res_num = atom.res_num
                    res_insert = atom.res_insert
                protein.insert_atom(-1, atom)

            if line.startswith("HETATM"):
                atom = AtomFromPdbLine(line)
                if res_num != atom.res_num or chain_id != atom.chain_id:
                    mol = Polymer()
                    residue = Residue(atom.res_type, atom.chain_id,
                                      atom.res_num)
                    residue.chain_id = atom.chain_id
                    mol.append_residue_no_renum(residue)
                    mol.id = atom.chain_id
                    self.append_chain(mol)
                    res_num = atom.res_num
                    chain_id = atom.chain_id
                    last_chain_is_polymer = False
                mol.insert_atom(0, atom)

            if line.startswith("TER"):
                chain_id = '-'

            if line.startswith("ENDMDL"):
                break
Exemplo n.º 11
0
def dock():
    time.sleep(1)
    return make_response(jsonify({"affinity": -7.5}))
    data = json.loads(request.data)
    directory = "proteins/" + str(data['protein']) + "/Structures/" + str(
        data['structure'])
    p1 = None
    for el in os.listdir(directory):
        extension = None
        if el.__contains__('.txt') or el.__contains__('.conf'):
            extension = el
        # directory for the conf.txt
        prtdir = directory + "/" + str(extension)

        # if to get rid of .ds-store and other weird files
        if extension is not None:
            p1 = Protein(prtdir)
    affinity = Setup.dock(p1, str(app.instance_path) + str(data['ligand']))
    obj = {"affinity": affinity}
    return make_response(jsonify(obj))
Exemplo n.º 12
0
def read_uniprot_sequence():
    '''
	Returns:
		connection_array:
		protein_array:
	'''
    file = 'uniprot_sprot.dat'
    #counter = 0 #temporary for testing, no need to read the whole file yet
    protein_array = []
    ec_array = []
    connection_array = []
    for record in SwissProt.parse(open(file)):
        if 'EC=' in record.description:
            #counter += 1
            #sequence is the string of the primary sequence
            #given by the markers of the residues
            print(record.sequence)
            #print(record.accessions) #holds the uniprot ids

            #description consists of ';' separated parts
            print(record.description)
            tokens = record.description.split(';')

            for token in tokens:
                if 'EC=' in token:
                    parts = token.split('=')  #split header
                    ec_parts = parts[1].split(' ')  #split additional content
                    if ec_parts[0] not in ec_array:
                        print('EC: ',
                              ec_parts[0])  #print EC number as a string
                        ec_array.append(ec_parts[0])
                        connection_array.append(
                            [ec_parts[0], record.accessions[0]])
                        protein_array.append(
                            Protein(ec_parts[0], record.accessions[0]))
            #if counter >= 10000:
            #	break
    return connection_array, protein_array
Exemplo n.º 13
0
  def read_pdb(self, pdb):
    self.clear()
    chain_id = '-'
    res_num = None
    res_insert = ' '
    is_last_chain_protein = False
    for line in open(pdb, 'r').readlines():

      if line.startswith("ATOM"):
        atom = AtomFromPdbLine(line)
        if not is_last_chain_protein or chain_id != atom.chain_id:
          protein = Protein()
          protein.id = atom.chain_id
          chain_id = protein.id
          self.append_chain(protein)
          is_last_chain_protein = True
          res_num = None
        if (res_num != atom.res_num) or (res_insert != atom.res_insert):
          residue = Residue(atom.res_type, atom.chain_id,
                            atom.res_num, atom.res_insert)
          residue.chain_id = chain_id
          protein.append_residue_no_renum(residue)
          res_num = atom.res_num
          res_insert = atom.res_insert
        protein.insert_atom(-1, atom)

      if line.startswith("HETATM"):
        atom = AtomFromPdbLine(line)
        if res_num != atom.res_num or chain_id != atom.chain_id:
          mol = Polymer()
          residue = Residue(atom.res_type, atom.chain_id, atom.res_num)
          residue.chain_id = atom.chain_id
          mol.append_residue_no_renum(residue)
          mol.id = atom.chain_id
          self.append_chain(mol)
          res_num = atom.res_num
          chain_id = atom.chain_id
          last_chain_is_polymer = False
        mol.insert_atom(0, atom);

      if line.startswith("TER"):
        chain_id = '-'

      if line.startswith("ENDMDL"):
        break
Exemplo n.º 14
0
    def setupProtein(proteinToUse, structure):
        # searching through the proteins directory to find the protein the user wants to dock to
        for i in os.listdir("proteins"):
            if i.lower() == proteinToUse.lower():
                directory = "proteins/" + i + "/Structures"
                print("We have found the directory and it is for the protein --> " + i)
        if directory is None:
            print("Couldn't find directory")
            # should return to the API with null or something if the protein files don't exist
            exit(0)

        # now searching through and creating protein array
        for i in os.listdir(directory):
            extension = None
            loc = os.listdir(directory + "/" + i)
            for el in loc:
                if el.__contains__('.txt') or el.__contains__('.conf'):
                    extension = el
            # directory for the conf.txt
            prtdir = directory + "/" + i + "/" + str(extension)

            # if to get rid of .ds-store and other weird files
            if extension is not None:
                p1 = Protein(prtdir, directory + "/" + i + "/", i)
Exemplo n.º 15
0
    def __init__(self, trajfile, indexfile, distance_criteria, outputfile,
                 thickness, simplethickness, insertion, printnatoms):
        """Instanciates a Trajectory object and checks some input the
        consistency of the input arguments

        Requires:
        trajfile
        indexfile
        outputfile
        thickness
        insertion

        Ensures:
        The input arguments are correctly assigned to the attributes,
        considering the help messages provided to the user
        """

        self._trajfile = trajfile
        self._indexfile = indexfile
        self._distance_criteria = distance_criteria

        if outputfile:
            self._outputfile = outputfile
        else:
            self._outputfile = None

        self._printnatoms = printnatoms
        self._thickness = thickness
        self._simplethickness = simplethickness

        if thickness and simplethickness:
            raise IOError(
                'Incompatible arguments: simplethickness and thickness.')

        if thickness:
            self._thicknessOutput1 = ''
            self._thicknessOutput2 = ''
            nargs_thickness = len(thickness)
            if nargs_thickness < 2:
                raise IOError('The thickness argument should have at least 2'
                              ' fields (the window size and step)')
            elif nargs_thickness > 5:
                raise IOError('The thickness argument should have at most 5 '
                              'fields (the window size, step, minimum and '
                              'and maximum values)')
        elif simplethickness:
            self._thicknessOutput = ''

        self._insertion = insertion
        if insertion:
            self._insertionOutput = ''
            nargs_insertion = len(insertion)

            if insertion[0] == 'closest' or \
               insertion[0] == 'average':
                if nargs_insertion == 1:
                    self._insertion_window = insertion[0]
                else:
                    print 'Warning: Extra arguments have been '\
                        'submitted and will be ignored'

            elif insertion[0] == 'zero':
                if nargs_insertion == 2:
                    self._insertion_window = insertion[0]
                elif nargs_insertion == 1:
                    raise IOError(
                        'Cutoff missing. The center of the '
                        'membrane requires the definition of a cutoff '
                        'beyond which bulk properties are assumed.')
                else:
                    print 'Warning: Extra arguments have been '\
                        'submitted and will be ignored'

            else:
                if nargs_insertion < 2:
                    raise IOError('The insertion argument requires '
                                  'at least 2 fields (window_size and step)')

                elif nargs_insertion > 5:
                    raise IOError('The insertion argument should '
                                  'have at most 5 fields')

        self._curtime = None
        self._box = None
        self._protein = Protein()
        self._CoI = Protein()
        self._membrane = Membrane()

        self.loadIndex()

        proteinCounter = 0
        coiCounter = 0
        for i in self._protein.getAtomsNumbers():
            proteinCounter += 1

        for i in self._CoI.getAtoms():
            coiCounter += 1

        if self._insertion and coiCounter < 1:
            raise IOError('The provided index file should have at least one '
                          'atom belonging to the Center_of_Interest group')
        elif self._thickness and proteinCounter < 1:
            raise IOError('The provided index file should have at least one '
                          'atom belonging to the Protein group')

        top_memb_size = len(self._membrane.getLeafletAtoms('one'))
        bottom_memb_size = len(self._membrane.getLeafletAtoms('two'))

        if top_memb_size < 1 or bottom_memb_size < 1:
            raise IOError('The provided index file should have at least one '
                          'atom in both Monolayer1 and '
                          'Monolayer2 groups')

        if not insertion and not (thickness or simplethickness):
            raise IOError('This script can calculate thickness and insertion '
                          'provided you use the -thickness or -insertion '
                          'arguments respectively')
Exemplo n.º 16
0
class Trajectory:
    def __init__(self, trajfile, indexfile, distance_criteria, outputfile,
                 thickness, simplethickness, insertion, printnatoms):
        """Instanciates a Trajectory object and checks some input the
        consistency of the input arguments

        Requires:
        trajfile
        indexfile
        outputfile
        thickness
        insertion

        Ensures:
        The input arguments are correctly assigned to the attributes,
        considering the help messages provided to the user
        """

        self._trajfile = trajfile
        self._indexfile = indexfile
        self._distance_criteria = distance_criteria

        if outputfile:
            self._outputfile = outputfile
        else:
            self._outputfile = None

        self._printnatoms = printnatoms
        self._thickness = thickness
        self._simplethickness = simplethickness

        if thickness and simplethickness:
            raise IOError(
                'Incompatible arguments: simplethickness and thickness.')

        if thickness:
            self._thicknessOutput1 = ''
            self._thicknessOutput2 = ''
            nargs_thickness = len(thickness)
            if nargs_thickness < 2:
                raise IOError('The thickness argument should have at least 2'
                              ' fields (the window size and step)')
            elif nargs_thickness > 5:
                raise IOError('The thickness argument should have at most 5 '
                              'fields (the window size, step, minimum and '
                              'and maximum values)')
        elif simplethickness:
            self._thicknessOutput = ''

        self._insertion = insertion
        if insertion:
            self._insertionOutput = ''
            nargs_insertion = len(insertion)

            if insertion[0] == 'closest' or \
               insertion[0] == 'average':
                if nargs_insertion == 1:
                    self._insertion_window = insertion[0]
                else:
                    print 'Warning: Extra arguments have been '\
                        'submitted and will be ignored'

            elif insertion[0] == 'zero':
                if nargs_insertion == 2:
                    self._insertion_window = insertion[0]
                elif nargs_insertion == 1:
                    raise IOError(
                        'Cutoff missing. The center of the '
                        'membrane requires the definition of a cutoff '
                        'beyond which bulk properties are assumed.')
                else:
                    print 'Warning: Extra arguments have been '\
                        'submitted and will be ignored'

            else:
                if nargs_insertion < 2:
                    raise IOError('The insertion argument requires '
                                  'at least 2 fields (window_size and step)')

                elif nargs_insertion > 5:
                    raise IOError('The insertion argument should '
                                  'have at most 5 fields')

        self._curtime = None
        self._box = None
        self._protein = Protein()
        self._CoI = Protein()
        self._membrane = Membrane()

        self.loadIndex()

        proteinCounter = 0
        coiCounter = 0
        for i in self._protein.getAtomsNumbers():
            proteinCounter += 1

        for i in self._CoI.getAtoms():
            coiCounter += 1

        if self._insertion and coiCounter < 1:
            raise IOError('The provided index file should have at least one '
                          'atom belonging to the Center_of_Interest group')
        elif self._thickness and proteinCounter < 1:
            raise IOError('The provided index file should have at least one '
                          'atom belonging to the Protein group')

        top_memb_size = len(self._membrane.getLeafletAtoms('one'))
        bottom_memb_size = len(self._membrane.getLeafletAtoms('two'))

        if top_memb_size < 1 or bottom_memb_size < 1:
            raise IOError('The provided index file should have at least one '
                          'atom in both Monolayer1 and '
                          'Monolayer2 groups')

        if not insertion and not (thickness or simplethickness):
            raise IOError('This script can calculate thickness and insertion '
                          'provided you use the -thickness or -insertion '
                          'arguments respectively')

    def getInsertionOutput(self):
        return self._insertionOutput

    def analyseTrajectory(self):
        def createOutputFile(filename):
            outputname = self.getOutputName(filename)
            os.system('rm -f {0}'.format(outputname))
            return outputname

        traj = self.loadTrajectory()

        if self._insertion:
            outputnameInsertion = createOutputFile("insertion")

        if self._thickness:
            outputnameThicknessTop = createOutputFile("thicknessTop")
            outputnameThicknessAvg1 = createOutputFile("thicknessTop_avg")
            outputnameThicknessBottom = createOutputFile("thicknessBottom")
            outputnameThicknessAvg2 = createOutputFile("thicknessBottom_avg")

        if self._simplethickness:
            outputnameThickness = createOutputFile("thickness")

        for frame in traj:
            if self._insertion:
                # Calculate geometric center of Center_of_Interest
                self._CoI.calcCenter()

                if 'zero' == self._insertion[0]:
                    # Calculate the Membrane Half Z
                    self._membrane.calcHalfMembraneZ(
                        self._protein, (0, 0, 0, 0, self._insertion[1]),
                        self._box)
                else:
                    # Choose leaflet
                    self._membrane.chooseClosestLeaflet(
                        self._CoI, self._box, self._distance_criteria)

                # Calculate insertion
                insertion = self._CoI.getInsertion(self._membrane,
                                                   self._insertion, self._box,
                                                   outputnameInsertion, self)

                if args.printclosestleaflet:
                    insertion = '{0} {1}'.format(
                        insertion, self._membrane._closestLeaflet)

                # Save to Output
                self.saveOutput(outputnameInsertion, insertion)

            if self._thickness:
                # Calculate the Membrane Half Z
                self._membrane.calcHalfMembraneZ(self._protein,
                                                 self._thickness, self._box)

                # Attribution of the Protein atoms to membrane
                # leaflets ('bottom' and 'top')
                self._CoI.calcAtomsClosestML(self._membrane)

                # Calculate the Thickness for ML1
                thicknessTop = self._membrane.getThickness(
                    self._CoI, 'top', self._box, self._thickness,
                    outputnameThicknessTop, self._printnatoms)

                # Calculate the Thickness for ML2
                thicknessBottom = self._membrane.getThickness(
                    self._CoI, 'bottom', self._box, self._thickness,
                    outputnameThicknessBottom, self._printnatoms)
                self._CoI.clearLeafletAtoms()
                # Save the Outputs
                self.saveOutput(outputnameThicknessTop, thicknessTop)
                self.saveOutput(outputnameThicknessBottom, thicknessBottom)

            if self._simplethickness:
                # Calculate the Membrane Thickness
                thickness = self._membrane.getSimpleThickness(
                    outputnameThickness)

                # Save the Outputs
                self.saveOutput(outputnameThickness, thickness)

        # Write to Output
        if self._insertion:
            self.writeOutput(outputnameInsertion)

        if self._thickness:
            self.writeOutput(outputnameThicknessTop)
            self.writeOutput(outputnameThicknessBottom)

            avgs_top, windows_top,\
                avgs_bottom, windows_bottom = self._membrane.calcThicknessAvg()

            self.writeAvgOutput(outputnameThicknessAvg1, avgs_top, windows_top)
            self.writeAvgOutput(outputnameThicknessAvg2, avgs_bottom,
                                windows_bottom)

        if self._simplethickness:
            self.writeOutput(outputnameThickness)

    def loadIndex(self):
        with open(self._indexfile) as f:
            addTo = None
            for line in f:
                line = line.strip()
                if '[ ' in line and ' ]' in line:
                    indexName = line.replace('[', '').replace(']', '')
                    indexName = indexName.replace(' ', '').lower()
                    if 'protein' == indexName:
                        addTo = 'protein'

                    elif 'center_of_interest' == indexName:
                        addTo = 'center_of_interest'

                    elif 'monolayer1' == indexName:
                        addTo = 'monolayer1'

                    elif 'monolayer2' == indexName:
                        addTo = 'monolayer2'

                    else:
                        addTo = None

                elif addTo:
                    for atomNumber in line.split():
                        if addTo == 'protein':
                            self._protein.addAtom(atomNumber)

                        elif addTo == 'center_of_interest':
                            self._CoI.addAtom(atomNumber)

                        elif addTo == 'monolayer1':
                            self._membrane.addAtom(atomNumber, 'one')

                        elif addTo == 'monolayer2':
                            self._membrane.addAtom(atomNumber, 'two')

    def loadTrajectory(self):
        def readLine(line):
            atype = line[12:16].strip()
            residue = line[23:26]
            x = float(line[30:38])
            y = float(line[38:46])
            z = float(line[46:54])
            return atype, residue, x, y, z

        proteinAtoms = self._protein.getAtomsNumbers()
        CoIAtoms = self._CoI.getAtomsNumbers()
        membraneAtoms = self._membrane.getAtomsNumbers()
        with open(self._trajfile) as f:
            for line in f:
                if line[0:4] == 'ATOM':
                    number = line[4:11].strip()

                    if number in proteinAtoms:
                        atype, residue, x, y, z = readLine(line)
                        self._protein.addProperties(number, atype, residue, x,
                                                    y, z)

                    if number in CoIAtoms:
                        atype, residue, x, y, z = readLine(line)
                        self._CoI.addProperties(number, atype, residue, x, y,
                                                z)

                    elif number in membraneAtoms:
                        atype, residue, x, y, z = readLine(line)
                        self._membrane.addProperties(number, atype, residue, x,
                                                     y, z)
                elif line[0:6] == 'CRYST1':
                    fields = line.split()
                    box_x = float(fields[1])
                    box_y = float(fields[2])
                    box_z = float(fields[3])
                    self._box = box_x, box_y, box_z

                elif line[0:5] == 'TITLE':
                    line = line.strip()
                    time = line.split('t=')[1].split()[0]
                    self._curtime = int(float(time))

                elif line[0:3] == 'TER':
                    if not self._CoI.IndexandTrajAtomsMatch():
                        raise IOError(
                            'Index file not correct. CoI group atoms in the index do '
                            'not match the trajectory file')
                    if not self._protein.IndexandTrajAtomsMatch():
                        raise IOError(
                            'Index file not correct. Protein group atoms in the index do '
                            'not match the trajectory file')

                    yield

    def getOutputName(self, prefix):
        if self._outputfile:
            outputname = '{0}_{1}.xvg'.format(self._outputfile, prefix)
        else:
            outputname = '{0}.xvg'.format(prefix)

        return outputname

    def saveOutput(self, outputname, data):
        if data[:4] == 'time':
            line = ''
        else:
            line = '{0:9f} '.format(self._curtime)

        nNaNs = 0
        for value in data.split(' '):
            if value == '\n':
                line = '{0}\n{1:9f}\t'.format(line, self._curtime)
            else:
                line = '{0}{1:5s} '.format(line, value)
                if value == 'NaN':
                    nNaNs += 1

        data_type = outputname.split('_')[-1].replace('.xvg', '')
        if data_type == 'insertion':
            self._insertionOutput += line + '\n'
        elif data_type == 'thicknessTop':
            # If all NaNs don't save the data
            if nNaNs != (len(data.split(' ')) - 2) / 3:
                self._thicknessOutput1 += line + '\n'
        elif data_type == 'thicknessBottom':
            # If all NaNs don't save the data
            if nNaNs != (len(data.split(' ')) - 2) / 3:
                self._thicknessOutput2 += line + '\n'
        elif data_type == 'thickness':
            self._thicknessOutput += line + '\n'

    def writeOutput(self, outputname):
        data_type = outputname.split('_')[-1].replace('.xvg', '')
        if data_type == 'insertion':
            data = self._insertionOutput
        elif data_type == 'thicknessTop':
            data = self._thicknessOutput1
        elif data_type == 'thicknessBottom':
            data = self._thicknessOutput2
        elif data_type == 'thickness':
            data = self._thicknessOutput

        with open(outputname, 'w') as f:
            if len(data) == 0:
                f.write('No occurrences in this monolayer\n')
            else:
                f.write(data)

    def writeAvgOutput(self, outputname, avgs, windows):
        text = ''
        with open(outputname, 'w') as f:
            for i in range(len(windows)):
                text += '{0:9} {1:9}\n'.format(windows[i], avgs[i])
            if len(text) == 0:
                text = 'No occurrences in this monolayer\n'
            f.write(text)
Exemplo n.º 17
0

args = parse_args()

if not os.path.exists(args.dataset_file):
    raise IOError('%s does not exist.' % args.dataset_file)
if not os.path.exists(args.protein_path):
    raise IOError('%s does not exist.' % args.protein_path)
if not os.path.exists(args.model_path):
    raise IOError('%s does not exist.' % args.model_path)
if not os.path.exists(args.output):
    os.makedirs(args.output)

with open(args.dataset_file, 'r') as f:
    lines = f.readlines()

protein_names = [line[:-1] for line in lines]

for prot in protein_names:
    protein = Protein(os.path.join(args.protein_path,
                                   prot + '.pdb'), args.protonate, args.expand,
                      args.f, args.output, args.discard_points)

    nn = Network(args.model_path, args.model, args.voxel_size)

    lig_scores = nn.get_lig_scores(protein, args.batch)

    extractor = Bsite_extractor(args.T)

    extractor.extract_bsites(protein, lig_scores)
Exemplo n.º 18
0
def greedy(protein_string, look_aheads, N_tries, dimension, matrix_size):
    '''
    Runs a Greedy look-ahead algorithm in which N_tries proteins are randomly
    created from a string of amino acid types. The best conformation,
    the one that has the lowest energy, is saved and returned along with
    a dictionary of all energy counts and the minimal matrix sizes for the folded
    protein.
    '''
    # Create a protein object with a specific matrix size
    protein = Protein(matrix_size, dimension)

    # Place the first two amino acids
    protein.place_first_two(protein_string)
    location = protein.last_acid

    energy_min = 1

    energy_counter = {}
    matrix_sizes = {}

    # Try to fold N_tries protein greedy like
    for i in range(N_tries):

        if (i + 1) % 1 == 0:
            print(f"{i + 1}th protein folded")

        # Remove acids until only the first two are left
        while protein.length > 2:
            protein.remove_acid(0)

        solution_found, protein = greedy_fold(protein, protein_string,
                                              look_aheads)

        while not solution_found:
            while protein.length > 2:
                protein.remove_acid(0)

            solution_found, protein = greedy_fold(protein, protein_string,
                                                  look_aheads)

        # When a protein is created save its energy
        if solution_found:
            energy = protein.energy

            # When its energy is lower than lowest energy found, save the protein
            if energy < energy_min:
                energy_min = energy
                protein_min = copy.deepcopy(protein)
                print(f"New minimum energy found: {energy_min}")

            # Update the dictonary for histogram of solutions
            energy_counter[energy] = energy_counter.get(energy, 0) + 1

            # Determine the smallest matrix size needed for this protein
            min_matrix_size = protein.smallest_matrix()
            matrix_sizes[energy] = matrix_sizes.get(energy, {})
            matrix_sizes[energy][min_matrix_size] = matrix_sizes[energy].get(
                min_matrix_size, 0) + 1

    if protein_min:
        return protein_min, energy_counter, matrix_sizes
    else:
        exit("Error: No protein 'protein_min' to return")
Exemplo n.º 19
0
def Loadpdb(pdb=None, hetatm= True, verbose=False):
    try:
        assert(pdb != None) #Check if filehandle to PDB file is passed
    except AssertionError:
        sys.exit("**No filehandle passed**. Pass a filehandle (to a pdb file) as an argument to Loadpdb.")
    
    AtomNumber=0 #Keeps track of atom indices (assigned in the order atoms listed in input file)
    mol_data={} #Key: molid; Value: Molecule_Type Object; Keep track of different molecules (different chains or molecule type) in input structure
    first_res =True  #To identify molecule type of every molecule in input structure and accordingly define Molecule object.
    Prev_res=0 # to keep track of residue change 
    Prev_chain='aa' # to keep track of chain change in HETATM section
    atmTohet = True #To determine transition from ATOM to HETATM record
    frame_tag = '' # To keep track of multi-frame entry (multiple entry for same molecule type with same chain id)
    '''Load the PDB structure file'''
    for line in pdb:
        if line[0:4]=="ATOM" and line[12:16].upper().strip() not in ["OXT"]:
            AtomNumber+=1
            AtomName, ResName, Chain, ResNo, CordX, CordY, CordZ, Occ, Bfac = Pdbcordsec(line)
            atm = Atom(AtomName, AtomNumber, ResName, Chain, ResNo, CordX, CordY, CordZ)
            
            #Check for unrecognized residue and new molecule
            if not first_res:
                if ResName.lower() not in Mol_types['protein'] + Mol_types['lipid'] + Mol_types['ligand']:
                    print "*** Unrecognized residue name: "+ ResName+ " ***.\nAdded %s as Ligand." % ResName
                    sys.exit("In file configstruc.py: Add missing residue name("+ ResName+ ") to appropriate molecule in Mol_types")
                if ResNo != Prev_res or (Prev_chain != Chain and mol.molecule_type().lower()=='ligand'):
                    if mol.molecule_type().lower()=='ligand':
                        mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary
                        first_res = True
                    elif Prev_chain != Chain or ResName.lower() not in Mol_types[mol.molecule_type().lower()]: #Either Chain is different or New residue doesn't belong to current molecule type
                        mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary
                        first_res = True
                    elif frame_tag.lower() in ['endmdl', 'ter', 'end'] and Prev_chain == Chain: #Different molecule (of same molecule type) with same chain id; as in trajectory frames
                        mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary
                        first_res = True
                        
                        
            if first_res: #Initialize mol for new chain or molecule
                if ResName.lower() in Mol_types['protein']:
                    mol = Protein()
                elif ResName.lower() in Mol_types['lipid']:
                    mol = Lipid()
                elif ResName.lower() in Mol_types['ligand']:
                    mol = Ligand()
                else:
                    print "*** Unrecognized residue name: "+ ResName+ " ***.\n Cannot initialize Molecule object."
                    sys.exit("In file configstruc.py: Add missing residue name("+ ResName+ ") to appropriate molecule in Mol_types")
                first_res = False
                frame_tag = ''
            
            mol.AddToResidue(atom=atm, occ=Occ, bfac=Bfac)
            mol.atmidx.append(AtomNumber)
            Prev_res= ResNo
            Prev_chain=Chain
        elif line[0:6]=="HETATM" and hetatm == True:
            if atmTohet:
                mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary
                first_res = True
                atmTohet = False
            AtomNumber+=1
            AtomName, ResName, Chain, ResNo, CordX, CordY, CordZ, Occ, Bfac = Pdbcordsec(line)
            atm = Atom(AtomName, AtomNumber, ResName, Chain, ResNo, CordX, CordY, CordZ)
            
            #Check for new ligand molecule 
            if not first_res and (ResNo != Prev_res or Prev_chain != Chain):
                mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary
                first_res = True
            
            #Initialize mol for new chain or molecule            
            if first_res: 
                if ResName.lower() in Mol_types['ligand']:
                    mol = Ligand()
                else:
                    print "*** Unrecognized residue name: "+ ResName+ " ***.\n Cannot initialize Ligand object."
                    sys.exit("In file configstruc.py: Add missing residue name ("+ ResName+ ") to ligand molecule in Mol_types")
                first_res = False
            
            mol.AddToResidue(atom=atm, occ=Occ, bfac=Bfac)
            mol.atmidx.append(AtomNumber)
            Prev_res= ResNo
            Prev_chain=Chain
        elif line[0:3].lower() in ["ter", "end"] or line[0:6].lower() == "endmdl":
            frame_tag = line[0:3]
    #append the last mol object to mol_data
    mol_data[Molecule.molid] = deepcopy(mol) #copy mol object into dictionary
    
    if verbose:
        print "Number of molecules in input file: ", len(mol_data), "\n"
    #Update mol_data[molid].nor, mol_data[molid].resids, and check for chain breaks in non-ligand molecules
    for key in sorted(mol_data):
        if verbose:
            print "Molid:", key,"Molecule_Type:",mol_data[key].molecule_type()
        mol_data[key].resids = sorted(mol_data[key].residue)
        mol_data[key].nor = len(mol_data[key].resids)
        if mol_data[key].molecule_type().lower() != 'ligand':
            #Check for chain breaks in non-ligand molecule
            resids_diff=numpy.array(mol_data[key].resids[1:]) - numpy.array(mol_data[key].resids[:-1])
            if mol_data[key].nor != (numpy.sum(resids_diff)+1):
                break_indices = numpy.where(resids_diff > 1)
                print "Chain break encountered in molecule",key, "at residue positions: "
                for res in break_indices[0]:
                    print mol_data[key].resids[res],
                print "\n"
                mol_data[key].chain_break = True
    return mol_data 
Exemplo n.º 20
0
def Loadpdb(pdb=None, hetatm=True, verbose=False):
    try:
        assert (pdb != None)  #Check if filehandle to PDB file is passed
    except AssertionError:
        sys.exit(
            "**No filehandle passed**. Pass a filehandle (to a pdb file) as an argument to Loadpdb. "
        )

    AtomNumber = 0  #Keeps track of atom indices (assigned in the order atoms listed i input file)
    mol_data = {
    }  #Key: molid; Keep track of different molecules (different chains or molecule type) in input structure
    check_het = False  #To keep track of new Hetero residue
    first_res = True  #To identify molecule type of every molecule in input structure and accordingly define Molecule object.
    Prev_res = 0  # to keep track of residue change in HETATM section; a new Molecule object is assigned for every residue.
    Prev_chain = 'a'  # to keep track of chain change in HETATM section
    '''Load the PDB structure file'''
    for line in pdb:
        if line[0:4] == "ATOM" and line[12:16].upper().strip() not in ["OXT"]:
            AtomNumber += 1
            AtomName, ResName, Chain, ResNo, CordX, CordY, CordZ, Occ, Bfac = Pdbcordsec(
                line)
            atm = Atom(AtomName, AtomNumber, ResName, Chain, ResNo, CordX,
                       CordY, CordZ)

            if first_res:  #Initialize mol for new chain or molecule
                if ResName.lower() in Mol_types['protein']:
                    mol = Protein()
                elif ResName.lower() in Mol_types['ligand']:
                    mol = Ligand()
                else:
                    print "*** Unrecognized residue name: " + ResName + " ***.\n Cannot initialize Molecule object."
                    sys.exit(
                        "In file configstruc.py: Add missing residue name(" +
                        ResName + ") to appropriate molecule in Mol_types")
                first_res = False

            mol.AddToResidue(atom=atm, occ=Occ, bfac=Bfac)
            mol.atmidx.append(AtomNumber)
        elif line[0:3] == "TER":
            mol_data[Molecule.molid] = deepcopy(
                mol)  #copy mol object into dictionary
            first_res = True  # mol object will be initialized to molecule type of next molecule
        elif line[0:6] == "HETATM" and hetatm == True:
            AtomNumber += 1
            AtomName, ResName, Chain, ResNo, CordX, CordY, CordZ, Occ, Bfac = Pdbcordsec(
                line)
            atm = Atom(AtomName, AtomNumber, ResName, Chain, ResNo, CordX,
                       CordY, CordZ)

            #Check for new molecule
            if (
                    ResNo != Prev_res or Prev_chain != Chain
            ) and check_het == True:  #For first HETATM check_het is always False
                mol_data[Molecule.molid] = deepcopy(
                    mol)  #copy mol object into dictionary
                first_res = True

            #Initialize mol for new chain or molecule
            if first_res:
                if ResName.lower() in Mol_types['ligand']:
                    mol = Ligand()
                else:
                    print "*** Unrecognized residue name: " + ResName + " ***.\n Cannot initialize Ligand object."
                    sys.exit(
                        "In file configstruc.py: Add missing residue name (" +
                        ResName + ") to ligand molecule in Mol_types")
                first_res = False

            mol.AddToResidue(atom=atm, occ=Occ, bfac=Bfac)
            mol.atmidx.append(AtomNumber)
            if Prev_res == 0:
                check_het = True
            Prev_res = ResNo
            Prev_chain = Chain
    if hetatm:  #If HETATM record was added; append the last hetero residue object to mol_data
        mol_data[Molecule.molid] = deepcopy(
            mol)  #copy mol object into dictionary

    if verbose:
        print "Number of molecules in input file: ", len(mol_data), "\n"
    #Update mol_data[molid].nor, mol_data[molid].resids, and check for chain breaks in non-ligand molecules
    for key in sorted(mol_data):
        if verbose:
            print "Molid:", key, "Molecule_Type:", mol_data[key].molecule_type(
            )
        if mol_data[key].molecule_type().lower() != 'ligand':
            mol_data[key].resids = sorted(mol_data[key].residue)
            mol_data[key].nor = len(mol_data[key].resids)
            #Check for chain breaks in protein
            resids_diff = numpy.array(mol_data[key].resids[1:]) - numpy.array(
                mol_data[key].resids[:-1])
            if mol_data[key].nor != (numpy.sum(resids_diff) + 1):
                break_indices = numpy.where(resids_diff > 1)
                print "Chain break encountered in molecule", key, "at residue positions: "
                for res in break_indices[0]:
                    print mol_data[key].resids[res],
                print "\n"
    return mol_data
Exemplo n.º 21
0
class Fold2D(ScatterPlane):
    lockGrowingPeptide = False
    lockSelectingResidue = False
    def __init__(self, sequence, maxScore=0, **kwargs):
        super(Fold2D, self).__init__(**kwargs)
        self.do_scale=False
        self.do_rotation=False
        self.translation_touches=2
        self.translationLock=False
        self.reverseLock=False

        self.bg = Background(20)
        self.add_widget(self.bg)
        
        self.protein = Protein(sequence)
        self.add_widget(self.protein)
        self.scoreCounter = ScoreCounter(0,maxScore)
        self.add_widget(self.scoreCounter)
        self.win=False
    
    def remove(self):
        self.protein.remove()
        self.remove_widget(self.scoreCounter)

    def on_touch_down(self, touch):
        super(Fold2D, self).on_touch_down(touch)
        if self.protein.toolBar.collide_point(touch.x,touch.y) and not self.reverseLock:
            self.protein.reverseSequence()
            self.reverseLock=True
        
        

    def on_touch_move(self, touch):
        super(Fold2D, self).on_touch_move(touch)
        if multitouch and self.translationLock: return
        self.lockGrowingPeptide = True
        self.protein.placeAA((touch.x,touch.y))
        self.scoreCounter.setScore(self.protein.score)

    def on_touch_up(self, touch):
        super(Fold2D, self).on_touch_up(touch)
        
        if multitouch and self.translationLock: return

        if not self.lockGrowingPeptide and not self.reverseLock:
            self.protein.select((touch.x,touch.y))
        
        self.lockGrowingPeptide = False
        self.reverseLock=False
        
        self.scoreCounter.setScore(self.protein.score)
        
        print self.scoreCounter.score, self.scoreCounter.anticipatedScore
        if self.scoreCounter.score >= self.scoreCounter.anticipatedScore:
            self.win=True
            #self.parent.back()

    def transform_with_touch(self, touch):
        if not multitouch:
            return
        if len(self._touches) == self.translation_touches:
            self.translationLock=True
            dx = (touch.x - self._last_touch_pos[touch][0]) \
                * self.do_translation_x
            dy = (touch.y - self._last_touch_pos[touch][1]) \
                * self.do_translation_y
            dx = dx / self.translation_touches
            dy = dy / self.translation_touches
            changed = True      
            self.protein.translate(dx,dy)
        else:
            self.translationLock=False


    def apply_transform(self, trans, post_multiply=False, anchor=(0, 0) ):
        return

    def update(self, dt):
        pass
Exemplo n.º 22
0
def main(argv=None):  # IGNORE:C0111
    '''Command line options.'''

    if argv is None:
        argv = sys.argv
    else:
        sys.argv.extend(argv)

    program_name = os.path.basename(sys.argv[0])
    program_version = "v%s" % __version__
    program_build_date = str(__updated__)
    program_version_message = '%%(prog)s %s (%s)' % (program_version,
                                                     program_build_date)
    program_shortdesc = __import__('__main__').__doc__.split("\n")[1]
    program_license = '''%s

  Created by Kyle Monson on %s.
  Copyright 2015 Pacific Northwest National Laboratory. All rights reserved.

  Licensed under the Apache License 2.0
  http://www.apache.org/licenses/LICENSE-2.0

  Distributed on an "AS IS" basis without warranties
  or conditions of any kind, either express or implied.

USAGE
''' % (program_shortdesc, str(__date__))

    try:
        # Setup argument parser
        parser = ArgumentParser(description=program_license,
                                formatter_class=RawDescriptionHelpFormatter)
        parser.add_argument("-v",
                            "--verbose",
                            dest="verbose",
                            action="count",
                            help="set verbosity level [default: %(default)s]")
        parser.add_argument('-V',
                            '--version',
                            action='version',
                            version=program_version_message)
        parser.add_argument(dest="input",
                            help="path to input folder",
                            metavar="input_path")
        parser.add_argument(dest="output",
                            help="paths to output folder",
                            metavar="output_path")
        parser.add_argument(
            "--test",
            action='store_true',
            default=False,
            help="Run basic sanity tests using selected input.")
        parser.add_argument("--dump-state",
                            action='store_true',
                            default=False,
                            help="Dump state to state.txt in output.")

        # Process arguments
        args = parser.parse_args()

        input_path = args.input
        output_path = args.output
        verbose = args.verbose
        dump_state = args.dump_state

        if verbose > 0:
            print("Verbose mode on")

        try:
            if verbose > 0:
                print("Creating output directory")
            os.makedirs(output_path)
        except os.error:
            if verbose > 0:
                print("Output directory already exists.")

        interaction_filepath = os.path.join(input_path,
                                            INTERACTION_BASE_FILENAME)
        background_filepath = os.path.join(input_path,
                                           BACKGROUND_BASE_FILENAME)
        desolvation_filepath = os.path.join(input_path,
                                            DESOLVATION_BASE_FILENAME)

        with open(interaction_filepath) as interaction_file, \
             open(background_filepath) as background_file, \
             open(desolvation_filepath) as desolvation_file:
            protein = Protein(interaction_file, desolvation_file,
                              background_file)

        state_file = None
        if dump_state:
            state_file = open(os.path.join(output_path, "state.txt"), 'w')

        start = datetime.now()
        curves = get_titration_curves(protein.protein_complex, state_file)
        end = datetime.now()

        delta = end - start
        delta_seconds = delta.total_seconds()

        with open(os.path.join(output_path, "timing.txt"), 'a') as timing_file:
            timing_file.write(str(delta_seconds) + '\n')

        if dump_state:
            state_file.close()

        create_output(output_path, curves)

        #pprint(dict(curves))

        if args.test:
            import tests
            #tests.test_normalize(protein)
            #tests.test_stuff(protein)
            #tests.test_adding_ph(protein)

        return 0
    except KeyboardInterrupt, e:
        ### handle keyboard interrupt ###
        if DEBUG:
            raise (e)
        return 0
Exemplo n.º 23
0
from protein import Protein, trypsin
from measurement import read_mgf
import pandas as pd
from multiprocessing import Pool
OVA = "GSIGAASMEFCFDVFKELKVHHANENIFYCPIAIMSALAMVYLGAKDSTRTQINKVVRFDKLPGFGDSIEAQCGTSVNVHSSLRDILNQITKPNDVYSFSLASRLYAEERYPILPEYLQCVKELYRGGLEPINFQTAADQARELINSWVESQTNGIIRNVLQPSSVDSQTAMVLVNAIVFKGLWEKAFKDEDTQAMPFRVTEQESKPVQMMYQIGLFRVASMASEKMKILELPFASGTMSMLVLLPDEVSGLEQLESIINFEKLTEWTSSNVMEERKIKVYLPRMKMEEKYNLTSVLMAMGITDVFSSSANLSGISSAESLKISQAVHAAHAEINEAGREVVGSAEAGVDAASVSEEFRADHPFLFCIKHIATNAVLFFGRCVSP"
LYS = "KVFGRCELAAAMKRHGLDNYRGYSLGNWVCAAKFESNFNTQATNRNTDGSTDYGILQINSRWWCNDGRTPGSRNLCNIPCSALLSSDITASVNCAKKIVSDGNGMNAWVAWRNRCKGTDVQAWIRGCRL"
BSA = "DTHKSEIAHRFKDLGEEHFKGLVLIAFSQYLQQCPFDEHVKLVNELTEFAKTCVADESHAGCEKSLHTLFGDELCKVASLRETYGDMADCCEKQEPERNECFLSHKDDSPDLPKLKPDPNTLCDEFKADEKKFWGKYLYEIARRHPYFYAPELLYYANKYNGVFQECCQAEDKGACLLPKIETMREKVLTSSARQRLRCASIQKFGERALKAWSVARLSQKFPKAEFVEVTKLVTDLTKVHKECCHGDLLECADDRADLAKYICDNQDTISSKLKECCDKPLLEKSHCIAEVEKDAIPENLPPLTADFAEDKDVCKNYQEAKDAFLGSFLYEYSRRHPEYAVSVLLRLAKEYEATLEECCAKDDPHACYSTVFDKLKHLVDEPQNLIKQNCDQFEKLGEYGFQNALIVRYTRKVPQVSTPTLVEVSRSLGKVGTRCCTKPESERMPCTEDYLSLILNRLCVLHEKTPVSEKVTKCCTESLVNRRPCFSALTPDETYVPKAFDEKLFTFHADICTLPDTEKQIKKQTALVELLKHKPKATEEQLKTVMENFVAFVDKCCAADDKEACFAVEGPKLVVSTQTALA"

# LYS můstky
# VFGRCELAAA + WIRGCRL
# GNWVCAAKFE + WRNRCKGTDV
# SRWWCNDGRT + CNIPCSALLS
# SRNLCNIPCS + ASVNCAKKIV

protein = Protein(LYS)
peptides = list(protein.digest(trypsin))
measurements = list(read_mgf("../data/mgf/190318_LYS_RAT_50x_05.mgf"))

# Protein Pilot
# Paragon
# AA tagy ze spektra

soft_err_ppm = 50
hard_err_ppm = 10
result = []
peps_with_threshold = [(pep, (hard_err_ppm / 1e6) * pep.total_mz)
                       for pep in peptides]
for i, m in enumerate(measurements):
    if i % 500 == 0:
        print(f"Done: {i}")
Exemplo n.º 24
0
    def dock(self):
        #setup tables with distances, errors and weights
        expDistances = []
        expErrors = []
        weights = []
        constraintNames = []

        for restraint in self.restraints:
            constraintNames.append(
                "%s-%s" % (restraint["anchorAname"], restraint["anchorBname"]))
            expDistances.append(restraint["distance"])
            expErrors.append(restraint["width"])
            weights.append(restraint["weight"])

        expDistances = numpy.array(expDistances)
        expErrors = numpy.array(expErrors)
        weights = numpy.array(weights)

        # setup Proteins
        anchorAcoords = []
        for restraint in self.restraints:
            anchorAcoords.append(restraint["anchorAcoord"])

        proteinAname = self.restraints[0]["proteinAname"]
        anchorAcalphas = self.restraints[0]["proteinAcalpha"]
        proteinA = Protein(anchorAcoords, anchorAcalphas, proteinAname)

        anchorBcoords = []
        for restraint in self.restraints:
            anchorBcoords.append(restraint["anchorBcoord"])
        proteinBname = self.restraints[0]["proteinBname"]
        anchorBcalphas = self.restraints[0]["proteinBcalpha"]
        proteinB = Protein(anchorBcoords, anchorBcalphas, proteinBname)

        # move both to origin
        proteinA.moveToOrigin(proteinA.labelAtomsCog)
        proteinB.moveToOrigin(proteinB.labelAtomsCog)

        #######
        # evolve
        #######

        zeroChromosome = Chromosome("None")
        zeroChromosome.genes = numpy.array([0, 0, 0, 0, 0, 0])

        # setup populations
        print "setting up populations..."

        populations = []
        for i in range(0, self.numberOfPopulations):
            if self.symmetry == "C2":
                population = Population(self.numberOfChromosomes, "C2")
            elif self.symmetry == "None":
                population = Population(self.numberOfChromosomes, "None")
            population.name = "%i" % (i + 1)
            populations.append(population)

        # put them into an environment and evolve
        environment1 = Environment(populations, proteinA, proteinB,
                                   expDistances, expErrors, weights, False,
                                   False, False)
        environment1.constraintNames = constraintNames
        #environment1.applySelectionPressure()
        #for population in environment1.populations:
        #	population.log += population.chromosomes[0].printChromosomeWithoutClashes()

        self.processes = []
        resultQueue = multiprocessing.Queue()
        progressQueue = multiprocessing.Queue()
        numberOfProcesses = len(environment1.populations)
        for idx, population in enumerate(environment1.populations):
            environment = copy.deepcopy(environment1)
            p = multiprocessing.Process(target=self.worker1,
                                        args=(environment, idx, resultQueue,
                                              progressQueue))
            p.start()
            self.processes.append(p)

        cycles = 0
        maxCycles = numberOfProcesses * (self.numberOfGenerations +
                                         self.numberOfRigidBodyCycles)
        #while True:
        #	cycles += progressQueue.get()
        #	progress = cycles/(numberOfProcesses*(self.numberOfGenerations+self.numberOfRigidBodyCycles))
        #	#send message to main thread
        #	wx.CallAfter(pub.sendMessage, "docking.update", progress=progress)
        #	if cycles >= maxCycles:
        #		break
        resultsList = [resultQueue.get() for p in self.processes]
        for p in self.processes:
            p.join()
        environment1.populations = resultsList

        # create solutions
        print ""
        print "Solutions:"
        nonClashingSolution = 1
        clashingSolution = 1
        for population in environment1.populations:
            #createPseudoatom(self.labelPositionsProteinB, "tmpSolution-labels", 1)
            tmpProtein = Protein(proteinB.originalLabelAtoms,
                                 proteinB.originalLabelAtoms,
                                 "tmpSolution-labels")
            solution = population.chromosomes[0]
            # print solution.printChromosomeWithClashes()
            if solution.clashes <= 5:
                nameOfSolution = "%s-%i_sol-%i" % (self.objectPrefix,
                                                   self.dockingRunNumber,
                                                   nonClashingSolution)
                solution.name = nameOfSolution

                proteinB.moveInPymol(nameOfSolution, solution, 1)
                #tmpProtein.moveInPymol("%s-labels" % nameOfSolution, solution, 1)
                cmd.translate(list(proteinA.labelAtomsCog.reshape(-1, )),
                              nameOfSolution, 1, 0, None)
                #cmd.translate(list(proteinA.labelAtomsCog.reshape(-1,)), "%s-labels" % nameOfSolution, 1, 0, None)
                nonClashingSolution += 1

            elif solution.clashes > 5:
                nameOfSolution = "%s-%i_clash-%i" % (
                    self.objectPrefix, self.dockingRunNumber, clashingSolution)
                solution.name = nameOfSolution
                proteinB.moveInPymol(nameOfSolution, solution, 1)
                #tmpProtein.moveInPymol("%s-labels" % nameOfSolution, solution, 1)
                cmd.translate(list(proteinA.labelAtomsCog.reshape(-1, )),
                              nameOfSolution, 1, 0, None)
                #cmd.translate(list(proteinA.labelAtomsCog.reshape(-1,)), "%s-labels" % nameOfSolution, 1, 0, None)
                clashingSolution += 1
        cmd.group("%s-%i" % (self.objectPrefix, self.dockingRunNumber),
                  "%s-%i*" % (self.objectPrefix, self.dockingRunNumber))
        #cmd.set_view(myView)
        return environment1, self.settings
Exemplo n.º 25
0
def random_walk(protein_string, N_tries, dimension, matrix_size):
    '''
    Runs a random walk algorithm in which N_tries proteins are randomly
    created from a string of amino acid types. The best conformation,
    the one that has the lowest energy, is saved and returned along with
    a dictionary of all N_tries energy counts and the minimal matrix sizes for
    the folded protein.
    '''
    # Create a protein object with a specific matrix size
    protein = Protein(matrix_size, dimension)

    # Place the first two amino acids
    protein.place_first_two(protein_string)
    location = protein.last_acid

    energy_min = 1

    energy_counter = {}
    matrix_sizes = {}

    # Try to fold N_tries proteins
    for i in range(N_tries):

        # Print an update for every 1000th protein
        if (i + 1) % 1000 == 0:
            print(f"{i + 1}th protein folded")

        # Remove acids until only the first two are left
        while protein.length > 2:
            protein.remove_acid(0)

        # Run the next random walk
        solution_found, protein = walk(protein, protein_string, location)

        while not solution_found:
            while protein.length > 2:
                protein.remove_acid(0)

            solution_found, protein = walk(protein, protein_string, location)

        # When a complete protein has been created, get its energy
        if solution_found:
            energy = protein.energy

            # When its energy is the lowest energy yet, save the protein object
            if energy < energy_min:
                energy_min = energy
                protein_min = copy.deepcopy(protein)
                print(f"New minimum energy found: {energy_min}")

            # Add the energy to a dictionary counter
            energy_counter[energy] = energy_counter.get(energy, 0) + 1

            # Determine the smallest matrix size needed for this protein
            min_matrix_size = protein.smallest_matrix()
            matrix_sizes[energy] = matrix_sizes.get(energy, {})
            matrix_sizes[energy][min_matrix_size] = matrix_sizes[energy].get(
                min_matrix_size, 0) + 1

    if protein_min:
        return protein_min, energy_counter, matrix_sizes
    else:
        exit("Error: No protein 'protein_min' to return")
Exemplo n.º 26
0
	def dock(self):
		#setup tables with distances, errors and weights
		expDistances = []
		expErrors = []
		weights = []
		constraintNames = []
		
		for restraint in self.restraints:
			constraintNames.append("%s-%s"%(restraint["anchorAname"], restraint["anchorBname"]))
			expDistances.append(restraint["distance"])
			expErrors.append(restraint["width"])
			weights.append(restraint["weight"])
		
		expDistances = numpy.array(expDistances)
		expErrors = numpy.array(expErrors)
		weights = numpy.array(weights)
		
		# setup Proteins
		anchorAcoords = []
		for restraint in self.restraints:
			anchorAcoords.append(restraint["anchorAcoord"])
		
		proteinAname = self.restraints[0]["proteinAname"] 
		anchorAcalphas = self.restraints[0]["proteinAcalpha"]
		proteinA = Protein(anchorAcoords, anchorAcalphas, proteinAname)

		anchorBcoords = []
		for restraint in self.restraints:
			anchorBcoords.append(restraint["anchorBcoord"])
		proteinBname = self.restraints[0]["proteinBname"] 
		anchorBcalphas = self.restraints[0]["proteinBcalpha"]
		proteinB = Protein(anchorBcoords, anchorBcalphas, proteinBname)

		# move both to origin
		proteinA.moveToOrigin(proteinA.labelAtomsCog)
		proteinB.moveToOrigin(proteinB.labelAtomsCog)

		# setup populations
		print "Starting..."
		
		populations = []
		for i in range(0, self.numberOfPopulations):
			if self.symmetry != "None":
				population = Population(self.numberOfChromosomes, self.symmetry)
			elif self.symmetry == "None":
				population = Population(self.numberOfChromosomes, "None")
			population.name = "%i" % (i + 1)
			populations.append(population)

		# put them into an environment and evolve
		environment1 = Environment(populations, proteinA, proteinB, expDistances, expErrors, weights, self.scoreClashes)
		environment1.constraintNames = constraintNames

		self.processes = []
		resultQueue = multiprocessing.Queue()
		progressQueue = multiprocessing.Queue()
		numberOfProcesses = len(environment1.populations)
		if os.name != "nt":
			for idx, population in enumerate(environment1.populations):
					environment = copy.deepcopy(environment1)
					p = multiprocessing.Process(target = self.worker, args = (environment, idx, resultQueue, progressQueue))
					p.start()
					self.processes.append(p)
					cycles = 0
					maxCycles = numberOfProcesses * (self.numberOfGenerations + self.numberOfRigidBodyCycles)
			while True:
					cycles += progressQueue.get()
					progress = cycles/(numberOfProcesses*(self.numberOfGenerations+self.numberOfRigidBodyCycles))
					#send message to main thread
					wx.CallAfter(pub.sendMessage, "docking.update", progress=progress)
					if cycles >= maxCycles:
							break
			resultsList = [resultQueue.get() for p in self.processes]
			for p in self.processes:
					p.join()
			environment1.populations = resultsList
		else:
			print "Windows... Using 1 core."
			self.worker(environment1, -1, resultQueue, progressQueue)
			self.abort = False
		
		# name solutions
		nonClashingSolution = 1
		clashingSolution = 1
		for population in environment1.populations:
			solution = population.chromosomes[0]
			if solution.clashes <= 5:
				nameOfSolution = "%s-%i_sol-%i" % (self.objectPrefix, self.dockingRunNumber, nonClashingSolution)
				solution.name = nameOfSolution
				nonClashingSolution += 1
			
			elif solution.clashes > 5:
				nameOfSolution = "%s-%i_clash-%i" % (self.objectPrefix, self.dockingRunNumber, clashingSolution)
				solution.name = nameOfSolution
				clashingSolution += 1
		return environment1, self.settings
Exemplo n.º 27
0
        for atom2 in atom2atom[atom1]:
            res2 = atom2.residue
            if res2 != res1:
                try:
                    res2res[res1].add(res2)
                except KeyError:
                    res2res[res1] = set([res2])
    ret = sum([len(res2res[x]) for x in res2res.keys()])/2, \
            avg_coord_num(res2res)
#    print 'DEBUG: csuAvgCoordNum end. Returning '+str(ret)
    return ret

#def picContactNumber(protein):
#    raise NotImplementedError

if __name__  == "__main__":
    import sys
    if len(sys.argv) == 1 or '-h' in sys.argv:
        print "Usage: python coordNum.py pdbFileName1 [pdbFileName2 ...]"
    else:
        #print "Protein name\tContact Number (Marek)\tAvg Coord Number \
        #    (Marek)\tContact Number (CSU)\tAvg Coord Number (CSU)"
        for f in sys.argv[1:]:
            p = Protein(f)
            name = f.split('/')[-2].split('.')[0]
            marek = marek_avg_coord_num(p)
            csu = csu_avg_coord_num(p, total_layers = 14)
            print '%(n)5s %(ncm)3d %(cnm)6.3f %(ncc)3d %(cnc)6.3f' % \
                    {'n': name, 'ncm': marek[0], 'cnm': marek[1], \
                    'ncc': csu[0], 'cnc': csu[1]}
Exemplo n.º 28
0
# -*- coding: utf-8 -*-
"""
Created on Wed Feb 28 17:00:23 2018

@author: HOS
"""
from numpy import *
from protein import Protein
from grid import Grid
import matplotlib.pyplot as plt

N = 18
d = 15
Prt = Protein(d, N)
Grid = Grid(d, N, Prt)

#folding
for i in range(30):
    Prt.tryRotate()
    Grid.update()
    #Grid.easyPlot()
    Grid.showOff()
Exemplo n.º 29
0
#!/usr/bin/env python

import sys
import requests as sender
from flask import Flask, abort, request
import json
import threading

from protein import Protein

# Globals
app = Flask(__name__)
p = Protein(
    "NC_000852,NC_007346,NC_008724,NC_009899,NC_014637,NC_020104,NC_023423,NC_023640,NC_023719,NC_027867"
)

# Design
# ------
# bio-engine (port 7000)        web-app (ports 8000-)
#     |                                  |
#     |                               (open port)
#     |  <---- POST (sequence) --------  |
#     |  ----- PUT (found protein) --->  |
#     |  ----- PUT (found protein) --->  |
#     |  ----- DELETE (finished) ----->  |
#     |                               (close port)


def match_provider(port, sequence):
    protein_codes = p.get_code_list()
Exemplo n.º 30
0
def getStructures(name):
    return make_response(jsonify(Protein.getStructures(name)))
def remove_acids(protein: Protein, cut_start: int, cut_end: int):
    '''
    Removes acids between two points
    '''
    for i in range(cut_start + 1 , cut_end):
        protein.remove_acid_index(i)
Exemplo n.º 32
0
import pickle
from lxml import etree
from protein import Protein

# Goes through the Swissprot database xml and parses all the relevant data

prefix = "{http://uniprot.org/uniprot}"

proteins = []

for event, element in etree.iterparse("in/uniprot_sprot.xml",
                                      tag=prefix + "entry"):

    prot = Protein()

    nonEukaryotProtein = False

    for child in element.getchildren():
        if nonEukaryotProtein:
            break

        # Gets Uniprot ID
        # The xml lists old IDs as well, only the first one is relevant and the rest are ignored
        if child.tag == prefix + "accession":
            if not prot.uniprot_id:
                prot.uniprot_id.add(child.text)

        # Gets names
        elif child.tag.endswith("protein"):
            for entry in child.getchildren():
                if entry.tag == prefix + "recommendedName" or entry.tag == prefix + "alternativeName":
Exemplo n.º 33
0
    parser.add_argument('--f', type=int, default=10, help='parameter for the simplification of points mesh')
    parser.add_argument('--T', type=float, default=0.9, help='ligandability threshold')
    parser.add_argument('--batch', type=int, default=32, help='batch size')
    parser.add_argument('--voxel_size', type=float, default=1.0, help='size of voxel in angstrom')
    parser.add_argument('--protonate', action='store_true', help='whether to protonate or not the input protein')
    parser.add_argument('--expand', action='store_true', help='whether to expand on residue level the extracted binding sites')
    parser.add_argument('--discard_points', action='store_true', help='whether to output or not the computed surface points')

    return parser.parse_args()


args = parse_args()

if not os.path.exists(args.prot_file):
    raise IOError('%s does not exist.' % args.prot_file)
if not os.path.exists(args.model_path):
    raise IOError('%s does not exist.' % args.model_path)
if not os.path.exists(args.output):
    os.makedirs(args.output)

prot = Protein(args.prot_file,args.protonate,args.expand,args.f,args.output, args.discard_points)

nn = Network(args.model_path,args.model,args.voxel_size)

lig_scores = nn.get_lig_scores(prot,args.batch)

extractor = Bsite_extractor(args.T)

extractor.extract_bsites(prot,lig_scores)