def construct_protein_records(proteins, main_data): label = 'Constructing protein records: ' show_progress(label, 40, 0.0) protein_records = [] # 1. process all main data index = 1 for line in main_data: # 1.1. construct peptide and peptide match from current analysis current_peptide = Peptide(sequence=b2str(line['sequence'])) current_peptide_match = PeptideMatch(analysis_name=b2str(line['filename']), score=line['score'], reverse_score=line['reverseScore'], percent_of_scored_peak_intensity=line['percent_scored_peak_intensity'], total_intensity=line['totalIntensity'], precursor_averagine_chi_squared=line['precursorAveragineChiSquared'], retention_time_min=line['retentionTimeMin'], chromatographic_peak_width_in_seconds=line['chromatographicPeakWidthSec']) # 1.2. get protein id for current analysis current_protein_id = b2str(line['accession_number']) # 1.3. find protein with such id protein = find_protein_with_id(proteins, current_protein_id) # TODO: if such protein not exists, extract Protein object and add to proteins # 1.4. find record with such protein protein_record = find_protein_record_with_protein(protein_records, protein) # 1.5. if record with such protein exists, add current match to received peptides if protein_record is not None: # 1.5.1. if such peptide was already received, add peptide match peptide_record = find_peptide_record_with_peptide(protein_record.received_peptide_records, current_peptide) if peptide_record is not None: peptide_record.matches.append(current_peptide_match) # 1.5.2. if such peptide was not received yet, add peptide record with this one peptide match else: current_peptide_record = PeptideRecord(current_peptide, [current_peptide_match]) protein_record.received_peptide_records.append(current_peptide_record) # 1.6. if protein record with such protein not exists, create new protein record else: current_peptide_record = PeptideRecord(current_peptide, [current_peptide_match]) protein_record = ProteinRecord(protein, received_peptide_records=[current_peptide_record]) protein_records.append(protein_record) show_progress(label, 40, index / len(main_data)) index += 1 print() # 2. sort peptide records by length (starting from longest) label = 'Filling received peptide records: ' show_progress(label, 35, 0.0) index = 1 for protein_record in protein_records: protein_record.received_peptide_records = sorted(protein_record.received_peptide_records, key=lambda peptide_record: len(peptide_record.peptide.sequence), reverse=True) show_progress(label, 35, index / len(protein_records)) index += 1 print() return protein_records
def load_proteins_from_csv(file_name): label = 'Loading proteins from \'{0}\': '.format(file_name) show_progress(label, 40, 0.0) data = genfromtxt(file_name, dtype=None, delimiter=';', names=True) proteins = [] index = 1 for line in data: proteins.append(Protein(id=b2str(line['id']), name=b2str(line['name']), sequence=b2str(line['sequence']))) show_progress(label, 40, index / len(data)) index += 1 print() return proteins
def construct_proteins(main_data): proteins = [] label = 'Constructing proteins from main data: ' show_progress(label, 35, 0.0) # 1. fill list with unique proteins index = 1 for line in main_data: # 1.1. construct protein from current line current_protein = Protein(id=b2str(line['accession_number']), name=b2str(line['entry_name'])) # 1.2. add if not already exists in list if current_protein not in proteins: proteins.append(current_protein) show_progress(label, 35, index / len(main_data)) index += 1 print() return proteins