Exemplo n.º 1
0
def construct_protein_records(proteins, main_data):
    label = 'Constructing protein records: '
    show_progress(label, 40, 0.0)
    protein_records = []

    # 1. process all main data
    index = 1
    for line in main_data:
        # 1.1. construct peptide and peptide match from current analysis
        current_peptide = Peptide(sequence=b2str(line['sequence']))
        current_peptide_match = PeptideMatch(analysis_name=b2str(line['filename']), score=line['score'],
                                             reverse_score=line['reverseScore'],
                                             percent_of_scored_peak_intensity=line['percent_scored_peak_intensity'],
                                             total_intensity=line['totalIntensity'],
                                             precursor_averagine_chi_squared=line['precursorAveragineChiSquared'],
                                             retention_time_min=line['retentionTimeMin'],
                                             chromatographic_peak_width_in_seconds=line['chromatographicPeakWidthSec'])

        # 1.2. get protein id for current analysis
        current_protein_id = b2str(line['accession_number'])

        # 1.3. find protein with such id
        protein = find_protein_with_id(proteins,
                                       current_protein_id)  # TODO: if such protein not exists, extract Protein object and add to proteins

        # 1.4. find record with such protein
        protein_record = find_protein_record_with_protein(protein_records, protein)

        # 1.5. if record with such protein exists, add current match to received peptides
        if protein_record is not None:
            # 1.5.1. if such peptide was already received, add peptide match
            peptide_record = find_peptide_record_with_peptide(protein_record.received_peptide_records, current_peptide)
            if peptide_record is not None:
                peptide_record.matches.append(current_peptide_match)
            # 1.5.2. if such peptide was not received yet, add peptide record with this one peptide match
            else:
                current_peptide_record = PeptideRecord(current_peptide, [current_peptide_match])
                protein_record.received_peptide_records.append(current_peptide_record)
        # 1.6. if protein record with such protein not exists, create new protein record
        else:
            current_peptide_record = PeptideRecord(current_peptide, [current_peptide_match])
            protein_record = ProteinRecord(protein, received_peptide_records=[current_peptide_record])
            protein_records.append(protein_record)

        show_progress(label, 40, index / len(main_data))
        index += 1
    print()

    # 2. sort peptide records by length (starting from longest)
    label = 'Filling received peptide records: '
    show_progress(label, 35, 0.0)
    index = 1
    for protein_record in protein_records:
        protein_record.received_peptide_records = sorted(protein_record.received_peptide_records, key=lambda peptide_record: len(peptide_record.peptide.sequence), reverse=True)
        show_progress(label, 35, index / len(protein_records))
        index += 1
    print()

    return protein_records
Exemplo n.º 2
0
def load_proteins_from_csv(file_name):
    label = 'Loading proteins from \'{0}\': '.format(file_name)
    show_progress(label, 40, 0.0)

    data = genfromtxt(file_name, dtype=None, delimiter=';', names=True)
    proteins = []
    index = 1
    for line in data:
        proteins.append(Protein(id=b2str(line['id']), name=b2str(line['name']), sequence=b2str(line['sequence'])))
        show_progress(label, 40, index / len(data))
        index += 1
    print()

    return proteins
Exemplo n.º 3
0
def construct_proteins(main_data):
    proteins = []
    label = 'Constructing proteins from main data: '
    show_progress(label, 35, 0.0)

    # 1. fill list with unique proteins
    index = 1
    for line in main_data:
        # 1.1. construct protein from current line
        current_protein = Protein(id=b2str(line['accession_number']), name=b2str(line['entry_name']))

        # 1.2. add if not already exists in list
        if current_protein not in proteins:
            proteins.append(current_protein)

        show_progress(label, 35, index / len(main_data))
        index += 1
    print()

    return proteins