def main():
    # Get file name and read the sequence
    test_file, sa_file = parse_args()
    test_sequence = utils.read_sequence(test_file)
    sa_sequence = utils.read_sequence(sa_file)

    # Read in the model
    json_model = utils.read_json()

    # Classify the test sequence
    predicted_rsa = DecisionTree.classify_sequence(test_sequence, json_model)

    utils.print_alignment(test_sequence, predicted_rsa, sa_sequence)
예제 #2
0
def needleman_wunsch(x, iterative_method, output_dir, proteins_dir):
    _, protein1, protein2 = x
    protein1_header, protein1_seq = read_sequence(protein1, proteins_dir)
    protein2_header, protein2_seq = read_sequence(protein2, proteins_dir)
    output_name = [protein1_header[1:5], protein2_header[1:5], 'global']

    if iterative_method:
        output_name.append('iterative')
        protein1_align, protein2_align, similarity = _needleman_wunsch_iterative(protein1_seq, protein2_seq)
    else:
        protein1_align, protein2_align, similarity = _needleman_wunsch(protein1_seq, protein2_seq)

    output = os.path.join(output_dir, '_'.join(output_name))
    save_alignment(protein1_align, protein2_align, similarity, output)
예제 #3
0
def smith_waterman(x, iterative_method, output_dir, proteins_dir):
    _, protein1, protein2 = x
    protein1_header, protein1_seq = read_sequence(protein1, proteins_dir)
    protein2_header, protein2_seq = read_sequence(protein2, proteins_dir)
    output_name = [protein1_header[1:5], protein2_header[1:5], 'local']

    if iterative_method:
        output_name.append('iterative')
        protein1_align, protein2_align, similarity = _smith_waterman_iterative(protein1_seq, protein2_seq)
    else:
        protein1_align, protein2_align, similarity = _smith_waterman(protein1_seq, protein2_seq)

    output = os.path.join(output_dir, '_'.join(output_name))
    save_alignment(protein1_align, protein2_align, similarity, output)
예제 #4
0
def parse_args():
    if len(sys.argv) < 3:
        print(err_msg)
        sys.exit()

    try:
        sequence_1 = utils.read_sequence(sys.argv[1])
        sequence_2 = utils.read_sequence(sys.argv[2])
    except:
        # File parsing has failed. Oops.
        print(err_msg)
        sys.exit()

    return sequence_1, sequence_2
예제 #5
0
    def build_feature_matrix(self):
        # For each fasta file in the training data, read the sequences and add them to the feature matrix
        for fasta_name in self.fasta_train:
            fasta = utils.read_sequence(fasta_name, self.fasta_dir)
            sa = utils.read_sequence(fasta_name.replace('.fasta', '.sa'),
                                     self.sa_dir)
            for index in range(len(fasta)):
                # Create the AA object
                acid = get_amino_acid(fasta[index].upper())

                # Add the RSA label
                rsa_binary = rsa_labels[sa[index]]
                acid['rsa-label'] = rsa_binary

                # Add the acid to the matrix
                self.feature_matrix.append(acid)
예제 #6
0
    def evaluate_model(self):
        # Keeping track of metrics (true condition, predicted condition)
        metrics = {
            (0, 0): 0,  # True negative
            (0, 1): 0,  # False positive
            (1, 0): 0,  # False negative
            (1, 1): 0  # True positive
        }
        # For each fasta file in the testing data, walk the tree for each amino acid
        for fasta_name in self.fasta_test:
            fasta = utils.read_sequence(fasta_name, self.fasta_dir)
            sa = utils.read_sequence(fasta_name.replace('.fasta', '.sa'),
                                     self.sa_dir)
            for index in range(len(fasta)):
                # Test this amino acid against our decision tree
                amino_acid = fasta[index]
                expected_result = rsa_labels[sa[index]]
                calculated_result = self.walk_tree(get_amino_acid(amino_acid))
                # print('Acid {}, expected {}, calculated {}'.format(amino_acid, expected_result, calculated_result))
                metrics[expected_result, calculated_result] += 1

        self.calculate_eval_metrics(metrics)