def test_map_sequence_to_gene_annotation_iso(self):
     gene_annotation_manager = GeneAnnotationManager()
     gene_annotation_manager.load_gene_annotation_data()
     expected_row = gene_annotation_manager.iso_gene_annotation_data[1]
     expected_row = re.split(r'\t+', expected_row.rstrip('\t'))
     sequence_no = expected_row[0]
     expected_annotation_desc = expected_row[1]
     result_row = gene_annotation_manager.map_sequence_to_gene_annotation(sequence_no)
     self.assertEqual(expected_annotation_desc, result_row[1])
 def test_load_gene_annotation_data_ehux(self):
     gene_annotation_manager = GeneAnnotationManager()
     gene_annotation_manager.load_gene_annotation_data()
     ehux_result_count = len(gene_annotation_manager.ehux_gene_annotation_data)
     expected_count = 30448
     self.assertEqual(ehux_result_count, expected_count)
 def test_load_gene_annotation_data_geph(self):
     gene_annotation_manager = GeneAnnotationManager()
     gene_annotation_manager.load_gene_annotation_data()
     geph_result_count = len(gene_annotation_manager.geph_gene_annotation_data)
     expected_count = 52679
     self.assertEqual(geph_result_count, expected_count)
 def test_load_gene_annotation_data_iso(self):
     gene_annotation_manager = GeneAnnotationManager()
     gene_annotation_manager.load_gene_annotation_data()
     iso_result_count = len(gene_annotation_manager.iso_gene_annotation_data)
     expected_count = 18712
     self.assertEqual(iso_result_count, expected_count)
                    f_handle.write(
                        str(v) + ',' + self.get_species_name(str(v)) + str(
                            [value for value in self.blast_graph.node[v].itervalues()]).strip(
                            '[]') + ',')
                    f_handle.write(
                        str(edata['evalue']) + ',' + str(edata['identpct']) + ',' + str(edata['mismatchno']) + ',' +
                        str(edata['aln']) + ',' + str(edata['alnspn']) + ',' +
                        str(edata['gapopenno']) + ',' + str(edata['bitscore']) + '\n')
                    # if self.generate_gml_files:
                    # file_name = "{}/blast_graph.gml".format(self.blast_output_path)
                    # with open(file_name, "a") as f_handle:
                    # nx.write_gml(self.blast_graph, f_handle)


if __name__ == "__main__":
    # Instantiate Gene Expression Manager object
    gene_expression_manager = GeneExpressionManager()
    gene_expression_manager.load_gene_expression_data()
    # Instantiate Gene Annotation Manager object
    gene_annotation_manager = GeneAnnotationManager()
    gene_annotation_manager.load_gene_annotation_data()
    # Filter By Target sequences
    target_sequences = np.genfromtxt("../data/protein_sequence_files/elong.txt", delimiter='\n', dtype=str)
    print target_sequences
    print 'Total Target Sequences:', target_sequences.shape
    similarity_networks = SimilarityNetworks(gene_expression_manager, gene_annotation_manager, evalue=1e-05,
                                             by_sequence=target_sequences)
    similarity_networks.output_file_identifier = "_elong_for_all_blast_data"
    similarity_networks.generate_blast_data()
    similarity_networks.write_blast_graph_file()