def setUp(self): test_file = os.path.join('NeuralNetwork', 'enolase.fasta') diff_file = os.path.join('NeuralNetwork', 'repeat.fasta') self.test_records = [] self.diff_records = [] # load the records for file, records in ((test_file, self.test_records), (diff_file, self.diff_records)): handle = open(file, 'r') seq_parser = Fasta.SequenceParser(alphabet=IUPAC.unambiguous_dna) iterator = Fasta.Iterator(handle, seq_parser) while 1: seq_record = iterator.next() if seq_record is None: break records.append(seq_record) handle.close() self.num_schemas = 2 schema_ga = Schema.GeneticAlgorithmFinder() schema_ga.min_generations = 1 self.finder = Schema.SchemaFinder(num_schemas=self.num_schemas, schema_finder=schema_ga)
def setUp(self): self.factory = Schema.SchemaFactory() self.test_file = os.path.join(os.getcwd(), "NeuralNetwork", "enolase.fasta") ambiguity_chars = {"G" : "G", "A" : "A", "T" : "T", "C" : "C", "R" : "AG", "*" : "AGTC"} self.schema = Schema.Schema(ambiguity_chars)
def runTest(self): match = Schema.matches_schema("GATC", "AAAAA") assert match == 0, "Expected no match because of length differences" match = Schema.matches_schema("GATC", "GAT*") assert match == 1, "Expected match" match = Schema.matches_schema("GATC", "GATC") assert match == 1, "Expected match" match = Schema.matches_schema("GATC", "C*TC") assert match == 0, "Expected no match because of char mismatch." match = Schema.matches_schema("G*TC", "*TTC") assert match == 1, "Expected match because of ambiguity."
def test_schema_representation(self): """Convert sequences into schema representations. """ # get a set of schemas we want to code the sequence in schema_bank = self._load_schema_repository() top_schemas = schema_bank.get_top(25) schema_coder = Schema.SchemaCoder(top_schemas, self.schema) # get the sequences one at a time, and encode them fasta_handle = open(self.test_file, 'r') seq_parser = Fasta.SequenceParser(alphabet=IUPAC.unambiguous_dna) iterator = Fasta.Iterator(fasta_handle, seq_parser) while 1: seq_record = iterator.next() if seq_record is None: break schema_values = schema_coder.representation(seq_record.seq) if VERBOSE: print "Schema values:", schema_values fasta_handle.close()
def runTest(self): match = Schema.matches_schema("GATC", "AAAAA") self.assertEqual(match, 0, "Expected no match because of length differences") match = Schema.matches_schema("GATC", "GAT*") self.assertEqual(match, 1, "Expected match") match = Schema.matches_schema("GATC", "GATC") self.assertEqual(match, 1, "Expected match") match = Schema.matches_schema("GATC", "C*TC") self.assertEqual(match, 0, "Expected no match because of char mismatch.") match = Schema.matches_schema("G*TC", "*TTC") self.assertEqual(match, 1, "Expected match because of ambiguity.")
def setUp(self): ambiguity_chars = {"G" : "G", "A" : "A", "T" : "T", "C" : "C", "R" : "AG", "*" : "AGTC"} motif_representation = Schema.Schema(ambiguity_chars) motifs = ("GA", "GATAG", "GA*AG", "GATRG", "*A") self.motif_coder = Schema.SchemaCoder(motifs, motif_representation) self.match_strings = [("GATAG", [.5, .5, .5, .5, 1.0]), ("GAGAGATA", [float(3) / float(4), 0, float(1) / float(4), 0, 1])]
def setUp(self): test_file = os.path.join('NeuralNetwork', 'enolase.fasta') diff_file = os.path.join('NeuralNetwork', 'repeat.fasta') self.test_records = [] self.diff_records = [] # load the records for file, records in ((test_file, self.test_records), (diff_file, self.diff_records)): handle = open(file, 'r') records.extend( SeqIO.parse(handle, "fasta", alphabet=IUPAC.unambiguous_dna)) handle.close() self.num_schemas = 2 schema_ga = Schema.GeneticAlgorithmFinder() schema_ga.min_generations = 1 self.finder = Schema.SchemaFinder(num_schemas=self.num_schemas, schema_finder=schema_ga)
def test_schema_representation(self): """Convert sequences into schema representations.""" # get a set of schemas we want to code the sequence in schema_bank = self._load_schema_repository() top_schemas = schema_bank.get_top(25) schema_coder = Schema.SchemaCoder(top_schemas, self.schema) # get the sequences one at a time, and encode them fasta_handle = open(self.test_file, 'r') for seq_record in SeqIO.parse(fasta_handle, "fasta", alphabet=IUPAC.unambiguous_dna): schema_values = schema_coder.representation(seq_record.seq) if VERBOSE: print("Schema values: %s" % schema_values) fasta_handle.close()
def setUp(self): ambiguity_chars = { "G": "G", "A": "A", "T": "T", "C": "C", "R": "AG", "*": "AGTC" } self.motif_coder = Schema.Schema(ambiguity_chars) self.match_string = "GATAG" self.match_info = [("GA", ["GA"]), ("GATAG", ["GATAG"]), ("GA*AG", ["GATAG"]), ("GATRG", ["GATAG"]), ("*A", ["GA", "TA"])]