def as_orange_table_v3(self): import numpy import Orange.data data = self.run(count=False, header=True) data = data.decode("utf-8") if self.format.lower() == "tsv": header, data = data.split("\n", 1) domain = Orange.data.Domain([], [], [ Orange.data.StringVariable(name) for name in header.split("\t") ]) rows = [ line.split("\t") for line in data.split("\n") if line.strip() ] rows = numpy.array(rows, dtype=object) X = numpy.empty((len(rows), 0)) return Orange.data.Table.from_numpy(domain, X, metas=rows) elif self.format.lower() == "fasta": from Bio import SeqIO domain = Orange.data.Domain([], [], [ Orange.data.StringVariable("id"), Orange.data.StringVariable("sequence") ]) rows = [[seq.id, str(seq.seq)] for seq in SeqIO.parse(io.StringIO(data), "fasta")] rows = numpy.array(rows, dtype=object) X = numpy.empty((len(rows), 0)) return Orange.data.Table.from_numpy(domain, X, metas=rows) else: raise BioMartError("Unsupported format: %s" % self.format)
def as_orange_table_v3(self): import numpy import Orange.data from Bio import SeqIO data = self.run(count=False, header=True) data = data.decode("utf-8") if self.format.lower() == "tsv": header, data = data.split("\n", 1) domain = Orange.data.Domain( [], [], [Orange.data.StringVariable(name) for name in header.split("\t")]) rows = [line.split("\t") for line in data.split("\n") if line.strip()] rows = numpy.array(rows, dtype=object) X = numpy.empty((len(rows), 0)) return Orange.data.Table.from_numpy(domain, X, metas=rows) elif self.format.lower() == "fasta": domain = Orange.data.Domain( [], [], [Orange.data.StringVariable("id"), Orange.data.StringVariable("sequence")]) rows = [[seq.id, str(seq.seq)] for seq in SeqIO.parse(io.StringIO(data), "fasta")] rows = numpy.array(rows, dtype=object) X = numpy.empty((len(rows), 0)) return Orange.data.Table.from_numpy(domain, X, metas=rows) else: raise BioMartError("Unsupported format: %s" % self.format)
def get_example_table(self): import Orange.data import Orange.feature data = self.run(count=False, header=True) if self.format.lower() == "tsv": header, data = data.split("\n", 1) domain = Orange.data.Domain( [Orange.feature.String(name) for name in header.split("\t")], None) data = [ line.split("\t") for line in data.split("\n") if line.strip() ] return Orange.data.Table(domain, data) if data else None elif self.format.lower() == "fasta": from Bio import SeqIO domain = Orange.data.Domain([ Orange.feature.String("id"), Orange.feature.String("sequence") ], False) # TODO: meaningful id examples = [] for seq in SeqIO.parse(io.BytesIO(data), "fasta"): examples.append([seq.id, str(seq.seq)]) return Orange.data.Table(domain, examples) else: raise BioMartError("Unsupported format: %s" % self.format)
def get_example_table(self): import Orange.data import Orange.feature from Bio import SeqIO data = self.run(count=False, header=True) if self.format.lower() == "tsv": header, data = data.split("\n", 1) domain = Orange.data.Domain( [Orange.feature.String(name) for name in header.split("\t")], None) data = [line.split("\t") for line in data.split("\n") if line.strip()] return Orange.data.Table(domain, data) if data else None elif self.format.lower() == "fasta": domain = Orange.data.Domain( [Orange.feature.String("id"), Orange.feature.String("sequence")], False) # TODO: meaningful id examples = [] for seq in SeqIO.parse(io.BytesIO(data), "fasta"): examples.append([seq.id, str(seq.seq)]) return Orange.data.Table(domain, examples) else: raise BioMartError("Unsupported format: %s" % self.format)