Пример #1
0
    def as_orange_table_v3(self):
        import numpy
        import Orange.data
        data = self.run(count=False, header=True)
        data = data.decode("utf-8")
        if self.format.lower() == "tsv":
            header, data = data.split("\n", 1)
            domain = Orange.data.Domain([], [], [
                Orange.data.StringVariable(name) for name in header.split("\t")
            ])
            rows = [
                line.split("\t") for line in data.split("\n") if line.strip()
            ]
            rows = numpy.array(rows, dtype=object)
            X = numpy.empty((len(rows), 0))
            return Orange.data.Table.from_numpy(domain, X, metas=rows)
        elif self.format.lower() == "fasta":
            from Bio import SeqIO
            domain = Orange.data.Domain([], [], [
                Orange.data.StringVariable("id"),
                Orange.data.StringVariable("sequence")
            ])

            rows = [[seq.id, str(seq.seq)]
                    for seq in SeqIO.parse(io.StringIO(data), "fasta")]
            rows = numpy.array(rows, dtype=object)
            X = numpy.empty((len(rows), 0))
            return Orange.data.Table.from_numpy(domain, X, metas=rows)
        else:
            raise BioMartError("Unsupported format: %s" % self.format)
Пример #2
0
    def as_orange_table_v3(self):
        import numpy
        import Orange.data
        from Bio import SeqIO
        data = self.run(count=False, header=True)
        data = data.decode("utf-8")
        if self.format.lower() == "tsv":
            header, data = data.split("\n", 1)
            domain = Orange.data.Domain(
                [], [], [Orange.data.StringVariable(name)
                         for name in header.split("\t")])
            rows = [line.split("\t")
                    for line in data.split("\n") if line.strip()]
            rows = numpy.array(rows, dtype=object)
            X = numpy.empty((len(rows), 0))
            return Orange.data.Table.from_numpy(domain, X, metas=rows)
        elif self.format.lower() == "fasta":
            domain = Orange.data.Domain(
                [], [],
                [Orange.data.StringVariable("id"),
                 Orange.data.StringVariable("sequence")])

            rows = [[seq.id, str(seq.seq)]
                    for seq in SeqIO.parse(io.StringIO(data), "fasta")]
            rows = numpy.array(rows, dtype=object)
            X = numpy.empty((len(rows), 0))
            return Orange.data.Table.from_numpy(domain, X, metas=rows)
        else:
            raise BioMartError("Unsupported format: %s" % self.format)
Пример #3
0
    def get_example_table(self):
        import Orange.data
        import Orange.feature

        data = self.run(count=False, header=True)

        if self.format.lower() == "tsv":
            header, data = data.split("\n", 1)
            domain = Orange.data.Domain(
                [Orange.feature.String(name) for name in header.split("\t")],
                None)

            data = [
                line.split("\t") for line in data.split("\n") if line.strip()
            ]
            return Orange.data.Table(domain, data) if data else None
        elif self.format.lower() == "fasta":
            from Bio import SeqIO
            domain = Orange.data.Domain([
                Orange.feature.String("id"),
                Orange.feature.String("sequence")
            ], False)  # TODO: meaningful id
            examples = []

            for seq in SeqIO.parse(io.BytesIO(data), "fasta"):
                examples.append([seq.id, str(seq.seq)])
            return Orange.data.Table(domain, examples)
        else:
            raise BioMartError("Unsupported format: %s" % self.format)
Пример #4
0
    def get_example_table(self):
        import Orange.data
        import Orange.feature
        from Bio import SeqIO

        data = self.run(count=False, header=True)

        if self.format.lower() == "tsv":
            header, data = data.split("\n", 1)
            domain = Orange.data.Domain(
                [Orange.feature.String(name) for name in header.split("\t")],
                None)

            data = [line.split("\t")
                    for line in data.split("\n") if line.strip()]
            return Orange.data.Table(domain, data) if data else None
        elif self.format.lower() == "fasta":
            domain = Orange.data.Domain(
                [Orange.feature.String("id"),
                 Orange.feature.String("sequence")],
                False)  # TODO: meaningful id
            examples = []

            for seq in SeqIO.parse(io.BytesIO(data), "fasta"):
                examples.append([seq.id, str(seq.seq)])
            return Orange.data.Table(domain, examples)
        else:
            raise BioMartError("Unsupported format: %s" % self.format)