Python feature_from_line示例，gffutils.feature.feature_from_line Python示例

示例#1

0

显示文件

def test_issue_85():
    # when start or stop was empty, #85 would fail Should now work with
    # blank fields
    f = feature.feature_from_line('\t'.join([''] * 9))

    # or with "." placeholders
    f = feature.feature_from_line('\t'.join(['.'] * 9))

示例#2

0

显示文件

def test_issue_85():
    # when start or stop was empty, #85 would fail Should now work with
    # blank fields
    f = feature.feature_from_line('\t'.join([''] * 9))

    # or with "." placeholders
    f = feature.feature_from_line('\t'.join(['.'] * 9))

示例#3

0

显示文件

文件： feature_test.py 项目： DHatziioanou/gffutils

def test_string_representation():
    line = "chr2L	FlyBase	exon	7529	8116	.	+	.	Name=CG11023:1;Parent=FBtr0300689,FBtr0300690"
    f = feature.feature_from_line(line, keep_order=True)
    assert line == str(f), str(f)

    line = "chr2L	FlyBase	exon	7529	8116	.	+	.	Name=CG11023:1;Parent=FBtr0300689,FBtr0300690	some	more	stuff"
    f = feature.feature_from_line(line, keep_order=True)
    assert line == str(f)

示例#4

0

显示文件

文件： feature_test.py 项目： computational-genomics-lab/IICB_Testing

def test_string_representation():
    line = "chr2L	FlyBase	exon	7529	8116	.	+	.	Name=CG11023:1;Parent=FBtr0300689,FBtr0300690"
    f = feature.feature_from_line(line, keep_order=True)
    assert line == str(f), str(f)

    line = "chr2L	FlyBase	exon	7529	8116	.	+	.	Name=CG11023:1;Parent=FBtr0300689,FBtr0300690	some	more	stuff"
    f = feature.feature_from_line(line, keep_order=True)
    assert line == str(f)

示例#5

0

显示文件

文件： helpers_test.py 项目： DHatziioanou/gffutils

    def test_merge_Attributes(self):
        f1 = feature.feature_from_line('chr2L . testing 1 10 . + . foo=1; baz=1; buz=1; biz=1; boo=1;', strict=False)
        f2 = feature.feature_from_line('chr2L . testing 1 10 . + . bar=2; baz=2; buz=2; biz=1; boo=1;', strict=False)
        test = helpers.merge_attributes(f1.attributes, f2.attributes)

        for k, v in list(test.items()):
            test[k] = sorted(v)

        true = {'foo': ['1'],
                'bar': ['2'],
                "baz": ['1', '2'],
                "boo": ['1'],
                "buz": ['1', '2'],
                "biz": ['1']}
        self.assertDictEqual(test, true)

示例#6

0

显示文件

文件： feature_test.py 项目： DHatziioanou/gffutils

def test_attribute_order():

    # default order is gene_id, transcript_id.  But feature_from_line -- if
    # dialect not provided -- will infer its own dialect.  In this case,
    # transcript_id comes first.
    attributes = 'transcript_id "mRNA1"; gene_id "gene1";'
    a = feature.feature_from_line(
        """
        chr1	.	mRNA	1	100	.	+	.	%s
        """ % attributes, strict=False, keep_order=True)
    a.strict = True
    a.keep_order = True
    assert str(a) == 'chr1	.	mRNA	1	100	.	+	.	transcript_id "mRNA1"; gene_id "gene1";', str(a)

    # ensure that using the default dialect uses the default order (and
    # indidentally converts to GFF3 format)
    orig_dialect = a.dialect
    a.dialect = constants.dialect
    a.keep_order = True
    assert str(a) == 'chr1	.	mRNA	1	100	.	+	.	gene_id=gene1;transcript_id=mRNA1', str(a)

    # adding an attribute shoud always result in that attribute coming last (as
    # long as that attribute is not in the dialect order)
    a['dummy'] = ['asdf']
    a.strict = True
    assert str(a) == 'chr1	.	mRNA	1	100	.	+	.	gene_id=gene1;transcript_id=mRNA1;dummy=asdf', str(a)

示例#7

0

显示文件

文件： feature_test.py 项目： computational-genomics-lab/IICB_Testing

def test_repr():
    line = "chr2L	FlyBase	exon	7529	8116	.	+	.	Name=CG11023:1;Parent=FBtr0300689,FBtr0300690	some	more	stuff"
    f = feature.feature_from_line(line, keep_order=True)
    print(repr(f))
    print(hex(id(f)))
    assert repr(f) == ("<Feature exon (chr2L:7529-8116[+]) at %s>" %
                       hex(id(f)))

示例#8

0

显示文件

文件： feature_test.py 项目： computational-genomics-lab/IICB_Testing

def test_attribute_order():

    # default order is gene_id, transcript_id.  But feature_from_line -- if
    # dialect not provided -- will infer its own dialect.  In this case,
    # transcript_id comes first.
    attributes = 'transcript_id "mRNA1"; gene_id "gene1";'
    a = feature.feature_from_line("""
        chr1	.	mRNA	1	100	.	+	.	%s
        """ % attributes,
                                  strict=False,
                                  keep_order=True)
    a.strict = True
    a.keep_order = True
    assert str(
        a
    ) == 'chr1	.	mRNA	1	100	.	+	.	transcript_id "mRNA1"; gene_id "gene1";', str(
        a)

    # ensure that using the default dialect uses the default order (and
    # indidentally converts to GFF3 format)
    orig_dialect = a.dialect
    a.dialect = constants.dialect
    a.keep_order = True
    assert str(
        a) == 'chr1	.	mRNA	1	100	.	+	.	gene_id=gene1;transcript_id=mRNA1', str(
            a)

    # adding an attribute shoud always result in that attribute coming last (as
    # long as that attribute is not in the dialect order)
    a['dummy'] = ['asdf']
    a.strict = True
    assert str(
        a
    ) == 'chr1	.	mRNA	1	100	.	+	.	gene_id=gene1;transcript_id=mRNA1;dummy=asdf', str(
        a)

示例#9

0

显示文件

def test_sequence():
    fasta = gffutils.example_filename('dm6-chr2L.fa')
    f = feature.feature_from_line('chr2L	FlyBase	gene	154	170	.	+	.	ID=one;')
    seq = f.sequence(fasta)
    assert seq == 'aCGAGATGATAATATAT'
    assert len(seq) == len(f)
    f.strand = '-'
    seq = f.sequence(fasta)
    assert seq == 'ATATATTATCATCTCGt'
    assert len(seq) == len(f)

示例#10

0

显示文件

文件： feature_test.py 项目： computational-genomics-lab/IICB_Testing

def test_aliases():
    line = "chr2L	FlyBase	exon	7529	8116	.	+	.	Name=CG11023:1;Parent=FBtr0300689,FBtr0300690"
    f = feature.feature_from_line(line, keep_order=True)
    assert f.chrom == 'chr2L' == f.seqid
    assert f.end == 8116 == f.stop

    f.chrom = 'fake'
    f.stop = 1
    assert f.chrom == 'fake' == f.seqid
    assert f.stop == 1 == f.end

示例#11

0

显示文件

文件： feature_test.py 项目： DHatziioanou/gffutils

def test_aliases():
    line = "chr2L	FlyBase	exon	7529	8116	.	+	.	Name=CG11023:1;Parent=FBtr0300689,FBtr0300690"
    f = feature.feature_from_line(line, keep_order=True)
    assert f.chrom == 'chr2L' == f.seqid
    assert f.end == 8116 == f.stop

    f.chrom = 'fake'
    f.stop = 1
    assert f.chrom == 'fake' == f.seqid
    assert f.stop == 1 == f.end

示例#12

0

显示文件

def test_issue_82():
    # key-val separator is inside an unquoted attribute value
    x = (
        'Spenn-ch12\tsgn_markers\tmatch\t2621812\t2622049\t.\t+\t.\t'
        'Alias=SGN-M1347;ID=T0028;Note=marker name(s): T0028 SGN-M1347 |identity=99.58|escore=2e-126'
    )
    y = feature.feature_from_line(x)
    assert y.attributes['Note'] == ['marker name(s): T0028 SGN-M1347 |identity=99.58|escore=2e-126']

    gffutils.create_db(gffutils.example_filename('keyval_sep_in_attrs.gff'), ':memory:')

示例#13

0

显示文件

文件： test.py 项目： arnikz/gffutils

def test_issue_82():
    # key-val separator is inside an unquoted attribute value
    x = (
        'Spenn-ch12\tsgn_markers\tmatch\t2621812\t2622049\t.\t+\t.\t'
        'Alias=SGN-M1347;ID=T0028;Note=marker name(s): T0028 SGN-M1347 |identity=99.58|escore=2e-126'
    )
    y = feature.feature_from_line(x)
    assert y.attributes['Note'] == ['marker name(s): T0028 SGN-M1347 |identity=99.58|escore=2e-126']

    gffutils.create_db(gffutils.example_filename('keyval_sep_in_attrs.gff'), ':memory:')

示例#14

0

显示文件

def test_sequence():
    fasta = gffutils.example_filename('dm6-chr2L.fa')
    f = feature.feature_from_line(
        'chr2L	FlyBase	gene	154	170	.	+	.	ID=one;')
    seq = f.sequence(fasta)
    assert seq == 'aCGAGATGATAATATAT'
    assert len(seq) == len(f)
    f.strand = '-'
    seq = f.sequence(fasta)
    assert seq == 'ATATATTATCATCTCGt'
    assert len(seq) == len(f)

示例#15

0

显示文件

    def test_merge_Attributes(self):
        f1 = feature.feature_from_line(
            'chr2L . testing 1 10 . + . foo=1; baz=1; buz=1; biz=1; boo=1;',
            strict=False)
        f2 = feature.feature_from_line(
            'chr2L . testing 1 10 . + . bar=2; baz=2; buz=2; biz=1; boo=1;',
            strict=False)
        test = helpers.merge_attributes(f1.attributes, f2.attributes)

        for k, v in list(test.items()):
            test[k] = sorted(v)

        true = {
            'foo': ['1'],
            'bar': ['2'],
            "baz": ['1', '2'],
            "boo": ['1'],
            "buz": ['1', '2'],
            "biz": ['1']
        }
        self.assertDictEqual(test, true)

示例#16

0

显示文件

文件： feature_test.py 项目： computational-genomics-lab/IICB_Testing

def test_pbt_interval_conversion():
    try:
        import pybedtools
    except ImportError:
        return
    line = "chr2L FlyBase exon 7529 8116 . + . Name=CG11023:1;Parent=FBtr0300689,FBtr0300690"
    f = feature.feature_from_line(line, strict=False, keep_order=True)
    pbt = helpers.asinterval(f)
    assert pbt.chrom == f.chrom == f.seqid
    assert pbt.start == f.start - 1
    assert pbt.stop == f.stop == f.end
    pn = pbt.name
    fn = f.attributes['Name'][0]
    assert pn == fn, '%s, %s' % (pn, fn)

示例#17

0

显示文件

文件： feature_test.py 项目： DHatziioanou/gffutils

def test_pbt_interval_conversion():
    try:
        import pybedtools
    except ImportError:
        return
    line = "chr2L FlyBase exon 7529 8116 . + . Name=CG11023:1;Parent=FBtr0300689,FBtr0300690"
    f = feature.feature_from_line(line, strict=False, keep_order=True)
    pbt = helpers.asinterval(f)
    assert pbt.chrom == f.chrom == f.seqid
    assert pbt.start == f.start -1
    assert pbt.stop == f.stop == f.end
    pn = pbt.name
    fn = f.attributes['Name'][0]
    assert pn == fn, '%s, %s' % (pn, fn)

示例#18

0

显示文件

def test_unquoting():
    # incoming is encoded
    s = ('chr1\tAUGUSTUS\tgene\t6950084\t6951407\t0.26\t-\t.\t'
         'ID=INIL01g00009;GeneSymbol=Ndufaf6;Note=NADH dehydrogenase '
         '(ubiquinone) complex I%2C assembly factor 6;GO_Terms=GO:0005743|'
         'GO:0016740|GO:0009058|GO:0032981;PFam=PF00494')
    f = feature.feature_from_line(s, keep_order=True)

    # string representation should be identical
    assert str(f) == s

    # accessing attribute should be decoded
    n = f['Note']
    assert n == [
        'NADH dehydrogenase (ubiquinone) complex I, assembly factor 6'
    ]

示例#19

0

显示文件

def test_unreasonable_unquoting():
    s = (
        'chr1\t.\t.\t1\t2\t0.26\t-\t.\t'
        'newline=%0A;'
        'percent=%25;'
        'null=%00;'
        'comma=%2C;'

        # The first parent is "A," (A with a comma), the second is "B%"
        'Parent=A%2C,B%25,C;')
    f = feature.feature_from_line(s, keep_order=True)
    assert f.attributes['newline'][0] == '\n'
    assert f.attributes['percent'][0] == '%'
    assert f.attributes['null'][0] == '\x00'
    assert f.attributes['comma'][0] == ','

    # Commas indicate
    assert f.attributes['Parent'] == ['A,', 'B%', 'C']
    assert str(f) == s

示例#20

0

显示文件

文件： iterators.py 项目： drubin23/gffutils

    def _custom_iter(self):
        valid_lines = 0
        for i, line in enumerate(self.open_function(self.data)):
            if isinstance(line, six.binary_type):
                line = line.decode('utf-8')
            line = line.rstrip('\n\r')
            self.current_item = line
            self.current_item_number = i

            if line == '##FASTA' or line.startswith('>'):
                raise StopIteration

            if line.startswith('##'):
                self._directive_handler(line)
                continue

            if line.startswith(('#')) or len(line) == 0:
                continue

            # (If we got here it should be a valid line)
            valid_lines += 1
            yield feature_from_line(line, dialect=self.dialect)

示例#21

0

显示文件

文件： iterators.py 项目： rbeagrie/gffutils

    def _custom_iter(self):
        valid_lines = 0
        for i, line in enumerate(self.open_function(self.data)):
            if isinstance(line, six.binary_type):
                line = line.decode('utf-8')
            line = line.rstrip('\n\r')
            self.current_item = line
            self.current_item_number = i

            if line == '##FASTA' or line.startswith('>'):
                raise StopIteration

            if line.startswith('##'):
                self._directive_handler(line)
                continue

            if line.startswith(('#')) or len(line) == 0:
                continue

            # (If we got here it should be a valid line)
            valid_lines += 1
            yield feature_from_line(line, dialect=self.dialect)

示例#22

0

显示文件

文件： feature_test.py 项目： computational-genomics-lab/IICB_Testing

def test_hash():
    line = "chr2L	FlyBase	exon	7529	8116	.	+	.	Name=CG11023:1;Parent=FBtr0300689,FBtr0300690	some	more	stuff"
    f = feature.feature_from_line(line, keep_order=True)
    assert hash(f) == hash(line)

示例#23

0

显示文件

文件： feature_test.py 项目： DHatziioanou/gffutils

def test_repr():
    line = "chr2L	FlyBase	exon	7529	8116	.	+	.	Name=CG11023:1;Parent=FBtr0300689,FBtr0300690	some	more	stuff"
    f = feature.feature_from_line(line, keep_order=True)
    print(repr(f))
    print(hex(id(f)))
    assert repr(f) == ("<Feature exon (chr2L:7529-8116[+]) at %s>" % hex(id(f)))

示例#24

0

显示文件

文件： feature_test.py 项目： DHatziioanou/gffutils

def test_hash():
    line = "chr2L	FlyBase	exon	7529	8116	.	+	.	Name=CG11023:1;Parent=FBtr0300689,FBtr0300690	some	more	stuff"
    f = feature.feature_from_line(line, keep_order=True)
    assert hash(f) == hash(line)

示例#25

0

显示文件

文件： parser_test.py 项目： drubin23/gffutils

def test_attributes():
    s = "chr2L	FlyBase	mRNA	7529	9484	.	+	.	ID=FBtr0300690;Name=CG11023-RC;Parent=FBgn0031208;"
    f = feature.feature_from_line(s)
    f.keep_order = True
    assert str(f) == s, str(f)

示例#26

0

显示文件

文件： parser_test.py 项目： drubin23/gffutils

def test_attributes():
    s = "chr2L	FlyBase	mRNA	7529	9484	.	+	.	ID=FBtr0300690;Name=CG11023-RC;Parent=FBgn0031208;"
    f = feature.feature_from_line(s)
    f.keep_order = True
    assert str(f) == s, str(f)

示例#27

0

显示文件

文件： feature_test.py 项目： DHatziioanou/gffutils

def test_feature_from_line():
    # spaces and tabs should give identical results
    line1 = "chr2L	FlyBase	exon	7529	8116	.	+	.	Name=CG11023:1;Parent=FBtr0300689,FBtr0300690"
    line2 = "chr2L FlyBase exon 7529 8116 . + . Name=CG11023:1;Parent=FBtr0300689,FBtr0300690"
    assert feature.feature_from_line(line1, strict=False, keep_order=True) == \
            feature.feature_from_line(line2, strict=False, keep_order=True)

示例#28

0

显示文件

文件： feature_test.py 项目： DHatziioanou/gffutils

 def test_feature_single_item(self):
     line = "chr2L	FlyBase	exon	7529	8116	.	+	.	Name=CG11023:1;Parent=FBtr0300689,FBtr0300690	some	more	stuff"
     f = feature.feature_from_line(line, keep_order=True)
     assert f['Name'] == ['CG11023:1']

示例#29

0

显示文件

文件： test.py 项目： DHatziioanou/gffutils

def test_update():
    # check both in-memory and file-based dbs
    db = create.create_db(
        example_filename('FBgn0031208.gff'), ':memory:', verbose=False,
        keep_order=True,
        force=True)

    orig_num_features = len(list(db.all_features()))

    f = feature.feature_from_line(
        'chr2L . testing 1 10 . + . ID=testing_feature;n=1',
        dialect=db.dialect, strict=False)

    # no merge strategy required because we're adding a new feature
    db.update([f])
    x = list(db.features_of_type('testing'))
    assert len(x) == 1
    x = x[0]
    x.keep_order = True
    assert str(x) == "chr2L	.	testing	1	10	.	+	.	ID=testing_feature;n=1", str(x)

    # ought to be one more now . . .
    num_features = len(list(db.all_features()))
    assert num_features == orig_num_features + 1, num_features

    # Now try updating with the same feature, but using merge_strategy="merge",
    # which appends items to attributes ( n=1 --> n=1,2 )
    f = feature.feature_from_line(
        'chr2L . testing 1 10 . + . ID=testing_feature;n=1',
        dialect=db.dialect, strict=False)
    f.keep_order = True
    f.attributes['n'] = ['2']
    db.update([f], merge_strategy='merge')
    x = list(db.features_of_type('testing'))
    assert len(x) == 1

    # Merging does a list(set()) operation, so the order is not guaranteed.
    # Fix it here for testing...
    x = x[0]
    x.attributes['n'].sort()

    assert str(x) == "chr2L	.	testing	1	10	.	+	.	ID=testing_feature;n=1,2", str(x)

    # still should have the same number of features as before (still 2)
    num_features = len(list(db.all_features()))
    assert num_features == orig_num_features + 1, num_features


    # Merging while iterating.  e.g., if you're updating children with gene
    # IDs.
    db = create.create_db(example_filename('FBgn0031208.gff'), ':memory:',
                          verbose=False, force=True, keep_order=True)
    for gene in db.features_of_type('gene'):
        for child in list(db.children(gene)):
            # important: the FBgn0031208.gff file was designed to have some
            # funky features: there are two exons without ID attributes.  These
            # are assigned to ids "exon_1" and "exon_2".  Upon update, with
            # still no ID, we then have two new features "exon_3" and "exon_4".
            # To prevent this issue, we ensure that the ID attribute exists...
            child.attributes['gene_id'] = [gene.id]
            if 'ID' not in child.attributes:
                child.attributes['ID'] = [child.id]
            db.update([child], merge_strategy='replace')

    print("\n\nafter\n\n")
    for child in db.children(gene):
        print(child.id)
        assert child.attributes['gene_id'] == ['FBgn0031208'], (child, child.attributes)

    num_entries = 0
    for gene_recs in list(db.iter_by_parent_childs()):
        # Add attribute to each gene record
        rec = gene_recs[0]
        rec.attributes["new"] = ["new_value"]
        db.update([rec])
        num_entries += 1
    print(list(db.all_features()))


    assert (num_entries > 1), "Only %d left after update" % (num_entries)


    # Replace
    f = feature.feature_from_line(
        'chr2L . testing 1 10 . + . ID=testing_feature;n=1',
        dialect=db.dialect, strict=False)

    f.keep_order = True

    f.attributes['n'] = ['3']
    db.update([f], merge_strategy='replace')
    x = list(db.features_of_type('testing'))
    assert len(x) == 1
    assert str(x[0]) == "chr2L	.	testing	1	10	.	+	.	ID=testing_feature;n=3", str(x[0])
    # still should have the same number of features as before (still 2)
    num_features = len(list(db.all_features()))
    assert num_features == orig_num_features + 1, num_features


    # Same thing, but GTF instead of GFF.
    db = create.create_db(
        example_filename('FBgn0031208.gtf'), ':memory:', verbose=False,
        force=True, keep_order=True)
    f = feature.feature_from_line('chr2L . testing 1 10 . + . gene_id "fake"; n "1"', strict=False)
    f.keep_order = True
    db.update([f], merge_strategy='merge')
    x = list(db.features_of_type('testing'))
    assert len(x) == 1
    x = x[0]
    x.keep_order = True

    # note the trailing semicolon.  That's because the db's dialect has
    # ['trailing semicolon'] = True.
    assert str(x) == 'chr2L	.	testing	1	10	.	+	.	gene_id "fake"; n "1";', str(x)

示例#30

0

显示文件

文件： feature_test.py 项目： computational-genomics-lab/IICB_Testing

 def test_feature_single_item(self):
     line = "chr2L	FlyBase	exon	7529	8116	.	+	.	Name=CG11023:1;Parent=FBtr0300689,FBtr0300690	some	more	stuff"
     f = feature.feature_from_line(line, keep_order=True)
     assert f['Name'] == ['CG11023:1']

示例#31

0

显示文件

def test_update():
    # check both in-memory and file-based dbs
    db = create.create_db(example_filename('FBgn0031208.gff'),
                          ':memory:',
                          verbose=False,
                          keep_order=True,
                          force=True)

    orig_num_features = len(list(db.all_features()))

    f = feature.feature_from_line(
        'chr2L . testing 1 10 . + . ID=testing_feature;n=1',
        dialect=db.dialect,
        strict=False)

    # no merge strategy required because we're adding a new feature
    db.update([f])
    x = list(db.features_of_type('testing'))
    assert len(x) == 1
    x = x[0]
    x.keep_order = True
    assert str(x) == "chr2L	.	testing	1	10	.	+	.	ID=testing_feature;n=1", str(
        x)

    # ought to be one more now . . .
    num_features = len(list(db.all_features()))
    assert num_features == orig_num_features + 1, num_features

    # Now try updating with the same feature, but using merge_strategy="merge",
    # which appends items to attributes ( n=1 --> n=1,2 )
    f = feature.feature_from_line(
        'chr2L . testing 1 10 . + . ID=testing_feature;n=1',
        dialect=db.dialect,
        strict=False)
    f.keep_order = True
    f.attributes['n'] = ['2']
    db.update([f], merge_strategy='merge')
    x = list(db.features_of_type('testing'))
    assert len(x) == 1

    # Merging does a list(set()) operation, so the order is not guaranteed.
    # Fix it here for testing...
    x = x[0]
    x.attributes['n'].sort()

    assert str(
        x) == "chr2L	.	testing	1	10	.	+	.	ID=testing_feature;n=1,2", str(x)

    # still should have the same number of features as before (still 2)
    num_features = len(list(db.all_features()))
    assert num_features == orig_num_features + 1, num_features

    # Merging while iterating.  e.g., if you're updating children with gene
    # IDs.
    db = create.create_db(example_filename('FBgn0031208.gff'),
                          ':memory:',
                          verbose=False,
                          force=True,
                          keep_order=True)

    def gen():
        for gene in db.features_of_type('gene'):
            for child in list(db.children(gene)):
                # important: the FBgn0031208.gff file was designed to have some
                # funky features: there are two exons without ID attributes.  These
                # are assigned to ids "exon_1" and "exon_2".  Upon update, with
                # still no ID, we then have two new features "exon_3" and "exon_4".
                # To prevent this issue, we ensure that the ID attribute exists...
                child.attributes['gene_id'] = [gene.id]
                if 'ID' not in child.attributes:
                    child.attributes['ID'] = [child.id]
                yield child

    db.update(gen(), merge_strategy='replace')

    print("\n\nafter\n\n")
    for child in db.children('FBgn0031208'):
        print(child.id)
        assert child.attributes['gene_id'] == ['FBgn0031208'
                                               ], (child, child.attributes)

    num_entries = 0
    for gene_recs in list(db.iter_by_parent_childs()):
        # Add attribute to each gene record
        rec = gene_recs[0]
        rec.attributes["new"] = ["new_value"]
        db.update([rec])
        num_entries += 1
    print(list(db.all_features()))

    assert (num_entries > 1), "Only %d left after update" % (num_entries)

    # Replace
    f = feature.feature_from_line(
        'chr2L . testing 1 10 . + . ID=testing_feature;n=1',
        dialect=db.dialect,
        strict=False)

    f.keep_order = True

    f.attributes['n'] = ['3']
    db.update([f], merge_strategy='replace')
    x = list(db.features_of_type('testing'))
    assert len(x) == 1
    assert str(
        x[0]) == "chr2L	.	testing	1	10	.	+	.	ID=testing_feature;n=3", str(x[0])
    # still should have the same number of features as before (still 2)
    num_features = len(list(db.all_features()))
    assert num_features == orig_num_features + 1, num_features

    # Same thing, but GTF instead of GFF.
    db = create.create_db(example_filename('FBgn0031208.gtf'),
                          ':memory:',
                          verbose=False,
                          force=True,
                          keep_order=True)
    f = feature.feature_from_line(
        'chr2L . testing 1 10 . + . gene_id "fake"; n "1"', strict=False)
    f.keep_order = True
    db.update([f], merge_strategy='merge')
    x = list(db.features_of_type('testing'))
    assert len(x) == 1
    x = x[0]
    x.keep_order = True

    # note the trailing semicolon.  That's because the db's dialect has
    # ['trailing semicolon'] = True.
    assert str(x) == 'chr2L	.	testing	1	10	.	+	.	gene_id "fake"; n "1";', str(
        x)

示例#32

0

显示文件

文件： feature_test.py 项目： computational-genomics-lab/IICB_Testing

def test_feature_from_line():
    # spaces and tabs should give identical results
    line1 = "chr2L	FlyBase	exon	7529	8116	.	+	.	Name=CG11023:1;Parent=FBtr0300689,FBtr0300690"
    line2 = "chr2L FlyBase exon 7529 8116 . + . Name=CG11023:1;Parent=FBtr0300689,FBtr0300690"
    assert feature.feature_from_line(line1, strict=False, keep_order=True) == \
            feature.feature_from_line(line2, strict=False, keep_order=True)