Пример #1
0
def test_resum_equal_in_memory():
    G = LineFileInMemory("tests/smallcorpus.txt.bz2", header="foo bar baz qux".split(), 
                 path="tests/tmp/testcorpus")
    len_G = len(G)
    total = G.sum_column("qux")
    G.resum_equal("foo", "qux", assert_sorted=True, keep_all=False)
    assert_equal(len(G), 1)
    for line in G.lines():
        assert_equal(int(G.extract_columns(line, "qux")[0]), total)

    G = LineFileInMemory("tests/smallcorpus.txt.bz2", header="foo bar baz qux".split(), 
                 path="tests/tmp/testcorpus")
    G.resum_equal("foo", "qux", assert_sorted=True, keep_all=True)
    assert_equal(len(G), len_G)
    for line in G.lines():
        assert_equal(int(G.extract_columns(line, "qux")[0]), total)
Пример #2
0
def test_resum_equal_in_memory():
    G = LineFileInMemory("tests/smallcorpus.txt.bz2",
                         header="foo bar baz qux".split(),
                         path="tests/tmp/testcorpus")
    len_G = len(G)
    total = G.sum_column("qux")
    G.resum_equal("foo", "qux", assert_sorted=True, keep_all=False)
    assert_equal(len(G), 1)
    for line in G.lines():
        assert_equal(int(G.extract_columns(line, "qux")[0]), total)

    G = LineFileInMemory("tests/smallcorpus.txt.bz2",
                         header="foo bar baz qux".split(),
                         path="tests/tmp/testcorpus")
    G.resum_equal("foo", "qux", assert_sorted=True, keep_all=True)
    assert_equal(len(G), len_G)
    for line in G.lines():
        assert_equal(int(G.extract_columns(line, "qux")[0]), total)
Пример #3
0
def test_basics_in_memory():
    G = LineFileInMemory("tests/smallcorpus.txt.bz2", header="foo bar baz qux", 
                 path="tests/tmp/testcorpus")
    assert_equal(G.header, "foo bar baz qux".split())
    assert_equal(G.files, ["tests/smallcorpus.txt.bz2"])

    G.make_column("quux", lambda x, y, z, w: "cat", "foo bar baz qux")
    assert_equal(G.header, "foo bar baz qux quux".split())
    for line in G.lines(parts=False):
        assert_equal(G.extract_columns(line, "quux"), ["cat"])
    
    G.delete_columns("quux")
    assert_equal(G.header, "foo bar baz qux".split())

    G.copy_column("quux", "qux")
    assert_equal(G.header, "foo bar baz qux quux".split())
    for line in G.lines(parts=False):
        assert_equal(G.extract_columns(line, "qux"), 
                     G.extract_columns(line, "quux")
                     )
Пример #4
0
def test_basics_in_memory():
    G = LineFileInMemory("tests/smallcorpus.txt.bz2",
                         header="foo bar baz qux",
                         path="tests/tmp/testcorpus")
    assert_equal(G.header, "foo bar baz qux".split())
    assert_equal(G.files, ["tests/smallcorpus.txt.bz2"])

    G.make_column("quux", lambda x, y, z, w: "cat", "foo bar baz qux")
    assert_equal(G.header, "foo bar baz qux quux".split())
    for line in G.lines(parts=False):
        assert_equal(G.extract_columns(line, "quux"), ["cat"])

    G.delete_columns("quux")
    assert_equal(G.header, "foo bar baz qux".split())

    G.copy_column("quux", "qux")
    assert_equal(G.header, "foo bar baz qux quux".split())
    for line in G.lines(parts=False):
        assert_equal(G.extract_columns(line, "qux"),
                     G.extract_columns(line, "quux"))