Exemplo n.º 1
def test_non_counter_features_bad_serialize():
    with pytest.raises(SerializationError):
        FeatureCollection({'NAME': 'foobaz'})
    fc = FeatureCollection()
    fc['NAME'] = 'foobaz'
    with pytest.raises(SerializationError):
def test_readonly(counter_type):
    fc = FeatureCollection({
        'hello': counter_type(Counter('hello')),
        'goodbye': counter_type(Counter('goodbye'))
    fc2 = FeatureCollection({
        'hello': counter_type(Counter('hello')),
        'goodbye': counter_type(Counter('goodbye'))

    fc.read_only = True
    with pytest.raises(ReadOnlyException):
        fc += fc2

    with pytest.raises(ReadOnlyException):
        fc -= fc2

    with pytest.raises(ReadOnlyException):
        fc *= 2

    with pytest.raises(ReadOnlyException):
        fc['woof'] = StringCounter()

    if hasattr(counter_type, 'read_only'):
        with pytest.raises(ReadOnlyException):
            fc['hello']['l'] = 3
        with pytest.raises(ReadOnlyException):
            fc['hello']['l'] += 3

    fc.read_only = False
    fc += fc2
    assert Counter(map(abs, fc['hello'].values())) == Counter({2: 3, 4: 1})
    fc -= fc2
    fc -= fc2
    assert Counter(map(abs, fc['hello'].values())) == Counter()
Exemplo n.º 3
def test_ignored():
    fc = FeatureCollection()
    fc['foo'] = 'bar'
    with pytest.raises(SerializationError):

    fc = FeatureCollection()
    fc['_foo'] = 'bar'
    fc.dumps()  # _foo is ignored!
def test_read_only_binop():
    fc1 = FeatureCollection({'NAME': {'foo': 1, 'bar': 1}})
    fc2 = FeatureCollection({'NAME': {'foo': 2, 'bar': 2}})

    fc1.read_only = True
    fc2.read_only = True

    result = fc1 + fc2
    expected = FeatureCollection({'NAME': {'foo': 3, 'bar': 3}})
    assert result == expected
    assert not result.read_only
Exemplo n.º 5
def test_no_bytes_allowed():
    fc = FeatureCollection({'foo': u'bar'})
    fc.dumps()  # OK!

    with pytest.raises(SerializationError):
        fc = FeatureCollection({'foo': 'bar'})

    fc = FeatureCollection()
    fc['foo'] = 'bar'
    with pytest.raises(SerializationError):
Exemplo n.º 6
def test_type(counter_type):
    m1 = FeatureCollection()
    m1['bow'] += counter_type(Counter(['big', 'dog']))

    assert type(m1) == FeatureCollection

    m2 = FeatureCollection()
    m2['bow'] += counter_type(Counter(['cat']))
    m1 += m2

    assert type(m1) == FeatureCollection
Exemplo n.º 7
def test_type(counter_type):
    ent1 = FeatureCollection()
    ent1['bow'] += counter_type(Counter(['big', 'dog']))
    if counter_type.__name__ == 'StringCounter':
        ent1['bow']['a'] += 1
    assert isinstance(ent1, FeatureCollection)

    ent3 = FeatureCollection()
    ent3['bow'] += counter_type(Counter(['cat']))

    ent1 += ent3
    assert isinstance(ent1, FeatureCollection)
Exemplo n.º 8
def test_binop_no_share():
    fc1 = FeatureCollection({'NAME': {'foo': 1, 'bar': 1}})
    fc2 = FeatureCollection({'NAME': {'foo': 2, 'bar': 2}})

    fc3 = fc1 + fc2

    assert fc1['NAME']['foo'] == 1
    assert fc2['NAME']['foo'] == 2

    fc1 += fc2
    assert fc1 == fc3
    assert fc1['NAME']['foo'] == 3
    assert fc2['NAME']['foo'] == 2
Exemplo n.º 9
def test_fc_eq(counter_type):
    fc1 = FeatureCollection({
            'hello': counter_type(Counter('hello')),
            'goodbye': counter_type(Counter('goodbye'))})
    fc2 = FeatureCollection({
            'hello': counter_type(Counter('hello')),
            'goodbye': counter_type(Counter('goodbye'))})
    fc3 = FeatureCollection({
            'hello': counter_type(Counter('hello')),
            'goodbye': counter_type(Counter('goodbye2'))})

    assert fc1 == fc2
    assert fc1 != fc3
Exemplo n.º 10
def test_binop_different_no_share():
    fc1 = FeatureCollection({'FOO': {'foo': 1}})
    fc2 = FeatureCollection({'BAR': {'bar': 1}})

    result = fc1 + fc2
    expected = FeatureCollection({'FOO': {'foo': 1 }, 'BAR': {'bar': 1}})
    assert result == expected

    result['BAR']['bar'] = 2
    assert fc2['BAR']['bar'] == 1

    result['FOO']['foo'] = 2
    assert fc1['FOO']['foo'] == 1
def test_read_only():
    fcwork = FeatureCollection({'feat': {'foo': 1}})

    fc = FeatureCollection()
    fc['feat']['foo'] += 1
    fc.read_only = True
    with pytest.raises(ReadOnlyException):
        fc += fcwork
    with pytest.raises(ReadOnlyException):
        fc -= fcwork
    with pytest.raises(ReadOnlyException):
        fc -= fcwork
    with pytest.raises(ReadOnlyException):
        del fc['feat']
    with pytest.raises(ReadOnlyException):
Exemplo n.º 12
def test_default(counter_type):
    'does a FC make a new counter that adds properly'
    mc = FeatureCollection()
    assert isinstance(mc['foo'], counter_type)

    mc['foo'] += counter_type(Counter('dog'))
    assert isinstance(mc['foo'], counter_type), \
        'failed and made %s' % type(mc['foo'])

    mc['foo'] -= counter_type(Counter('dog'))
    assert isinstance(mc['foo'], counter_type), \
        'failed and made %s' % type(mc['foo'])

    if hasattr(mc['foo'], 'substract'):
        assert isinstance(mc['foo'], counter_type), \
            'failed and made %s' % type(mc['foo'])
        mc['foo'] += counter_type(Counter('dog'))

    mc['foo'] += counter_type(Counter('dog'))
    assert isinstance(mc['foo'], counter_type), \
        'failed and made %s' % type(mc['foo'])

    mc['foo'] += counter_type(Counter('dog'))
    mc['foo'] += counter_type(Counter('dog cat'))
    assert Counter(map(abs, mc['foo'].values())) == Counter({1: 4, 3: 3})
def test_read_only_preserved_after_serialized():
    fc = FeatureCollection({'NAME': {'foo': 1, 'baz': 2}})
    fc.read_only = True
    fcnew = FeatureCollection.loads(fc.dumps())
    assert fcnew.read_only
    with pytest.raises(ReadOnlyException):
        fcnew['NAME']['foo'] += 1
Exemplo n.º 14
def test_ft_roundtrip():
    fc = FeatureCollection()
        ('nltk', 5, 2),
    fc2 = FeatureCollection.loads(fc.dumps())
    assert fc['@NAME'] == fc2['@NAME']
Exemplo n.º 15
def test_eq(counter_type):
    mc1 = FeatureCollection({
        'hello': counter_type(Counter('hello')),
        'goodbye': counter_type(Counter('goodbye'))
    mc2 = FeatureCollection({
        'hello': counter_type(Counter('hello')),
        'goodbye': counter_type(Counter('goodbye'))
    mc3 = FeatureCollection({
        'hello': counter_type(Counter('hello')),
        'goodbye': counter_type(Counter('goodbye2'))

    assert mc1 == mc2
    assert mc1 != mc3
Exemplo n.º 16
def test_fc_chunk():
    fc1 = FeatureCollection({'NAME': {'foo': 2, 'baz': 1}})
    fc2 = FeatureCollection({'NAME': {'foo': 4, 'baz': 2}})

    fh = StringIO()
    chunk = FeatureCollectionChunk(file_obj=fh, mode='wb')

    blob = fh.getvalue()
    assert blob
    fh = StringIO(blob)
    chunk = FeatureCollectionChunk(file_obj=fh, mode='rb')
    rfc1, rfc2 = list(chunk)
    assert fc1 == rfc1
    assert fc2 == rfc2
Exemplo n.º 17
def test_string_counter_serialize():
    fc = FeatureCollection()
    fc['thing1'] = StringCounter()
    fc['thing1']['foo'] += 1
    fc_str = fc.dumps()

    fc2 = FeatureCollection.loads(fc_str)
    assert fc2['thing1']['foo'] == 1
Exemplo n.º 18
def test_meta_adding(counter_type):
    mc = FeatureCollection({
        'hello': counter_type(Counter('hello')),
        'goodbye': counter_type(Counter('goodbye'))
    mc2 = mc + mc

    assert Counter(map(abs, mc2['hello'].values())) == Counter({2: 3, 4: 1})
Exemplo n.º 19
def test_build_from_dict(counter_type):
    mc = FeatureCollection({
        'hello': counter_type(Counter('hello')),
        'goodbye': counter_type(Counter('goodbye'))

    assert Counter(map(abs, mc['hello'].values())) == Counter({1: 3, 2: 1})
    assert isinstance(mc['hello'], counter_type)
Exemplo n.º 20
def test_fc_meta_adding_complex(counter_type):
    fc = FeatureCollection({
            'hello': counter_type(Counter('hello')),
            'goodbye': counter_type(Counter('goodbye'))})
    fc2 = FeatureCollection({
            'hello': counter_type(Counter('hello')),
            'goodbye': counter_type(Counter('goodbye'))})
    fc3 = fc + fc2

    assert Counter(map(abs,fc3['hello'].values())) == Counter({2: 3, 4: 1})
    fc += fc2
    assert Counter(map(abs,fc['hello'].values())) == Counter({2: 3, 4: 1})

    fc3 -= fc2
    assert Counter(map(abs,fc3['hello'].values())) == Counter({1: 3, 2: 1})

    fc3 -= fc2
    assert Counter(map(abs,fc3['hello'].values())) == Counter()
def test_read_only_features():
    fc = FeatureCollection({'feat': StringCounter({'foo': 1})})
    fc['feat']['foo'] += 1
    fc.read_only = True

    with pytest.raises(ReadOnlyException):
        fc['feat']['foo'] += 1
    with pytest.raises(ReadOnlyException):
    with pytest.raises(ReadOnlyException):
        del fc['feat']['foo']
Exemplo n.º 22
def test_serialize_deserialize(counter_type):
    ## build entity, serialize, deserialize, and verify its multisets
    ent1 = FeatureCollection()
    ent1['bow'] += counter_type(Counter(['big', 'dog']))
    ent1['bow'] += counter_type(Counter('tall building'))
    ent1['bon'] += counter_type(Counter(['Super Cat', 'Small Cat',
                                         'Tiger Fish']))

    blob = ent1.dumps()
    ent2 = FeatureCollection.loads(blob)
    assert_same_fc(ent1, ent2)
Exemplo n.º 23
def test_meta_adding_complex(counter_type):
    mc = FeatureCollection({
        'hello': counter_type(Counter('hello')),
        'goodbye': counter_type(Counter('goodbye'))
    mc2 = FeatureCollection({
        'hello': counter_type(Counter('hello')),
        'goodbye': counter_type(Counter('goodbye'))
    mc3 = mc + mc2

    assert Counter(map(abs, mc3['hello'].values())) == Counter({2: 3, 4: 1})
    mc += mc2
    assert Counter(map(abs, mc['hello'].values())) == Counter({2: 3, 4: 1})

    ## isub tests
    mc3 -= mc2
    assert Counter(map(abs, mc3['hello'].values())) == Counter({1: 3, 2: 1})

    mc3 -= mc2
    assert Counter(map(abs, mc3['hello'].values())) == Counter()
Exemplo n.º 24
def test_thing_serializer():
    with registry:
        registry.add('StringCounter', ThingSerializer)

        fc = FeatureCollection()
        fc['thing1'] = Thing(json.dumps(dict(hello='people')))
        fc['thing1']['another'] = 'more'
        fc_str = fc.dumps()

        fc2 = FeatureCollection.loads(fc_str)

        assert fc2['thing1']['another'] == 'more'
        assert fc2['thing1']['hello'] == 'people'
        assert fc2['thing1']['doing'] == 'something'
Exemplo n.º 25
def test_json_serializer():
    with registry:
        registry.add('StringCounter', JsonSerializer)

        fc = FeatureCollection()
        fc['thing2'] = StringCounter(dict(hello='people'))
        fc['thing2']['another'] = 5
        fc['thing3'] = StringCounter(dict(hello='people2'))
        fc_str = fc.dumps()

        fc2 = FeatureCollection.loads(fc_str)

        assert fc2['thing2']['another'] == 5
        assert fc2['thing2']['hello'] == 'people'
        assert fc2['thing3']['hello'] == 'people2'
Exemplo n.º 26
def forum_post_features(row):
    fc = FeatureCollection()
    for k in row['author']:
        fc['post_author_' + k] = row['author'][k]

    if 'image_urls' in row:
        fc['image_url'] = StringCounter()
        for image_url in row['image_urls']:
            fc['image_url'][image_url] += 1

    others = ['parent_id', 'thread_id', 'thread_link', 'thread_name', 'title']
    for k in others:
        if k in row:
            fc['post_' + k] = uni(row[k])
    return fc
    def add_folder(self, folder_id, ann_id=None):
        '''Add a folder.

        If ``ann_id`` is set, then the folder is owned by the given user.
        Otherwise, the folder is owned and viewable by all anonymous

        :param str folder_id: Folder id
        :param str ann_id: Username
        ann_id = self._annotator(ann_id)
        cid = self.wrap_folder_content_id(ann_id, folder_id)
        self.store.put([(cid, FeatureCollection())])
        logger.info('Added folder %r with content id %r', folder_id, cid)
Exemplo n.º 28
def perftest_throughput_feature_collection():
    with registry:
        registry.add('StringCounter', ThingSerializer)
        fc = FeatureCollection()
        fc['thing1'] = Thing(json.dumps(dict(one_mb=' ' * 2**20)))
        fc_str = fc.dumps()

        start_time = time.time()
        num = 1000
        for i in range(num):
            fc2 = FeatureCollection.loads(fc_str)
        elapsed = time.time() - start_time
        rate = float(num) / elapsed
        print('%d MB in %.1f sec --> %.1f MB per sec' % (num, elapsed, rate))
Exemplo n.º 29
def test_multiset_change(counter_type):
    ent1 = FeatureCollection()
    ent1['bow'] += counter_type(Counter(['big', 'dog']))
    assert dict(ent1.items()) == dict()

    ## can pop empty -- fails

    ## set equal to
    test_data = ['big2', 'dog2']
    ent1['bow'] = counter_type(Counter(test_data))
    assert list(map(abs,ent1['bow'].values())) == [1,1]

    ent1['bow'] += counter_type(Counter(test_data))
    assert list(map(abs,ent1['bow'].values())) == [2,2]
Exemplo n.º 30
def test_entity(counter_type):
    ## build entity, serialize, deserialize, and verify its multisets
    fc1 = FeatureCollection()
    fc1['bow'] += counter_type(Counter(['big', 'dog']))
    fc1['bow'] += counter_type(Counter('tall building'))
    fc1['bon'] += counter_type(Counter(['Super Cat', 'Small Cat', 'Tiger Fish']))

    ## there should be nine items of size 1
    assert Counter(map(abs,fc1['bow'].values()))[1] == 10, fc1['bow'].items()

    ## double the counts, should recurse down
    fc1 += fc1

    ## check values doubled
    assert Counter(map(abs,fc1['bow'].values()))[2] == 10, fc1['bow'].items()

    ## serialize/deserialize it
    blob = fc1.dumps()
    assert_same_fc(fc1, FeatureCollection.loads(blob))

    ## deserialize it via chunk
    fc2 = FeatureCollection.loads(fc1.dumps())
    assert_same_fc(fc1, fc2)