示例#1
0
文件: app.py 项目: tobgu/qcache
    def post(self, dataset_key, optional_q):
        if optional_q:
            q_dict = self.q_json_to_dict(decoded_body(self.request))
            if q_dict is not None:
                self.query(dataset_key, q_dict)
            return

        t0 = time.time()
        self.operation = "store"
        if dataset_key in self.dataset_cache:
            self.stats.inc("replace_count")
            del self.dataset_cache[dataset_key]

        content_type = self.content_type()
        input_data = decoded_body(self.request)
        if content_type == CONTENT_TYPE_CSV:
            durations_until_eviction = self.dataset_cache.ensure_free(len(input_data))
            qf = QFrame.from_csv(input_data, column_types=self.dtypes(), stand_in_columns=self.stand_in_columns())
        else:
            # This is a waste of CPU cycles, first the JSON decoder decodes all strings
            # from UTF-8 then we immediately encode them back into UTF-8. Couldn't
            # find an easy solution to this though.
            durations_until_eviction = self.dataset_cache.ensure_free(len(input_data) / 2)
            data = json.loads(input_data, cls=UTF8JSONDecoder)
            qf = QFrame.from_dicts(data, stand_in_columns=self.stand_in_columns())

        self.dataset_cache[dataset_key] = qf
        self.set_status(ResponseCode.CREATED)
        self.stats.inc("size_evict_count", count=len(durations_until_eviction))
        self.stats.inc("store_count")
        self.stats.append("store_row_counts", len(qf))
        self.stats.append("store_durations", time.time() - t0)
        self.stats.extend("durations_until_eviction", durations_until_eviction)
        self.write("")
示例#2
0
def test_large_frame_msgpack(large_frame):
    # NOTE: This implementation does not exist but once did as an experiment
    #       This test is left as reference and reminder
    with timeit('to_msgpack'):
        msgpack_string = large_frame.to_msgpack()

    with timeit('from_msgpack'):
        QFrame.from_msgpack(msgpack_string)
示例#3
0
def test_sub_select(data, engine):
    frame = QFrame.from_csv(data)

    result = frame.query({'where': ['in', 'bar', {'where': ['==', 'foo', 2]}]},
                         filter_engine=engine)

    assert_rows(result, [1, 2])
示例#4
0
def test_unicode_content_from_dicts():
    data = [{'foo': 'aaa', 'bar': u'Iñtërnâtiônàližætiøn'},
            {'foo': 'bbb', 'bar': u'räksmörgås'.encode(encoding='utf-8')}]
    input_frame = QFrame.from_dicts(data)
    frame = input_frame.query({'where': ["==", "bar", u"'räksmörgås'"]})

    assert_rows(frame, ['bbb'])
示例#5
0
def test_count_without_aggregation(basic_frame):
    expected = QFrame.from_csv("""
count
3""")

    frame = basic_frame.query({'select': [['count']]})
    assert frame.to_csv() == expected.to_csv()
示例#6
0
def test_max_without_aggregation(basic_frame):
    expected = QFrame.from_csv("""
baz
9""")

    frame = basic_frame.query({'select': [['max', 'baz']]})
    assert frame.to_csv() == expected.to_csv()
示例#7
0
def frame_with_zero():
    data = """
foo,bar
1,0
1,11"""

    return QFrame.from_csv(data)
示例#8
0
def test_sub_select_in_column_missing_in_sub_select(engine):
    frame = QFrame.from_csv("""foo,bar
    1,aa""")

    with pytest.raises(MalformedQueryException):
        frame.query({'where': ['in', 'bar', {'select': ['foo'],
                                             'where': ['==', 'foo', 2]}]},
                    filter_engine=engine)
示例#9
0
def string_frame():
    data = """foo,bar
    1,abcd
    2,defg
    3,ghij
    4,gxyj"""

    return QFrame.from_csv(data)
示例#10
0
def subselect_frame():
    data = """
foo,bar
1,10
1,15
5,50"""

    return QFrame.from_csv(data)
示例#11
0
def basic_frame():
    data = """
foo,bar,baz,qux
bbb,1.25,5,qqq
aaa,3.25,7,qqq
ccc,,9,www"""

    return QFrame.from_csv(data)
示例#12
0
def bitwise_frame():
    data = """foo,bar,baz
    1,1.5,abc
    2,1.5,def
    3,1.5,ghi
    4,1.5,ijk
    5,1.5,lmn"""

    return QFrame.from_csv(data)
示例#13
0
def calculation_frame():
    data = """
foo,bar
1,10
1,11
2,20
3,30
3,33"""

    return QFrame.from_csv(data)
示例#14
0
def test_unicode_content_from_csv():
    data = u"""foo,bar
aaa,Iñtërnâtiônàližætiøn
bbb,räksmörgås
ccc,"""

    input_frame = QFrame.from_csv(data)
    frame = input_frame.query({'where': ["==", "bar", u"'räksmörgås'"]})

    assert_rows(frame, ['bbb'])
示例#15
0
def test_basic_count_aggregation(basic_frame):
    expected = QFrame.from_csv("""
qux,baz
qqq,2
www,1""")

    frame = basic_frame.query({
        'select': ['qux', ['count', 'baz']],
        'group_by': ['qux']})

    assert frame.to_csv() == expected.to_csv()
示例#16
0
def test_basic_sum_aggregation(basic_frame):
    expected = QFrame.from_csv("""
qux,baz
www,9
qqq,12""")

    frame = basic_frame.query({
        'select': ['qux', ['sum', 'baz']],
        'group_by': ['qux'],
        'order_by': ['baz']})

    assert frame.to_csv() == expected.to_csv()
示例#17
0
def test_large_frame_csv(large_frame):
    with timeit('to_csv'):
        csv_string = large_frame.to_csv()

    with timeit('from_csv'):
        QFrame.from_csv(csv_string)
示例#18
0
def large_frame():
    d = 1000000 * [{'aaa': 123456789, 'bbb': 'abcdefghijklmnopqrvwxyz', 'ccc': 1.23456789}]
    return QFrame.from_dicts(d)
示例#19
0
def test_enum_from_dicts(enum_frame):
    cat_frame = QFrame.from_dicts(enum_frame.to_dicts(), column_types={'foo': 'category'})
    frame = QFrame.from_dicts(enum_frame.to_dicts())

    assert cat_frame.byte_size() < frame.byte_size()
示例#20
0
def test_enum_size(enum_frame, enum_data):
    # Space savings should be possible using categorials
    # when multiple rows containing the same value exists.
    frame = QFrame.from_csv(enum_data)
    assert enum_frame.byte_size() < frame.byte_size()
示例#21
0
def enum_frame(enum_data):
    return QFrame.from_csv(enum_data, column_types={'foo': 'category'})