Пример #1
0
def test_max_without_aggregation(basic_frame):
    expected = QFrame.from_csv("""
baz
9""")

    frame = basic_frame.query({'select': [['max', 'baz']]})
    assert frame.to_csv() == expected.to_csv()
Пример #2
0
    def post(self, dataset_key, optional_q):
        if optional_q:
            q_dict = self.q_json_to_dict(decoded_body(self.request))
            if q_dict is not None:
                self.query(dataset_key, q_dict)
            return

        t0 = time.time()
        self.operation = "store"
        if dataset_key in self.dataset_cache:
            self.stats.inc("replace_count")
            del self.dataset_cache[dataset_key]

        content_type = self.content_type()
        input_data = decoded_body(self.request)
        if content_type == CONTENT_TYPE_CSV:
            durations_until_eviction = self.dataset_cache.ensure_free(len(input_data))
            qf = QFrame.from_csv(input_data, column_types=self.dtypes(), stand_in_columns=self.stand_in_columns())
        else:
            # This is a waste of CPU cycles, first the JSON decoder decodes all strings
            # from UTF-8 then we immediately encode them back into UTF-8. Couldn't
            # find an easy solution to this though.
            durations_until_eviction = self.dataset_cache.ensure_free(len(input_data) / 2)
            data = json.loads(input_data, cls=UTF8JSONDecoder)
            qf = QFrame.from_dicts(data, stand_in_columns=self.stand_in_columns())

        self.dataset_cache[dataset_key] = qf
        self.set_status(ResponseCode.CREATED)
        self.stats.inc("size_evict_count", count=len(durations_until_eviction))
        self.stats.inc("store_count")
        self.stats.append("store_row_counts", len(qf))
        self.stats.append("store_durations", time.time() - t0)
        self.stats.extend("durations_until_eviction", durations_until_eviction)
        self.write("")
Пример #3
0
def frame_with_zero():
    data = """
foo,bar
1,0
1,11"""

    return QFrame.from_csv(data)
Пример #4
0
def test_count_without_aggregation(basic_frame):
    expected = QFrame.from_csv("""
count
3""")

    frame = basic_frame.query({'select': [['count']]})
    assert frame.to_csv() == expected.to_csv()
Пример #5
0
def test_sub_select(data, engine):
    frame = QFrame.from_csv(data)

    result = frame.query({'where': ['in', 'bar', {'where': ['==', 'foo', 2]}]},
                         filter_engine=engine)

    assert_rows(result, [1, 2])
Пример #6
0
def subselect_frame():
    data = """
foo,bar
1,10
1,15
5,50"""

    return QFrame.from_csv(data)
Пример #7
0
def basic_frame():
    data = """
foo,bar,baz,qux
bbb,1.25,5,qqq
aaa,3.25,7,qqq
ccc,,9,www"""

    return QFrame.from_csv(data)
Пример #8
0
def test_sub_select_in_column_missing_in_sub_select(engine):
    frame = QFrame.from_csv("""foo,bar
    1,aa""")

    with pytest.raises(MalformedQueryException):
        frame.query({'where': ['in', 'bar', {'select': ['foo'],
                                             'where': ['==', 'foo', 2]}]},
                    filter_engine=engine)
Пример #9
0
def string_frame():
    data = """foo,bar
    1,abcd
    2,defg
    3,ghij
    4,gxyj"""

    return QFrame.from_csv(data)
Пример #10
0
def bitwise_frame():
    data = """foo,bar,baz
    1,1.5,abc
    2,1.5,def
    3,1.5,ghi
    4,1.5,ijk
    5,1.5,lmn"""

    return QFrame.from_csv(data)
Пример #11
0
def calculation_frame():
    data = """
foo,bar
1,10
1,11
2,20
3,30
3,33"""

    return QFrame.from_csv(data)
Пример #12
0
def test_unicode_content_from_csv():
    data = u"""foo,bar
aaa,Iñtërnâtiônàližætiøn
bbb,räksmörgås
ccc,"""

    input_frame = QFrame.from_csv(data)
    frame = input_frame.query({'where': ["==", "bar", u"'räksmörgås'"]})

    assert_rows(frame, ['bbb'])
Пример #13
0
def test_basic_count_aggregation(basic_frame):
    expected = QFrame.from_csv("""
qux,baz
qqq,2
www,1""")

    frame = basic_frame.query({
        'select': ['qux', ['count', 'baz']],
        'group_by': ['qux']})

    assert frame.to_csv() == expected.to_csv()
Пример #14
0
def test_basic_sum_aggregation(basic_frame):
    expected = QFrame.from_csv("""
qux,baz
www,9
qqq,12""")

    frame = basic_frame.query({
        'select': ['qux', ['sum', 'baz']],
        'group_by': ['qux'],
        'order_by': ['baz']})

    assert frame.to_csv() == expected.to_csv()
Пример #15
0
def test_large_frame_csv(large_frame):
    with timeit('to_csv'):
        csv_string = large_frame.to_csv()

    with timeit('from_csv'):
        QFrame.from_csv(csv_string)
Пример #16
0
def test_enum_size(enum_frame, enum_data):
    # Space savings should be possible using categorials
    # when multiple rows containing the same value exists.
    frame = QFrame.from_csv(enum_data)
    assert enum_frame.byte_size() < frame.byte_size()
Пример #17
0
def enum_frame(enum_data):
    return QFrame.from_csv(enum_data, column_types={'foo': 'category'})