def post(self, dataset_key, optional_q): if optional_q: q_dict = self.q_json_to_dict(decoded_body(self.request)) if q_dict is not None: self.query(dataset_key, q_dict) return t0 = time.time() self.operation = "store" if dataset_key in self.dataset_cache: self.stats.inc("replace_count") del self.dataset_cache[dataset_key] content_type = self.content_type() input_data = decoded_body(self.request) if content_type == CONTENT_TYPE_CSV: durations_until_eviction = self.dataset_cache.ensure_free(len(input_data)) qf = QFrame.from_csv(input_data, column_types=self.dtypes(), stand_in_columns=self.stand_in_columns()) else: # This is a waste of CPU cycles, first the JSON decoder decodes all strings # from UTF-8 then we immediately encode them back into UTF-8. Couldn't # find an easy solution to this though. durations_until_eviction = self.dataset_cache.ensure_free(len(input_data) / 2) data = json.loads(input_data, cls=UTF8JSONDecoder) qf = QFrame.from_dicts(data, stand_in_columns=self.stand_in_columns()) self.dataset_cache[dataset_key] = qf self.set_status(ResponseCode.CREATED) self.stats.inc("size_evict_count", count=len(durations_until_eviction)) self.stats.inc("store_count") self.stats.append("store_row_counts", len(qf)) self.stats.append("store_durations", time.time() - t0) self.stats.extend("durations_until_eviction", durations_until_eviction) self.write("")
def test_unicode_content_from_dicts(): data = [{'foo': 'aaa', 'bar': u'Iñtërnâtiônàližætiøn'}, {'foo': 'bbb', 'bar': u'räksmörgås'.encode(encoding='utf-8')}] input_frame = QFrame.from_dicts(data) frame = input_frame.query({'where': ["==", "bar", u"'räksmörgås'"]}) assert_rows(frame, ['bbb'])
def large_frame(): d = 1000000 * [{'aaa': 123456789, 'bbb': 'abcdefghijklmnopqrvwxyz', 'ccc': 1.23456789}] return QFrame.from_dicts(d)
def test_enum_from_dicts(enum_frame): cat_frame = QFrame.from_dicts(enum_frame.to_dicts(), column_types={'foo': 'category'}) frame = QFrame.from_dicts(enum_frame.to_dicts()) assert cat_frame.byte_size() < frame.byte_size()