def test_frequency_analysis_fast0(self): with timer() as t1: rows1 = self.query(''' SELECT fn1.performance_reduce_characters_fast(w, c) FROM ( SELECT fn1.performance_map_characters_fast0(text) FROM daten.wiki_names WHERE text IS NOT NULL ) GROUP BY w ORDER BY c DESC, w ASC''') with timer() as t2: rows2 = self.query(''' SELECT w, c FROM daten.wiki_freq ORDER BY c DESC, w ASC''') data = [tuple(x) for x in rows1] reference = [tuple(x) for x in rows2] print "test_frequency_analysis_fast0 query:", t1.duration, t2.duration self.compare(reference, data)
def test_frequency_analysis(self): if udf.opts.lang == 'r': raise SkipTest('this R implementation is too slow') with timer() as t1: rows1 = self.query(''' SELECT fn1.performance_reduce_characters(w, c) FROM ( SELECT fn1.performance_map_characters(text) FROM daten.wiki_names ) GROUP BY w ORDER BY c DESC, w ASC''') with timer() as t2: rows2 = self.query(''' SELECT w, c FROM daten.wiki_freq ORDER BY c DESC, w ASC''') data = [tuple(x) for x in rows1] reference = [tuple(x) for x in rows2] print "test_frequency_analysis query:", t1.duration, t2.duration self.compare(reference, data)
def test_word_unicode_count(self): '''DWA-13860 (lua)''' sql = ''' SELECT performance_reduce_counts(w, c) FROM ( SELECT performance_map_unicode_words(c3_varchar100) FROM test.enginetablebigunicode ) GROUP BY w ORDER BY 1 DESC''' with timer() as t: self.query(sql) self.assertLessEqual(t.duration, 11)
def test_word_count_fast77(self): sql = ''' SELECT COUNT(*) FROM ( SELECT performance_reduce_counts_fast77(w, c) FROM ( SELECT performance_map_words(varchar02) FROM test.enginetablebig1 ) GROUP BY w ORDER BY 1 DESC)''' with timer() as t: ret = self.query(sql) print "test_word_count_fast77 query:", t.duration, repr(ret) self.assertLessEqual(t.duration, 160)