def test_freq_analysis(self): """Test that it performs a proper frequency analysis.""" test_string = random_string(20, string.ascii_lowercase) string_set = set(test_string) analysis = bar_chart.freq_analysis(test_string) for element in string_set: # Test that each element in the set is a key. self.assertIn(element, analysis) # Test that each element in the dictionary value matches the key. for i in analysis[element]: self.assertEqual(element, i) # Test that each dictionary value has the correct number of elements. self.assertEqual(test_string.count(element), len(analysis[element])) # Test that it skips non-letters. test_string = random_string(20, string.punctuation + string.whitespace) analysis = bar_chart.freq_analysis(test_string) self.assertDictEqual(analysis, {}) # Test that it converts uppercase to lowercase. test_string = random_string(20, string.ascii_uppercase) analysis = bar_chart.freq_analysis(test_string) for key in analysis.keys(): self.assertTrue(key.islower())
def test_cleanup_list(self): """Test that it removes single letter words from a list of words.""" random_list = [random_string(1) for _ in range(13)] random_list.extend([random_string(5) for _ in range(10)]) clean_list = cleanup_dictionary.cleanup_list(random_list) self.assertEqual(len(clean_list), 10) for element in clean_list: self.assertEqual(len(element), 5)
def test_get_id(self): """Test that it can convert a word to an ID.""" # Test a random letter. test_letter = random_string(1, ascii_lowercase) test_letter_id = anagram_generator.get_id(test_letter) self.assertEqual(LETTER_PRIME_DICT[test_letter], test_letter_id) # Test a random string. test_string = random_string(30, ascii_lowercase) test_string_id = anagram_generator.get_id(test_string) actual_id = 1 for letter in test_string: actual_id *= LETTER_PRIME_DICT[letter] self.assertEqual(actual_id, test_string_id)
def test_join_random(seed, lt): random.seed(seed) ndata = int(random.expovariate(0.0005)) nkeys = int(random.expovariate(0.01)) + 1 st = random.choice(lt.stypes) if lt == ltype.bool: keys = [True, False] elif lt == ltype.int: nbits = (6 if st == stype.int8 else 12 if st == stype.int16 else 24) keys = list(set(random.getrandbits(nbits) for _ in range(nkeys))) elif lt == ltype.real: keys = [random.random() for _ in range(nkeys)] if st == stype.float32: keys = list(set(dt.Frame(keys, stype=st).topython()[0])) else: keys = list(set(keys)) else: l = int(random.expovariate(0.05)) + 1 keys = list(set(random_string(l) for _ in range(nkeys))) nkeys = len(keys) dkey = dt.Frame(KEY=keys, VAL=range(nkeys), stypes={"KEY": st}) dkey.key = "KEY" keys, vals = dkey.topython() main = [random.choice(keys) for i in range(ndata)] dmain = dt.Frame(KEY=main, stype=st) res = [vals[keys.index(main[i])] for i in range(ndata)] djoined = dmain[:, :, join(dkey)] djoined.internal.check() assert djoined.shape == (ndata, 2) assert djoined.names == ("KEY", "VAL") assert djoined.topython() == [main, res]
def generate_str_column(allparams): """ Generate and return a column with random string data. This is the most versatile generator, and includes multiple different "modes" of generation. """ nrows = allparams["nrows"] quote = allparams["quote"] always_quote = random.random() < 0.2 rr = (lambda x: x) if always_quote: rr = (lambda x: quote + x + quote) rmode = random.random() if rmode < 0: pass else: # Generate simple alphanumeric strings and make sure # the resulting column is not fully populated with numeric values. is_numeric = nrows > 0 col = [] while is_numeric: col = [ rr(random_string(int(random.expovariate(0.01)))) for _ in range(nrows) ] for row in col: try: if row: float(row) except: is_numeric = False break return col
def test_re_match_random(seed): random.seed(seed) n = int(random.expovariate(0.001) + 100) k = random.randint(2, 12) random_re = "" random_len = 0 while random_len < k: t = random.random() if t < 0.4: random_re += "." elif t < 0.6: random_re += random.choice("abcdefgh") elif t < 0.8: random_re += ".*" random_len += 3 else: random_re += "\\w" random_len += 1 random_rx = re.compile(random_re) src = [random_string(k) for _ in range(n)] frame = dt.Frame(A=src) frame_res = frame[:, f.A.re_match(random_rx)] assert frame_res.shape == (n, 1) res = [bool(re.fullmatch(random_rx, s)) for s in src] dtres = frame_res.to_list()[0] assert res == dtres
def test_match_random(seed): random.seed(seed) n = int(random.expovariate(0.001) + 100) k = random.randint(2, 12) random_re = "" random_len = 0 while random_len < k: t = random.random() if t < 0.4: random_re += "." elif t < 0.6: random_re += random.choice("abcdefgh") elif t < 0.8: random_re += ".*" random_len += 3 else: random_re += "\\w" random_len += 1 random_rx = re.compile(random_re) src = [random_string(k) for _ in range(n)] DT = dt.Frame(A=src) res = DT[:, match(f.A, random_rx)] assert_equals(res, dt.Frame(A=[bool(re.fullmatch(random_rx, s)) for s in src]))
def test_recursive_ispalindrome(self): """Test that it can identify a pseudo-random palindrome.""" random_string_ = random_string(10, string.ascii_lowercase) random_palindrome = random_string_ + random_string_[::-1] self.assertTrue( recursive_palindrome.recursive_ispalindrome(random_palindrome)) # Test a word that isn't a palindrome. not_palindrome = 'cat' self.assertFalse( recursive_palindrome.recursive_ispalindrome(not_palindrome))
def test_add_keys_to_dict(self): """Test add_keys_to_dict function.""" # Test that it adds all ASCII lowercase letters to a dictionary. test_dict = foreign_chart.add_keys_to_dict({}) for letter in string.ascii_lowercase: self.assertIn(letter, test_dict) # Test that it doesn't duplicate keys. random_letter = random_string(1, string.ascii_lowercase) random_dict = {random_letter: []} test_dict = foreign_chart.add_keys_to_dict(random_dict) self.assertDictEqual(test_dict, EMPTY_LETTER_DICT)
def generate_str_column(allparams): """ Generate and return a column with random string data. This is the most versatile generator, and includes multiple different "modes" of generation. """ nrows = allparams["nrows"] quote = allparams["quote"] always_quote = random.random() < 0.2 rr = (lambda x: x) if always_quote: rr = (lambda x: quote + x + quote) rmode = random.random() if rmode < 0: pass else: # Generate simple alphanumeric strings return [rr(random_string(int(random.expovariate(0.01)))) for _ in range(nrows)]
def test_empty_strings(seed, repl): # TODO: also test repl=None, which currently gets deserialized into empty # strings. alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" random.seed(seed) ncols = random.randint(3, 10) nrows = int(random.expovariate(1 / 200) + 1) p = random.uniform(0.1, 0.5) src = [] for i in range(ncols): src.append([(random_string(8) if random.random() < p else repl) for j in range(nrows)]) if src[i] == [repl] * nrows: src[i][0] = "!!!" colnames = list(alphabet[:ncols].upper()) d0 = dt.Frame(src, names=colnames) assert d0.names == tuple(colnames) assert d0.ltypes == (ltype.str, ) * ncols text = d0.to_csv() d1 = dt.fread(text) frame_integrity_check(d1) assert d1.names == d0.names assert d1.stypes == d0.stypes assert d1.to_list() == src
def random_str(): if random.random() < 0.1: return None return random_string(random.randint(1, 20))