def load_test_data(f, optsplit=[]): testRe1 = re.compile( r"^÷\s+([^\s].*[^\s])\s+÷\s+#\s+÷\s+\[0.2\].*?([÷×].*)\s+÷\s+\[0.3\]\s*$" ) unicode.fetch(f) data = [] for line in fileinput.input(os.path.basename(f)): # lines that include a test start with the ÷ character if len(line) < 2 or not line.startswith('÷'): continue m = testRe1.match(line) if not m: print("error: no match on line where test was expected: %s" % line) continue # process the characters in this test case chars = process_split_string(m.group(1)) # skip test case if it contains invalid characters (viz., surrogates) if not chars: continue # now process test cases (chars, info) = process_split_info(m.group(2), chars, optsplit) # make sure that we have break info for each break! assert len(chars) - 1 == len(info) data.append((chars, info)) return data
def load_test_data(f, optsplit=[]): outls = [] testRe1 = re.compile("^÷\s+([^\s].*[^\s])\s+÷\s+#\s+÷\s+\[0.2\].*?([÷×].*)\s+÷\s+\[0.3\]\s*$") unicode.fetch(f) data = [] for line in fileinput.input(os.path.basename(f)): # lines that include a test start with the ÷ character if len(line) < 2 or line[0:2] != '÷': continue m = testRe1.match(line) if not m: print "error: no match on line where test was expected: %s" % line continue # process the characters in this test case chars = process_split_string(m.group(1)) # skip test case if it contains invalid characters (viz., surrogates) if not chars: continue # now process test cases (chars, info) = process_split_info(m.group(2), chars, optsplit) # make sure that we have break info for each break! assert len(chars) - 1 == len(info) outls.append((chars, info)) return outls
def load_test_data(f): outls = [] testRe = re.compile("^(.*?);(.*?);(.*?);(.*?);(.*?);\s+#.*$") unicode.fetch(f) for line in fileinput.input(os.path.basename(f)): # comment and header lines start with # and @ respectively if len(line) < 1 or line[0:1] == '#' or line[0:1] == '@': continue m = testRe.match(line) groups = [] if not m: print "error: no match on line where test was expected: %s" % line continue has_surrogates = False for i in range(1, 6): group = [] chs = m.group(i).split() for ch in chs: intch = int(ch, 16) if unicode.is_surrogate(intch): has_surrogates = True break group.append(intch) if has_surrogates: break groups.append(group) if has_surrogates: continue outls.append(groups) return outls
def load_test_data(f): outls = [] testRe = re.compile("^(.*?);(.*?);(.*?);(.*?);(.*?);\s+#.*$") unicode.fetch(f) for line in fileinput.input(os.path.basename(f)): # comment and header lines start with # and @ respectively if len(line) < 1 or line[0:1] == "#" or line[0:1] == "@": continue m = testRe.match(line) groups = [] if not m: print "error: no match on line where test was expected: %s" % line continue has_surrogates = False for i in range(1, 6): group = [] chs = m.group(i).split() for ch in chs: intch = int(ch, 16) if unicode.is_surrogate(intch): has_surrogates = True break group.append(intch) if has_surrogates: break groups.append(group) if has_surrogates: continue outls.append(groups) return outls