def test(): from tests.title_test_data import data as test_data # from tests.title_test_data_two import data as test_data_more count = 0 mismatch = 0 for key, value in test_data: # if not " " in key: # continue # print(key, value) p = TPN(key) vol, chp, frag, post = p.getVolume(), p.getChapter(), p.getFragment( ), p.getPostfix() # print(p) if len(value) == 2: pass elif len(value) == 4: e_vol, e_chp, e_frag, e_post = value if e_chp == 0.0 and chp is None: e_chp = None bad = False if vol != e_vol or chp != e_chp or frag != e_frag: bad = True print(p) print("Parsed: v{}, c{}, f{}".format(vol, chp, frag)) print("Expect: v{}, c{}, f{}".format(e_vol, e_chp, e_frag)) print() if e_post != post: bad = True print(p) print("Post mismatch - Parsed: {}".format(post)) print("Post mismatch - Expect: {}".format(e_post)) if bad: mismatch += 1 # elif post: # print("Valid post - Parsed: {}".format(post)) # for number in p.getNumbers(): # print(number) # print("Preceeded by:", number.lastData()) count += 1 # if len(value) == 2: # assert value == extractChapterVol(key), "Wat? Values: '{}', '{}', '{}'".format(key, value, extractChapterVol(key)) # elif len(value) == 4: # assert value == extractVolChapterFragmentPostfix(key), "Wat? Values: '{}', '{}', '{}'".format(key, value, extractVolChapterFragmentPostfix(key)) # else: # print("Wat?") # print(key, value) # print("All matches passed!") print("{} Items with parsed output".format(count)) print("{} Items mismatch in new parser".format(mismatch)) print("Total items: {}".format(len(test_data)))
def merge_in_fixed_mismatch(): test_data = load_test_data(only_mismatch=True) count = 0 mismatch = 0 remlines = [] good_lines = [] good_sets = [] for key, value in test_data: p = TPN(key) vol, chp, frag, post = p.getVolume(), p.getChapter(), p.getFragment( ), p.getPostfix() e_vol, e_chp, e_frag, e_post = value if vol != e_vol or chp != e_chp or frag != e_frag or e_post != post: badtmp, remline = format_double_row( key, output_volume=e_vol, output_chapter=e_chp, output_fragment=e_frag, output_postfix=e_post, expect_volume=vol, expect_chapter=chp, expect_fragment=frag, expect_postfix=post, ) remlines.append(remline) else: goodtmp = format_row(key, e_vol, e_chp, e_frag, e_post) good_lines.append(goodtmp) good_sets.append((key, value)) count += 1 # print("All matches passed!") print("{} Items with parsed output".format(count)) print("{} Items mismatch in new parser".format(len(remlines))) print("{} OK lines".format(len(good_lines))) print("Total items: {}".format(len(test_data))) if good_lines: comment_mismatches(good_lines, mismatch=True) create_set_files_for_values(good_sets)
def test_mismatch(): from tests.title_test_data_mismatch import data as test_data count = 0 mismatch = 0 for key, value in test_data: p = TPN(key) vol, chp, frag, post = p.getVolume(), p.getChapter(), p.getFragment( ), p.getPostfix() # print(p) if len(value) == 4: e_vol, e_chp, e_frag, e_post = value if e_chp == 0.0 and chp is None: e_chp = None bad = False if vol != e_vol or chp != e_chp or frag != e_frag: bad = True print(p) print("Parsed: v{}, c{}, f{}".format(vol, chp, frag)) print("Expect: v{}, c{}, f{}".format(e_vol, e_chp, e_frag)) print() if e_post != post: bad = True print(p) print("Post mismatch - Parsed: {}".format(post)) print("Post mismatch - Expect: {}".format(e_post)) if bad: mismatch += 1 count += 1 # print("All matches passed!") print("{} Items with parsed output".format(count)) print("{} Items mismatch in new parser".format(mismatch)) print("Total items: {}".format(len(test_data)))
def extractTitle(inStr): # print("Parsing: '%s'" % inStr) p = TitleParser(inStr) vol = p.getVolume() chp = p.getChapter() frag = p.getFragment() post = p.getPostfix() if (chp and not frag) or (chp and float(int(float(chp))) != float(chp) and (frag == 0 or frag is None)): chp = int(chp) frag = int(chp * 100) % 100 # if chp: # assert float(int(float(chp))) == float(chp), "chp is not an integer ('%s', %s, %s, %s)! Wat?" % (inStr, vol, chp, frag) # if vol: # assert float(int(float(vol))) == float(vol), "vol is not an integer ('%s', %s, %s, %s)! Wat?" % (inStr, vol, chp, frag) # if frag: # assert float(int(float(frag))) == float(frag), "frag is not an integer ('%s', %s, %s, %s)! Wat?" % (inStr, vol, chp, frag) return vol, chp, frag, post
def extract_mismatch(): from tests.title_test_data import data as test_data from tests.title_test_data_two import data as test_data_more count = 0 mismatch = 0 test_data_dict = {} for key, value in test_data_more: if not key in test_data_dict: test_data_dict[key] = [] test_data_dict[key].append(value) for key, value in test_data: test_data_dict[key] = [] for key, value in test_data: test_data_dict[key].append(value) with open("tests/title_test_data_mismatch.py", 'w') as fp: fp.write("data = [\n") goodstr = [] badstr = [] errored = [] for key, value in test_data_dict.items(): try: p = TPN(key) vol, chp, frag, post = p.getVolume(), p.getChapter( ), p.getFragment(), p.getPostfix() # print(p) badtmp = '' goodtmp = '' for valueset in value: assert (len(valueset) == 4), "Wat: %s" % (valueset, ) e_vol, e_chp, e_frag, e_post = valueset if e_chp == 0.0 and chp is None: e_chp = None bad = False if vol != e_vol or chp != e_chp or frag != e_frag: bad = True print(p) print("Parsed: v{}, c{}, f{}".format(vol, chp, frag)) print("Expect: v{}, c{}, f{}".format( e_vol, e_chp, e_frag)) print() if e_post != post: bad = True print(p) print("Post mismatch - Parsed: {}".format(post)) print("Post mismatch - Expect: {}".format(e_post)) if bad: mismatch += 1 if vol != e_vol or chp != e_chp or frag != e_frag or e_post != post: badtmp = format_row(key, e_vol, e_chp, e_frag, e_post) else: goodtmp = format_row(key, e_vol, e_chp, e_frag, e_post) if goodtmp: goodstr.append(goodtmp) else: badstr.append(badtmp) except AssertionError: errored.append(format_row(key, None, None, None, '')) count += 1 goodstr.sort() badstr.sort() fp.write("".join(goodstr)) fp.write("\n\n") fp.write( "#################################################################################################################################################################################################################################" ) fp.write( "#################################################################################################################################################################################################################################" ) fp.write( "#################################################################################################################################################################################################################################" ) fp.write( "#################################################################################################################################################################################################################################" ) fp.write("\n\n") fp.write("".join(badstr)) fp.write("\n\n") fp.write( "#################################################################################################################################################################################################################################" ) fp.write( "#################################################################################################################################################################################################################################" ) fp.write( "#################################################################################################################################################################################################################################" ) fp.write( "#################################################################################################################################################################################################################################" ) fp.write("\n\n") fp.write("".join(errored)) fp.write("]\n") # print("All matches passed!") print("{} Items with parsed output".format(count)) print("{} Items mismatch in new parser".format(mismatch)) print("{} error encountered in parsing".format(len(errored))) print("Total items: {}".format(len(test_data)))
def test(): test_data = load_test_data() # from tests.title_test_data_two import data as test_data_more count = 0 mismatch = 0 mismatches = [] for key, value in tqdm.tqdm(test_data): # if not " " in key: # continue # print(key, value) p = TPN(key) vol, chp, frag, post = p.getVolume(), p.getChapter(), p.getFragment( ), p.getPostfix() # print(p) if len(value) == 2: pass elif len(value) == 4: e_vol, e_chp, e_frag, e_post = value if e_chp == 0.0 and chp is None: e_chp = None bad = False if vol != e_vol or chp != e_chp or frag != e_frag: bad = True # print(p) # print("Parsed: v{}, c{}, f{}".format(vol, chp, frag)) # print("Expect: v{}, c{}, f{}".format(e_vol, e_chp, e_frag)) # print() if e_post != post: bad = True # print(p) # print("Post mismatch - Parsed: {}".format(post)) # print("Post mismatch - Expect: {}".format(e_post)) if bad: # Row structure is ('Name', (vol, chp, frag, postfix)), mismatches.append(( (key, (e_vol, e_chp, e_frag, e_post)), (key, (vol, chp, frag, post)), )) mismatch += 1 # elif post: # print("Valid post - Parsed: {}".format(post)) # for number in p.getNumbers(): # print(number) # print("Preceeded by:", number.lastData()) count += 1 # if len(value) == 2: # assert value == extractChapterVol(key), "Wat? Values: '{}', '{}', '{}'".format(key, value, extractChapterVol(key)) # elif len(value) == 4: # assert value == extractVolChapterFragmentPostfix(key), "Wat? Values: '{}', '{}', '{}'".format(key, value, extractVolChapterFragmentPostfix(key)) # else: # print("Wat?") # print(key, value) # print("All matches passed!") print("{} Items with parsed output".format(count)) print("{} Items mismatch in new parser".format(mismatch)) print("Total items: {}".format(len(test_data))) with open("title_disconnects.json", "w") as fp: json.dump(mismatches, fp, indent=4)
def extract_mismatch(): # try: # with open("mismatches.json", "r") as fp: # remlines = json.load(fp) # except (json.JSONDecodeError, FileNotFoundError): test_data = load_test_data(mismatch=False) count = 0 mismatch = 0 test_data_dict = {} for item in test_data: try: key, value = item except Exception: pprint.pprint(item) raise test_data_dict.setdefault(key, []) test_data_dict[key].append(value) remlines = [] if os.path.exists("tests/title_data/title_test_data_mismatch.pyson"): raise RuntimeError("Mismatch file already exists. Not overwriting!") existing_entries = [] with open("tests/title_data/title_test_data_mismatch.pyson", 'w') as fp: fp.write("[\n") goodstr = [] badstr = [] errored = [] for key, value in tqdm.tqdm(test_data_dict.items()): try: p = TPN(key) vol, chp, frag, post = p.getVolume(), p.getChapter( ), p.getFragment(), p.getPostfix() # print(p) badtmp = '' goodtmp = '' for valueset in value: assert (len(valueset) == 4), "Wat: %s" % (valueset, ) e_vol, e_chp, e_frag, e_post = valueset if e_chp == 0.0 and chp is None: e_chp = None bad = False if vol != e_vol or chp != e_chp or frag != e_frag: bad = True # print(p) # print("Parsed: v{}, c{}, f{}".format(vol, chp, frag)) # print("Expect: v{}, c{}, f{}".format(e_vol, e_chp, e_frag)) # print() if e_post != post: bad = True # print(p) # print("Post mismatch - Parsed: {}".format(post)) # print("Post mismatch - Expect: {}".format(e_post)) if bad: mismatch += 1 if vol != e_vol or chp != e_chp or frag != e_frag or e_post != post: badtmp, remline = format_double_row( key, output_volume=e_vol, output_chapter=e_chp, output_fragment=e_frag, output_postfix=e_post, expect_volume=vol, expect_chapter=chp, expect_fragment=frag, expect_postfix=post, ) remlines.append(remline) else: goodtmp = format_row(key, e_vol, e_chp, e_frag, e_post) if goodtmp: goodstr.append(goodtmp) else: badstr.append(badtmp) except AssertionError: errored.append(format_row(key, None, None, None, '')) count += 1 goodstr.sort() badstr.sort() fp.write(" # Lines with parse mismatches: %s" % (len(badstr), )) fp.write("\n\n") fp.write("".join(badstr)) fp.write("\n\n") fp.write(" # Errored lines: %s" % (len(errored), )) fp.write("\n\n") fp.write("".join(errored)) fp.write(" # Old lines: %s" % (len(existing_entries), )) fp.write("\n\n") fp.write("".join(existing_entries)) fp.write("]\n") # print("All matches passed!") print("{} Items with parsed output".format(count)) print("{} Items mismatch in new parser".format(mismatch)) print("{} error encountered in parsing".format(len(errored))) print("Total items: {}".format(len(test_data))) # with open("mismatches.json", "w") as fp: # json.dump(remlines, fp, indent=4) comment_mismatches(remlines)