def read_label(idx): gatt = read_yaml("../../data/label/applicability/labels/GATT.yaml") gatt = cleanse_dict(gatt) inv_gatt = invert_dict(gatt) print(len(inv_gatt.keys())) print(sorted(inv_gatt.keys())) return inv_gatt[idx]
def rehash_arts_in_text(indices, yaml_path= "labels/GATT.yaml"): """ From the integer list of labels, change the int into article name e/g : [0,2] -> ['Article I', 'Article II'] """ gatt_info = read_yaml(yaml_path) gatt_dict = cleanse_dict(gatt_info) gatt_keys_in_int = get_keys(gatt_dict) in_text = [] for index in indices: in_text.append(gatt_keys_in_int[index]) return in_text
def filter_panel_eng(list_of_urls, idx): """ Filter panel report's urls on given list of urls :param list_of_urls :return: one url that of panel report """ def _find_panel(urls, ds_idx): panel_url = None print("Panel reports: ") snippet = None for url in filter_eng(urls): print("url", url.split("/")[-1]) if 'Q' in url and 'WT' in url: snippet = url if 'q' in url and 'WT' in url: snippet = url if "{}R.pdf".format(ds_idx) == url.split("/")[-1]: panel_url = url break if "{}R.PDF".format(ds_idx) == url.split("/")[-1]: panel_url = url break if "{}RW.pdf".format(ds_idx) == url.split("/")[-1]: panel_url = url break if "{}R-00.pdf".format(ds_idx) == url.split("/")[-1]: panel_url = url break if "{}R-01.pdf".format(ds_idx) == url.split("/")[-1]: panel_url = url break if not snippet: snippet = url print("urlSnippet", snippet) print("panel_url", panel_url) return panel_url, snippet panelURL, urlSNIPPET = _find_panel(list_of_urls, idx) if not panelURL: group = None info = read_yaml("../../data/info.yaml") links = info['LinkedPanel'] for link in links: if idx in link: group = list(link.keys()) print("group", group) break if group is not None: for one in group: print("linked", one) panelURL, url_snippet = _find_panel(list_of_urls, one) print("newly found panelURL", panelURL) if panelURL: break if not panelURL: panelURL = os.path.join('/'.join(urlSNIPPET.split('/')[:-1]), '{}R.pdf'.format(idx)) print("final_decision: ", panelURL) return panelURL
from prep.factual.after_panel.extract import extract_factual_manual, locate_chapter_II, locate_chapter_III def read_label(idx): gatt = read_yaml("../../data/label/applicability/labels/GATT.yaml") gatt = cleanse_dict(gatt) inv_gatt = invert_dict(gatt) print(len(inv_gatt.keys())) print(sorted(inv_gatt.keys())) return inv_gatt[idx] if __name__ == "__main__": pkl_path = "../../data/factual/after_panel/factual.pkl" info = read_yaml("../../data/info.yaml") panel_exist = info['Panel']['ds_numb'] mutual_agree = info['Panel']['mutual_agree'] WPF = info['Panel']['WPF'] print(WPF) LinkedPanel = info['LinkedPanel'] LinkedOmit = info['LinkedOmit'] for idx in panel_exist: if check_already_exist(pkl_path, idx): continue elif idx in mutual_agree: continue elif idx in WPF: print(idx, "in WPF") continue elif idx in LinkedOmit:
"""Convert Multi Label Dataset to One-Label""" import os import json from utils.misc.yaml import read_yaml from utils.misc.dict import get_keys from utils.misc.json import write_json_line_by_line from prep.label.cite.parse import cleanse_dict print(os.getcwd()) multi_label_json = "entire_data.json" provision_path = "/home/zachary/projects/DeepWTO/data/provision/gatt.yaml" provision = read_yaml(provision_path) provision_names = list(provision.keys()) print(provision_names) print(len(provision_names)) print(provision[provision_names[0]]) gatt = read_yaml("/home/zachary/projects/DeepWTO/data/label/citability/labels" "/GATT.yaml") gatt = cleanse_dict(gatt) gatt_keys = get_keys(gatt) print(gatt_keys) art_id = gatt_keys.index("Article I") print(art_id) print(provision_names == gatt_keys)
from utils.misc.yaml import read_yaml read = read_yaml("notable.yaml") ds_numbs = set() titles = ['III', 'III:1', 'III:2', 'III:4', 'III:5', 'III:7'] for title in titles: ds_numbs = ds_numbs.union(set(read[title].split(','))) ds_numbs = sorted(list(ds_numbs)) print(len(ds_numbs)) print(ds_numbs) ds_numbs_wo_DS_str = [] for ds_numb in ds_numbs: if ' ' in ds_numb: ds_numb = ds_numb[1:] print(ds_numb) ds_numbs_wo_DS_str.append(int(ds_numb[2:])) comparee = read_yaml("../../info.yaml") comparee = comparee['Panel']['ds_numb'] joint = [] for elem in ds_numbs_wo_DS_str: if elem in comparee: joint.append(elem) if __name__ == "__main__": print(sorted(set(joint))) print(len(sorted(set(joint))))
write_yaml(cited, "./stat.yaml") def sort_yaml(yaml_path): """ Read yaml path then sort it by value, then print. :param yaml_path: :return: None. only prints. "" """ stream = open(yaml_path, "r") docs = yaml.load_all(stream) result = dict() for doc in docs: for k, v in doc.items(): result[k] = v sorted_result = sorted(result.items(), key=lambda kv: kv[1], reverse=True) print(sorted_result) if __name__ == "__main__": # main() # sort_yaml("stat.yaml") info = read_yaml("../info.yaml") panel = info['Panel']['ds_numb'] appellate = info['AppellateBody']['ds_numb'] linked = info['LinkedPanel'] print(linked)
def query(article, yaml_path): data = read_yaml(yaml_path) print(data[article])
"labels/GATT.yaml"): """ From the integer list of labels, change the int into article name e/g : [0,2] -> ['Article I', 'Article II'] """ gatt_info = read_yaml(yaml_path) gatt_dict = cleanse_dict(gatt_info) gatt_keys_in_int = get_keys(gatt_dict) in_text = [] for index in indices: in_text.append(gatt_keys_in_int[index]) return in_text if __name__ == "__main__": gatt = read_yaml("labels/GATT.yaml") gatt = cleanse_dict(gatt) print(gatt) gatt_keys = get_keys(gatt) print(gatt_keys) for key in gatt_keys: print(key) inv_gatt = invert_dict(gatt) inv_gatt_keys = sorted(list(inv_gatt.keys())) print(inv_gatt) print(inv_gatt_keys) print(len(inv_gatt_keys)) print(rehash_arts_in_text([0, 1])) # Dump # path_to_dump = "../../dataset/citability/GATT_523/label.pkl"
from utils.misc.yaml import read_yaml read = read_yaml("labels/AD.yaml") if __name__ == "__main__": new = dict() keys = read.keys() for key in keys: value = [] dss = read[key].split(', ') for ds in dss: numb_only = ds[2:] value.append(numb_only) new[key] = value print(new.keys()) print(new['Article XXIV:12'][-1]) pass