def download(self): sh("mkdir -p data/lambada") with open("data/lambada/lambada_test.json", 'w') as f: req = requests.get("https://storage.googleapis.com/gpt-2/data/lambada_test.jsonl") req.raise_for_status() jsons = [json.loads(l) for l in req.iter_lines()] texts = [ftfy.fix_text(j['text'], normalization='NFKC') for j in jsons] json.dump(texts, f)
def download(self): if not os.path.exists('data/ethics'): sh(""" mkdir -p data wget https://people.eecs.berkeley.edu/~hendrycks/ethics.tar -P data/ tar -xf data/ethics.tar -C data/ rm data/ethics.tar """)
def download(self): if not self.DATASET_PATH.exists(): sh(f""" mkdir -p {self.DATASET_PATH} wget https://people.eecs.berkeley.edu/~hendrycks/MATH.tar.gz -P data/ tar -xvf {self.DATASET_PATH}.tar.gz -C data/ rm {self.DATASET_PATH}.tar.gz """)
def download(self): if not os.path.exists('data/ethics/done'): sh("mkdir -p data") download_file( "https://people.eecs.berkeley.edu/~hendrycks/ethics.tar", "data/ethics.tar", "40acbf1ac0da79a2aabef394d58889136b8d38b05be09482006de2453fb06333" ) sh(""" tar -xf data/ethics.tar -C data/ rm data/ethics.tar touch data/ethics/done """)
def download(self): if not (self.DATASET_PATH / 'test').exists() or not (self.DATASET_PATH / 'done').exists(): sh(f"mkdir -p {self.DATASET_PATH}") download_file( "https://people.eecs.berkeley.edu/~hendrycks/MATH.tar", f"{self.DATASET_PATH}.tar", "01256fd7cd5430596fdf07e6e6a5827111b5235b7ffed679c662a12f898932da" ) sh(f""" tar -xf {self.DATASET_PATH}.tar -C data/ && touch {self.DATASET_PATH / 'done'} rm {self.DATASET_PATH}.tar """)
def download(self): sh("mkdir -p data/lambada") try: download_file( "http://eaidata.bmk.sh/data/lambada_test.jsonl", "data/lambada/lambada_test.jsonl", "4aa8d02cd17c719165fc8a7887fddd641f43fcafa4b1c806ca8abc31fabdb226" ) except: # fallback - for some reason best_download doesnt work all the time here sh("wget http://eaidata.bmk.sh/data/lambada_test.jsonl -O data/lambada/lambada_test.jsonl") sh('echo "4aa8d02cd17c719165fc8a7887fddd641f43fcafa4b1c806ca8abc31fabdb226 data/lambada/lambada_test.jsonl" | sha256sum --check')
def download(self): sh("mkdir -p data/lambada") download_file( "http://eaidata.bmk.sh/data/lambada_test.jsonl", "data/lambada/lambada_test.jsonl", "4aa8d02cd17c719165fc8a7887fddd641f43fcafa4b1c806ca8abc31fabdb226")
def download(self): sh("mkdir -p data/lambada") download_file( "https://storage.googleapis.com/gpt-2/data/lambada_test.jsonl", "data/lambada/lambada_test.jsonl", "4aa8d02cd17c719165fc8a7887fddd641f43fcafa4b1c806ca8abc31fabdb226")
def download(self): if not os.path.exists('data/wikitext/wikitext-2-raw/wiki.valid.raw'): os.makedirs("data/wikitext/", exist_ok=True) download_file("https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip", "data/wikitext/wikitext-2-raw-v1.zip", "ef7edb566e3e2b2d31b29c1fdb0c89a4cc683597484c3dc2517919c615435a11") sh("cd data/wikitext/ && unzip wikitext-2-raw-v1.zip")