示例#1
0
 def download(self):
     sh("mkdir -p data/lambada")
     with open("data/lambada/lambada_test.json", 'w') as f:
         req = requests.get("https://storage.googleapis.com/gpt-2/data/lambada_test.jsonl")
         req.raise_for_status()
         jsons = [json.loads(l) for l in req.iter_lines()]
         texts = [ftfy.fix_text(j['text'], normalization='NFKC') for j in jsons]
         json.dump(texts, f)
 def download(self):
     if not os.path.exists('data/ethics'):
         sh("""
             mkdir -p data
             wget https://people.eecs.berkeley.edu/~hendrycks/ethics.tar -P data/
             tar -xf data/ethics.tar -C data/
             rm data/ethics.tar
             """)
示例#3
0
 def download(self):
     if not self.DATASET_PATH.exists():
         sh(f"""
         mkdir -p {self.DATASET_PATH}
         wget https://people.eecs.berkeley.edu/~hendrycks/MATH.tar.gz -P data/
         tar -xvf {self.DATASET_PATH}.tar.gz -C data/
         rm {self.DATASET_PATH}.tar.gz
         """)
 def download(self):
     if not os.path.exists('data/ethics/done'):
         sh("mkdir -p data")
         download_file(
             "https://people.eecs.berkeley.edu/~hendrycks/ethics.tar",
             "data/ethics.tar",
             "40acbf1ac0da79a2aabef394d58889136b8d38b05be09482006de2453fb06333"
         )
         sh("""
         tar -xf data/ethics.tar -C data/
         rm data/ethics.tar
         touch data/ethics/done
         """)
示例#5
0
 def download(self):
     if not (self.DATASET_PATH /
             'test').exists() or not (self.DATASET_PATH / 'done').exists():
         sh(f"mkdir -p {self.DATASET_PATH}")
         download_file(
             "https://people.eecs.berkeley.edu/~hendrycks/MATH.tar",
             f"{self.DATASET_PATH}.tar",
             "01256fd7cd5430596fdf07e6e6a5827111b5235b7ffed679c662a12f898932da"
         )
         sh(f"""
         tar -xf {self.DATASET_PATH}.tar -C data/ && touch {self.DATASET_PATH / 'done'}
         rm {self.DATASET_PATH}.tar
         """)
示例#6
0
 def download(self):
     sh("mkdir -p data/lambada")
     try:
         download_file(
             "http://eaidata.bmk.sh/data/lambada_test.jsonl", 
             "data/lambada/lambada_test.jsonl", 
             "4aa8d02cd17c719165fc8a7887fddd641f43fcafa4b1c806ca8abc31fabdb226"
         )
     except:
         # fallback - for some reason best_download doesnt work all the time here
         sh("wget http://eaidata.bmk.sh/data/lambada_test.jsonl -O data/lambada/lambada_test.jsonl")
         sh('echo "4aa8d02cd17c719165fc8a7887fddd641f43fcafa4b1c806ca8abc31fabdb226  data/lambada/lambada_test.jsonl" | sha256sum --check')
示例#7
0
 def download(self):
     sh("mkdir -p data/lambada")
     download_file(
         "http://eaidata.bmk.sh/data/lambada_test.jsonl",
         "data/lambada/lambada_test.jsonl",
         "4aa8d02cd17c719165fc8a7887fddd641f43fcafa4b1c806ca8abc31fabdb226")
示例#8
0
 def download(self):
     sh("mkdir -p data/lambada")
     download_file(
         "https://storage.googleapis.com/gpt-2/data/lambada_test.jsonl",
         "data/lambada/lambada_test.jsonl",
         "4aa8d02cd17c719165fc8a7887fddd641f43fcafa4b1c806ca8abc31fabdb226")
示例#9
0
 def download(self):
     if not os.path.exists('data/wikitext/wikitext-2-raw/wiki.valid.raw'):
         os.makedirs("data/wikitext/", exist_ok=True)
         download_file("https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip", "data/wikitext/wikitext-2-raw-v1.zip", "ef7edb566e3e2b2d31b29c1fdb0c89a4cc683597484c3dc2517919c615435a11")
         sh("cd data/wikitext/ && unzip wikitext-2-raw-v1.zip")