def run(self): if is_aws_authenticated(): s3_location = 's3://pinafore-us-west-2/public/wiki_redirects.csv' shell('aws s3 cp {} {}'.format(s3_location, ALL_WIKI_REDIRECTS)) else: https_location = 'https://s3-us-west-2.amazonaws.com/pinafore-us-west-2/public/wiki_redirects.csv' shell('wget -O {} {}'.format(ALL_WIKI_REDIRECTS, https_location))
def run(self): safe_path(WIKI_DISAMBIGUATION_PAGES) if is_aws_authenticated(): s3_location = 's3://pinafore-us-west-2/public/disambiguation_pages.json' shell('aws s3 cp {} {}'.format(s3_location, WIKI_DISAMBIGUATION_PAGES)) else: https_location = 'https://s3-us-west-2.amazonaws.com/pinafore-us-west-2/public/disambiguation_pages.json' shell('wget -O {} {}'.format(WIKI_DISAMBIGUATION_PAGES, https_location))
def run(self): safe_path(ALL_WIKI_REDIRECTS) if is_aws_authenticated(): s3_location = 's3://pinafore-us-west-2/public/wiki_redirects.csv' shell('aws s3 cp {} {}'.format(s3_location, ALL_WIKI_REDIRECTS)) else: https_location = 'https://s3-us-west-2.amazonaws.com/pinafore-us-west-2/public/wiki_redirects.csv' shell('wget -O {} {}'.format(ALL_WIKI_REDIRECTS, https_location))
def run(self): safe_path(WIKI_DISAMBIGUATION_PAGES) if is_aws_authenticated(): s3_location = "s3://pinafore-us-west-2/public/disambiguation_pages.json" shell("aws s3 cp {} {}".format(s3_location, WIKI_DISAMBIGUATION_PAGES)) else: https_location = "https://s3-us-west-2.amazonaws.com/pinafore-us-west-2/public/disambiguation_pages.json" shell("wget -O {} {}".format(WIKI_DISAMBIGUATION_PAGES, https_location))
def run(self): safe_path(ALL_WIKI_REDIRECTS) if is_aws_authenticated(): s3_location = "s3://pinafore-us-west-2/public/wiki_redirects.csv" shell("aws s3 cp {} {}".format(s3_location, ALL_WIKI_REDIRECTS)) else: https_location = "https://s3-us-west-2.amazonaws.com/pinafore-us-west-2/public/wiki_redirects.csv" shell("wget -O {} {}".format(ALL_WIKI_REDIRECTS, https_location))
def run(self): if is_aws_authenticated(): s3_location = 's3://pinafore-us-west-2/public/wikipedia-dumps/parsed-wiki.tar.lz4' shell('aws s3 cp {} data/external/wikipedia/parsed-wiki.tar.lz4'.format(s3_location)) else: https_location = 'https://s3-us-west-2.amazonaws.com/pinafore-us-west-2/public/wikipedia-dumps/parsed-wiki.tar.lz4' shell('wget -O {} {}'.format('data/external/wikipedia/parsed-wiki.tar.lz4', https_location)) shell('lz4 -d data/external/wikipedia/parsed-wiki.tar.lz4 | tar -x -C data/external/wikipedia/') shell('rm data/external/wikipedia/parsed-wiki.tar.lz4') shell('touch data/external/wikipedia/parsed-wiki_SUCCESS')
def run(self): archive = safe_path('data/external/wikipedia/parsed-wiki.tar.lz4') if is_aws_authenticated(): s3_location = f's3://pinafore-us-west-2/public/parsed-wiki.tar.lz4' shell(f'aws s3 cp {s3_location} {archive}') else: https_location = 'https://s3-us-west-2.amazonaws.com/pinafore-us-west-2/public/parsed-wiki.tar.lz4' shell(f'wget -O {archive} {https_location}') shell(f'lz4 -d {archive} | tar -x -C data/external/wikipedia/') shell(f'rm {archive}') shell('touch data/external/wikipedia/parsed-wiki_SUCCESS')
def run(self): archive = safe_path("data/external/wikipedia/parsed-wiki.tar.lz4") if is_aws_authenticated(): s3_location = f"s3://pinafore-us-west-2/public/parsed-wiki.tar.lz4" shell(f"aws s3 cp {s3_location} {archive}") else: https_location = "https://s3-us-west-2.amazonaws.com/pinafore-us-west-2/public/parsed-wiki.tar.lz4" shell(f"wget -O {archive} {https_location}") shell(f"lz4 -d {archive} | tar -x -C data/external/wikipedia/") shell(f"rm {archive}") shell("touch data/external/wikipedia/parsed-wiki_SUCCESS")