示例#1
0
 def run(self):
     if is_aws_authenticated():
         s3_location = 's3://pinafore-us-west-2/public/wiki_redirects.csv'
         shell('aws s3 cp {} {}'.format(s3_location, ALL_WIKI_REDIRECTS))
     else:
         https_location = 'https://s3-us-west-2.amazonaws.com/pinafore-us-west-2/public/wiki_redirects.csv'
         shell('wget -O {} {}'.format(ALL_WIKI_REDIRECTS, https_location))
示例#2
0
文件: preprocess.py 项目: Pinafore/qb
 def run(self):
     safe_path(WIKI_DISAMBIGUATION_PAGES)
     if is_aws_authenticated():
         s3_location = 's3://pinafore-us-west-2/public/disambiguation_pages.json'
         shell('aws s3 cp {} {}'.format(s3_location, WIKI_DISAMBIGUATION_PAGES))
     else:
         https_location = 'https://s3-us-west-2.amazonaws.com/pinafore-us-west-2/public/disambiguation_pages.json'
         shell('wget -O {} {}'.format(WIKI_DISAMBIGUATION_PAGES, https_location))
示例#3
0
文件: preprocess.py 项目: Pinafore/qb
 def run(self):
     safe_path(ALL_WIKI_REDIRECTS)
     if is_aws_authenticated():
         s3_location = 's3://pinafore-us-west-2/public/wiki_redirects.csv'
         shell('aws s3 cp {} {}'.format(s3_location, ALL_WIKI_REDIRECTS))
     else:
         https_location = 'https://s3-us-west-2.amazonaws.com/pinafore-us-west-2/public/wiki_redirects.csv'
         shell('wget -O {} {}'.format(ALL_WIKI_REDIRECTS, https_location))
示例#4
0
文件: preprocess.py 项目: NPSDC/qb
 def run(self):
     safe_path(WIKI_DISAMBIGUATION_PAGES)
     if is_aws_authenticated():
         s3_location = "s3://pinafore-us-west-2/public/disambiguation_pages.json"
         shell("aws s3 cp {} {}".format(s3_location, WIKI_DISAMBIGUATION_PAGES))
     else:
         https_location = "https://s3-us-west-2.amazonaws.com/pinafore-us-west-2/public/disambiguation_pages.json"
         shell("wget -O {} {}".format(WIKI_DISAMBIGUATION_PAGES, https_location))
示例#5
0
文件: preprocess.py 项目: NPSDC/qb
 def run(self):
     safe_path(ALL_WIKI_REDIRECTS)
     if is_aws_authenticated():
         s3_location = "s3://pinafore-us-west-2/public/wiki_redirects.csv"
         shell("aws s3 cp {} {}".format(s3_location, ALL_WIKI_REDIRECTS))
     else:
         https_location = "https://s3-us-west-2.amazonaws.com/pinafore-us-west-2/public/wiki_redirects.csv"
         shell("wget -O {} {}".format(ALL_WIKI_REDIRECTS, https_location))
示例#6
0
文件: preprocess.py 项目: nadesai/qb
    def run(self):
        if is_aws_authenticated():
            s3_location = 's3://pinafore-us-west-2/public/wikipedia-dumps/parsed-wiki.tar.lz4'
            shell('aws s3 cp {} data/external/wikipedia/parsed-wiki.tar.lz4'.format(s3_location))
        else:
            https_location = 'https://s3-us-west-2.amazonaws.com/pinafore-us-west-2/public/wikipedia-dumps/parsed-wiki.tar.lz4'
            shell('wget -O {} {}'.format('data/external/wikipedia/parsed-wiki.tar.lz4', https_location))

        shell('lz4 -d data/external/wikipedia/parsed-wiki.tar.lz4 | tar -x -C data/external/wikipedia/')
        shell('rm data/external/wikipedia/parsed-wiki.tar.lz4')
        shell('touch data/external/wikipedia/parsed-wiki_SUCCESS')
示例#7
0
    def run(self):
        archive = safe_path('data/external/wikipedia/parsed-wiki.tar.lz4')
        if is_aws_authenticated():
            s3_location = f's3://pinafore-us-west-2/public/parsed-wiki.tar.lz4'
            shell(f'aws s3 cp {s3_location} {archive}')
        else:
            https_location = 'https://s3-us-west-2.amazonaws.com/pinafore-us-west-2/public/parsed-wiki.tar.lz4'
            shell(f'wget -O {archive} {https_location}')

        shell(f'lz4 -d {archive} | tar -x -C data/external/wikipedia/')
        shell(f'rm {archive}')
        shell('touch data/external/wikipedia/parsed-wiki_SUCCESS')
示例#8
0
文件: preprocess.py 项目: Pinafore/qb
    def run(self):
        archive = safe_path('data/external/wikipedia/parsed-wiki.tar.lz4')
        if is_aws_authenticated():
            s3_location = f's3://pinafore-us-west-2/public/parsed-wiki.tar.lz4'
            shell(f'aws s3 cp {s3_location} {archive}')
        else:
            https_location = 'https://s3-us-west-2.amazonaws.com/pinafore-us-west-2/public/parsed-wiki.tar.lz4'
            shell(f'wget -O {archive} {https_location}')

        shell(f'lz4 -d {archive} | tar -x -C data/external/wikipedia/')
        shell(f'rm {archive}')
        shell('touch data/external/wikipedia/parsed-wiki_SUCCESS')
示例#9
0
文件: preprocess.py 项目: NPSDC/qb
    def run(self):
        archive = safe_path("data/external/wikipedia/parsed-wiki.tar.lz4")
        if is_aws_authenticated():
            s3_location = f"s3://pinafore-us-west-2/public/parsed-wiki.tar.lz4"
            shell(f"aws s3 cp {s3_location} {archive}")
        else:
            https_location = "https://s3-us-west-2.amazonaws.com/pinafore-us-west-2/public/parsed-wiki.tar.lz4"
            shell(f"wget -O {archive} {https_location}")

        shell(f"lz4 -d {archive} | tar -x -C data/external/wikipedia/")
        shell(f"rm {archive}")
        shell("touch data/external/wikipedia/parsed-wiki_SUCCESS")