def run(self):
     LOCAL_PATH = os.getcwd(
     ) + self.LOCAL_DIR  # I need to do this trick to facilitate pytest run
     print("====================================", LOCAL_PATH)
     # Read the hyperspectral image from the S3 write to local directory
     with S3Target(path=f'{self.S3_ROOT}{self.image}',
                   format=luigi.format.Nop).open('r') as in_image:
         my_image = in_image.read()
     with LocalTarget(path=f'{LOCAL_PATH}{self.image}',
                      format=luigi.format.Nop).open('w') as out_image:
         out_image.write(my_image)
     # Read ground truth from S3 and write to local directory
     with S3Target(path=f'{self.S3_ROOT}{self.gt}',
                   format=luigi.format.Nop).open('r') as in_gt:
         my_gt = in_gt.read()
     with LocalTarget(path=f'{LOCAL_PATH}{self.gt}',
                      format=luigi.format.Nop).open('w') as out_gt:
         out_gt.write(my_gt)
     # Read ground truth names from S3 and write to local directory
     with S3Target(path=f'{self.S3_ROOT}{self.gt_names}',
                   format=luigi.format.Nop).open('r') as in_gt_names:
         my_gt_names = in_gt_names.read()
     with LocalTarget(path=f'{LOCAL_PATH}{self.gt_names}',
                      format=luigi.format.Nop).open('w') as out_gt_names:
         out_gt_names.write(my_gt_names)
Пример #2
0
 def output(self):
     return [
         S3Target('s3://data-observatory/observatory.pdf'),
         S3Target(
             's3://data-observatory/observatory-{timestamp}.pdf'.format(
                 timestamp=self.timestamp)),
     ]
Пример #3
0
 def output(self):
     prefix = '{}/{}/'.format(cfg['S3_BUCKET'], self.expt_id)
     out_dict = {
         'est_counts': S3Target(prefix + 'est_counts.csv'),
         'tpm': S3Target(prefix + 'tpm.csv')
     }
     if self.annot:
         out_dict['annotations'] = S3Target(prefix + 'annotations.csv')
     return out_dict
Пример #4
0
 def output(self):
     return S3Target(
         s3.path(S3.MODELS +
                 "{date:%Y/%m/%d/random_forest_T%H%M%S.pkl}".format(
                     date=self.date)),
         client=s3.create_client(),
     )
Пример #5
0
def get_target(path):
    """
    Factory method to create a Luigi Target from a path string.

    Supports the following Target types:

    * S3Target: s3://my-bucket/my-path
    * LocalTarget: /path/to/file or file:///path/to/file

    :type path: str
    :param path: s3 or file URL, or local path

    :rtype: Target:
    :returns: Target for path string
    """
    if path.startswith('s3:'):
        return S3Target(path)
    elif path.startswith('/'):
        return LocalTarget(path)
    elif path.startswith('file://'):
        # remove the file portion
        actual_path = path[7:]
        return LocalTarget(actual_path)
    else:
        raise RuntimeError("Unknown scheme for path: %s" % path)
Пример #6
0
    def test_read_iterator_long(self):
        # Test iteration - write a file that is 5X the boto buffersize
        old_buffer = key.Key.BufferSize
        key.Key.BufferSize = 2
        try:
            tempf = tempfile.NamedTemporaryFile(mode='wb', delete=False)
            temppath = tempf.name
            firstline = ''.zfill(key.Key.BufferSize * 5) + os.linesep
            secondline = 'line two' + os.linesep
            thirdline = 'line three' + os.linesep
            contents = firstline + secondline + thirdline
            tempf.write(contents.encode('utf-8'))
            tempf.close()

            client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
            create_bucket()
            remote_path = 's3://psetbucket/largetempfile'
            client.put(temppath, remote_path)
            t = S3Target(remote_path, client=client)
            with t.open() as read_file:
                lines = [line for line in read_file]
        finally:
            key.Key.BufferSize = old_buffer

        self.assertEqual(3, len(lines))
        self.assertEqual(firstline, lines[0])
        self.assertEqual(secondline, lines[1])
        self.assertEqual(thirdline, lines[2])
Пример #7
0
 def output(self):
     return S3Target(
         s3.path(S3.MODELS +
                 "{date:%Y/%m/%d/gradient_boosting_T%H%M%S.pkl}".format(
                     date=self.date)),
         client=s3.create_client(),
     )
Пример #8
0
 def output(self):
     return S3Target(
         s3.path(S3.MODELS +
                 "{date:%Y/%m/%d/logistic_regression_T%H%M%S.pkl}".format(
                     date=self.date)),
         client=s3.create_client(),
     )
 def requires(self):
     # _x = tempfile.NamedTemporaryFile(mode="w+b",delete=False)
     # # with _x as temporaryfile:
     # #     temporaryfile.write(b"cool")
     # return _x
     f = S3Target(path='test', format=Nop)
     return f
Пример #10
0
    def test_read_iterator_long(self):
        # write a file that is 5X the boto buffersize
        # to test line buffering
        old_buffer = key.Key.BufferSize
        key.Key.BufferSize = 2
        try:
            tempf = tempfile.NamedTemporaryFile(mode='wb', delete=False)
            temppath = tempf.name
            firstline = ''.zfill(key.Key.BufferSize * 5) + os.linesep
            contents = firstline + 'line two' + os.linesep + 'line three'
            tempf.write(contents.encode('utf-8'))
            tempf.close()

            client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
            client.s3.create_bucket('mybucket')
            client.put(temppath, 's3://mybucket/largetempfile')
            t = S3Target('s3://mybucket/largetempfile', client=client)
            with t.open() as read_file:
                lines = [line for line in read_file]
        finally:
            key.Key.BufferSize = old_buffer

        self.assertEqual(3, len(lines))
        self.assertEqual(firstline, lines[0])
        self.assertEqual("line two" + os.linesep, lines[1])
        self.assertEqual("line three", lines[2])
Пример #11
0
 def get_target(cls, scheme, path, fragment, username, password, hostname,
                port, query, **kwargs):
     query.update(kwargs)
     return S3Target(
         '{scheme}://{hostname}{path}'.format(scheme=scheme,
                                              hostname=hostname,
                                              path=path), **query)
Пример #12
0
 def create_target(self, format=None, **kwargs):
     client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
     create_bucket()
     return S3Target('s3://mybucket/test_file',
                     client=client,
                     format=format,
                     **kwargs)
Пример #13
0
 def test_read_with_session(self):
     client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY, AWS_SESSION_TOKEN)
     create_bucket()
     client.put(self.tempFilePath, 's3://mybucket/tempfile-with-session')
     t = S3Target('s3://mybucket/tempfile-with-session', client=client)
     read_file = t.open()
     file_str = read_file.read()
     self.assertEqual(self.tempFileContents, file_str.encode('utf-8'))
Пример #14
0
 def output(self):
     s3_client = boto3.client('s3')
     objects = s3_client.list_objects(Bucket=self.S3_BUCKET,
                                      Prefix=self.IMAGE_ROOT)
     for obj in objects['Contents']:
         print('Checking for files in S3: %s' % obj['Key'])
     return S3Target("s3://{}/{}".format(self.S3_BUCKET, self.IMAGE_ROOT),
                     format=luigi.format.Nop)
Пример #15
0
 def test_read(self):
     client = S3Client(AWS_ACCESS_KEY, AWS_SECRET_KEY)
     client.s3.create_bucket('mybucket')
     client.put(self.tempFilePath, 's3://mybucket/tempfile')
     t = S3Target('s3://mybucket/tempfile', client=client)
     read_file = t.open()
     file_str = read_file.read()
     self.assertEqual(self.tempFileContents, file_str.encode('utf-8'))
Пример #16
0
 def output(self):
     params = config(section='s3')
     client = S3Client(**params)
     return S3Target(
         's3://s3-bucket-wikidata/{}/wikipedia_info_output.csv'.format(
             strftime("%Y-%m-%d")),
         format=UTF8,
         client=client)
Пример #17
0
 def __init__(self, path, *args, **kwargs):
     self.local_s3_path = kwargs.pop('local_s3_path',
                                     os.getenv('LOCAL_S3_PATH', None))
     if not self.local_s3_path:
         self._proxy = S3Target(path, *args, **kwargs)
     else:
         path = os.path.join(self.local_s3_path, path.replace('s3://', ''))
         self._proxy = LocalTarget(path, *args, **kwargs)
Пример #18
0
 def output(self):
     s3_prefix = '{}/{}/disambiguate/{}'.format(cfg['S3_BUCKET'],
                                                self.sample_folder,
                                                self.sample_id)
     s3_paths = {
         'fq1': s3_prefix + '_1.fq.gz',
         'fq2': s3_prefix + '_2.fq.gz',
     }
     return {k: S3Target(path) for k, path in s3_paths.items()}
Пример #19
0
 def output(self):
     s3_prefix = '{}/{}/filtered/{}'.format(cfg['S3_BUCKET'],
                                            self.sample_folder,
                                            self.sample_id)
     s3_paths = [
         s3_prefix + '_1.fq.gz',
         s3_prefix + '_2.fq.gz',
     ]
     return [S3Target(path) for path in s3_paths]
Пример #20
0
 def output(self):
     # test from local file (development use)
     # return LocalTarget(
     #     os.path.join(local_root, "FinanceData", "{}.parquet".format(self.ticker)),
     #     format=Nop,
     # )
     return S3Target(
         os.path.join(s3_root, "FinanceData",
                      "{}.parquet".format(self.ticker)))
Пример #21
0
 def output(self):
     path = self.input().path.replace('tmp/carto/Dump_', 'do-release-')
     path = path.replace('.dump', '/obs.dump')
     path = 's3://cartodb-observatory-data/{path}'.format(path=path)
     LOGGER.info(path)
     target = S3Target(path)
     if self.force:
         shell('aws s3 rm {output}'.format(output=path))
         self.force = False
     return target
Пример #22
0
 def output(self):
     if self.layout == 'paired':
         s3_path = '{folder}/{srr_id}_{pe}.fastq.gz'
         return {
             'fq1':
             S3Target(
                 s3_path.format(folder=self.outpath,
                                pe=1,
                                srr_id=self.srr_id)),
             'fq2':
             S3Target(
                 s3_path.format(folder=self.outpath,
                                pe=2,
                                srr_id=self.srr_id))
         }
     else:
         s3_path = '{folder}/{srr_id}.fastq.gz'
         return S3Target(
             s3_path.format(folder=self.outpath, srr_id=self.srr_id))
Пример #23
0
 def output(self):
     return [
         S3Target(s3.path(S3.MODELLING + "train.parquet"),
                  client=s3.create_client()),
         S3Target(s3.path(S3.MODELLING + "test.parquet"),
                  client=s3.create_client()),
         S3Target(
             s3.path(S3.MODELS +
                     "{date:%Y/%m/%d/train_T%H%M%S.parquet}".format(
                         date=self.date)),
             client=s3.create_client(),
         ),
         S3Target(
             s3.path(S3.MODELS +
                     "{date:%Y/%m/%d/test_T%H%M%S.parquet}".format(
                         date=self.date)),
             client=s3.create_client(),
         ),
     ]
Пример #24
0
 def output(self):
     output_files = {
         'abundance': 'abundance.tsv',
         'h5': 'abundance.h5',
         'run_info': 'run_info.json'
     }
     return {
         k: S3Target('{}/{}/kallisto/{}'.format(cfg['S3_BUCKET'],
                                                self.sample_folder, fname))
         for k, fname in output_files.items()
     }
Пример #25
0
    def _interpret_scheme(full_path):
        scheme = urllib.parse.urlparse(full_path).scheme

        if scheme == '' or scheme == 'file':
            ''' LOCAL FILE '''
            return luigi.LocalTarget(full_path)
        elif scheme == 's3':
            ''' S3  FILE '''
            return S3Target(full_path)

        assert False
Пример #26
0
    def output(self):
        """
        The output that this Task produces.

        See :ref:`Task.output`
        :rtype: luigi.LocalTarget
        """
        if self.target.startswith('s3://'):
            return S3Target(self.target)
        else:
            return luigi.LocalTarget(self.target + self.dry_run_suffix)
Пример #27
0
 def output(self):
     s3_paths = {
         'html_1': self.sample_id + '_1_fastqc.html',
         'zip_1': self.sample_id + '_1_fastqc.zip',
         'html_2': self.sample_id + '_2_fastqc.html',
         'zip_2': self.sample_id + '_2_fastqc.zip'
     }
     return {
         k: S3Target('{}/{}/fastqc/{}'.format(cfg['S3_BUCKET'],
                                              self.sample_folder, fname))
         for k, fname in s3_paths.items()
     }
Пример #28
0
 def output(self):
     output_files = {
         'human': '{}{}.disambiguatedSpeciesA.bam',
         'mouse': '{}{}.disambiguatedSpeciesB.bam',
         'human_ambiguous': '{}{}.ambiguousSpeciesA.bam',
         'mouse_ambiguous': '{}{}.ambiguousSpeciesB.bam',
         'summary': '{}{}_summary.txt'
     }
     s3_paths = {
         k: v.format(self.parameters['outdir'], self.parameters['sample'])
         for k, v in output_files.items()
     }
     return {k: S3Target(path) for k, path in s3_paths.items()}
Пример #29
0
 def run(self):
     entries = []
     for folder_path in self.folder_paths:
         s3 = S3Target(folder_path)
         client = s3.fs
         for file_name in client.list(s3.path):
             entries.append({
                 'url': '%s/%s' % (folder_path, file_name),
                 'mandatory': True
             })
     manifest = {'entries': entries}
     target = self.output().open('w')
     dump = json.dumps(manifest)
     if not self.text_target:
         dump = dump.encode('utf8')
     target.write(dump)
     target.close()
Пример #30
0
def save_result(data, path):
    print('Saving result')
    sleep(3)
    S3Target(path).open('w').close()