Пример #1
0
    def __init__(self, path, flag='_SUCCESS', client=None, gcs_client=None):

        self.client = client or BigqueryClient()
        self.path = path
        self.wildcard = False

        if '*' in path:
            self.wildcard = True
            directory = os.path.dirname(path)
            assert '*' not in directory, \
                'Wildcard (*) only supported in filename %s' % (path,)

            self._target = gcs.GCSTarget(os.path.join(directory, flag),
                                         client=gcs_client)
        else:
            self._target = gcs.GCSTarget(path, client=gcs_client)
Пример #2
0
    def test_get_target_path(self):
        bq_target = bigquery.BigQueryTarget("p",
                                            "d",
                                            "t",
                                            client="fake_client")
        self.assertEqual(SimpleTestTask.get_target_path(bq_target), "p:d.t")

        gcs_target = gcs.GCSTarget("gs://foo/bar.txt", client="fake_client")
        self.assertEqual(SimpleTestTask.get_target_path(gcs_target),
                         "gs://foo/bar.txt")

        with self.assertRaises(ValueError):
            SimpleTestTask.get_target_path("not_a_target")
Пример #3
0
 def output(_):
     return gcs.GCSTarget(self.gcs_dir_url)
Пример #4
0
 def create_target(self, format=None):
     return gcs.GCSTarget(bucket_url(self.id()), format=format, client=self.client)
Пример #5
0
 def run(self):
     with open(self.source, 'r') as infile:
         with gcs.GCSTarget(self.destination, client=self.client).open(mode='w') as outfile:
             outfile.write(infile.read())
Пример #6
0
 def output(self):
     return gcs.GCSTarget(self.destination, client=self.client)
Пример #7
0
 def output(self):
     output_path_template = '{}/{}/data/{date:%Y-%m-%d}.csv'
     output_path = output_path_template.format(BUCKET_PATH,
                                               BUCKET_SUBDIR,
                                               date=self.date)
     return luigi_gcs.GCSTarget(output_path, client=GCS_CLIENT)
Пример #8
0
	def output(self):
		return gcs.GCSTarget("gs://snowplow_tracker/%s_%s.json.gz" % (self.file_root, self.dataset_date.strftime("%Y$m%d")))