Exemplo n.º 1
0
 def testHttpInputOk(self):
     pl = linter.PipelineLinter(
         '{"inputs": [{"type": "HttpInput", "url": "http://foo/data.csv",'
         ' "sinks": ["gs:/b2/o2"]}]}')
     expect = {
         linter.PipelineLinter.CHECK_SYNTAX_VALID: {
             'pass': True
         },
         linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: {
             'pass': True
         },
         linter.PipelineLinter.CHECK_REQ_IO_STAGES: {
             'pass': True
         },
         'stages': {
             'inputs': [{
                 linter.StageLinter.CHECK_TYPE_FMT % u'HttpInput': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'url': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'sinks': {
                     'pass': True
                 },
             }]
         }
     }
     self.assertTrue(pl.results.valid)
     self.assertSameStructure(expect, pl.results.results)
Exemplo n.º 2
0
 def testJunk(self):
     bad_strings = ((None, 'PreTemplate: expected string or buffer'),
                    ('', 'PreTemplate: No JSON object could be decoded'),
                    ('"', 'PreTemplate: end is out of bounds'),
                    ('"fish', 'PreTemplate: %s' %
                     self.getUnterminatedStringMessage('"fish')),
                    ('fish',
                     'PreTemplate: No JSON object could be decoded'))
     for (bad_string, reason) in bad_strings:
         logging.info('testing: %r', bad_string)
         pl = linter.PipelineLinter(bad_string)
         expect = {
             linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: {
                 'pass': True
             },
             linter.PipelineLinter.CHECK_SYNTAX_VALID: {
                 'pass': False,
                 'reason': reason
             },
             linter.PipelineLinter.CHECK_REQ_IO_STAGES: {
                 'pass': False,
                 'reason': linter.PipelineLinter.MSG_MISSING_IO_STAGES
             }
         }
         self.assertFalse(pl.results.valid, 'Config:%r ' % bad_string)
         self.assertSameStructure(expect, pl.results.results,
                                  'Config:%r' % bad_string)
Exemplo n.º 3
0
 def testGcsOutputOk(self):
     pl = linter.PipelineLinter(
         '{"outputs": [{"type": "GcsOutput", "object": "gs://b1/o1",'
         ' "sources": ["gs:/b2/o2"]}]}')
     expect = {
         linter.PipelineLinter.CHECK_SYNTAX_VALID: {
             'pass': True
         },
         linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: {
             'pass': True
         },
         linter.PipelineLinter.CHECK_REQ_IO_STAGES: {
             'pass': True
         },
         'stages': {
             'outputs': [{
                 linter.StageLinter.CHECK_TYPE_FMT % u'GcsOutput': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'sources': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'object': {
                     'pass': True
                 },
             }]
         }
     }
     self.assertTrue(pl.results.valid)
     self.assertSameStructure(expect, pl.results.results)
Exemplo n.º 4
0
 def testSimpleFailure(self):
     pl = linter.PipelineLinter('{"inputs": [{"type": "GcsInput"}]}')
     expect = {
         linter.PipelineLinter.CHECK_SYNTAX_VALID: {
             'pass': True
         },
         linter.PipelineLinter.CHECK_REQ_IO_STAGES: {
             'pass': True
         },
         linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: {
             'pass': True
         },
         'stages': {
             'inputs': [{
                 linter.StageLinter.CHECK_TYPE_FMT % u'GcsInput': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % [
                     'object', 'objects'
                 ]: {
                     'pass':
                     False,
                     'reason':
                     linter.StageLinter.MSG_REQUIRE_AT_LEAST_ONE_FMT %
                     ['object', 'objects']
                 },
             }]
         }
     }
     self.assertFalse(pl.results.valid)
     self.assertSameStructure(expect, pl.results.results)
Exemplo n.º 5
0
  def RunPipeline(self, p):
    """Run the pipeline."""
    logging.info('Linting pipeline: %s', p.name)
    options_dict = appconfig.AppConfig.GetAppConfig().AsOptionsDict()
    self.expandOptionsDict(options_dict, self.GetAllArguments())
    logging.info('options_dict is:\n%r', options_dict)
    logging.info('input config is:\n%s', p.config)
    pl = linter.PipelineLinter(p.config, options_dict)
    if not pl.results.valid:
      self.BadRequest('Linting for pipeline [%s] FAILED.\n%r',
                      p.name, pl.results.results)
      return

    logging.info('Running pipeline: %s with config\n%s', p.name, pl.config)
    config = pl.config

    storage = config.get('options', {}).get(appconfig.OPTIONS_STORAGE_KEY, {})
    bucket = storage[appconfig.OPTIONS_STORAGE_BUCKET_KEY]
    prefix = storage.get(appconfig.OPTIONS_STORAGE_PREFIX_KEY, '')

    pr = runner.PipelineRunner()
    pipe = pr.Build(config, gcs.Gcs.UrlCreator(bucket, prefix))
    pipe.max_attempts = 1
    pipe.start()
    p.running_pipeline_ids.append(pipe.pipeline_id)
    p.put()
    # show the status page using the default frontend module
    url = urlparse.urljoin(self.GetModuleUrl('default'),
                           '/_ah/pipeline/status?root=%s' % pipe.pipeline_id)
    self.redirect(str(url))
Exemplo n.º 6
0
    def post(self):
        """Find and lint a pipeline."""
        p = json.loads(self.request.body)

        if not p or 'config' not in p:
            self.NotFound('Unable to find pipeline config in json request.')
        else:
            lint = linter.PipelineLinter(
                p['config'],
                appconfig.AppConfig.GetAppConfig().AsOptionsDict())
            p['lint'] = lint.results.results
            self.SendJson(p)
Exemplo n.º 7
0
 def testS3InputOk(self):
     pl = linter.PipelineLinter(
         '{"inputs": [{"type": "S3Input", "object": "s3://b/o",'
         ' "s3Credentials": {"accessKey": "123", "accessSecret": "abc"},'
         ' "sinks": ["gs:/b2/o2"]}]}')
     expect = {
         linter.PipelineLinter.CHECK_SYNTAX_VALID: {
             'pass': True
         },
         linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: {
             'pass': True
         },
         linter.PipelineLinter.CHECK_REQ_IO_STAGES: {
             'pass': True
         },
         'stages': {
             'inputs': [{
                 linter.StageLinter.CHECK_TYPE_FMT % u'S3Input': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_TYPE_FMT % 's3Credentials': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'object': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'sinks': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 's3Credentials':
                 {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % [
                     'object', 'objects'
                 ]: {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 's3Credentials.accessKey':
                 {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 's3Credentials.accessSecret':
                 {
                     'pass': True
                 },
             }]
         }
     }
     self.assertTrue(pl.results.valid)
     self.assertSameStructure(expect, pl.results.results)
Exemplo n.º 8
0
 def testMissingInputStageType(self):
     pl = linter.PipelineLinter('{}')
     expect = {
         linter.PipelineLinter.CHECK_SYNTAX_VALID: {
             'pass': True
         },
         linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: {
             'pass': True
         },
         linter.PipelineLinter.CHECK_REQ_IO_STAGES: {
             'pass': False,
             'reason': linter.PipelineLinter.MSG_MISSING_IO_STAGES
         },
     }
     self.assertFalse(pl.results.valid)
     self.assertSameStructure(expect, pl.results.results)
Exemplo n.º 9
0
    def testLintFullFiles(self):
        directories = ['src/pipelines/testdata', 'static/examples']

        for directory in directories:
            directory = os.path.join(os.path.dirname(__file__), '../..',
                                     directory)
            filenames = [
                x for x in os.listdir(directory) if x.endswith('.json')
            ]
            logging.info('directory %s files %r', directory, filenames)

            for filename in filenames:
                logging.info('Linting %r from %r', filename,
                             os.path.basename(directory))
                j = open(os.path.join(directory, filename)).read()
                pl = linter.PipelineLinter(j)
                self.assertTrue(pl.results.valid)
Exemplo n.º 10
0
 def testBadSection(self):
     pl = linter.PipelineLinter('{"input": [{"type": "UnknownInput"}]}')
     expect = {
         linter.PipelineLinter.CHECK_SYNTAX_VALID: {
             'pass': True
         },
         linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: {
             'pass':
             False,
             'reason':
             linter.PipelineLinter.MSG_UNKNOWN_CONFIG_KEYS_FMT % u'input'
         },
         linter.PipelineLinter.CHECK_REQ_IO_STAGES: {
             'pass': False,
             'reason': linter.PipelineLinter.MSG_MISSING_IO_STAGES
         }
     }
     self.assertFalse(pl.results.valid)
     self.assertSameStructure(expect, pl.results.results)
Exemplo n.º 11
0
 def testDatastoreInputOk(self):
     pl = linter.PipelineLinter(
         '{"inputs": [{"type": "DatastoreInput", "gql": "SELECT *",'
         ' "params": {"projection": ["a", "b"]},'
         ' "sinks": ["gs:/b2/o2"]}]}')
     expect = {
         linter.PipelineLinter.CHECK_SYNTAX_VALID: {
             'pass': True
         },
         linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: {
             'pass': True
         },
         linter.PipelineLinter.CHECK_REQ_IO_STAGES: {
             'pass': True
         },
         'stages': {
             'inputs': [{
                 linter.StageLinter.CHECK_TYPE_FMT % u'DatastoreInput': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'sinks': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_TYPE_FMT % 'params': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_TYPE_FMT % 'params.projection': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % [
                     'gql', 'object'
                 ]: {
                     'pass': True
                 },
             }]
         }
     }
     self.assertTrue(pl.results.valid)
     self.assertSameStructure(expect, pl.results.results)
Exemplo n.º 12
0
 def testBadStageType(self):
     pl = linter.PipelineLinter('{"inputs": [{"type": "UnknownInput"}]}')
     expect = {
         linter.PipelineLinter.CHECK_SYNTAX_VALID: {
             'pass': True
         },
         linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: {
             'pass': True
         },
         linter.PipelineLinter.CHECK_REQ_IO_STAGES: {
             'pass': True
         },
         'stages': {
             'inputs': [{
                 linter.StageLinter.CHECK_TYPE_FMT % u'UnknownInput': {
                     'pass': False,
                     'reason': 'No module named unknowninput'
                 }
             }],
         }
     }
     self.assertFalse(pl.results.valid)
     self.assertSameStructure(expect, pl.results.results)
Exemplo n.º 13
0
 def testMissingStageType(self):
     pl = linter.PipelineLinter('{"inputs": [{}]}')
     expect = {
         linter.PipelineLinter.CHECK_SYNTAX_VALID: {
             'pass': True
         },
         linter.PipelineLinter.CHECK_REQ_IO_STAGES: {
             'pass': True
         },
         linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: {
             'pass': True
         },
         'stages': {
             'inputs': [{
                 linter.StageLinter.CHECK_TYPE_FMT % None: {
                     'pass': False,
                     'reason': linter.StageLinter.MSG_TYPE_NOT_FOUND
                 }
             }]
         }
     }
     self.assertFalse(pl.results.valid)
     self.assertSameStructure(expect, pl.results.results)
Exemplo n.º 14
0
 def testGcsInputNullSink(self):
     pl = linter.PipelineLinter(
         '{"inputs": [{"type": "GcsInput", "object": "gs://b1/o1",'
         ' "sinks": [null]}]}')
     expect = {
         linter.PipelineLinter.CHECK_SYNTAX_VALID: {
             'pass': True
         },
         linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: {
             'pass': True
         },
         linter.PipelineLinter.CHECK_REQ_IO_STAGES: {
             'pass': True
         },
         'stages': {
             'inputs': [{
                 linter.StageLinter.CHECK_TYPE_FMT % u'GcsInput': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'sinks': {
                     'pass': False,
                     'reason':
                     linter.StageLinter.MSG_FIELD_INVALID_FMT % 'null'
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'object': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % [
                     'object', 'objects'
                 ]: {
                     'pass': True
                 },
             }]
         }
     }
     self.assertFalse(pl.results.valid)
     self.assertSameStructure(expect, pl.results.results)
Exemplo n.º 15
0
 def testHttpInputBadShardSize(self):
     pl = linter.PipelineLinter(
         '{"inputs": [{"type": "HttpInput", "url": "http://foo/data.csv",'
         ' "shardSize": 33554433, "sinks": ["gs:/b2/o2"]}]}')
     expect = {
         linter.PipelineLinter.CHECK_SYNTAX_VALID: {
             'pass': True
         },
         linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: {
             'pass': True
         },
         linter.PipelineLinter.CHECK_REQ_IO_STAGES: {
             'pass': True
         },
         'stages': {
             'inputs': [{
                 linter.StageLinter.CHECK_TYPE_FMT % u'HttpInput': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'url': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'sinks': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'shardSize': {
                     'pass':
                     False,
                     'reason':
                     'Invalid value: \''
                     'Size exceeds App Engine response limit.\''
                 },
             }]
         }
     }
     self.assertFalse(pl.results.valid)
     self.assertSameStructure(expect, pl.results.results)
Exemplo n.º 16
0
 def testGcsCompositorOk(self):
     pl = linter.PipelineLinter(
         '{"inputs": [{"type": "GcsInput", "object": "gs://b1/o1",'
         ' "sinks": ["gs:/b2/o2"]}],'
         '"transforms": [{"type": "GcsCompositor", "sources": ["gs:/b2/o2"],'
         ' "sinks": ["gs:/b3/o3"], "contentType": "text/plain"}],'
         '"outputs": [{"type": "GcsOutput", "object": "gs://b3/o3",'
         ' "sources": ["gs:/b4/o4"]}]}')
     expect = {
         linter.PipelineLinter.CHECK_SYNTAX_VALID: {
             'pass': True
         },
         linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: {
             'pass': True
         },
         linter.PipelineLinter.CHECK_REQ_IO_STAGES: {
             'pass': True
         },
         'stages': {
             'inputs': [{
                 linter.StageLinter.CHECK_TYPE_FMT % u'GcsInput': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'sinks': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'object': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % [
                     'object', 'objects'
                 ]: {
                     'pass': True
                 }
             }],
             'transforms': [{
                 linter.StageLinter.CHECK_TYPE_FMT % u'GcsCompositor': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'sources': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'sinks': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'contentType':
                 {
                     'pass': True
                 }
             }],
             'outputs': [{
                 linter.StageLinter.CHECK_TYPE_FMT % u'GcsOutput': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'sources': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'object': {
                     'pass': True
                 }
             }]
         }
     }
     self.assertTrue(pl.results.valid)
     self.assertSameStructure(expect, pl.results.results)
Exemplo n.º 17
0
 def testCsvMatchReplaceOk(self):
     pl = linter.PipelineLinter(
         '{"inputs": [{"type": "GcsInput", "object": "gs://b1/o1",'
         ' "sinks": ["gs:/b2/o2"]}],'
         ' "transforms": [{"type": "CsvMatchReplace",'
         ' "fieldDelimiter": ",", "columns": [{"wanted": true, '
         ' "type": "STRING", "name": "col1"}],'
         ' "sources": ["gs://bucket/foo.csv"],'
         ' "sinks": ["gs://bucket/results", "gs://bucket/badrows"]}]}')
     expect = {
         linter.PipelineLinter.CHECK_SYNTAX_VALID: {
             'pass': True
         },
         linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: {
             'pass': True
         },
         linter.PipelineLinter.CHECK_REQ_IO_STAGES: {
             'pass': True
         },
         'stages': {
             'transforms': [{
                 linter.StageLinter.CHECK_TYPE_FMT % u'CsvMatchReplace': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_TYPE_FMT % 'columns': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'sources': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'sinks': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'fieldDelimiter':
                 {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'columns': {
                     'pass': True
                 },
             }],
             'inputs': [{
                 linter.StageLinter.CHECK_TYPE_FMT % u'GcsInput': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'sinks': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'object': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % [
                     'object', 'objects'
                 ]: {
                     'pass': True
                 }
             }]
         }
     }
     self.assertTrue(pl.results.valid)
     self.assertSameStructure(expect, pl.results.results)
Exemplo n.º 18
0
 def testBigQueryOutputOk(self):
     pl = linter.PipelineLinter(
         '{"outputs": [{"type": "BigQueryOutput",'
         ' "destinationTable": {"projectId": "123", "tableId": "abc",'
         ' "datasetId": "xyz"}, "schema": {"fields": [{"type": "STRING"}]},'
         ' "sources": ["gs:/b2/o2"]}]}')
     expect = {
         linter.PipelineLinter.CHECK_SYNTAX_VALID: {
             'pass': True
         },
         linter.PipelineLinter.CHECK_UNKNOWN_CONFIG_KEYS: {
             'pass': True
         },
         linter.PipelineLinter.CHECK_REQ_IO_STAGES: {
             'pass': True
         },
         'stages': {
             'outputs': [{
                 linter.StageLinter.CHECK_TYPE_FMT % u'BigQueryOutput': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'sources': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_TYPE_FMT % 'destinationTable': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_TYPE_FMT % 'schema': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_TYPE_FMT % 'schema.fields': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'destinationTable':
                 {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'schema': {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'destinationTable.projectId':
                 {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'destinationTable.tableId':
                 {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'destinationTable.datasetId':
                 {
                     'pass': True
                 },
                 linter.StageLinter.CHECK_FIELD_EXISTS_FMT % 'schema.fields':
                 {
                     'pass': True
                 },
             }]
         }
     }
     self.assertTrue(pl.results.valid)
     self.assertSameStructure(expect, pl.results.results)