def run_morphline(self, collection_name, morphline, input_path): workspace_path = self._upload_workspace(morphline) snippet_properties = { u'files': [{ u'path': u'%s/log4j.properties' % workspace_path, u'type': u'file' }, { u'path': u'%s/morphline.conf' % workspace_path, u'type': u'file' }], u'class': u'org.apache.solr.hadoop.MapReduceIndexerTool', u'app_jar': CONFIG_INDEXER_LIBS_PATH.get(), u'arguments': [ u'--morphline-file', u'morphline.conf', u'--output-dir', u'${nameNode}/user/%s/indexer' % self.username, u'--log4j', u'log4j.properties', u'--go-live', u'--zk-host', zkensemble(), u'--collection', collection_name, u'${nameNode}%s' % input_path, ], u'archives': [], } notebook = make_notebook( name='Indexer', editor_type='java', snippet_properties=snippet_properties).get_data() notebook_doc, created = _save_notebook(notebook, self.user) workflow_doc = WorkflowBuilder().create_workflow( document=notebook_doc, user=self.user, managed=True, name=_("Batch job for %s") % notebook_doc.name) workflow = Workflow(document=workflow_doc, user=self.user) job_id = _submit_workflow(user=self.user, fs=self.fs, jt=self.jt, workflow=workflow, mapping=None) return job_id
def create_query_document(self, owner, query_type='hive', database='default', name='Test Query', description='Test Query', statement='', files=None, functions=None, settings=None): """ Creates and returns a query Document2 object :param owner: owner of doc :param query_type: hive, impala or spark :param database: database name :param name: name of document :param description: description of document :param statement: SQL statement (can be multi-query statement) :param files: list of dicts representing files :param functions: list of dicts representing functions :param settings: list of dicts representing settings :return: Document2 object representing query """ if query_type not in ('hive', 'impala', 'spark'): raise ValueError("Invalid query_type: %s" % query_type) notebook = make_notebook(name=name, description=description, editor_type=query_type, statement=statement, status='ready', database=database, files=files, functions=functions, settings=settings) notebook_doc, save_as = _save_notebook(notebook.get_data(), owner) return notebook_doc
def run_morphline(self, collection_name, morphline, input_path): workspace_path = self._upload_workspace(morphline) snippet_properties = { u'files': [ {u'path': u'%s/log4j.properties' % workspace_path, u'type': u'file'}, {u'path': u'%s/morphline.conf' % workspace_path, u'type': u'file'} ], u'class': u'org.apache.solr.hadoop.MapReduceIndexerTool', u'app_jar': CONFIG_INDEXER_LIBS_PATH.get(), u'arguments': [ u'--morphline-file', u'morphline.conf', u'--output-dir', u'${nameNode}/user/%s/indexer' % self.username, u'--log4j', u'log4j.properties', u'--go-live', u'--zk-host', zkensemble(), u'--collection', collection_name, u'${nameNode}%s' % input_path, ], u'archives': [], } notebook = make_notebook(name='Indexer', editor_type='java', snippet_properties=snippet_properties).get_data() notebook_doc, created = _save_notebook(notebook, self.user) workflow_doc = WorkflowBuilder().create_workflow(document=notebook_doc, user=self.user, managed=True, name=_("Batch job for %s") % notebook_doc.name) workflow = Workflow(document=workflow_doc, user=self.user) job_id = _submit_workflow(user=self.user, fs=self.fs, jt=self.jt, workflow=workflow, mapping=None) return job_id
def run_morphline(self, request, collection_name, morphline, input_path): workspace_path = self._upload_workspace(morphline) # snippets = [ # { # u'type': u'java', # u'files': [ # {u'path': u'%s/log4j.properties' % workspace_path, u'type': u'file'}, # {u'path': u'%s/morphline.conf' % workspace_path, u'type': u'file'} # ], # u'class': u'org.apache.solr.hadoop.MapReduceIndexerTool', # u'app_jar': CONFIG_INDEXER_LIBS_PATH.get(), # u'arguments': [ # u'--morphline-file', # u'morphline.conf', # u'--output-dir', # u'${nameNode}/user/%s/indexer' % self.username, # u'--log4j', # u'log4j.properties', # u'--go-live', # u'--zk-host', # zkensemble(), # u'--collection', # collection_name, # input_path, # ], # u'archives': [], # } # ] # # # managed notebook # notebook = make_notebook2(name='Indexer job for %s' % collection_name, snippets=snippets).get_data() # notebook_doc, created = _save_notebook(notebook, self.user) # # snippet = {'wasBatchExecuted': True} snippet_properties = { u'files': [ {u'path': u'%s/log4j.properties' % workspace_path, u'type': u'file'}, {u'path': u'%s/morphline.conf' % workspace_path, u'type': u'file'} ], u'class': u'org.apache.solr.hadoop.MapReduceIndexerTool', u'app_jar': CONFIG_INDEXER_LIBS_PATH.get(), u'arguments': [ u'--morphline-file', u'morphline.conf', u'--output-dir', u'${nameNode}/user/%s/indexer' % self.username, u'--log4j', u'log4j.properties', u'--go-live', u'--zk-host', zkensemble(), u'--collection', collection_name, input_path, ], u'archives': [], } notebook = make_notebook(name='Indexer', editor_type='java', snippet_properties=snippet_properties, status='running').get_data() notebook_doc, created = _save_notebook(notebook, self.user) snippet = {'wasBatchExecuted': True, 'id': notebook['snippets'][0]['id'], 'statement': ''} job_handle = _execute_notebook(request, notebook, snippet) return job_handle