def run_morphline(self, request, collection_name, morphline, input_path, query=None): workspace_path = self._upload_workspace(morphline) notebook = Notebook(name='Indexer job for %s' % collection_name, isManaged=True) if query: q = Notebook(document=Document2.objects.get_by_uuid(user=self.user, uuid=query)) notebook_data = q.get_data() snippet = notebook_data['snippets'][0] api = get_api(request, snippet) destination = '__hue_%s' % notebook_data['uuid'][:4] location = '/user/%s/__hue-%s' % (request.user, notebook_data['uuid'][:4]) sql, success_url = api.export_data_as_table(notebook_data, snippet, destination, is_temporary=True, location=location) input_path = '${nameNode}%s' % location notebook.add_hive_snippet(snippet['database'], sql) notebook.add_java_snippet( clazz='org.apache.solr.hadoop.MapReduceIndexerTool', app_jar=CONFIG_INDEXER_LIBS_PATH.get(), arguments=[ u'--morphline-file', u'morphline.conf', u'--output-dir', u'${nameNode}/user/%s/indexer' % self.username, u'--log4j', u'log4j.properties', u'--go-live', u'--zk-host', zkensemble(), u'--collection', collection_name, input_path, ], files=[{ u'path': u'%s/log4j.properties' % workspace_path, u'type': u'file' }, { u'path': u'%s/morphline.conf' % workspace_path, u'type': u'file' }]) return notebook.execute(request, batch=True)
def run_morphline(self, request, collection_name, morphline, input_path, query=None): workspace_path = self._upload_workspace(morphline) notebook = Notebook( name='Indexer job for %s' % collection_name, isManaged=True ) if query: q = Notebook(document=Document2.objects.get_by_uuid(user=self.user, uuid=query)) notebook_data = q.get_data() snippet = notebook_data['snippets'][0] api = get_api(request, snippet) destination = '__hue_%s' % notebook_data['uuid'][:4] location = '/user/%s/__hue-%s' % (request.user, notebook_data['uuid'][:4]) sql, success_url = api.export_data_as_table(notebook_data, snippet, destination, is_temporary=True, location=location) input_path = '${nameNode}%s' % location notebook.add_hive_snippet(snippet['database'], sql) notebook.add_java_snippet( clazz='org.apache.solr.hadoop.MapReduceIndexerTool', app_jar=CONFIG_INDEXER_LIBS_PATH.get(), arguments=[ u'--morphline-file', u'morphline.conf', u'--output-dir', u'${nameNode}/user/%s/indexer' % self.username, u'--log4j', u'log4j.properties', u'--go-live', u'--zk-host', zkensemble(), u'--collection', collection_name, input_path, ], files=[ {u'path': u'%s/log4j.properties' % workspace_path, u'type': u'file'}, {u'path': u'%s/morphline.conf' % workspace_path, u'type': u'file'} ] ) return notebook.execute(request, batch=True)
def run_sync_query(doc_id, user): '''Independently run a query as a user.''' # Add INSERT INTO table if persist result # Add variable substitution # Send notifications: done/on failure if type(user) is str: lookup = {orm_user_lookup(): user} user = User.objects.get(**lookup) user = rewrite_user(user) query_document = Document2.objects.get_by_uuid(user=user, uuid=doc_id) notebook = Notebook(document=query_document).get_data() snippet = notebook['snippets'][0] editor_type = snippet['type'] sql = _get_statement(notebook) request = MockedDjangoRequest(user=user) last_executed = time.mktime(datetime.datetime.now().timetuple()) * 1000 notebook = make_notebook(name='Scheduled query %s at %s' % (query_document.name, last_executed), editor_type=editor_type, statement=sql, status='ready', last_executed=last_executed, is_task=True) task = notebook.execute(request, batch=True) task['uuid'] = task['history_uuid'] status = check_status(task) while status['status'] in ('waiting', 'running'): status = check_status(task) time.sleep(3) return task