Exemplo n.º 1
0
    def run_morphline(self,
                      request,
                      collection_name,
                      morphline,
                      input_path,
                      query=None):
        workspace_path = self._upload_workspace(morphline)

        notebook = Notebook(name='Indexer job for %s' % collection_name,
                            isManaged=True)

        if query:
            q = Notebook(document=Document2.objects.get_by_uuid(user=self.user,
                                                                uuid=query))
            notebook_data = q.get_data()
            snippet = notebook_data['snippets'][0]

            api = get_api(request, snippet)

            destination = '__hue_%s' % notebook_data['uuid'][:4]
            location = '/user/%s/__hue-%s' % (request.user,
                                              notebook_data['uuid'][:4])
            sql, success_url = api.export_data_as_table(notebook_data,
                                                        snippet,
                                                        destination,
                                                        is_temporary=True,
                                                        location=location)
            input_path = '${nameNode}%s' % location

            notebook.add_hive_snippet(snippet['database'], sql)

        notebook.add_java_snippet(
            clazz='org.apache.solr.hadoop.MapReduceIndexerTool',
            app_jar=CONFIG_INDEXER_LIBS_PATH.get(),
            arguments=[
                u'--morphline-file',
                u'morphline.conf',
                u'--output-dir',
                u'${nameNode}/user/%s/indexer' % self.username,
                u'--log4j',
                u'log4j.properties',
                u'--go-live',
                u'--zk-host',
                zkensemble(),
                u'--collection',
                collection_name,
                input_path,
            ],
            files=[{
                u'path': u'%s/log4j.properties' % workspace_path,
                u'type': u'file'
            }, {
                u'path': u'%s/morphline.conf' % workspace_path,
                u'type': u'file'
            }])

        return notebook.execute(request, batch=True)
Exemplo n.º 2
0
  def run_morphline(self, request, collection_name, morphline, input_path, query=None, start_time=None, lib_path=None):
    workspace_path = self._upload_workspace(morphline)

    task = make_notebook(
      name=_('Indexing into %s') % collection_name,
      editor_type='notebook',
      on_success_url=reverse('search:browse', kwargs={'name': collection_name}),
      pub_sub_url='assist.collections.refresh',
      is_task=True,
      is_notebook=True,
      last_executed=start_time
    )

    if query:
      q = Notebook(document=Document2.objects.get_by_uuid(user=self.user, uuid=query))
      notebook_data = q.get_data()
      snippet = notebook_data['snippets'][0]

      api = get_api(request, snippet)

      destination = '__hue_%s' % notebook_data['uuid'][:4]
      location = '/user/%s/__hue-%s' % (request.user,  notebook_data['uuid'][:4])
      sql, _success_url = api.export_data_as_table(notebook_data, snippet, destination, is_temporary=True, location=location)
      input_path = '${nameNode}%s' % location

      task.add_hive_snippet(snippet['database'], sql)

    client = SolrClient(self.user)

    extra_args = ['-Dmapreduce.job.user.classpath.first=true'] if client.is_solr_six_or_more() else []

    task.add_java_snippet(
      clazz='org.apache.solr.hadoop.MapReduceIndexerTool',
      app_jar=lib_path if lib_path is not None else CONFIG_INDEXER_LIBS_PATH.get(),
      arguments=extra_args + [
          u'--morphline-file',
          u'morphline.conf',
          u'--output-dir',
          u'${nameNode}/user/%s/indexer' % self.username,
          u'--log4j',
          u'log4j.properties',
          u'--go-live',
          u'--zk-host',
          client.get_zookeeper_host(),
          u'--collection',
          collection_name,
          input_path,
      ],
      files=[
          {u'path': u'%s/log4j.properties' % workspace_path, u'type': u'file'},
          {u'path': u'%s/morphline.conf' % workspace_path, u'type': u'file'}
      ]
    )

    return task.execute(request, batch=True)
Exemplo n.º 3
0
  def run_morphline(self, request, collection_name, morphline, input_path, query=None):
    workspace_path = self._upload_workspace(morphline)

    notebook = Notebook(
        name='Indexer job for %s' % collection_name,
        isManaged=True
    )

    if query:
      q = Notebook(document=Document2.objects.get_by_uuid(user=self.user, uuid=query))
      notebook_data = q.get_data()
      snippet = notebook_data['snippets'][0]

      api = get_api(request, snippet)

      destination = '__hue_%s' % notebook_data['uuid'][:4]
      location = '/user/%s/__hue-%s' % (request.user,  notebook_data['uuid'][:4])
      sql, success_url = api.export_data_as_table(notebook_data, snippet, destination, is_temporary=True, location=location)
      input_path = '${nameNode}%s' % location

      notebook.add_hive_snippet(snippet['database'], sql)

    notebook.add_java_snippet(
      clazz='org.apache.solr.hadoop.MapReduceIndexerTool',
      app_jar=CONFIG_INDEXER_LIBS_PATH.get(),
      arguments=[
          u'--morphline-file',
          u'morphline.conf',
          u'--output-dir',
          u'${nameNode}/user/%s/indexer' % self.username,
          u'--log4j',
          u'log4j.properties',
          u'--go-live',
          u'--zk-host',
          zkensemble(),
          u'--collection',
          collection_name,
          input_path,
      ],
      files=[
          {u'path': u'%s/log4j.properties' % workspace_path, u'type': u'file'},
          {u'path': u'%s/morphline.conf' % workspace_path, u'type': u'file'}
      ]
    )

    return notebook.execute(request, batch=True)