示例#1
0
    def _generate_plugin_objects(self, context, inputs_dict):
        """
        Runs user code and and produces hive queries
        :param flytekit.engines.common.EngineContext context:
        :param dict[Text, T] inputs:
        :rtype: list[_qubole.QuboleHiveJob]
        """
        queries_from_task = super(SdkHiveTask, self)._execute_user_code(
            context, inputs_dict) or []
        if not isinstance(queries_from_task, list):
            queries_from_task = [queries_from_task]

        self._validate_queries(queries_from_task)
        plugin_objects = []

        for q in queries_from_task:
            hive_query = _qubole.HiveQuery(
                query=q,
                timeout_sec=self.metadata.timeout.seconds,
                retry_count=self.metadata.retries.retries)

            # TODO: Remove this after all users of older SDK versions that did the single node, multi-query pattern are
            #       deprecated. This is only here for backwards compatibility - in addition to writing the query to the
            #       query field, we also construct a QueryCollection with only one query. This will ensure that the
            #       older plugin will continue to work.
            query_collection = _qubole.HiveQueryCollection([hive_query])

            plugin_objects.append(
                _qubole.QuboleHiveJob(hive_query,
                                      self._cluster_label,
                                      self._tags,
                                      query_collection=query_collection))

        return plugin_objects
示例#2
0
    def _generate_hive_queries(self, context, inputs_dict):
        """
        Runs user code and and produces hive queries
        :param flytekit.engines.common.EngineContext context:
        :param dict[Text, T] inputs:
        :rtype: _qubole.QuboleHiveJob
        """
        queries_from_task = super(SdkHiveTask, self)._execute_user_code(
            context, inputs_dict) or []
        if not isinstance(queries_from_task, list):
            queries_from_task = [queries_from_task]

        self._validate_queries(queries_from_task)
        queries = _qubole.HiveQueryCollection([
            _qubole.HiveQuery(query=q,
                              timeout_sec=self.metadata.timeout.seconds,
                              retry_count=self.metadata.retries.retries)
            for q in queries_from_task
        ])
        return _qubole.QuboleHiveJob(queries, self._cluster_label, self._tags)
示例#3
0
def test_hive_query():
    q = qubole.HiveQuery(query="some query", timeout_sec=10, retry_count=0)
    q2 = qubole.HiveQuery.from_flyte_idl(q.to_flyte_idl())
    assert q == q2
    assert q2.query == "some query"
示例#4
0
def test_hive_job():
    query = qubole.HiveQuery(query="some query", timeout_sec=10, retry_count=0)
    obj = qubole.QuboleHiveJob(query=query, cluster_label="default", tags=[])
    obj2 = qubole.QuboleHiveJob.from_flyte_idl(obj.to_flyte_idl())
    assert obj == obj2