def execute(self, context): self.log.debug("Preparing Sqoop job") hook = HopsworksHook(self.hopsworks_conn_id, self.project_id, self.project_name, self.owner, self.hw_api_key) if self.project_name is None: self.project_name = hook.project_name project_specific_user = "******".format(self.project_name, self.owner) # Set impersonation os.environ['HADOOP_USER_NAME'] = project_specific_user # Generate secret dir and export MATERIAL_DIRECTORY secret_dir = hook._generate_secret_dir() os.environ['MATERIAL_DIRECTORY'] = secret_dir # Set per project staging directory staging_dir = HopsworksSqoopOperator.PROJECT_STAGING.format( project_name=self.project_name) self.properties = {} if self.properties is None else self.properties self.properties['yarn.app.mapreduce.am.staging-dir'] = staging_dir self.properties['yarn.app.mapreduce.client.max-retries'] = 10 self.log.debug("Calling SqoopOperator") super(HopsworksSqoopOperator, self).execute(context)
def _get_hook(self): return HopsworksHook(self.hopsworks_conn_id, self.project_id, self.project_name, self.owner, self.hw_api_key)