Пример #1
0
 def setUp(self):
     session = requests.Session()
     adapter = requests_mock.Adapter()
     session.mount('mock', adapter)
     self.get_hook = HttpHook(method='GET')
     self.get_lowercase_hook = HttpHook(method='get')
     self.post_hook = HttpHook(method='POST')
    def execute(self, context):

        http_hook = HttpHook(http_conn_id=self.http_conn_id,
                             method=self.method)

        retry_args = dict(wait=tenacity.wait_fixed(10),
                          stop=tenacity.stop_after_attempt(10))

        gcp_conn = GCSHook(gcp_conn_id=self.google_cloud_storage_conn_id)

        total_rows = int(self.max_rows)

        for offset in range(0, total_rows, 5000):

            q = self.query + f'&$offset={offset}'
            response = http_hook.run_with_advanced_retry(
                endpoint=self.resource, data=q, _retry_args=retry_args)

            Json_response = response.json()
            df = pd.DataFrame(Json_response)
            df.insert(0, "surrogate_keys", 'null', True)
            df['partitioned_key'] = df[{self.partitioned_key}]
            first_col = df.pop('partitioned_key')
            df.insert(1, 'partitioned_key', first_col)
            df = df.to_csv(index=False)

            name = self.object_name + '/' + str(offset)

            gcp_conn.upload(self.bucket_name, name, data=df)
Пример #3
0
 def spill_session_logs(self):
     dashes = 50
     logging.info(f"{'-'*dashes}Full log for session {self.session_id}{'-'*dashes}")
     endpoint = f"{ENDPOINT}/{self.session_id}/log"
     hook = HttpHook(method="GET", http_conn_id=self.http_conn_id)
     line_from = 0
     line_to = LOG_PAGE_LINES
     while True:
         log_page = self.fetch_log_page(hook, endpoint, line_from, line_to)
         try:
             logs = log_page["log"]
             for log in logs:
                 logging.info(log.replace("\\n", "\n"))
             actual_line_from = log_page["from"]
             total_lines = log_page["total"]
         except LookupError as ex:
             log_response_error("$.log, $.from, $.total", log_page, self.session_id)
             raise AirflowBadRequest(ex)
         actual_lines = len(logs)
         if actual_line_from + actual_lines >= total_lines:
             logging.info(
                 f"{'-' * dashes}End of full log for session {self.session_id}"
                 f"{'-' * dashes}"
             )
             break
         line_from = actual_line_from + actual_lines
Пример #4
0
    def poke(self, context: 'Context') -> bool:
        from airflow.utils.operator_helpers import determine_kwargs

        hook = HttpHook(
            method=self.method,
            http_conn_id=self.http_conn_id,
            tcp_keep_alive=self.tcp_keep_alive,
            tcp_keep_alive_idle=self.tcp_keep_alive_idle,
            tcp_keep_alive_count=self.tcp_keep_alive_count,
            tcp_keep_alive_interval=self.tcp_keep_alive_interval,
        )

        self.log.info('Poking: %s', self.endpoint)
        try:
            response = hook.run(
                self.endpoint,
                data=self.request_params,
                headers=self.headers,
                extra_options=self.extra_options,
            )
            if self.response_check:
                kwargs = determine_kwargs(self.response_check, [response],
                                          context)
                return self.response_check(response, **kwargs)
        except AirflowException as exc:
            if str(exc).startswith("404"):
                return False

            raise exc

        return True
Пример #5
0
 def poke(self, context):
     logging.info(
         f"Getting status for statement {self.statement_id} "
         f"in session {self.session_id}"
     )
     endpoint = f"{ENDPOINT}/{self.session_id}/statements/{self.statement_id}"
     response = HttpHook(method="GET", http_conn_id=self.http_conn_id).run(endpoint)
     try:
         statement = json.loads(response.content)
         state = statement["state"]
     except (JSONDecodeError, LookupError) as ex:
         log_response_error("$.state", response, self.session_id, self.statement_id)
         raise AirflowBadRequest(ex)
     if state in ["waiting", "running"]:
         logging.info(
             f"Statement {self.statement_id} in session {self.session_id} "
             f"has not finished yet (state is '{state}')"
         )
         return False
     if state == "available":
         self.__check_status(statement, response)
         return True
     raise AirflowBadRequest(
         f"Statement {self.statement_id} in session {self.session_id} failed due to "
         f"an unknown state: '{state}'.\nKnown states: 'waiting', 'running', "
         "'available'"
     )
Пример #6
0
def get_aware_devices(token, customer_id):

    token = json.loads(token)['token']

    print(f'token: {token}')
    conn = get_connection()
    h = HttpHook(http_conn_id=conn.conn_id, method='GET')
    endpoint = f'/api/customer/{customer_id}/devices?limit=1000'
    headers = {
        "Content-Type": "application/json",
        "X-Authorization": "Bearer " + token
    }
    r = h.run(endpoint=endpoint, headers=headers)

    response = r.json()
    devices = {}

    for device in response['data']:
        d_name = device['name']  #USACE00504
        d_label = device['label']
        d_type = device['type']
        d_entity_type = device['id']['entityType']  #normally 'DEVICE'
        d_id = device['id']['id']  # UUID of device
        # print(f' entityType: {d_entity_type} - id: {d_id} - name: {d_name} - label: {d_label} - type: {d_type}')

        d = {
            "name": d_name,
            "type": d_type,
            "label": d_label,
            "entityType": d_entity_type
        }
        if d_name != "USACEQueue":
            devices[d_id] = d

    return json.dumps(devices)
Пример #7
0
    def test_connection_without_host(self, mock_get_connection):
        conn = Connection(conn_id='http_default', conn_type='http')
        mock_get_connection.return_value = conn

        hook = HttpHook()
        hook.get_conn({})
        self.assertEqual(hook.base_url, 'http://')
Пример #8
0
 def close_batch(self):
     logging.info(f"Closing batch with id = {self.batch_id}")
     batch_endpoint = f"{LIVY_ENDPOINT}/{self.batch_id}"
     HttpHook(method="DELETE", http_conn_id=self.http_conn_id_livy).run(
         batch_endpoint
     )
     logging.info(f"Batch {self.batch_id} has been closed")
Пример #9
0
def extract(
    batch_id, method="GET", http_conn_id="default_api", mongo_conn_id="default_mongo"
):

    http = HttpHook(method, http_conn_id=http_conn_id)

    mongo_conn = MongoHook(mongo_conn_id)
    ids_to_update_coll = mongo_conn.get_collection("ids_to_update", "courts")
    results_to_transform_coll = mongo_conn.get_collection(
        "results_to_transform", "courts"
    )

    # Note/TODO: because we add endpoints back that we couldn't handle, we may
    # get stuck in an infinite loop. Another solution is exiting whenever an
    # exception occurs, but this isn't ideal either
    while ids_to_update_coll.find_one({"batch_id": str(batch_id)}) != None:

        # find a job to work on
        result = ids_to_update_coll.find_one_and_delete({"batch_id": str(batch_id)})
        api_id = result["api_id"]
        try:

            # transform to get a valid link
            # TODO: this needs to be generalized to any website
            endpoint = f"opinions/{api_id}"

            # pull data in
            response = http.run(endpoint)

            result_data = response.json()

            if response.status_code == 200:

                # store our result into mongo
                results_to_transform_coll.insert_one(
                    {"batch_id": str(batch_id), "data": result_data}
                )

            else:
                # TODO: throw a more specific exception
                raise AirflowException(
                    f"Received {response.status_code} code from {endpoint}."
                )

        except json.JSONDecodeError as j_error:
            print("Failed to decode response with {j_error}:\n{response.body}")
            mongo_conn.insert_one(
                "ids_to_update",
                {"api_id": str(api_id), "batch_id": str(batch_id)},
                mongo_db="courts",
            )
        except Exception as error:
            # something went wrong. Log it and return this endpoint to mongoDB so we can try again
            print(f"An exception occured while processing batch {batch_id}:\n{error}")
            mongo_conn.insert_one(
                "ids_to_update",
                {"api_id": str(api_id), "batch_id": str(batch_id)},
                mongo_db="courts",
            )
Пример #10
0
 def test_host_encoded_https_connection(self, mock_get_connection):
     conn = Connection(conn_id='http_default',
                       conn_type='http',
                       host='https://localhost')
     mock_get_connection.return_value = conn
     hook = HttpHook()
     hook.get_conn({})
     self.assertEqual(hook.base_url, 'https://localhost')
Пример #11
0
 def test_https_connection(self, mock_get_connection):
     conn = Connection(conn_id='http_default',
                       conn_type='http',
                       host='localhost',
                       schema='https')
     mock_get_connection.return_value = conn
     hook = HttpHook()
     hook.get_conn({})
     assert hook.base_url == 'https://localhost'
Пример #12
0
def flashfloodinfo_authenticate():
    conn = get_connection()
    h = HttpHook(http_conn_id=conn.conn_id, method='POST')
    r = h.run(endpoint='/api/auth/login',
              data=json.dumps({
                  'username': conn.login,
                  'password': conn.password
              }),
              headers={"Content-Type": "application/json"})

    return json.dumps(r.json())
Пример #13
0
    def test_json_request(self, method, mock_requests):
        obj1 = {'a': 1, 'b': 'abc', 'c': [1, 2, {"d": 10}]}

        def match_obj1(request):
            return request.json() == obj1

        mock_requests.request(method=method, url='//test:8080/v1/test', additional_matcher=match_obj1)

        with mock.patch('airflow.hooks.base.BaseHook.get_connection', side_effect=get_airflow_connection):
            # will raise NoMockAddress exception if obj1 != request.json()
            HttpHook(method=method).run('v1/test', json=obj1)
Пример #14
0
 def get_spark_app_id(self, batch_id):
     logging.info(f"Getting Spark app id from Livy API for batch {batch_id}...")
     endpoint = f"{LIVY_ENDPOINT}/{batch_id}"
     response = HttpHook(method="GET", http_conn_id=self.http_conn_id_livy).run(
         endpoint
     )
     try:
         return json.loads(response.content)["appId"]
     except (JSONDecodeError, LookupError, AirflowException) as ex:
         log_response_error("$.appId", response, batch_id)
         raise AirflowBadRequest(ex)
Пример #15
0
    def execute(self, context):
        http = HttpHook(self.method, http_conn_id=self.http_conn_id)

        self.log.info("Calling HTTP method")

        response = http.run(self.endpoint, self.data, self.headers,
                            self.extra_options)
        if self.log_response:
            self.log.info(response.text)
        if self.response_check:
            if not self.response_check(response):
                raise AirflowException("Response check returned False.")
        return response.text
Пример #16
0
def flashflood_get_customer(token):

    token = json.loads(token)['token']

    conn = get_connection()
    h = HttpHook(http_conn_id=conn.conn_id, method='GET')
    endpoint = f'/api/auth/user'
    headers = {
        "Content-Type": "application/json",
        "X-Authorization": "Bearer " + token
    }
    r = h.run(endpoint=endpoint, headers=headers)

    return r.json()['customerId']['id']
Пример #17
0
 def submit_statement(self, statement: Statement):
     headers = {"X-Requested-By": "airflow", "Content-Type": "application/json"}
     payload = {"code": statement.code}
     if statement.kind:
         payload["kind"] = statement.kind
     endpoint = f"{ENDPOINT}/{self.session_id}/statements"
     response = HttpHook(http_conn_id=self.http_conn_id).run(
         endpoint, json.dumps(payload), headers
     )
     try:
         return json.loads(response.content)["id"]
     except (JSONDecodeError, LookupError) as ex:
         log_response_error("$.id", response, self.session_id)
         raise AirflowBadRequest(ex)
Пример #18
0
    def execute(self, context: Dict[str, Any]) -> Any:

        self.http = HttpHook(self.method, http_conn_id=self.http_conn_id)
        self.mongo_conn = MongoHook(self.mongo_conn_id)

        # generate query parameters
        self.query = self.query_builder()

        self.log.info(f"Connecting to: {self.http_conn_id}")

        return_val = self._execute(context)

        self._shutdown()

        return return_val
Пример #19
0
    def write_to_midas(instrument, aware_data):
        """ Write timeseries data from FlashFlood API to MIDAS API (single instrument)
        
        Arguments: 
            instrument {dictionary} - instrument dictionary
            aware_data {string} - aware_data object as string
        """

        # Convert string to dict
        aware_response = json.loads(aware_data)

        # Return from function if aware data not present
        if len(aware_response) == 0:
            return

        logging.info(f'instrument: {instrument}')
        logging.debug(f'aware_data: {aware_data}')       

        payload = []

        for aware_param, midas_ts_id in instrument['aware_parameters'].items():
            if midas_ts_id is not None:
                tsv_obj = {}
                tsv_obj['timeseries_id'] = midas_ts_id
                tsv_list = []

                print(f"AWARE values for {aware_param}:")
                # Get the list that cooresponds to the AWARE param
                aware_tsv_list = aware_response[aware_param]
                for tsv in aware_tsv_list:
                    tsv_list.append({"time": aware.epoch_ms_to_human(tsv['ts']), "value": float(tsv['value'])})
                
                tsv_obj['items'] = tsv_list
                payload.append(tsv_obj)       
        

        # pp = pprint.PrettyPrinter(depth=6)
        # pp.pprint(json.dumps(midas_payload))
        print(f'payload: {json.dumps(payload)}')
        
              
        conn = midas.get_connection()
        h = HttpHook(http_conn_id=conn.conn_id, method='POST')    
        endpoint = f"/projects/{instrument['project_id']}/timeseries_measurements?key_id={conn.login}&key={conn.password}"
        headers = {"Content-Type": "application/json"}
        r = h.run(endpoint=endpoint, json=payload, headers=headers)           

        return
Пример #20
0
def get_device_ts_data(token, device_id, startTs, endTs, keys, limit):

    conn = get_connection()
    h = HttpHook(http_conn_id=conn.conn_id, method='GET')
    headers = {
        "Content-Type": "application/json",
        "X-Authorization": "Bearer " + token
    }
    endpoint = (
        f"/api/plugins/telemetry/DEVICE/{device_id}"
        f"/values/timeseries?limit={limit}&agg=NONE&startTs={startTs}&endTs={endTs}&keys={keys}"
    )
    # print(f'calling ts data endpoint: {endpoint} with token {token}')
    r = h.run(endpoint=endpoint, headers=headers)

    return r
Пример #21
0
 def check_yarn_app_status(self, app_id):
     logging.info(f"Getting app status (id={app_id}) from YARN RM REST API...")
     endpoint = f"{YARN_ENDPOINT}/{app_id}"
     response = HttpHook(method="GET", http_conn_id=self.http_conn_id_yarn).run(
         endpoint
     )
     try:
         status = json.loads(response.content)["app"]["finalStatus"]
     except (JSONDecodeError, LookupError, TypeError) as ex:
         log_response_error("$.app.finalStatus", response)
         raise AirflowBadRequest(ex)
     expected_status = "SUCCEEDED"
     if status != expected_status:
         raise AirflowException(
             f"YARN app {app_id} is '{status}', expected status: '{expected_status}'"
         )
Пример #22
0
 def poke(self, context):
     logging.info(f"Getting batch {self.batch_id} status...")
     endpoint = f"{LIVY_ENDPOINT}/{self.batch_id}"
     response = HttpHook(method="GET", http_conn_id=self.http_conn_id).run(endpoint)
     try:
         state = json.loads(response.content)["state"]
     except (JSONDecodeError, LookupError) as ex:
         log_response_error("$.state", response, self.batch_id)
         raise AirflowBadRequest(ex)
     if state in VALID_BATCH_STATES:
         logging.info(
             f"Batch {self.batch_id} has not finished yet (state is '{state}')"
         )
         return False
     if state == "success":
         logging.info(f"Batch {self.batch_id} has finished successfully!")
         return True
     raise AirflowException(f"Batch {self.batch_id} failed with state '{state}'")
Пример #23
0
    def execute(self, context: Dict[str, Any]) -> Any:
        from airflow.utils.operator_helpers import make_kwargs_callable

        http = HttpHook(self.method, http_conn_id=self.http_conn_id)

        self.log.info("Calling HTTP method")

        response = http.run(self.endpoint, self.data, self.headers, self.extra_options)
        if self.log_response:
            self.log.info(response.text)
        if self.response_check:
            kwargs_callable = make_kwargs_callable(self.response_check)
            if not kwargs_callable(response, **context):
                raise AirflowException("Response check returned False.")
        if self.response_filter:
            kwargs_callable = make_kwargs_callable(self.response_filter)
            return kwargs_callable(response, **context)
        return response.text
Пример #24
0
    def execute(self, context: 'Context') -> Any:
        from airflow.utils.operator_helpers import determine_kwargs

        http = HttpHook(self.method, http_conn_id=self.http_conn_id, auth_type=self.auth_type)

        self.log.info("Calling HTTP method")

        response = http.run(self.endpoint, self.data, self.headers, self.extra_options)
        if self.log_response:
            self.log.info(response.text)
        if self.response_check:
            kwargs = determine_kwargs(self.response_check, [response], context)
            if not self.response_check(response, **kwargs):
                raise AirflowException("Response check returned False.")
        if self.response_filter:
            kwargs = determine_kwargs(self.response_filter, [response], context)
            return self.response_filter(response, **kwargs)
        return response.text
Пример #25
0
    def __init__(self,
                 endpoint: str,
                 http_conn_id: str = 'http_default',
                 method: str = 'GET',
                 request_params: Optional[Dict] = None,
                 headers: Optional[Dict] = None,
                 response_check: Optional[Callable] = None,
                 extra_options: Optional[Dict] = None,
                 *args,
                 **kwargs):
        super().__init__(*args, **kwargs)
        self.endpoint = endpoint
        self.http_conn_id = http_conn_id
        self.request_params = request_params or {}
        self.headers = headers or {}
        self.extra_options = extra_options or {}
        self.response_check = response_check

        self.hook = HttpHook(method=method, http_conn_id=http_conn_id)
Пример #26
0
 def poke(self, context):
     logging.info(f"Getting session {self.session_id} status...")
     endpoint = f"{ENDPOINT}/{self.session_id}/state"
     response = HttpHook(method="GET", http_conn_id=self.http_conn_id).run(endpoint)
     try:
         state = json.loads(response.content)["state"]
     except (JSONDecodeError, LookupError) as ex:
         log_response_error("$.state", response, self.session_id)
         raise AirflowBadRequest(ex)
     if state == "starting":
         logging.info(f"Session {self.session_id} is starting...")
         return False
     if state == "idle":
         logging.info(f"Session {self.session_id} is ready to receive statements.")
         return True
     raise AirflowException(
         f"Session {self.session_id} failed to start. "
         f"State='{state}'. Expected states: 'starting' or 'idle' (ready)."
     )
Пример #27
0
 def submit_batch(self):
     headers = {"X-Requested-By": "airflow", "Content-Type": "application/json"}
     unfiltered_payload = {
         "file": self.file,
         "proxyUser": self.proxy_user,
         "className": self.class_name,
         "args": self.arguments,
         "jars": self.jars,
         "pyFiles": self.py_files,
         "files": self.files,
         "driverMemory": self.driver_memory,
         "driverCores": self.driver_cores,
         "executorMemory": self.executor_memory,
         "executorCores": self.executor_cores,
         "numExecutors": self.num_executors,
         "archives": self.archives,
         "queue": self.queue,
         "name": self.name,
         "conf": self.conf,
     }
     payload = {k: v for k, v in unfiltered_payload.items() if v}
     logging.info(
         f"Submitting the batch to Livy... "
         f"Payload:\n{json.dumps(payload, indent=2)}"
     )
     response = HttpHook(http_conn_id=self.http_conn_id_livy).run(
         LIVY_ENDPOINT, json.dumps(payload), headers
     )
     try:
         batch_id = json.loads(response.content)["id"]
     except (JSONDecodeError, LookupError) as ex:
         log_response_error("$.id", response)
         raise AirflowBadRequest(ex)
     if not isinstance(batch_id, Number):
         raise AirflowException(
             f"ID of the created batch is not a number ({batch_id}). "
             "Are you sure we're calling Livy API?"
         )
     self.batch_id = batch_id
Пример #28
0
    def create_session(self):
        headers = {"X-Requested-By": "airflow", "Content-Type": "application/json"}
        unfiltered_payload = {
            "kind": self.kind,
            "proxyUser": self.proxy_user,
            "jars": self.jars,
            "pyFiles": self.py_files,
            "files": self.files,
            "driverMemory": self.driver_memory,
            "driverCores": self.driver_cores,
            "executorMemory": self.executor_memory,
            "executorCores": self.executor_cores,
            "numExecutors": self.num_executors,
            "archives": self.archives,
            "queue": self.yarn_queue,
            "name": self.name,
            "conf": self.conf,
            "heartbeatTimeoutInSecond": self.heartbeat_timeout,
        }
        payload = {k: v for k, v in unfiltered_payload.items() if v}
        logging.info(
            f"Creating a session in Livy... "
            f"Payload:\n{json.dumps(payload, indent=2)}"
        )
        response = HttpHook(http_conn_id=self.http_conn_id).run(
            ENDPOINT, json.dumps(payload), headers,
        )
        try:
            session_id = json.loads(response.content)["id"]
        except (JSONDecodeError, LookupError) as ex:
            log_response_error("$.id", response)
            raise AirflowBadRequest(ex)

        if not isinstance(session_id, Number):
            raise AirflowException(
                f"ID of the created session is not a number ({session_id}). "
                "Are you sure we're calling Livy API?"
            )
        self.session_id = session_id
Пример #29
0
 def check_spark_app_status(self, app_id):
     logging.info(f"Getting app status (id={app_id}) from Spark REST API...")
     endpoint = f"{SPARK_ENDPOINT}/{app_id}/jobs"
     response = HttpHook(method="GET", http_conn_id=self.http_conn_id_spark).run(
         endpoint
     )
     try:
         jobs = json.loads(response.content)
         expected_status = "SUCCEEDED"
         for job in jobs:
             job_id = job["jobId"]
             job_status = job["status"]
             logging.info(
                 f"Job id {job_id} associated with application '{app_id}' "
                 f"is '{job_status}'"
             )
             if job_status != expected_status:
                 raise AirflowException(
                     f"Job id '{job_id}' associated with application '{app_id}' "
                     f"is '{job_status}', expected status is '{expected_status}'"
                 )
     except (JSONDecodeError, LookupError, TypeError) as ex:
         log_response_error("$.jobId, $.status", response)
         raise AirflowBadRequest(ex)
Пример #30
0
 def close_session(self):
     logging.info(f"Closing session with id = {self.session_id}")
     session_endpoint = f"{ENDPOINT}/{self.session_id}"
     HttpHook(method="DELETE", http_conn_id=self.http_conn_id).run(session_endpoint)
     logging.info(f"Session {self.session_id} has been closed")