Exemplo n.º 1
0
    def __init__(
            self,
            fields,  # dict of fields, e.g. {'campaign': ['id', 'name']}
            metrics,  # list of metrics to retrieve -
            resource,  # name of the resource, e.g. ad_group
            client_id,
            conditions=None,  # list of conditions, e.g. ["ad_group.status = 'ENABLED'"]
            *args,
            **kwargs):
        """
        recommendation: leave data_until=None and use a timedelta for
        data_from.
        """

        if kwargs.get("columns_definition"):
            raise Exception("columns_definition is not accepted for this " +
                            "operator!")

        kwargs["update_on_columns"] = [
            col.replace(".", "_") for col in self.get_select_statement(fields)
            if col[:7] == "segment"
        ] + ["{0}_resource_name".format(resource)]

        if conditions and not is_iterable_not_string(conditions):
            raise Exception('Argument "conditions" must be a list!')

        if not (fields.get("segments") and ("date" in fields["segments"])):
            raise Exception("fields list MUST contain segments.date!")

        # must use deepcopy due to airflow calling field by reference
        # and the update in the following row adding the metrics to the field,
        # which in turn screws with the update_on_columns definition above
        self.fields_dict = deepcopy(fields)
        self.fields_dict.update({"metrics": deepcopy(metrics)})
        self.fields_list = self.get_select_statement(self.fields_dict)
        self.resource = resource
        self.client_id = str(client_id)
        self.conditions = conditions

        super().__init__(*args, **kwargs)
Exemplo n.º 2
0
    def __init__(
            self,
            fields,  # dict of fields, e.g. {'campaign': ['id', 'name']}
            metrics,  # list of metrics to retrieve -
            resource,  # name of the resource, e.g. ad_group
            client_id,
            conditions=None,  # list of conditions, e.g. ["ad_group.status = 'ENABLED'"]
            data_from=None,  # can be datetime or timedelta relative to data_until
            data_until=None,
            *args,
            **kwargs):
        """
            recommendation: leave data_until=None and use a timedelta for
            data_from.
        """
        kwargs['update_on_columns'] = [
            col.replace('.', '_') for col in self.get_select_statement(fields)
        ]

        if conditions and not is_iterable_not_string(conditions):
            raise Exception('Argument "conditions" must be a list!')

        if not (fields.get('segments') and ('date' in fields['segments'])):
            raise Exception('fields list MUST contain segments.date!')

        # must use deepcopy due to airflow calling field by reference
        # and the update in the following row adding the metrics to the field,
        # which in turn screws with the update_on_columns definition above
        self.fields_dict = deepcopy(fields)
        self.fields_dict.update({'metrics': deepcopy(metrics)})
        self.fields_list = self.get_select_statement(self.fields_dict)
        self.resource = resource
        self.client_id = str(client_id)
        self.conditions = conditions
        self.data_from = data_from
        self.data_until = data_until

        super().__init__(*args, **kwargs)
Exemplo n.º 3
0
    def __init__(
            self,
            shop_id,
            shopify_object,
            auth_type,
            filter_fields={},
            api_version=None,
            get_transactions_with_orders=False,
            get_events_with_orders=False,
            page_limit=250,  # API Call pagination limit
            *args,
            **kwargs):

        if is_iterable_not_string(shop_id):
            raise Exception('Multiple shops in one DAG is deprecated!')

        if get_transactions_with_orders and not shopify_object == 'orders':
            raise Exception('transactions can only be pulled for orders!')

        if get_events_with_orders and not shopify_object == 'orders':
            raise Exception('events can only be pulled for orders!')

        if not shopify_object in self._accepted_objects.keys():
            raise Exception('{0} is not in the list of accepted objects!' + \
                ' accepted objects: {1}'.format(
                    shopify_object,
                    ', '.join(self._accepted_objects.keys())
                ))

        if self._accepted_objects[shopify_object].get('_is_drop_and_replace'):
            kwargs['drop_and_replace'] = True

        if not auth_type in ['access_token', 'basic_auth']:
            raise Exception('auth_type must be access_token or basic_auth!')

        if not type(filter_fields) == dict:
            raise Exception('filter_fields must be a dictionary!')
        else:
            for key, value in filter_fields.items():
                if not key in self._accepted_objects['shopify_object'].keys():
                    raise Exception('invalid key {0} in filter fields!'.format(
                        key, ))

        api_version = api_version or self._current_api_version
        if not api_version in self._acceptable_api_versions:
            raise Exception(
                '{0} is not a valid api version! valid versions: {1}'.format(
                    api_version,
                    ', '.join(self._acceptable_api_versions),
                ))

        if not type(page_limit) == int or page_limit > 250 or page_limit < 1:
            raise Exception( \
                'Page limit must be a positive integer not exceeding 250!')

        kwargs['primary_key_column_name'] = \
            kwargs.get('primary_key_column_name', 'id')

        # source conn id is not required on operator call level! avoid error
        kwargs['source_conn_id'] = kwargs.get('source_conn_id', '__none__')

        super().__init__(*args, **kwargs)

        self.shop_id = shop_id
        self.shopify_object = shopify_object
        self.auth_type = auth_type
        self.filter_fields = filter_fields
        self.api_version = api_version
        self.page_limit = page_limit
        self.get_transactions_with_orders = get_transactions_with_orders
        self.get_events_with_orders = get_events_with_orders
Exemplo n.º 4
0
    def ewah_execute(self, context):
        # can supply a list of shops - need to run for all shops individually!
        def datetime_to_string(dt, format):
            # check if tz aware; set to utc if so
            if dt.tzinfo:
                dt = dt.astimezone(timezone('UTC'))
            else:
                dt = dt.replace(tzinfo=timezone('UTC'))
            # check if format_string contains timezone
            if '%z' in format:
                # add colon!
                dt_string = dt.strftime(format)
                return dt_string[:-2] + ':' + dt_string[-2:]
            else:
                return dt.strftime(format)

        object_metadata = self._accepted_objects[self.shopify_object]
        self.object_metadata = object_metadata
        params = {
            key: val
            for key, val in object_metadata.items()
            if not val is None and not key[:1] == '_'
        }
        params.update(self.filter_fields)
        params.update({'limit': self.page_limit})
        if not self.drop_and_replace:
            timestamp_fields = object_metadata.get(
                '_timestamp_fields',
                self._default_timestamp_fields,
            )
            timestamp_format_string = object_metadata.get(
                '_datetime_format',
                self._default_datetime_format,
            )
            params.update({
                # Pendulum by coincidence converts to the correct string format
                timestamp_fields[1]:
                datetime_to_string(
                    context['next_execution_date'],
                    timestamp_format_string,
                ),
            })
            if self.test_if_target_table_exists():
                params.update({
                    timestamp_fields[0]:
                    datetime_to_string(
                        context['execution_date'],
                        timestamp_format_string,
                    ),
                })

        source_conn_id = self.source_conn_id
        auth_type = self.auth_type
        if is_iterable_not_string(self.shop_id):
            # multiple shops to iterate - loop through!
            # deprecated feature - don't use!
            raise Exception('Multiple Shops in one DAG is deactivated!')
            self.log.info('iterating through multiple shops!')
            for shop_id in self.shop_id:
                # metadata: shop id
                self.log.info('getting data for: {0}'.format(shop_id))
                if hasattr(self.shop_id, 'get') \
                    and hasattr(self.shop_id[shop_id], 'get'):
                    # dict, not list! check for conn details!
                    shop_dict = self.shop_id[shop_id]
                    sci = shop_dict.get('source_conn_id', source_conn_id)
                    at = shop_dict.get('auth_type', auth_type)
                else:
                    sci = source_conn_id
                    at = auth_type
                self._metadata.update({'shop_id': shop_id})
                self.execute_for_shop(context, shop_id, params, sci, at)
        else:
            self._metadata.update({'shop_id': self.shop_id})
            sci = self.source_conn_id
            at = self.auth_type
            self.execute_for_shop(context, self.shop_id, params, sci, at)
Exemplo n.º 5
0
    def __init__(
            self,
            shop_id,
            shopify_object,
            auth_type,
            filter_fields={},
            api_version=None,
            get_transactions_with_orders=False,
            get_events_with_orders=False,
            get_inventory_data_with_product_variants=False,
            page_limit=250,  # API Call pagination limit
            *args,
            **kwargs):

        if is_iterable_not_string(shop_id):
            raise Exception("Multiple shops in one DAG is deprecated!")

        if get_transactions_with_orders and not shopify_object == "orders":
            raise Exception("transactions can only be pulled for orders!")

        if get_events_with_orders and not shopify_object == "orders":
            raise Exception("events can only be pulled for orders!")

        if (get_inventory_data_with_product_variants
                and not shopify_object == "products"):
            raise Exception("inventory data may only be pulled with products!")

        if not shopify_object in self._accepted_objects.keys():
            raise Exception(
                "{0} is not in the list of accepted objects!" +
                " accepted objects: {1}".format(
                    shopify_object, ", ".join(self._accepted_objects.keys())))

        if self._accepted_objects[shopify_object].get("_is_drop_and_replace"):
            kwargs["extract_strategy"] = EC.ES_FULL_REFRESH

        if not auth_type in ["access_token", "basic_auth"]:
            raise Exception("auth_type must be access_token or basic_auth!")

        if not type(filter_fields) == dict:
            raise Exception("filter_fields must be a dictionary!")
        else:
            for key, value in filter_fields.items():
                if not key in self._accepted_objects["shopify_object"].keys():
                    raise Exception("invalid key {0} in filter fields!".format(
                        key, ))

        api_version = api_version or self._current_api_version
        if not api_version in self._acceptable_api_versions:
            raise Exception(
                "{0} is not a valid api version! valid versions: {1}".format(
                    api_version,
                    ", ".join(self._acceptable_api_versions),
                ))

        if not type(page_limit) == int or page_limit > 250 or page_limit < 1:
            raise Exception(
                "Page limit must be a positive integer not exceeding 250!")

        kwargs["primary_key_column_name"] = kwargs.get(
            "primary_key_column_name", "id")

        # source conn id is not required on operator call level! avoid error
        kwargs["source_conn_id"] = kwargs.get("source_conn_id", "__none__")

        super().__init__(*args, **kwargs)

        self.shop_id = shop_id
        self.shopify_object = shopify_object
        self.auth_type = auth_type
        self.filter_fields = filter_fields
        self.api_version = api_version
        self.page_limit = page_limit
        self.get_transactions_with_orders = get_transactions_with_orders
        self.get_events_with_orders = get_events_with_orders
        self.get_inventory_data_with_product_variants = (
            get_inventory_data_with_product_variants)
Exemplo n.º 6
0
    def ewah_execute(self, context):
        # can supply a list of shops - need to run for all shops individually!
        def datetime_to_string(dt, format):
            # check if tz aware; set to utc if so
            if dt.tzinfo:
                dt = dt.astimezone(timezone("UTC"))
            else:
                dt = dt.replace(tzinfo=timezone("UTC"))
            # check if format_string contains timezone
            if "%z" in format:
                # add colon!
                dt_string = dt.strftime(format)
                return dt_string[:-2] + ":" + dt_string[-2:]
            else:
                return dt.strftime(format)

        object_metadata = self._accepted_objects[self.shopify_object]
        self.object_metadata = object_metadata
        params = {
            key: val
            for key, val in object_metadata.items()
            if not val is None and not key[:1] == "_"
        }
        params.update(self.filter_fields)
        params["limit"] = self.page_limit

        timestamp_fields = object_metadata.get(
            "_timestamp_fields",
            self._default_timestamp_fields,
        )
        timestamp_format_string = object_metadata.get(
            "_datetime_format",
            self._default_datetime_format,
        )
        if self.data_until:
            params[timestamp_fields[1]] = datetime_to_string(
                self.data_until,
                timestamp_format_string,
            )
        if self.data_from:
            params[timestamp_fields[0]] = (datetime_to_string(
                self.data_from,
                timestamp_format_string,
            ), )

        source_conn_id = self.source_conn.conn_id
        auth_type = self.auth_type
        if is_iterable_not_string(self.shop_id):
            # multiple shops to iterate - loop through!
            # deprecated feature - don't use!
            raise Exception("Multiple Shops in one DAG is deactivated!")
            self.log.info("iterating through multiple shops!")
            for shop_id in self.shop_id:
                # metadata: shop id
                self.log.info("getting data for: {0}".format(shop_id))
                if hasattr(self.shop_id, "get") and hasattr(
                        self.shop_id[shop_id], "get"):
                    # dict, not list! check for conn details!
                    shop_dict = self.shop_id[shop_id]
                    sci = shop_dict.get("source_conn_id", source_conn_id)
                    at = shop_dict.get("auth_type", auth_type)
                else:
                    sci = source_conn_id
                    at = auth_type
                self._metadata.update({"shop_id": shop_id})
                self.execute_for_shop(context, shop_id, params, sci, at)
        else:
            self._metadata.update({"shop_id": self.shop_id})
            sci = self.source_conn.conn_id
            at = self.auth_type
            self.execute_for_shop(context, self.shop_id, params, sci, at)