Exemplo n.º 1
0
 def format_date(self, key, rules, mp=None, ct=None):
     """
     :param key:
     :param rules:
     :param mp:
     :param ct:
     :return:
     """
     result_date = None
     try:
         if rules == 'TODAY':
             if key == 'start_time':
                 result_date = date_formatter(
                     current_timestamp(milliseconds=False),
                     date_format="%Y-%m-%dT00:00:00.000+07:00")
             else:
                 result_date = date_formatter(
                     current_timestamp(milliseconds=False),
                     date_format="%Y-%m-%dT23:59:59.000+07:00")
         else:
             raw_date = ct.parse(rule_type=mp['rule_type'],
                                 data=self.raw_data,
                                 rules=rules,
                                 flattening=False)
             if raw_date:
                 result_date = self.convert(raw_date).values()[0]
     except Exception as e:
         logger('{}: {}'.format(self.__class__.__name__, str(e)))
     finally:
         return result_date
Exemplo n.º 2
0
 def publish(self, data):
     """
     :param data:
     :return:
     """
     try:
         self.producer.put_message(json.dumps(data))
         if self.debug:
             logger(message=data)
     except Exception:
         raise
Exemplo n.º 3
0
 def build_url(self, url, mp):
     """
     :param url:
     :param mp:
     :return:
     """
     try:
         if not validate(url, data_type='url') and url.startswith('/'):
             url = '{}{}'.format(mp['mp_link'], url)
     except Exception as e:
         logger('{}: {}'.format(self.__class__.__name__, str(e)))
     finally:
         return url
Exemplo n.º 4
0
 def format_item_url(self, mp=None, ct=None):
     """
     :param mp: Marketplace config
     :param ct: Object Controller
     :return: string
     """
     result = self.item
     try:
         value = dict(pair for d in self.raw_data for pair in d.items())
         result = ct.fill_arguments(mp['mp_item_url'], arguments=value)
     except Exception as e:
         logger('{}: {}'.format(self.__class__.__name__, str(e)))
     finally:
         return result
Exemplo n.º 5
0
 def format_number(self, key, mp):
     """
     :return: long
     """
     result = 0
     try:
         if self.item:
             result = validate(self.item, data_type=long)
             if key in ['price_before', 'price_after']:
                 result = result // mp['price_divider']
     except Exception as e:
         logger('{}: {}'.format(self.__class__.__name__, str(e)))
     finally:
         return result
Exemplo n.º 6
0
 def html_parser(self, rule, attr=None):
     """
     :param rule:
     :param attr:
     :return:
     """
     try:
         if attr:
             return self.bs.select_one(rule)[attr]
         else:
             return self.bs.select(rule)
     except Exception as e:
         logger('{}: {} ({}:{})'.format(self.__class__.__name__, str(e),
                                        rule, attr))
         return None
Exemplo n.º 7
0
 def format_image_url(self, key, mp=None, ct=None):
     """
     :param key: Rule key
     :param mp: Marketplace config
     :param ct: Object Controller
     :return: string
     """
     image_url = self.item
     try:
         if not validate(self.item, data_type='url'):
             if mp['mp_item_image_url']:
                 image_url = ct.fill_arguments(
                     mp['mp_item_image_url'],
                     {key: self.convert(self.item)})
             else:
                 image_url = self.convert(self.item)
     except Exception as e:
         logger('{}: {}'.format(self.__class__.__name__, str(e)))
     finally:
         return image_url
Exemplo n.º 8
0
 def extractor(data):
     """
     Extracting bs4.element.* to text.
     :param data: bs4.* or other type.
     :return: string
     """
     try:
         if isinstance(data, list):
             t = []
             for d in data:
                 if d is not None:
                     if isinstance(d, bs4.element.Tag) \
                             or isinstance(d, bs4.element.NavigableString):
                         d = d.find_all(text=True)[-1]
                     t.append(d)
             data = "".join(t)
     except Exception as e:
         logger('{}: {}'.format(inspect.currentframe().f_code.co_name,
                                str(e)))
     finally:
         return data
Exemplo n.º 9
0
    def json_parser(self, rule, data):
        """
        :param rule:
        :param data:
        :return:
        """
        try:
            item_index = None
            match = IS_ARRAY.search(rule)
            if match:
                rule = match.group(1)
                if match.group(2):
                    item_index = match.group(2)

            result = data[rule]
            if isinstance(result, list) and item_index:
                result = result[int(item_index)]

            return result
        except KeyError as e:
            logger('{}: KeyError: {}'.format(self.__class__.__name__, str(e)))
            return dict()
Exemplo n.º 10
0
def run(mp_name=None,
        output=None,
        file_path=None,
        file_name=None,
        publish=None,
        debug=None):
    """
    :param mp_name: market place name
    :param output: type of file for output
    :param file_path: file path
    :param file_name: file name
    :param publish: publish data to NSQ
    :param debug:
    :return: list of dict
    """
    try:
        shop_items = []
        start_time = end_time = None

        ct = Controller(mp_name=mp_name)
        marketplace = ct.mp

        session_arguments = list_to_dict(
            get_arguments(marketplace['mp_sessions_url']))

        if debug:
            logger("{}: {}".format("session_arguments", session_arguments))

        ses, html = ct.get_sessions(arguments=session_arguments)

        if debug:
            logger("{}: {}".format("response", remove_whitespace(str(html))))
            logger("{}: {}".format("session", ses))

        items_url = marketplace['mp_item_index_url']
        items_arguments = list_to_dict(get_arguments(items_url))

        if debug:
            logger("{}: {}".format("items_arguments", items_arguments))

        # Get start & end flash sale date from index page
        if marketplace['period_source'] == 'root':
            ft = Formatter(data=html)
            start_time = ft.format_date(
                key='start_time',
                rules=marketplace['rule_item_start_time'],
                mp=marketplace,
                ct=ct)
            end_time = ft.format_date(key='end_time',
                                      rules=marketplace['rule_item_end_time'],
                                      mp=marketplace,
                                      ct=ct)

        if debug:
            logger("{}: {}".format("start_time", start_time))
            logger("{}: {}".format("end_time", end_time))

        index = 0
        for s in ses[next(iter(ses))]:
            items_arguments['id'] = s
            target_url = ct.fill_arguments(items_url, items_arguments)

            if debug:
                logger("{}: {}".format("target_url", target_url))

            items = ct.get_items(target_url)

            if debug:
                logger("{}: {}".format("total items",
                                       len(items[next(iter(items))])))

            for item in items[next(iter(items))]:
                shop_item = dict()
                template = ct.item_template()
                shop_item['marketplace'] = mp_name

                if debug:
                    logger("{0:<11}: {1:<11}".format("item index", index))
                    logger("{0:<11}: {1:<11}".format(
                        "item", remove_whitespace(str(item))))

                for t_key, t_value in template.iteritems():
                    value = ct.parse(rule_type=marketplace['rule_type'],
                                     data=item,
                                     rules=marketplace[t_value['rule']],
                                     flattening=False)

                    if len(value):
                        ft = Formatter(value)

                        if debug:
                            print()
                            logger("{0:<11}: {1:<11}".format("key", t_key))
                            logger("{0:<11}: {1:<11}".format(
                                "raw_value", value))

                        if len(value) > 1 and t_key == 'url':
                            value = ft.format_item_url(mp=marketplace, ct=ct)
                        else:
                            raw_value = value[0]
                            value = raw_value[next(iter(raw_value))]

                            if t_key == 'image':
                                value = ft.format_image_url(key=t_key,
                                                            mp=marketplace,
                                                            ct=ct)
                            else:
                                if t_key in ['start_time', 'end_time']:
                                    value = date_formatter(value)
                                elif t_key in [
                                        'price_before', 'price_after',
                                        'discount'
                                ]:
                                    value = ft.format_number(key=t_key,
                                                             mp=marketplace)
                                else:
                                    value = ft.item

                        if t_key in ['image', 'url']:
                            value = ft.build_url(value, mp=marketplace)

                        if debug:
                            logger("{0:<11}: {1:<11}".format(
                                "clean_value", value))

                        shop_item[t_key] = remove_whitespace(value)

                if not shop_item['start_time']:
                    shop_item['start_time'] = date_formatter(start_time)

                if not shop_item['end_time']:
                    shop_item['end_time'] = date_formatter(end_time)

                if debug:
                    print()
                    logger("{0:<11}: {1:<11}".format(
                        "result", pformat(shop_item, indent=4)))
                    print()
                    print()

                shop_items.append(shop_item)

        if output:
            if file_path:
                file_name = file_name if file_name else mp_name
                return Export(data=shop_items,
                              file_path=file_path,
                              output_format=output,
                              file_name=file_name).save
            else:
                raise Exception('File path required')

        if publish:
            nsq = Nsq(debug=debug)
            for item in shop_items:
                nsq.publish(item)

        return shop_items

    except Exception as e:
        logger(str(e), level='error')