示例#1
0
 def __init__(self, target):
     super().__init__()
     self.crawler = WebCrawler()
     target_config = getConfig().get("targets", {}).get(target)
     self.logger = getLogger(self.__class__.__name__)
     if not target_config:
         self.logger.error("target is not found in config.")
         raise Exception("target is not found in config.")
     self.logger.info(f"Application is processing target {target}")
     self.target_config = target_config
     self.max_threads = int(getConfig()["configs"]["max_threads"])
     self.sleep_time = int(self.target_config["sleep"])
     self.detail_urls = []
     self.items = []
示例#2
0
 def get_keywords(self):
     keys = [
         key for key in analyse.extract_tags(self.get_reviews(),
                                             topK=getConfig()['task1'].get(
                                                 'topK', 10),
                                             withWeight=False)
     ]
     return keys
示例#3
0
 def __init__(self):
     # Something
     self.configuration = configure.getConfig()
     session = boto3.Session(
         aws_access_key_id=self.configuration['keyid'],
         aws_secret_access_key=self.configuration['key'],
         region_name=self.configuration['region'])
     self.sqs = session.resource('sqs')
示例#4
0
class RrysSpider(scrapy.Spider):

    name = "rrys"
    allowed_domains = getConfig()[name]["allowed_domains"]
    start_urls = getConfig()[name]["start_urls"]

    def parse(self, response):
        for sel in response.xpath(
                "//body/div[@class=\"middle-box\"]/div[@class=\"w\"]/div[1]/div/ul/li"
        ):
            item = RrysItem()
            item['seq'] = int(sel.xpath("span/text()").extract_first().strip())
            item['title'] = sel.xpath("a/text()").extract_first().strip()
            item['link'] = "http://www.rrys2019.com" + sel.xpath(
                "a/@href").extract_first().strip()
            yield scrapy.Request(item['link'],
                                 meta={'item': item},
                                 callback=self.parse_detail)

    def parse_detail(self, response):
        item = response.meta['item']
        item['ranking'] = int(
            re.sub(
                r"[^0-9]", "",
                response.xpath(
                    "//body/div[@class=\"middle-box\"]/div[@class=\"w\"]/div[1]/div[2]/div[1]/ul/li[1]/p/text()"
                ).extract_first().strip()))
        item['classification'] = response.xpath(
            "//body/div[@class=\"middle-box\"]/div[@class=\"w\"]/div[1]/div[1]/div[2]/div[2]/div[@class=\"level-item\"]/img/@src"
        ).extract_first().strip().replace(
            "http://js.jstucdn.com/images/level-icon/",
            "").replace("-big-1.png", "").upper()
        item['favorites'] = int(
            re.sub(
                r"[^0-9]", "",
                response.xpath("//li[@id=\"score_list\"]//div[1]/div[2]").
                extract_first().strip()))
        item['cover'] = response.xpath(
            "//body/div[@class=\"middle-box\"]/div[@class=\"w\"]/div[1]/div[1]/div[2]/div[1]/div[1]/a/img/@src"
        ).extract_first().strip()
        yield item
示例#5
0
 def __init__(self):
     self.logger = getLogger(self.__class__.__name__)
     dbparams = dict(host=getConfig()['task2']['db']['host'],
                     db=getConfig()['task2']['db']['name'],
                     user=getConfig()['task2']['db']['user'],
                     passwd=getConfig()['task2']['db']['password'],
                     charset=getConfig()['task2']['db']['charset'],
                     cursorclass=pymysql.cursors.DictCursor,
                     use_unicode=getConfig()['task2']['db']['use_unicode'])
     self.__dbpool = adbapi.ConnectionPool('pymysql', **dbparams)
 def __init__(self, customer_id):
     super().__init__()
     self.__customer_id = customer_id
     self.__customer = Customer.get_customer(customer_id)
     if not self.__customer:
         raise Exception("Customer is not found.")
     self.vip_discount = float(getConfig().get('vip_discount', '0.8'))
     self.vip_threshold = float(getConfig().get('vip_threshold', '200.0'))
     self.vip_item_threshold = int(getConfig().get('vip_item_threshold',
                                                   '10'))
     self.vip_item_discount = float(getConfig().get('vip_item_discount',
                                                    '0.85'))
     self.generic_discount = float(getConfig().get('generic_discount',
                                                   '0.9'))
     self.generic_threshold = float(getConfig().get('generic_threshold',
                                                    '200.0'))
示例#7
0
 def __init__(self):
     super().__init__()
     self.logger = getLogger(self.__class__.__name__)
     self.reviewUrl = getConfig()['task1']['review_url']
     self.web_crawler = WebCrawler()
示例#8
0
 def __init__(self):
     self.logger = getLogger(self.__class__.__name__)
     self.commentUrl = getConfig()['task2']['comment_url']
     self.web_crawler = WebCrawler()
     self.db_helper = DbHelper()
     self.__comments = []
示例#9
0
 def start(self):
     self.client.on_connect = self.__onConnect
     self.client.on_disconnect = self.__onDisconnect
     ip = configure.getConfig()['broker_addr']
     port = int(configure.getConfig()['broker_port'])
     self.client.connect( ip, port )
示例#10
0
 def printed_currency(val):
     locale.setlocale(locale.LC_ALL, getConfig().get('locale', 'en_US'))
     return locale.currency(val, grouping=True)
示例#11
0
def getLogger(name):
    log_level = getConfig()["configs"]["log_level"]
    log_format = getConfig()["configs"]["log_format"]
    logging.basicConfig(level=LOG_LEVELS[log_level.upper()], format=log_format)
    return logging.getLogger(name)