示例#1
0
    def _prepare_and_call(self, grouped_data):
        to_parse = {}
        default_searchterms = set(GOOGLE_SEARCHTERMS)
        for k, v in grouped_data.items():
            lst = v.get('searchterms', set([]))
            if lst:
                to_parse[k] = {
                    'update': v.get('update'),
                    'searchterms': list(default_searchterms - lst)
                }

        logger.info('-' * 50)
        logger.info(to_parse)
        logger.info('-' * 50)
        self.write_json(to_parse)

        google_data = self.scrapyd_data.copy()
        google_data.update(
            spider=GOOGLE_NAME,
            companies=SPLITTER.join([]),
            json_data=self.json_path
        )
        run_scrapy_process(self.project_name, GOOGLE_NAME, google_data)

        return {}
示例#2
0
 def execute_websites(self, force_update=False):
     companies_w_websites = self.get_wibsites_missing(force_update)
     self.logger.info(u"Google get websites for next companies: %s" % companies_w_websites)
     google_data = self.scrapyd_data.copy()
     google_data.update(spider=GOOGLE_NAME, companies=SPLITTER.join(companies_w_websites), only_website=True)
     run_scrapy_process(self.project_name, GOOGLE_NAME, google_data)
     del companies_w_websites
示例#3
0
 def execute_update(self, force_update=False):
     companies_xing_update = self.get_companies_on_update(force_update)
     self.logger.info(u"Update xing for next companies: %s" %
                      companies_xing_update)
     xing_data = self.scrapyd_data.copy()
     xing_data.update(spider=XING_NAME, json_data=self.json_path)
     self.write_json({'manual_data': companies_xing_update})
     run_scrapy_process(self.project_name, XING_NAME, xing_data)
     del companies_xing_update
示例#4
0
 def execute_update(self, force_update=False):
     companies_wiki_update = self.get_companies_on_update(force_update)
     self.logger.info(u"Update wikipedia for next companies: %s" % companies_wiki_update)
     wiki_data = self.scrapyd_data.copy()
     wiki_data.update(
         spider=WIKIPEDIA_NAME,
         json_data=self.json_path
     )
     self.write_json({'manual_data': companies_wiki_update})
     run_scrapy_process(self.project_name, WIKIPEDIA_NAME, wiki_data)
     del companies_wiki_update
示例#5
0
 def execute_search(self, force_update=False):
     companies_w_xing_url = self.get_missing_url(force_update)
     self.logger.info(u"Parse xing for next companies: %s" %
                      companies_w_xing_url)
     xing_data = self.scrapyd_data.copy()
     xing_data.update(spider=XING_NAME,
                      json_data=self.json_path,
                      dont_filter=True)
     self.write_json({'companies': companies_w_xing_url})
     run_scrapy_process(self.project_name, XING_NAME, xing_data)
     del companies_w_xing_url
示例#6
0
 def execute_search(self, force_update=False):
     companies_w_wiki_url_revenue = self.get_missing_url_revenue(force_update)
     self.logger.info(u"Parse wikipedia for next companies: %s" % companies_w_wiki_url_revenue)
     wiki_data = self.scrapyd_data.copy()
     wiki_data.update(
         spider=WIKIPEDIA_NAME,
         json_data=self.json_path,
         dont_filter=True
     )
     self.write_json({
         'companies': companies_w_wiki_url_revenue.keys(),
         'urls': companies_w_wiki_url_revenue.values()
     })
     run_scrapy_process(self.project_name, WIKIPEDIA_NAME, wiki_data)
     del companies_w_wiki_url_revenue