Пример #1
0
class CrawlerTest(unittest.TestCase):
    
    def setUp(self):
        self.crawler = BaseCrawler() 
    
    def _test_requests(self):
        """
            Very basic and foolish test
        """
        response = self.crawler._get_response("https://github.com/jmg")        
        self.assertTrue(response)
    
    def test_cookies(self):
        """
            This test asserts if the login was successful and the second request retrieves 
            a facebook's page that requires to be logged in.
        """        
        data = {'email' : 'user', 'pass': '******'}
        
        response = self.crawler._get_response("https://www.facebook.com/login.php?login_attempt=1", data)
        response = self.crawler._get_response("http://www.facebook.com/profile.php?id=1271577281")
        with open("url.html", 'w') as f:
            f.write(response.raw_html)
    
    def _test_post(self):
        
        crawler = PostCrawler()        
        crawler.start()
Пример #2
0
class CrawlerTest(unittest.TestCase):

    def setUp(self):
        self.crawler = BaseCrawler()

    def _test_requests(self):
        """
            Very basic and foolish test
        """
        response = self.crawler._get_response("https://github.com/jmg")
        self.assertTrue(response)

    def test_cookies(self):
        """
            This test asserts if the login was successful and the second request retrieves
            a facebook's page that requires to be logged in.
        """
        data = {'email' : 'user', 'pass': '******'}

        response = self.crawler._get_response("https://www.facebook.com/login.php?login_attempt=1", data)
        response = self.crawler._get_response("http://www.facebook.com/profile.php?id=1271577281")
        with open("url.html", 'w') as f:
            f.write(response.raw_html)

    def _test_post(self):

        crawler = PostCrawler()
        crawler.start()
Пример #3
0
    def execute(self):

        try:
            import IPython
        except ImportError:
            exit_with_error("Please install the ipython console")

        url = self.args[0]
        crawler = BaseCrawler()

        response = crawler._get_data(url)
        html = XPathExtractor().get_object(response)

        shell = IPython.Shell.IPShellEmbed(argv=[], user_ns={ 'response' : response })
        shell()
Пример #4
0
    def execute(self):

        try:
            import IPython
        except ImportError:
            exit_with_error("Please install the ipython console")

        url = self.args[0]
        crawler = BaseCrawler()

        response = crawler._get_response(url)
        html = XPathExtractor().get_object(response)

        shell = IPython.Shell.IPShellEmbed(argv=[],
                                           user_ns={'response': response})
        shell()
Пример #5
0
    def test_generated_scrapers(self):

        test_dsl = """PAGE => http://www.python.org/
                      table3.model1 -> /html/body/div[5]/div/div/h1
                      table3.model2 -> /html/body/div
                      table4.model1 -> /html/body/div/span"""
        
        generator = Generator(test_dsl, settings)
        generator.gen_entities()
        
        scrapers_classes = generator.gen_scrapers()

        crawler = BaseCrawler()
        response = crawler._get_response("http://www.python.org/")

        for scraper_class in scrapers_classes:           
            scraper_class().scrape(response)
Пример #6
0
 def setUp(self):
     self.crawler = BaseCrawler() 
Пример #7
0
 def setUp(self):
     self.crawler = BaseCrawler()