Exemplo n.º 1
0
def virtual_display_if_enabled(enabled):
    if enabled:
        display = Display(visible=0, size=(800, 600))
        display.start()
        return display
    else:
        return NoopDisplay()
Exemplo n.º 2
0
    def run(self):
        """Run the SelScraper."""

        display = Display(visible=0, size=(800, 600))
        display.start()
        #self._set_xvfb_display()

        if not self._get_webdriver():
            raise_or_log('{}: Aborting due to no available selenium webdriver.'.format(self.name),
                         exception_obj=SeleniumMisconfigurationError)

        try:
            self.webdriver.set_window_size(400, 400)
            self.webdriver.set_window_position(400 * (self.browser_num % 4), 400 * (math.floor(self.browser_num // 4)))
        except WebDriverException as e:
            out('Cannot set window size: {}'.format(e), lvl=4)

        super().before_search()

        if self.startable:
            self.build_search()
            self.search()

        if self.webdriver:
            self.webdriver.close()
Exemplo n.º 3
0
def webthumb(url, filename, is_flash=False):
    script = """
        var s = document.createElement('script');
        s.src = 'http://cruels.net/sb/flashfix.js';
        document.body.appendChild(s);
    """
    print "webthumb(%s, %s)" % (url, filename)
    display = Display(visible=0, size=(1200, 900))
    display.start()
    browser = webdriver.Firefox()
    browser.get(url)
    if is_flash:
        time.sleep(1)
    else:
        browser.execute_script(script)
        time.sleep(6)
    tmpfile = "%s.tmp" % filename
    browser.get_screenshot_as_file(tmpfile)
    img = pil.open(tmpfile)
    width, height = img.size
    if is_flash:
        resized = img.resize((LIBRARYFILE_THUMB_WIDTH, LIBRARYFILE_THUMB_HEIGHT), pil.ANTIALIAS)
    else:
        ratio = float(width) / float(height)
        resized = img.resize((LIBRARYFILE_THUMB_WIDTH, int(LIBRARYFILE_THUMB_WIDTH / ratio)), pil.ANTIALIAS)
    resized.save(filename)
    os.remove(tmpfile)
    print "Saved %s." % filename
    browser.quit()
    display.stop()
    return True
class BrowserManager:
	def __init__(self):
		self._lock = False
	def bootup(self):
		self._display = Display(visible=0, size=(1024, 768))
		self._display.start()
		profile = {}
		if 'HTTP_PROXY' in os.environ:
			proxy_url = os.environ['HTTP_PROXY']
			proxy_server = proxy_url.split(':')[1][2:]
			proxy_port = proxy_url.split(':')[-1]
			profile['network.proxy.type'] = 1
			profile['network.proxy.http'] = proxy_server
			profile['network.proxy.http_port'] = proxy_port
			profile['network.proxy.https'] = proxy_server
			profile['network.proxy.https_port'] = proxy_port
		self.browser = Browser(profile_preferences=profile)
	def obtain(self,background):
		while self._lock:
			background.wait('Browser lock', 15)
		self._lock = True
		return self.browser
	def release(self,background):
		self._lock = False
	def shutdown(self):
		self.browser.quit()
		self._display.stop()
Exemplo n.º 5
0
class UITestCase(LiveServerTestCase):
    def use_xvfb(self):
        from pyvirtualdisplay import Display
        self.display = Display('xvfb',
                               visible=1,
                               size=(1280, 1024))
        self.display.start()
        self.driver = WebDriver()

    def setUp(self):
        try:
            self.driver = WebDriver()
            ui_is_not_available = False
        except WebDriverException:
            ui_is_not_available = True

        if ui_is_not_available:
            self.use_xvfb()

        self.driver.implicitly_wait(10)
        super(UITestCase, self).setUp()

    def tearDown(self):
        self.driver.quit()
        if hasattr(self, 'display'):
            self.display.stop()

        super(UITestCase, self).tearDown()
Exemplo n.º 6
0
def main():
    '''business logic for when running this module as the primary one!'''
    display = Display(visible=0, size=(1024, 768))
    display.start()

    fresh_cl_post = find_cl_post()
    prev_cl_post = {"title":"","link":""}
    old_cl_post = {"title":"","link":""}
    
    # find_cl_post()
    while True:
        # print "TEST" + str(datetime.date.today())
        fresh_cl_post = find_cl_post()
        
        try:
            if fresh_cl_post['title'] != prev_cl_post['title']:
            
                old_cl_post = prev_cl_post
                prev_cl_post = fresh_cl_post
            
                send_cl_email(fresh_cl_post)

        except:
            print "Failed to test & send mail at: "+str(datetime.datetime.now())

        gc.collect()
        time.sleep(SLEEP_SECONDS)
        
    
    
    display.stop()
Exemplo n.º 7
0
class Xvfb(object):
    def __init__(self, width=1366, height=768, visible=0):
        self.__virtual_display = None
        self.width = width
        self.height = height
        self.visible = visible

    def __init_display(self):
        if self.__virtual_display is None:
            self.__virtual_display = Display(visible=self.visible, size=(self.width, self.height))
            self.__virtual_display.start()

    def __enter__(self):
        self.__init_display()

    def __exit__(self, exc_type, exc_val, exc_tb):
        self._close_display()

    def _close_display(self):
        if self.__virtual_display:
            try:
                self.__virtual_display.close()
            except:
                pass
        self.__virtual_display = None

    @staticmethod
    def run(func, *args, **kwargs):
        runner = Xvfb()
        with runner:
            return func(*args, **kwargs)
    def __init__(self, domain, dte):
        self.domain = domain
        self.dte = dte

        self.mydump = "mydump_pro_similatr"

        try:
            os.makedirs(self.mydump)
        except:
            pass

        self.directory = "dirpro%s" % (time.strftime("%d%m%Y"))

        try:
            os.makedirs(self.directory)
        except:
            pass

        display = Display()
        self.display = display.start()

        fp = webdriver.FirefoxProfile()
        fp.set_preference("browser.download.folderList", 2)
        fp.set_preference("browser.download.manager.showWhenStarting", False)
        fp.set_preference("browser.download.dir", os.getcwd())
        fp.set_preference("browser.helperApps.neverAsk.saveToDisk", "text/csv")

        self.driver = webdriver.Firefox(firefox_profile=fp)
        self.driver.maximize_window()
        self.driver.implicitly_wait(60)
        self.driver.set_page_load_timeout(120)
Exemplo n.º 9
0
def getupc(data, sleeptime):
    display = Display(visible=0, size=(800, 600))
    display.start()
    a = webdriver.Firefox()
    a.get('https://www.google.com/ncr')
    time.sleep(sleeptime)
    search = WebDriverWait(a, 5).until(EC.element_to_be_clickable((By.XPATH, "//input[@type='text']")))
    for i in data:
        ActionChains(a).move_to_element(search).click(search).send_keys(i['name'] + ' upc', Keys.ENTER).perform()
        time.sleep(sleeptime)
        contents = WebDriverWait(a, 5).until(EC.presence_of_all_elements_located((By.XPATH, "//div[@class='g']")))
        try:
            upc = next(
                    (re.split(r'/', href.find_element_by_tag_name('a').get_attribute('href'))[-1] for
                     href in contents if
                     href.find_element_by_tag_name('a').get_attribute('href').startswith(
                             'http://www.upcitemdb.com/upc')))
            i['upc'] = upc
        except StopIteration:
            pass

        search = WebDriverWait(a, 5).until(EC.element_to_be_clickable((By.XPATH, "//input[@type='text']")))
        search.clear()
    a.close()
    display.stop()
    return data
Exemplo n.º 10
0
 def load(self):
     min_time = 3600 # 1 hour in seconds
     max_time = 7179 # 2 hours in seconds (less 21)
     tasktime = randint(min_time, max_time)
     threading.Timer(tasktime, self.load).start()
     tasktime_m , tasktime_s = divmod( tasktime , 60)
     tasktime_h , tasktime_m = divmod( tasktime_m , 60) 
     output_content = "Load execution - waiting %dh %02dmin %02dsec for the next time." % (tasktime_h, tasktime_m, tasktime_s)
     print "[KeepUp]" , output_content
     
     from selenium import webdriver
     from selenium.webdriver.common.by import By
     from selenium.webdriver.support.ui import WebDriverWait
     from selenium.webdriver.support import expected_conditions as ec
     from selenium.webdriver.common.keys import Keys
     from pyvirtualdisplay import Display
     
     # Initial
     display = Display(visible=0, size=(1600, 900))
     display.start()
     profile = webdriver.FirefoxProfile()
     profile.set_preference("browser.cache.disk.enable", False)
     profile.set_preference("browser.cache.memory.enable", False)
     profile.set_preference("browser.cache.offline.enable", False)
     profile.set_preference("network.http.use-cache", False)
     driver = webdriver.Firefox()
     driver.get("https://c9.io/dashboard.html")
     driver.save_screenshot(self.directory_img + 'login.png')
     
     #Username
     username = driver.find_element_by_id("id-username")
     username.click()
     username.clear()
     username.send_keys(self.user, Keys.ARROW_DOWN)
     
     #Password
     password = driver.find_element_by_id("id-password")
     password.click()
     password.clear()
     password.send_keys(self.password, Keys.ARROW_DOWN)
     
     #Submit
     submit_button = driver.find_element_by_css_selector("button[type=submit]")
     # print submit_button.text
     
     # Click submition
     submit_button.click();
     time.sleep(5)
     driver.save_screenshot(self.directory_img + 'user_profile.png')
     
     # Target dir
     driver.get(self.target_workspace)
     time.sleep(10)
     
     self.log({'log_html': driver.page_source, 'log_file': output_content}) #make log
     driver.save_screenshot(self.directory_img + 'final_workspace.png')
     
     # End
     driver.quit()
     display.stop()
Exemplo n.º 11
0
class BCCVLTestCase(unittest.TestCase):
    def setUp(self):
        # acquire URL, username and password from environment variables, or use default values for dev env.
        self.username = os.getenv("BCCVL_TEST_USERNAME", "admin")
        self.password = os.getenv("BCCVL_TEST_PASSWORD", "admin")
        self.url = os.getenv("BCCVL_TEST_URL", "https://192.168.100.200/")

        # The amount of time selenium will potentially wait in searching for elements. This is blocking.
        implicit_wait = int(os.getenv("BCCVL_TEST_IMPLICIT_WAIT", "15"))

        # Run tests in a virtual display (xvfb)
        virtual_display = os.getenv("BCCVL_TEST_VIRTUAL_DISPLAY", "false") == "true"

        # Setup the virtual display
        if virtual_display:
            self.display = Display(visible=0, size=(1920, 1080))
            self.display.start()
        else:
            self.display = None

        # Setup the Firefox Profile and webdriver
        self.driver = webdriver.Firefox()
        self.driver.implicitly_wait(implicit_wait)

        # Maximize the window
        # self.driver.maximize_window()
        self.driver.set_window_size(1200, 800)

        # Go to the bccvl homepage
        self.driver.get(self.url)

    def tearDown(self):
        if self.display:
            self.display.stop()
        self.driver.quit()
Exemplo n.º 12
0
def rzhd():
    directions=[create_url(),]

    while raw_input('Want to add more directions? y/n ')=='y':
        directions.append(create_url())
        print "------------------"
    # n=raw_input('Check tickets every ...(seconds)? ')
    n = 60

    place=choose_place()
    i = 0
    display = Display(visible=0, size=(5, 5))
    display.start() # Запускаем вирутальный дисплей
    while len(directions)!=0:
        i+=1
        print
        print "----------------->Searching for PLATSKART<-----------------"

        print "try #",i
        print time.asctime()
        print

        for url in directions:
            if find_train(url, place)==True:
                send_email('*****@*****.**', url)
                if raw_input('Did you buy ticket? y/n ')=='y':
                    directions.remove(url)
                    if len(directions) == 0:
                        print "Successfully bought all tickets!"
                        return True                
            print str(n)+" seconds until next try..."
            time.sleep(float(n)) # Дадим браузеру корректно завершиться
    display.stop() # Закрываем виртуальный дисплей
Exemplo n.º 13
0
def get_screenshot(site_id, update_id):
    """
    Create a screenshot and save it to the database
    """
    # Get the objects we're working with
    site = Site.objects.get(id=site_id)
    update = Update.objects.get(id=update_id)
    
    # Fire up a headless display to work in
    display = Display(visible=0, size=(1680, 1050))
    display.start()
    
    # Fire up a Selenium browsers
    browser = webdriver.Firefox()
    
    # Set a timeout for the pageload
    seconds = 15
    browser.command_executor._commands['setPageLoadTimeout'] = (
        'POST', '/session/$sessionId/timeouts'
    )
    browser.execute("setPageLoadTimeout", {
        'ms': 1000*seconds,
        'type':'page load'
    })
    
    # Snap a screenshot of the target site
    logger.debug("Opening %s" % site.url)
    timestamp = timezone.now()
    try:
        browser.get(site.url + "?x=" + get_random_string())
        logger.debug("Response received for %s" % site.url)
    except TimeoutException, e:
        logger.error("Request for %s timed out" % site.url)
        pass
Exemplo n.º 14
0
def main(param):

    if len(param) != 2:
        sys.exit(-9)
    if len(param[1]) <= 0:
        sys.exit(-8)
    paths = param[0]
    shotsdir = paths.get('path', 'output.shotsdir').lstrip('"').rstrip('"')
    targets = param[1]

    display = Display(visible=0, size=(800, 600))
    display.start()

    binary = FirefoxBinary('/opt/firefox/firefox')
    browser = webdriver.Firefox(firefox_binary=binary)

    tgt_len = len(targets)
    for i, tgt in enumerate(targets):
        browser.get(tgt[0])
        browser.save_screenshot(shotsdir+'/'+tgt[1]+'.png')
        print '( %3d / %3d ) Took %s.png' % (i+1, tgt_len, tgt[1])

    browser.quit()

    display.stop()
Exemplo n.º 15
0
class TestContext(object):
    
    def open_browser(self):

#         if test_config.SELENIUM_USE_REMOTE:
#             dc = getattr(DesiredCapabilities, self.driver.upper())
#             dc['name'] = test_config.SELENIUM_TEST_NAME
#             cmd_exec = test_config.SELENIUM_REMOTE_CMD_EXEC
#             self.browser = webdriver.Remote(desired_capabilities=dc, command_executor=cmd_exec)

        if test_config.SELENIUM_USE_VIRTUALDISPLAY:
            self.virtualdisplay = Display(backend=test_config.SELENIUM_VIRTUALDISPLAY_BACKEND, size=(600, 800)).start()

        self.browser = webdriver.Firefox(firefox_binary=FirefoxBinary(test_config.SELENIUM_FIREFOX_PATH))
        self.browser.implicitly_wait(test_config.SELENIUM_PAGE_WAIT)
        
    def close(self):
        self.browser.quit()
        if hasattr(self, 'virtualdisplay'):
            self.virtualdisplay.stop()
            
    def get(self, url):
        self.browser.get(url)
        self.url = url
    
    def follow_link(self, link):
        link.click()
        self.url = self.browser.current_url
        
    def wait_for(self, by, thing):
        wait = WebDriverWait(self.browser, test_config.SELENIUM_PAGE_WAIT)
        wait.until(EC.presence_of_element_located((by, thing)))
Exemplo n.º 16
0
def loadSite(url):
    profile = webdriver.FirefoxProfile()
    profile.set_preference("network.proxy.type", 1)
    profile.set_preference("network.proxy.http", "74.84.131.34")
    profile.set_preference("network.proxy.http_port", int('80'))
    profile.update_preferences()
    #
    display = Display(visible=0, size=(800, 600))
    display.start()
    path_to_chromedriver = '/home/alexandr/www/html/python/prs/files/geckodriver'
    browser = webdriver.Firefox(firefox_profile = profile, executable_path = path_to_chromedriver)
    #
    browser.delete_all_cookies()
    browser.get(url)
    #print(browser.page_source)
    #print(browser.page_source)
    tree = etree.HTML( browser.page_source)
    #
    browser.close()
    display.stop()
    #
    nodes = tree.xpath('//table[@class="network-info"]//tr/td')
    for node in nodes:
        print(node.text)
    return 1
    def process_install_form (self):
        if (self.args.xvfb):
	    print "Omeka is being installed in: " + self.folder_name
            display = Display(visible=0, size=(800, 600))
            display.start()
        driver = webdriver.Firefox()
        driver.get("http://localhost/omeka/" + self.folder_name + "/install")
        inputElement = driver.find_element_by_name("username")
        inputElement.send_keys(self.omeka_user)
        inputElement = driver.find_element_by_name("password")
        inputElement.send_keys(self.omeka_passwd)
        inputElement = driver.find_element_by_name("password_confirm")
        inputElement.send_keys(self.omeka_passwd)
        inputElement = driver.find_element_by_name("super_email")
        inputElement.send_keys("*****@*****.**")
        inputElement = driver.find_element_by_name("administrator_email")
        inputElement.send_keys("*****@*****.**")
        inputElement = driver.find_element_by_name("site_title")
        inputElement.send_keys(self.omeka_title)
        inputElement.submit()
        try:
            WebDriverWait(driver, 10).until(
                lambda driver : driver.find_element_by_partial_link_text("Tableau"))
        finally:
            driver.quit()
Exemplo n.º 18
0
class TestCase(unittest.TestCase):
    def setUp(self):
        app.config['TESTING'] = True
        app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///:memory:'
        self.app = app.test_client()
        db.create_all()

        self.display = Display(visible=0, size=(800, 600))
        self.display.start()
        self.driver = webdriver.Firefox()

    def tearDown(self):
        db.session.remove()
        db.drop_all()

        self.driver.quit()
        self.display.stop()

    def test_extract_funds(self):
        funds = extract_funds(
            # some javascript going on that I can't figure out how to mock
            #'file:///%s/t/test_files/list_mutual_funds.html' % basedir,

            self.driver
        )

        self.assertTrue(len(funds) > 110)
Exemplo n.º 19
0
def main(args):
    parser = argparse.ArgumentParser(description="Program for running tests on the PATRIC web interface.")
    parser.add_argument("user", metavar="user", help="Patric login username.")
    parser.add_argument("passwd", metavar="passwd", help="Patric login password.")
    parser.add_argument("--firebug", action="store_true", help="Open Firebug during test.")
    args = parser.parse_args()

    fp = webdriver.FirefoxProfile()
    if args.firebug:
        fp.add_extension(extension='extras/firebug-2.0.9.xpi')
        fp.set_preference("extensions.firebug.currentVersion", "2.0.9") #Avoid startup screen
        fp.set_preference("extensions.firebug.console.enableSites", "true")
        fp.set_preference("extensions.firebug.net.enableSites", "true")
        fp.set_preference("extensions.firebug.script.enableSites", "true")
        fp.set_preference("extensions.firebug.allPagesActivation", "on")

    # Create virtual display
    display = Display(visible=0, size=(1400, 950))
    display.start()

    # Create webdriver and retrieve url
    driver = webdriver.Firefox(firefox_profile=fp)
    driver.get(SITE_URL + '/login')

    # Wait for username input box to appear
    WebDriverWait(driver, PAGE_LOAD_TIMEOUT).until(EC.presence_of_element_located((By.ID, "dijit_form_TextBox_0")))

    # Set username and password, click login button
    userElement = driver.find_element_by_id("dijit_form_TextBox_0")
    pwdElement = driver.find_element_by_id("dijit_form_TextBox_1")
    userElement.send_keys(args.user)
    pwdElement.send_keys(args.passwd)
    loginElement = driver.find_element_by_id("dijit_form_Button_1")
    loginElement.click()
    time.sleep(3)

    # Retrieve home page, wait for an expected page element to load, take a screenshot
    driver.get(SITE_URL + '/portal/portal/patric/Home')
    WebDriverWait(driver, PAGE_LOAD_TIMEOUT).until(EC.presence_of_element_located((By.ID, "cart")))
    driver.set_window_size(1400, 950)
    driver.execute_script("window.scrollTo(0,0);")
    driver.get_screenshot_as_file("homepage_after_login.jpg")
    print "Saved screenshot to: homepage_after_login.jpg\n"

    # Retrieve ws url, wait for create folder button to appear
    ws_url = SITE_URL + '/workspace/' + args.user + '@patricbrc.org/home'
    driver.get(ws_url)
    WebDriverWait(driver, PAGE_LOAD_TIMEOUT).until(EC.presence_of_element_located((By.CLASS_NAME, "ActionButtonContainer")))
    time.sleep(5)

    # Have to reload page, because often time the workspace is empty on first load
    driver.get(ws_url)
    WebDriverWait(driver, PAGE_LOAD_TIMEOUT).until(EC.presence_of_element_located((By.CLASS_NAME, "ActionButtonContainer")))
#    createFolderButton = driver.find_element_by_class_name("ActionButton fa icon-folder-plus fa-2x")
#    createFolderButton.click()
    time.sleep(30)

    driver.quit()
    display.stop()
    return 0
Exemplo n.º 20
0
Arquivo: f.py Projeto: kamekame/alpha
def get_news():
    if check_wlan():
        from pyvirtualdisplay import Display
        import re

        display = Display(visible=0, size=(800, 600))
        display.start()

        driver = webdriver.Firefox()
        url = "http://www.deutschlandfunk.de/"
        driver.get(url)
        source = driver.find_element_by_xpath('//*[@id="wrapper"]/div/section[2]/div[1]').get_attribute('innerHTML')

        n_articles = source.count('<article')
        print(str(n_articles) + " articles found.")

        lst = re.findall('<h3>(.+)</h3>', source)
        result = lst

        driver.close()

        display.stop()
        return result
    else:
        print("Error: Not connected to the internet")
Exemplo n.º 21
0
class FunctionalTest(StaticLiveServerTestCase):
    @classmethod
    def setUpClass(cls):
        for arg in sys.argv:
            if 'liveserver' in arg:
                cls.server_url = 'http://' + arg.split('=')[1]
                return
        super().setUpClass()
        cls.server_url = cls.live_server_url

    @classmethod
    def tearDownClass(cls):
        if cls.server_url == cls.live_server_url:
            super().tearDownClass()

    def setUp(self):
        self.display = Display(visible=0, size=(1024, 768))
        self.display.start()
        self.browser = webdriver.Firefox()
        # self.browser.implicitly_wait(3)

    def tearDown(self):
        self.browser.quit()
        self.display.stop()

    def check_for_row_in_list_table(self, row_text):
        table = self.browser.find_element_by_id('id_list_table')
        rows = table.find_elements_by_tag_name('tr')
        self.assertIn(row_text, [row.text for row in rows])
Exemplo n.º 22
0
class AdminTestCase(LiveServerTestCase):

    def setUp(self):
        self.display = Display(visible=0, size=(800, 600))
        self.display.start()

        self.selenium = webdriver.Firefox()

        super(AdminTestCase, self).setUp()

    def tearDown(self):
        self.selenium.quit()
        self.display.stop()
        super(AdminTestCase, self).tearDown()

    def test_payment(self):
        """
        payment will be successful.
        """

        self.selenium.get("%s/pay" % self.live_server_url)
        self.selenium.implicitly_wait(20)
        self.selenium.maximize_window()

        self.selenium.find_element_by_name("amount").send_keys("100000")

        pay_button = self.selenium \
            .find_element_by_xpath('//input[@value="pay"]')
        pay_button.click()

        return_to_site_button = self.selenium.find_element_by_id("btn3")

        return_to_site_button.click()

        self.assertIn("successful", self.selenium.page_source)
Exemplo n.º 23
0
	def get_image(self):
		## Uses supplied scrape site to find new pictures
		url = self.scrape_site
		# virtual display for headless runs
		display = Display(visible=0, size=(800, 600))
		display.start()

		with closing(Firefox()) as browser:
			browser.get(url)
			time.sleep(5) # TODO: fix with something less static, but still
			# multipurpose considering scrape_site as a db var
			imgs = browser.find_elements_by_tag_name('img')
			# TODO: fix this temporary workaround that prevents ad server data
			# from reaching the image checks
			no_ad_imgs = [i for i in imgs if 'adsrvr' not in \
				i.get_attribute('src')]
			for img in no_ad_imgs:
				src = img.get_attribute('src')
				alt = img.get_attribute('alt')
				image_id = re.findall("/photo/(.+?)/", src)[0]
				if(self._check_id(image_id) and self._check_ratios(src)):
					self.img_id = image_id
					self.description = alt
					self._save_hd_image()
					break
		display.stop()
		if (self.img_id):
			return
		raise Exception('Failed to find a suitable image: all out or bugged')
Exemplo n.º 24
0
def get_all_items():
    #list to store alll scraped data
    all_items = list()

    #Display - read about pyvirtualdisplay
    display = Display(visible=0, size=(1024, 768))
    display.start()
    #webdriver - read about selenium.webdriver
    driver = webdriver.Firefox()
    
    #this is a starting page we are scraping
    driver.get("http://www.federalreserve.gov/apps/reportforms/default.aspx")
    #Every element on the HTML page can be located using CSS selectors.
    #Opening the starting page in Chrome, right click on the drop-down menu, click "Inspect" we see a tag on the right highlighted, we copy it's id - MainContent_ddl_ReportForms
    #Knowing the id of dropdown menu, we can locate it with Selenium like this
    main_menu = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.CSS_SELECTOR,"#MainContent_ddl_ReportForms")))
    #Drop down menu is an HTML table of options which can be verified in Chrome browser (Developer Tools, that pop up when you right click and press "Inspect" on an element)
    #Following returns all of the options - rows in that table
    form_options = main_menu.find_elements_by_tag_name("option")
    #We count them
    option_count = len(form_options)
    #Next, we loop over all of them - essentially like we scrolling down the drop down menu and clicking on each every form 
    for form_i in xrange(1,option_count):
        #Get web element corresponding to a form
        form = form_options[form_i]
        #Click as a mouse click-action in browser 
        form.click()
        #Get text, because we need to store the form number
        form_id = form.text
        #Locate a web element corresponding to the submit button. By CSS selector which we found by inspection in Chrome browser (same logic as above)
        submit_button = WebDriverWait(driver,3).until(EC.presence_of_element_located((By.CSS_SELECTOR,"#MainContent_btn_GetForm")))
        #Click as a mouse click-action in browser 
        submit_button.click()      
        #Prepare data structures to store all the info we want to scrape
        a = dict.fromkeys(['Description','OMB','Background','RespondentPanel','Frequency','PublicRelease'])
        #We are on a web page after submit-click, following will search for all items of interest. Or for corresponding
        #web-elements 
        for el in a.keys():
            try:
                item = driver.find_element_by_css_selector("#MainContent_lbl_"+el+"_data") 
                #Once found it will store them in our dictionary, if not it will proceed to "except" section and do nothing
                a[el] = item.text 
            except: 
                #case when there is no such field
                pass
        #we need form number as well
        a['FormNumber'] = form_id
        #keeping them all in one list, which will have a dictionary per Form Number - and later, a row in your excel file per Form number
        all_items.append(a)
    
        #Ok, that part bothers me a little: it looks like I have to refresh "form_options" each time... 
        #Otherwise I get following exception: selenium.common.exceptions.StaleElementReferenceException: Message: Element not found in the cache - perhaps the page has changed since it was looked up
        driver.get("http://www.federalreserve.gov/apps/reportforms/default.aspx")
        main_menu = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.CSS_SELECTOR,"#MainContent_ddl_ReportForms")))
        form_options = main_menu.find_elements_by_tag_name("option")

    driver.close()
    display.stop()

    return all_items
Exemplo n.º 25
0
class Spider(scrapy.Spider):
    name = "mayors"
    allowed_domains = ["www.cec.gov.tw"]
    start_urls = ["https://www.cec.gov.tw/pc/zh_TW/IDX/indexC.html",]
    download_delay = 1

    def __init__(self, ad=None, *args, **kwargs):
        super(Spider, self).__init__(*args, **kwargs)
        self.display = Display(visible=0, size=(800, 600))
        self.display.start()
        self.driver = webdriver.Chrome("/var/chromedriver/chromedriver")

    def spider_closed(self, spider):
        self.display.close()

    def parse(self, response):
        self.driver.get(response.url)
        nodes = scrapy.Selector(text=self.driver.page_source).xpath('//a[@target="_top"]')
        for node in nodes:
            county = node.xpath('text()').extract_first()
            print county
            yield response.follow(node, callback=self.parse_list, meta={'meta': county})

    def parse_list(self, response):
        for tr in response.css(u'table.tableT tr.trT'):
            d = {}
            d['type'] = 'mayors'
            d['county'] = response.meta['meta']
            d['constituency'] = 0
            d['elected'] = tr.xpath('td[1]/text()').extract_first().strip()
            d['number'] = int(tr.xpath('td[2]/text()').extract_first())
            d['votes'] = int(re.sub('\D', '', tr.xpath('td[5]/text()').extract_first()))
            d['votes_percentage'] = tr.xpath('td[6]/text()').extract_first()
            yield d
Exemplo n.º 26
0
def openurl(companyname=first_arg):
    display = Display(visible=0, size=(1024, 768))
    display.start()
    browser = webdriver.Firefox()
    time.sleep(randint(8,10))
    try:
        browser.get('http://www.google.com')
        time.sleep(5)
        search = browser.find_element_by_name('q')
        input_text = companyname + str(" crunchbase")
        search.send_keys(input_text)
        time.sleep(randint(10,15))
        search.send_keys(Keys.RETURN)
        time.sleep(randint(10,15))
        gn = browser.find_element_by_tag_name('h3').text
        gnc = str(gn).split(' | ')[0].replace(" ","")
        output_file = '0515' + gnc + '.html'
        browser.find_element_by_link_text(gn).click()
        time.sleep(randint(55,60))
        company_html = browser.page_source
        time.sleep(randint(5,10))
        with open("smallname.txt", 'a') as myfile:
            json.dump(output_file,myfile)
        with open(output_file, 'a+') as myfile:
            myfile.write(company_html)
    except:
        company_html = 'none'        
        with open("missedname.txt", "a") as myfile:
            json.dump(companyname,myfile)            
    time.sleep(1)
    browser.close()
    time.sleep(1)
    display.stop()
    return company_html
Exemplo n.º 27
0
def process_screenshots(app, env):
    if not hasattr(env, 'screenshot_all_screenshots'):
        return

    if not app.config['screenshots_create']:
        print("Not doing screenshots on maggies farm no more")
        return
        
    if 'SPHINX_SS_USE_PVD' in os.environ.keys() and os.environ['SPHINX_SS_USE_PVD'] == "true":
        from pyvirtualdisplay import Display
        # Start a virtual headless display
        display = Display(visible=0, size=(1024, 768))
        display.start()
    else:
        display = None
    
    # Don't bother building screenshots if we're just collecting messages.
    # Just checks if we invoked the build command with "gettext" in there somewhere
    if "gettext" in sys.argv:
        return
    all_args = map(lambda x: x['from_str_arg'], env.screenshot_all_screenshots)
    # If building in a different language, start the server in a different language
    command = SCREENSHOT_COMMAND + SCREENSHOT_COMMAND_OPTS + \
              [re.sub(r"\s", r"", "--from-str={0}".format(json.dumps(all_args)))]
    language = env.config.language
    if language:
        command += ["--lang={0}".format(language)]
    subprocess = Popen(command)
    subprocess.wait()
    try:
        if subprocess.returncode:
            raise Exception("Screenshot process had nonzero return code: {0}".format(subprocess.returncode))
    finally:
        if display:
            display.stop()
def run_selenium(landmark):
	display = Display(visible=0, size=(800, 600))
	display.start()
        logTo(TEST_LOG,'Selenium : Starting Selenium  for '+landmark,'INFO','a')
	interFace=open(HOME_DIR+'/Desktop/one-time-test-suite/iface.txt','r')
	tmp=interFace.readlines()
	iface=tmp[0].split('\n')[0]
	tmpstmp=datetime.now().strftime("%s")
	profile = webdriver.FirefoxProfile()
	profile.update_preferences()
	browser = webdriver.Firefox(firefox_profile=profile) # assign profile to browser
	browser.delete_all_cookies()
	logTo(TEST_LOG,' Selenium : Starting tcpdump .. ','INFO','a')
	tcpcmd='tcpdump -i '+iface+' -w '+EXP_DIR+'/'+'tcpdump_'+landmark.split('.')[0]+'_'+tmpstmp
	args=shlex.split(tcpcmd)
	ptcpdmp=sub.Popen((args))
	time.sleep(10)
	logTo(TEST_LOG,' Selenium : Starting get '+landmark,'INFO','a')
	browser.get('http://www.'+landmark)
	time.sleep(5)
	perfData=browser.execute_script('return window.performance.timing')
	fname=EXP_DIR+'/'+'perfdata_'+landmark.split('/')[0]
	fname=fname.replace('.','-')
	pickle.dump(perfData,open(fname,'wb'))
        logTo(TEST_LOG,'Selenium : Writing done to '+EXP_DIR+'/perfdata_'+landmark,'INFO','a')
	browser.quit()
	display.stop()
	ptcpdmp.terminate()
        logTo(TEST_LOG,'Finished Selenium for '+landmark,'INFO','a')
Exemplo n.º 29
0
class SeleniumRunner(object):
    def __call__(self, f):
        @functools.wraps(f)
        def decorated(_self, *args, **kwargs):
            with self as driver:
                return f(_self, driver, *args, **kwargs)
        return decorated

    def __enter__(self):
        self.display = Display(visible=0, size=(800, 600))
        self.display.start()
        self.driver = webdriver.Chrome()
        return self.driver

    def __exit__(self, *args, **kwargs):
        try:
            self.driver.quit()
        except (AttributeError,) as e:
            # Someone has messed with our browser
            pass
        try:
            self.display.stop()
        except (AttributeError,) as e:
            # Someone has messed with our display
            pass
def work():

    logging.info("start weeklys screenshot work")
    print ("start ... ")

    if not DISPLAY:
        print ("hide display ... ")
        display = Display(visible=0, size=(1366, 768))
        display.start()

    config = getConfigObj()
    if config == None:
        return False
    userName = config.get("USER", "UserName")
    userPWD = config.get("USER", "userPWD")

    ret = getTowerWeeklyScreenshot(userName, userPWD, DEFAULT_SAVE_PATH)

    if not ret:
        print ('Error, abort. Please check the log file "%s"' % LOG_FILE)
        return False

    logging.info("finish all work, exit.")

    if not DISPLAY:
        display.stop()

    return True
Exemplo n.º 31
0
    def setUp(self):
        self.frontend_config = {
            "backend":
            "remote",
            "docker_daemons": [{
                "remote_host": "192.168.59.103",
                "remote_docker_port": 2375,
                "remote_agent_port": 63456
            }],
            "mongo_opt": {
                "host": "localhost",
                "database": "INGIniousFrontendTest"
            },
            "tasks_directory":
            "./inginious/tasks",
            "containers": {
                "default": "ingi/inginious-c-default",
                "sekexe": "ingi/inginious-c-sekexe",
            },
            "superadmins": ["test"],
            "plugins": [{
                "plugin_module":
                "inginious.frontend.webapp.plugins.auth.demo_auth",
                "users": {
                    "test": "test",
                    "test2": "test",
                    "test3": "test"
                }
            }]
        }

        if TEST_ENV == "boot2docker":
            self.display = None
            self.driver = webdriver.Remote(
                command_executor=(CUSTOM_SELENIUM_EXECUTOR
                                  or 'http://192.168.59.103:4444/wd/hub'),
                desired_capabilities=DesiredCapabilities.FIREFOX)
            self.base_url = CUSTOM_SELENIUM_BASE_URL or "http://192.168.59.3:8081"
            self.frontend_host = "192.168.59.3"
            self.frontend_port = 8081
            self.frontend_ssh_port = 8082
        elif TEST_ENV == "boot2docker-local":
            self.display = None
            self.driver = webdriver.Firefox()
            self.base_url = CUSTOM_SELENIUM_BASE_URL or "http://127.0.0.1:8081"
            self.frontend_host = "127.0.0.1"
            self.frontend_port = 8081
            self.frontend_ssh_port = 8082
        elif False and TEST_ENV == "jenkins":
            self.display = Display(visible=0, size=(1920, 1080))
            self.display.start()
            self.driver = webdriver.Firefox()
            self.base_url = CUSTOM_SELENIUM_BASE_URL or "http://localhost:8081"
            self.frontend_host = "localhost"
            self.frontend_port = 8081
            self.frontend_ssh_port = 8082
            self.frontend_config["backend"] = "local"
        else:
            raise SkipTest(
                "Env variable TEST_ENV is not properly configured. Please take a look a the documentation to properly configure your "
                "test environment.")

        self.driver.maximize_window()
        self.driver.implicitly_wait(30)
        self.verificationErrors = []
        self.accept_next_alert = True
        _drop_database(self.frontend_config["mongo_opt"])
        self.frontend_thread, self.frontend_server, self.close_app_func = _start_frontend(
            self.frontend_config, self.frontend_host, self.frontend_port,
            self.frontend_ssh_port)
Exemplo n.º 32
0
 def __init__(self):
     display = Display(visible=0, size=(800, 800))
     display.start()
     os.environ["PATH"] = "YOUR PATHS"
     capa = DesiredCapabilities.FIREFOX
     self.browser = webdriver.Firefox(capabilities=capa)
Exemplo n.º 33
0
class QQRobot(object):
    LOGIN_URL = "https://xui.ptlogin2.qq.com/cgi-bin/xlogin?appid=522005705&daid=4&s_url=https://mail.qq.com/cgi-bin/login?vt=passport%26vm=wpt%26ft=loginpage%26target=&style=25&low_login=1&proxy_url=https://mail.qq.com/proxy.html&need_qr=0&hide_border=1&border_radius=0&self_regurl=http://zc.qq.com/chs/index.html?type=1&app_id=11005?t=regist&pt_feedback_link=http://support.qq.com/discuss/350_1.shtml&css=https://res.mail.qq.com/zh_CN/htmledition/style/ptlogin_input24e6b9.css"

    def __init__(self, username, passwd, proxy_ip=None, proxy_port=None):
        """
        :param username:     用户名
        :param passwd:       密码
        :param proxy_ip:     访问QQ邮箱使用的IP, 为空是,默认选择本地IP
        :param proxy_port:   当IP不为空是, 通过端口port与IP通信, 默认为3128, 就是代理服务squid的默认端口
        """
        self.username = username
        self.passwd = passwd
        self.proxy_ip = proxy_ip
        self.proxy_port = proxy_port or 31218
        self.is_login = False
        self.platform = platform
        if self.platform == "win32":
            self.geckopath = "F:\software\geckodriver\geckodriver.exe"
        else:
            self.geckopath = "/usr/bin/geckodriver"

    def refresh(self):
        log.info("refresh firefox, user: {}, proxy_ip: {}".format(self.username, self.proxy_ip))
        self.driver.refresh()

    def quit(self):
        log.info("quit user: {}, proxy_ip: {}".format(self.username, self.proxy_ip))
        try:
            self.driver.quit()
        except BaseException as e:
            log.info(e)
        if self.platform == "linux":
            try:
                self.display.stop()
            except BaseException as e:
                log.info(e)

    def login(self):
        self.set_driver()
        self.set_login()
        if self.set_login_check(timeout=1):
            return True

        self.set_login_verify()
        if self.set_login_check(timeout=3):
            return True
        self.quit()
        raise ValueError(u"不能登录QQ邮箱,重试")

    def set_profile(self):
        """ 设置代理 """
        profile = None
        if self.proxy_ip:
            profile = webdriver.FirefoxProfile()
            profile.set_preference('network.proxy.type', 1)
            profile.set_preference('network.proxy.http', self.proxy_ip)
            profile.set_preference('network.proxy.http_port', self.proxy_port)
            profile.set_preference('network.proxy.ssl', self.proxy_ip)
            profile.set_preference('network.proxy.ssl_port', self.proxy_port)
            profile.update_preferences()
        return profile

    def set_driver(self):
        """ 设置浏览器 """
        try:
            if self.platform == "linux":
                self.display = Display(visible=0, size=(800, 600))
                self.display.start()
            self.driver = webdriver.Firefox(executable_path=self.geckopath, firefox_profile=self.set_profile())
            self.driver.delete_all_cookies()
            # 防止页面加载个没完
            self.driver.set_page_load_timeout(300)
            self.driver.implicitly_wait(10)
            self.wait = WebDriverWait(self.driver, 30)

            # 设置初始登录页面
            self.driver.get(self.LOGIN_URL)
        except BaseException as e:
            self.quit()
            log.error(traceback.format_exc())
            raise LoginError("WebDriverException, can not set driver...")

    def set_login(self):
        """ 登录 """
        try:
            self.set_login_submit()
            # 断言登陆成功
            assert "退出" in self.driver.page_source
            # self.driver.find_element_by_xpath('''//div[@id="newVcodeIframe"]/iframe[1]''')
        except BaseException as e:
            try:
                log.info("login user: {}, retry login...".format(self.username))
                self.set_login_submit()
            except:
                pass

    def set_login_check(self, timeout=5):
        """ 检测是否已经登录 """
        index = 3
        while index:
            if self.driver.title.strip() == u"QQ邮箱":
                self.is_login = True
                return True
            index -= 1
            time.sleep(timeout)
        return False

    def set_login_submit(self):
        """ 登录提交 """
        self.driver.find_element_by_id("switcher_plogin").click()
        # self.wait.until(EC.presence_of_element_located((By.ID, 'u')))
        elem_user = self.driver.find_element_by_name("u")
        elem_user.clear()
        time.sleep(0.1)
        elem_user.send_keys(self.username)

        elem_pwd = self.driver.find_element_by_name("p")
        elem_pwd.clear()
        time.sleep(0.1)
        elem_pwd.send_keys(self.passwd)
        elem_but = self.driver.find_element_by_id("login_button")
        # elem_pwd.send_keys(Keys.RETURN)
        time.sleep(0.1)
        elem_but.click()

    def set_login_verify(self):
        """ 遇到验证码登录 """
        index = 3
        while index:
            try:
                time.sleep(0.5)
                log.info("get captcha_img user: {}, index: {}".format(self.username, index))
                newVcodeIframe = self.driver.find_element_by_xpath('''//div[@id="newVcodeIframe"]/iframe[1]''')
                self.driver.switch_to.frame(newVcodeIframe)

                captcha_img = self.set_login_save_img('capImg')
                rs, verify_code = get_qq_captcha_code(captcha_img)
                log.info(
                    'login user: {} captcha_img: {}, verifycode: {}'.format(self.username, captcha_img, verify_code))
                if not rs:
                    log.error('login user: {}, verify img fail'.format(self.username))
                    index -= 1
                    continue

                ele_verifycode = self.driver.find_element_by_id("capAns")
                ele_verifycode.send_keys(verify_code)
                self.driver.find_element_by_id("submit").click()
            except BaseException as e:
                log.error('user: %s, verifycode err, msg: %s' % (self.username, e))
                # log.error(traceback.format_exc())
                index -= 1
                if index == 1:
                    log.info("verify_login user: {}, retry login...".format(self.username))
                    self.set_login()

    def set_login_save_img(self, imgid, uid=None):
        """ 保存验证码 """
        if not uid:
            uid = str(uuid.uuid1())
        screenshot_img = os.path.join(IMG_DIR, "screenshot_{}.png".format(uid))
        captcha_img = os.path.join(IMG_DIR, "captcha_{}.png".format(uid))

        self.driver.save_screenshot(screenshot_img)
        img = self.driver.find_element_by_id(imgid)
        loc = img.location
        print("loc:")
        print(loc)

        image = cv2.imread(screenshot_img, True)
        # roi = image[int(loc['y']):int(loc['y']) + 40, int(loc['x']):int(loc['x']) + 138]
        roi = image[int(loc['y']):int(loc['y'])+48, int(loc['x']):int(loc['x'])+130]
        cv2.imwrite(captcha_img, roi)
        return captcha_img

    @login_required
    def check(self, addrs):
        res = None
        index = 3
        while index:
            try:
                if index == 2: self.refresh()
                if index == 1: time.sleep(5)
                # 直接跳出所有frame
                self.driver.switch_to.default_content()

                # 点击写信
                # self.wait.until(EC.presence_of_element_located((By.ID, 'composebtn')))
                elem_but_w = self.driver.find_element_by_id("composebtn")
                elem_but_w.click()

                # 切换至右侧 主iframe
                main_Frame1 = self.driver.find_element_by_id("mainFrame")
                self.driver.switch_to.frame(main_Frame1)

                # 发件人
                check_addrs = "{};[email protected];".format(addrs) if addrs else "[email protected];"
                self.driver.find_element_by_xpath('''//div[@id="toAreaCtrl"]/div[2]/input''').send_keys(check_addrs)

                count = 30
                while count:
                    _t = self.driver.find_element_by_xpath('''//div[@id="toAreaCtrl"]''')
                    errors = _t.find_elements_by_css_selector("div.addr_base.addr_error")
                    res = [e.text.strip().replace(";", "") for e in errors]
                    if res and res[-1] == '*****@*****.**':
                        break
                    count -= 1
                    time.sleep(0.5)
                index = 0
            except BaseException as e:
                log.error('user: %s, check err, msg: %s' % (self.username, e))
                log.error(traceback.format_exc())
                index -= 1
        if res is None:
            self.is_login = False
        return res

    @login_required
    def send_email(self, addrs, subject, content, subtype="html"):
        try:
            self.driver.switch_to.default_content()

            # 点击写信
            # self.wait.until(EC.presence_of_element_located((By.ID, 'composebtn')))
            elem_but_w = self.driver.find_element_by_id("composebtn")
            elem_but_w.click()

            # 切换至右侧 主iframe
            main_Frame1 = self.driver.find_element_by_id("mainFrame")
            self.driver.switch_to.frame(main_Frame1)

            # 发件人
            self.driver.find_element_by_xpath('''//div[@id="toAreaCtrl"]/div[2]/input''').send_keys(addrs)
            # 输入主题
            # self.driver.find_element_by_xpath('''//input[@id="subject"]''').send_keys(subject)
            self.driver.find_element_by_id('subject').send_keys(subject)
            # self.driver.find_element_by_xpath('''//input[@id="subject"]''').send_keys(subject)

            # 输入正文
            o = self.driver.find_elements_by_class_name("qmEditorIfrmEditArea")
            o[0].click()  # !!!!!!!must click!!!!!!!
            o[0].send_keys(content)

            time.sleep(1)

            # 点击发送按钮
            self.driver.find_element_by_xpath("//*[@id='toolbar']/div/a[1]").click()
            # driver.find_element_by_xpath('//a[@name="sendbtn" and @tabindex="9"]').click()

            time.sleep(3)
            # 断言发送成功
            assert "再写一封" in self.driver.page_source

        except:
            log.error("弹出验证框")
            self.refresh()
            return

            try:
                self.driver.switch_to.default_content()

                log.error("弹出验证框")
                # time.sleep(600)
                captcha_img = self.set_login_save_img('QMVerify_QMDialog_verify_img_code')
                rs, verify_code = get_qq_captcha_code(captcha_img)
                log.info(
                    'send email user: {} captcha_img: {}, verifycode: {}'.format(
                        self.username, captcha_img, verify_code))
                if not rs:
                    log.error('login user: {}, verify img fail'.format(self.username))
                    raise

                ele_verifycode = self.driver.find_element_by_id("QMVerify_QMDialog_verifycodeinput")
                ele_verifycode.send_keys(verify_code)
                self.driver.find_element_by_id("QMVerify_QMDialog_btnConfirm").click()

                time.sleep(3)
                assert "再写一封" in self.driver.page_source
            except:
                log.error(traceback.format_exc())
                self.is_login = False
                time.sleep(3600)
                # 关闭浏览器
                self.quit()
Exemplo n.º 34
0
logfile = './logger.txt'  #日志文件的保存位置
fh = logging.FileHandler(logfile, mode='w')  #日志文件输出配置
fh.setLevel(logging.DEBUG)

ch = logging.StreamHandler()
ch.setLevel(logging.WARNING)  #控制台输出配置

formatter = logging.Formatter(
    "%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s")
fh.setFormatter(formatter)
ch.setFormatter(formatter)
logger.addHandler(fh)
logger.addHandler(ch)

#设置display
display = Display(visible=0, size=(1440, 900))
display.start()


class MyWeb:
    def __init__(self):
        self.groupDict = {}
        #self.chromedirverPath = '/Users/Homosum/Downloads/chromedriver' #chromedirver的位置
        self.chromedirverPath = '/Users/Homosum/Downloads/geckodriver'
        self.qqSavePath = '/opt/scripts/qqspider/qqfile'  #结果csv文件的保存位置
        self.groupSavePath = '/opt/scripts/qqspider/groupfile'

    def get_group(self, user, password):
        driver = webdriver.Firefox()
        driver.get("https://qun.qq.com/member.html")
        print('i got it')
Exemplo n.º 35
0
from queue import Queue
from selenium import webdriver
import requests
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import os
import re

lock = threading.Lock()
import shutil

from pyvirtualdisplay import Display

display = Display(visible=0, size=(1280, 1024))
display.start()

DB_ADDRESS = "35.154.103.232"
DB_USER = "******"
DB_PASS = "******"
DB_NAME = "office_nw"

KARVY_USER = '******'
KARVY_PASS = '******'

#CAMS_USER = '******'
#CAMS_PASS = '******'

CAMS_USER = '******'
CAMS_PASS = '******'
Exemplo n.º 36
0
def get_browser():
    global display
    display = Display(visible=0, size=(1024, 768))
    display.start()
    return webdriver.Firefox()
Exemplo n.º 37
0
def before_all(context):

    atexit.register(cleanup, context, True)

    if not hasattr(context, "step_registry"):
        raise Exception("you must use a test runner that exposes "
                        "step_registry on the context object")

    # We need to set these to None so that cleanup does not fail. It
    # expects to be able to check these fields without having to check
    # first for their existence.
    context.driver = None
    context.wm = None
    context.display = None
    context.server = None
    context.server_tempdir = None
    context.download_dir = None
    context.tunnel_id = None
    context.tunnel = None
    context.builder = None
    context.created_documents = {}

    context.selenium_quit = os.environ.get("SELENIUM_QUIT")
    context.behave_keep_tempdirs = os.environ.get("BEHAVE_KEEP_TEMPDIRS")
    context.visible = os.environ.get("SELENIUM_VISIBLE")

    userdata = context.config.userdata
    context.builder = builder = Builder(conf_path, userdata)

    dump_config(builder)
    if userdata.get("check_selenium_config", False):
        exit(0)

    setup_screenshots(context)

    browser_to_tag_value = {
        "INTERNETEXPLORER": "ie",
        "CHROME": "ch",
        "FIREFOX": "ff",
        "EDGE": "edge"
    }

    values = {
        'browser': browser_to_tag_value[builder.config.browser],
    }

    platform = builder.config.platform
    if platform.startswith("OS X "):
        values['platform'] = 'osx'
    elif platform.startswith("WINDOWS "):
        values['platform'] = 'win'
    elif platform == "LINUX" or platform.startswith("LINUX "):
        values['platform'] = 'linux'

    # We have some cases that need to match a combination of platform
    # and browser
    values['platform_browser'] = values['platform'] + "," + values['browser']

    context.active_tag_matcher = ActiveTagMatcher(values)

    # Without this, window sizes vary depending on the actual browser
    # used.
    initial_window_size = context.initial_window_size = \
        {"width": 1366, "height": 768}

    if not builder.remote:
        visible = context.visible or \
            context.selenium_quit in ("never", "on-success", "on-enter")
        context.display = Display(visible=visible,
                                  size=(initial_window_size["width"],
                                        initial_window_size["height"]))
        context.display.start()
        print("Display started")
        builder.update_ff_binary_env('DISPLAY')
        context.wm = subprocess.Popen(["openbox", "--sm-disable"])
        print("Window manager started")

        chrome_options = builder.local_conf.get("CHROME_OPTIONS", None)
        if chrome_options:
            # We set a temporary directory for Chrome downloads. Even if
            # we do not test downloads, this will prevent Chrome from
            # polluting our *real* download directory.
            context.download_dir = tempfile.mkdtemp()
            prefs = {"download.default_directory": context.download_dir}
            chrome_options.add_experimental_option("prefs", prefs)
    else:
        context.display = None
        context.wm = None

        tunnel_id = os.environ.get("TUNNEL_ID")
        if not tunnel_id:
            context.tunnel_id = builder.start_tunnel()
        else:
            builder.set_tunnel_id(tunnel_id)

    driver = builder.get_driver()
    context.driver = driver
    print("Obtained driver")
    context.util = selenic.util.Util(driver, 5)

    behave_wait = os.environ.get("BEHAVE_WAIT_BETWEEN_STEPS")
    context.behave_wait = behave_wait and float(behave_wait)

    context.server_tempdir = tempfile.mkdtemp()
    server_write_fifo = os.path.join(context.server_tempdir, "fifo_to_server")
    os.mkfifo(server_write_fifo)
    server_read_fifo = os.path.join(context.server_tempdir, "fifo_from_server")
    os.mkfifo(server_read_fifo)

    nginx_port = str(builder.get_unused_port())
    server = subprocess.Popen([
        "utils/start_server", server_write_fifo, server_read_fifo, nginx_port
    ],
                              close_fds=True)
    context.server = ServerControl(server, server_read_fifo, server_write_fifo)
    signal.signal(signal.SIGCHLD, lambda *_: sigchld(context))

    # We must add the port to the server
    context.builder.SERVER += ":" + nginx_port

    context.selenium_logs = os.environ.get("SELENIUM_LOGS", False)

    remove_server_limit()

    lognames = subprocess.check_output(
        ["./manage.py", "btwworker", "lognames"])

    context.log_checkers = [LogChecker(name) for name in lognames.splitlines()]
Exemplo n.º 38
0
class Agent:

    def __init__(self, sess, n_agent, agent_to_env, env_to_agent, replay_buffer, writer, filename, learner_policy_parameters, agent_to_learner, learner_to_agent):

        print("Initializing agent " + str(n_agent) + "...")

        # Saving inputs to self object for later use
        self.n_agent = n_agent
        self.sess = sess
        self.replay_buffer = replay_buffer
        self.filename = filename
        self.learner_policy_parameters = learner_policy_parameters
        self.agent_to_env = agent_to_env
        self.env_to_agent = env_to_agent
        self.agent_to_learner = agent_to_learner
        self.learner_to_agent = learner_to_agent
        
        # Build this Agent's actor network
        self.build_actor()

        # Build the operations to update the actor network
        self.build_actor_update_operation()

        # Establish the summary functions for TensorBoard logging.
        self.create_summary_functions()
        self.writer = writer

        # If we want to record video, launch one hidden display
        if Settings.RECORD_VIDEO and self.n_agent == 1:
            self.display = Display(visible = False, size = (1400,900))
            self.display.start()

        print("Agent %i initialized!" % self.n_agent)


    
    def create_summary_functions(self):
        # Logging the timesteps used for each episode for each agent
        self.timestep_number_placeholder      = tf.placeholder(tf.float32)
        self.episode_reward_placeholder       = tf.placeholder(tf.float32)
        timestep_number_summary               = tf.summary.scalar("Agent_" + str(self.n_agent) + "/Number_of_timesteps", self.timestep_number_placeholder)
        episode_reward_summary                = tf.summary.scalar("Agent_" + str(self.n_agent) + "/Episode_reward", self.episode_reward_placeholder)
        self.regular_episode_summary          = tf.summary.merge([timestep_number_summary, episode_reward_summary])

        # If this is agent 1, the agent who will also test performance, additionally log the reward
        if self.n_agent == 1:
            test_time_episode_reward_summary  = tf.summary.scalar("Test_agent/Episode_reward", self.episode_reward_placeholder)
            test_time_timestep_number_summary = tf.summary.scalar("Test_agent/Number_of_timesteps", self.timestep_number_placeholder)
            self.test_time_episode_summary    = tf.summary.merge([test_time_episode_reward_summary, test_time_timestep_number_summary])


    def build_actor(self):
        # Generate the actor's policy neural network
        agent_name = 'agent_' + str(self.n_agent) # agent name 'agent_3', for example
        self.state_placeholder = tf.placeholder(dtype = tf.float32, shape = [None, Settings.OBSERVATION_SIZE], name = 'state_placeholder') # the * lets Settings.OBSERVATION_SIZE be not restricted to only a scalar

        #############################
        #### Generate this Actor ####
        #############################
        self.policy = BuildActorNetwork(self.state_placeholder, scope = agent_name)


    def build_actor_update_operation(self):
        # Update agent's policy network parameters from the most up-to-date version from the learner
        update_operations = []
        source_variables = self.learner_policy_parameters
        destination_variables = self.policy.parameters

        # For each parameters in the network
        for source_variable, destination_variable in zip(source_variables, destination_variables):
            # Directly copy from the learner to the agent
            update_operations.append(destination_variable.assign(source_variable))

        # Save the operation that performs the actor update
        self.update_actor_parameters = update_operations
    
    def reset_action_augment_log(self):
        # Create state-augmentation queue (holds previous actions)
        self.past_actions = queue.Queue(maxsize = Settings.AUGMENT_STATE_WITH_ACTION_LENGTH)
        
        # Fill it with zeros to start
        for i in range(Settings.AUGMENT_STATE_WITH_ACTION_LENGTH):
            self.past_actions.put(np.zeros(Settings.ACTION_SIZE), False)
            
    def augment_state_with_actions(self, total_state):
        # Just received a total_state from the environment, need to augment 
        # it with the past action data and return it
        
        past_action_data = np.asarray(self.past_actions.queue).reshape([-1]) # past actions reshaped into a column
        augmented_state = np.concatenate([total_state, past_action_data])
        
        # Remove the oldest entry from the action log queue
        self.past_actions.get(False)
        
        return augmented_state

    def run(self, stop_run_flag, replay_buffer_dump_flag, starting_episode_number):
        # Runs the agent in its own environment
        # Runs for a specified number of episodes or until told to stop
        print("Starting to run agent %i at episode %i." % (self.n_agent, starting_episode_number[self.n_agent -1]))

        # Initializing parameters for agent network
        self.sess.run(self.update_actor_parameters)

        # Getting the starting episode number. If we are restarting a training
        # run that has crashed, the starting episode number will not be 1.
        episode_number = starting_episode_number[self.n_agent - 1]

        # Resetting the noise scale
        noise_scale = 0.

        # Start time
        start_time = time.time()

        # Creating the temporary memory space for calculating N-step returns
        self.n_step_memory = deque()

        # For all requested episodes or until user flags for a stop (via Ctrl + C)
        while episode_number <= Settings.NUMBER_OF_EPISODES and not stop_run_flag.is_set():

            ####################################
            #### Getting this episode ready ####
            ####################################

            # Clearing the N-step memory for this episode
            self.n_step_memory.clear()
            
            # Reset the action_log, if applicable
            if Settings.AUGMENT_STATE_WITH_ACTION_LENGTH > 0:
                self.reset_action_augment_log()

            # Checking if this is a test time (when we run an agent in a
            # noise-free environment to see how the training is going).
            # Only agent_1 is used for test time
            test_time = (self.n_agent == 1) and (episode_number % Settings.CHECK_GREEDY_PERFORMANCE_EVERY_NUM_EPISODES == 0 or episode_number == 1)

            # Resetting the environment for this episode by sending a boolean
            if test_time and Settings.TEST_ON_DYNAMICS:
                self.agent_to_env.put((True, test_time)) # Reset into a dynamics environment only if it's test time and desired
            else:
                self.agent_to_env.put((False, test_time)) # Reset into a kinematics environment
            total_state = self.env_to_agent.get()
            
            # Augment total_state with past actions, if appropriate
            if Settings.AUGMENT_STATE_WITH_ACTION_LENGTH > 0:
                total_state = self.augment_state_with_actions(total_state)

            # Calculating the noise scale for this episode. The noise scale
            # allows for changing the amount of noise added to the actor during training.
            if test_time:
                # It's test time! Run this episode without noise (if desired) to evaluate performance.
                if Settings.NOISELESS_AT_TEST_TIME:
                    noise_scale = 0

                # Additionally, if it's time to render, make a statement to the user
                if Settings.RECORD_VIDEO and (episode_number % (Settings.CHECK_GREEDY_PERFORMANCE_EVERY_NUM_EPISODES*Settings.VIDEO_RECORD_FREQUENCY) == 0 or episode_number == 1):
                    # Also log the states & actions encountered in this episode because we are going to render them!
                    raw_total_state_log = []
                    observation_log = []
                    action_log = []
                    next_observation_log = []
                    instantaneous_reward_log = []
                    cumulative_reward_log = []
                    done_log = []
                    discount_factor_log = []
                    guidance_position_log = []
                    raw_total_state_log.append(total_state)
                    


            else:
                # Regular training episode, use noise.
                # Noise is decayed during the training
                noise_scale = Settings.NOISE_SCALE * Settings.NOISE_SCALE_DECAY ** episode_number

            # Normalizing the total_state to 1 separately along each dimension
            # to avoid the 'vanishing gradients' problem
            if Settings.NORMALIZE_STATE:
                total_state = (total_state - Settings.STATE_MEAN)/Settings.STATE_HALF_RANGE

            # Discarding irrelevant states to obtain the observation
            observation = np.delete(total_state, Settings.IRRELEVANT_STATES)

            # Resetting items for this episode
            episode_reward = 0
            timestep_number = 0
            done = False

            # Stepping through time until episode completes.
            while not done:
                ##############################
                ##### Running the Policy #####
                ##############################
                action = self.sess.run(self.policy.action_scaled, feed_dict = {self.state_placeholder: np.expand_dims(observation,0)})[0] # Expanding the observation to be a 1x3 instead of a 3

                # Calculating random action to be added to the noise chosen from the policy to force exploration.
                if Settings.UNIFORM_OR_GAUSSIAN_NOISE:
                    # Uniform noise (sampled between -/+ the action range)
                    exploration_noise = np.random.uniform(low = -Settings.ACTION_RANGE, high = Settings.ACTION_RANGE, size = Settings.ACTION_SIZE)*noise_scale
                else:
                    # Gaussian noise (standard normal distribution scaled to half the action range)
                    exploration_noise = np.random.normal(size = Settings.ACTION_SIZE)*Settings.ACTION_RANGE*noise_scale # random number multiplied by the action range

                # Add exploration noise to original action, and clip it incase we've exceeded the action bounds
                action = np.clip(action + exploration_noise, Settings.LOWER_ACTION_BOUND, Settings.UPPER_ACTION_BOUND)

                # Adding the action taken to the past_actions log
                if Settings.AUGMENT_STATE_WITH_ACTION_LENGTH > 0:
                    self.past_actions.put(action)

                ################################################
                #### Step the dynamics forward one timestep ####
                ################################################
                # Send the action to the environment process
                self.agent_to_env.put((np.concatenate([action, np.zeros([1])]),)) # The concatenated 0 is to command 0 altitude acceleration

                # Receive results from stepped environment
                next_total_state, reward, done, *guidance_position = self.env_to_agent.get() # The * means the variable will be unpacked only if it exists

                # Add reward we just received to running total for this episode
                episode_reward += reward
                
                # Augment total_state with past actions, if appropriate
                if Settings.AUGMENT_STATE_WITH_ACTION_LENGTH > 0:
                    next_total_state = self.augment_state_with_actions(next_total_state)

                if self.n_agent == 1 and Settings.RECORD_VIDEO and (episode_number % (Settings.CHECK_GREEDY_PERFORMANCE_EVERY_NUM_EPISODES*Settings.VIDEO_RECORD_FREQUENCY) == 0 or episode_number == 1) and not Settings.ENVIRONMENT == 'gym':
                    if not done:
                        raw_total_state_log.append(next_total_state)

                # Normalize the state
                if Settings.NORMALIZE_STATE:
                    next_total_state = (next_total_state - Settings.STATE_MEAN)/Settings.STATE_HALF_RANGE

                # Discarding irrelevant states
                next_observation = np.delete(next_total_state, Settings.IRRELEVANT_STATES)

                # Store the data in this temporary buffer until we calculate the n-step return
                self.n_step_memory.append((observation, action, reward))

                # If the n-step memory is full enough and we aren't testing performance
                if (len(self.n_step_memory) >= Settings.N_STEP_RETURN):
                    # Grab the oldest data from the n-step memory
                    observation_0, action_0, reward_0 = self.n_step_memory.popleft()
                    # N-step reward starts with reward_0
                    n_step_reward = reward_0
                    # Initialize gamma
                    discount_factor = Settings.DISCOUNT_FACTOR
                    for (observation_i, action_i, reward_i) in self.n_step_memory:
                        # Calculate the n-step reward
                        n_step_reward += reward_i*discount_factor
                        discount_factor *= Settings.DISCOUNT_FACTOR # for the next step, gamma**(i+1)

                    # Dump data into large replay buffer
                    # If the prioritized replay buffer is currently dumping data,
                    # wait until that is done before adding more data to the buffer                    
                    if not test_time:
                        replay_buffer_dump_flag.wait() # blocks until replay_buffer_dump_flag is True
                        self.replay_buffer.add((observation_0, action_0, n_step_reward, next_observation, done, discount_factor))

                    # If this episode is being rendered, log the state for rendering later
                    if self.n_agent == 1 and Settings.RECORD_VIDEO and (episode_number % (Settings.CHECK_GREEDY_PERFORMANCE_EVERY_NUM_EPISODES*Settings.VIDEO_RECORD_FREQUENCY) == 0 or episode_number == 1) and not Settings.ENVIRONMENT == 'gym':
                        observation_log.append(observation_0)
                        action_log.append(action_0)
                        next_observation_log.append(next_observation)
                        cumulative_reward_log.append(episode_reward)
                        instantaneous_reward_log.append(n_step_reward)
                        done_log.append(done)
                        discount_factor_log.append(discount_factor)
                        guidance_position_log.append(guidance_position)

                # End of timestep -> next state becomes current state
                observation = next_observation
                timestep_number += 1

                # If this episode is done, drain the N-step buffer, calculate
                # returns, and dump in replay buffer unless it is test time.
                if done:
                    # Episode has just finished, calculate the remaining N-step entries
                    while len(self.n_step_memory) > 0:
                        # Grab the oldest data from the n-step memory
                        observation_0, action_0, reward_0 = self.n_step_memory.popleft()
                        # N-step reward starts with reward_0
                        n_step_reward = reward_0
                        # Initialize gamma
                        discount_factor = Settings.DISCOUNT_FACTOR
                        for (observation_i, action_i, reward_i) in self.n_step_memory:
                            # Calculate the n-step reward
                            n_step_reward += reward_i*discount_factor
                            discount_factor *= Settings.DISCOUNT_FACTOR # for the next step, gamma**(i+1)

                        # dump data into large replay buffer
                        if not test_time:
                            replay_buffer_dump_flag.wait()
                            self.replay_buffer.add((observation_0, action_0, n_step_reward, next_observation, done, discount_factor))

                        # If this episode is being rendered, log the state for rendering later
                        if self.n_agent == 1 and Settings.RECORD_VIDEO and (episode_number % (Settings.CHECK_GREEDY_PERFORMANCE_EVERY_NUM_EPISODES*Settings.VIDEO_RECORD_FREQUENCY) == 0 or episode_number == 1) and not Settings.ENVIRONMENT == 'gym':
                            observation_log.append(observation_0)
                            action_log.append(action_0)
                            next_observation_log.append(next_observation)
                            cumulative_reward_log.append(episode_reward)
                            instantaneous_reward_log.append(n_step_reward)
                            done_log.append(done)
                            discount_factor_log.append(discount_factor)
                            guidance_position_log.append(guidance_position)

            ################################
            ####### Episode Complete #######
            ################################
            # If this episode is being rendered, render it now.
            if self.n_agent == 1 and Settings.RECORD_VIDEO and (episode_number % (Settings.CHECK_GREEDY_PERFORMANCE_EVERY_NUM_EPISODES*Settings.VIDEO_RECORD_FREQUENCY) == 0 or episode_number == 1) and not Settings.ENVIRONMENT == 'gym':
                print("Rendering Actor %i at episode %i" % (self.n_agent, episode_number))

                os.makedirs(os.path.dirname(Settings.MODEL_SAVE_DIRECTORY + self.filename + '/trajectories/'), exist_ok=True)
                np.savetxt(Settings.MODEL_SAVE_DIRECTORY + self.filename + '/trajectories/' + str(episode_number) + '.txt',np.asarray(raw_total_state_log))

                # Ask the learner to tell us the value distributions of the state-action pairs encountered in this episode
                self.agent_to_learner.put((np.asarray(observation_log), np.asarray(action_log), np.asarray(next_observation_log), np.asarray(instantaneous_reward_log), np.asarray(done_log), np.asarray(discount_factor_log)))

                # Wait for the results
                try:
                    critic_distributions, target_critic_distributions, projected_target_distribution, loss_log = self.learner_to_agent.get(timeout = 3)

                    bins = np.linspace(Settings.MIN_V, Settings.MAX_V, Settings.NUMBER_OF_BINS)

                    # Render the episode
                    environment_file.render(np.asarray(raw_total_state_log), np.asarray(action_log), np.asarray(instantaneous_reward_log), np.asarray(cumulative_reward_log), critic_distributions, target_critic_distributions, projected_target_distribution, bins, np.asarray(loss_log), np.squeeze(np.asarray(guidance_position_log)), episode_number, self.filename, Settings.MODEL_SAVE_DIRECTORY)

                except queue.Empty:
                    print("Skipping this animation!")
                    raise SystemExit

            # Periodically update the agent with the learner's most recent version of the actor network parameters
            if episode_number % Settings.UPDATE_ACTORS_EVERY_NUM_EPISODES == 0:
                self.sess.run(self.update_actor_parameters)

            # Periodically print to screen how long it's taking to run these episodes
            if episode_number % Settings.DISPLAY_ACTOR_PERFORMANCE_EVERY_NUM_EPISODES == 0:
                print("Actor " + str(self.n_agent) + " ran " + str(Settings.DISPLAY_ACTOR_PERFORMANCE_EVERY_NUM_EPISODES) + " episodes in %.1f minutes, and is now at episode %i" % ((time.time() - start_time)/60, episode_number))
                start_time = time.time()

            ###################################################
            ######## Log training data to tensorboard #########
            ###################################################
            # Logging the number of timesteps and the episode reward.
            feed_dict = {self.episode_reward_placeholder:  episode_reward, self.timestep_number_placeholder: timestep_number}
            if test_time:
                summary = self.sess.run(self.test_time_episode_summary, feed_dict = feed_dict)
            else:
                summary = self.sess.run(self.regular_episode_summary,   feed_dict = feed_dict)
            self.writer.add_summary(summary, episode_number)

            # Increment the episode counter
            episode_number += 1

        #################################
        ##### All episodes complete #####
        #################################
        # If were recording video, stop the display
        if Settings.RECORD_VIDEO and self.n_agent == 1:
            self.display.stop()

        # Notify user of completion
        print("Actor %i finished after running %i episodes!" % (self.n_agent, episode_number - 1))
Exemplo n.º 39
0
def AdjustResolution():
    display = Display(visible=0, size=(800, 800))
    display.start()
Exemplo n.º 40
0
        }
      }
      return elemInfo;
    }
    
    
    return JSON.stringify(getElemInfo());
    '''
    
    returned = driver.execute_script(js_script)
    with open('page_elements.json','w',encoding='utf-8') as f:
        f.write(returned)

try:
    print("Initializing...")
    display = Display(visible=0, size=(1280, 720))
    display.start()
    
    firefox_profile = FirefoxProfile()
    driver = webdriver.Firefox(firefox_profile)
    driver.set_page_load_timeout(90)
    driver.implicitly_wait(30)
    print("Initialization completed")

    # Load jQuery
    with open('jquery.min.js') as f:
        driver.execute_script(f.read())
    main()
finally:
    print("Running Garbage Collection")
    driver.quit()
Exemplo n.º 41
0
class Deamonizer:
    def __init__(self):
        self.main_functionality = {
            "function": None,
            "args": None,
            "kwargs": None
        }
        self.pre_functionality = {
            "function": None,
            "args": None,
            "kwargs": None
        }
        self.print_text = None
        self.visibility = False
        self.display = None

    def format_log(self, priority, description, text):
        """
			DEBUG - for genuinely debug-level info; will not be seen in production or shipped product, as INFO will be the minimum level; good for capturing timings, number of occurrences of events, etc

			INFO - minimum level for production/shipped usage; record data likely to be useful in forensic investigations and to confirm successful outcomes ("stored 999 items in DB OK"); all info here must be such that you would be OK with end users/customers seeing it and sending you it, if need be (no secrets, no profanity!)

			WARN - not an error level as such, but useful to know the system may be entering dodgy territory, e.g. business logic stuff like "number of ordered products < 0" which suggests a bug somewhere, but isn't a system exception; I tend not to use it that much to be honest, finding things tend to be more natural fits to INFO or ERROR

			ERROR - use this for exceptions (unless there's a good reason to reduce to WARN or INFO); log full stacktraces along with important variable values without which diagnosis is impossible; use only for app/system errors, not bad business logic circumstances

			FATAL - only use this for an error of such high severity that it literally prevents the app from starting / continuing
			(http://stackoverflow.com/questions/7486596/commons-logging-priority-best-practices) Retrieved 1453177472
		"""
        start_color = ''
        end_color = bcolors.ENDC
        priotity = priority.lower()
        if 'info' in priority:
            start_color = bcolors.OKBLUE
        elif 'warn' in priority:
            start_color = bcolors.WARNING
        elif 'error' in priority:
            start_color = bcolors.FAIL
        elif 'fatal' in priority:
            start_color = bcolors.FAIL
        else:
            end_color = ''
        if len(description) > 30:
            description = description[:27] + '...'
        return '{4}{0!s:30} {1!s:20} {2!s:30} {3}{5}'.format(
            time.ctime(time.time()), priority, description, text, start_color,
            end_color)

    def parse_command_line(self):
        output_deamonizer = 'stdout'
        self.print_text = print_optional(output_deamonizer)
        # ------------------------------------------------------------------------------
        # |      Routines for parsing the command line                                 |
        # |                                                                            |
        # |   Usage: python name_of_file.py [stdout|file_name] [visible|nonvisible]    |
        # ------------------------------------------------------------------------------
        if len(sys.argv) == 2:
            if sys.argv[1] == 'compile':
                # Saving of the run file
                fileTmpName = os.path.basename(__file__)
                filenameRun = 'run_' + fileTmpName.replace('.py', '') + '.sh'
                result_string = ''
                result_string += 'cd ' + os.path.dirname(
                    os.path.abspath(fileTmpName)
                ) + ' && ' + sys.executable + ' ' + fileTmpName + ' ' + fileTmpName.replace(
                    '.py', '') + '.log nonvisible 2>&1'
                f = open(filenameRun, 'w')
                f.write(result_string)
                f.close()
                # Saving of the crontab script
                filename = 'crontab_' + fileTmpName.replace('.py', '') + '.txt'
                result_string = ''
                result_string += '30 7 * * * sh ' + os.path.dirname(
                    os.path.abspath(fileTmpName)) + '/' + filenameRun
                print result_string
                f = open(filename, 'w')
                f.write(result_string)
                f.close()
                sys.exit()
            else:
                sys.stdout = open(sys.argv[1], 'a')
                output_deamonizer = 'file'
                self.print_text = print_optional(output_deamonizer)
        elif len(sys.argv) == 3:
            if sys.argv[1] != 'stdout':
                sys.stdout = open(sys.argv[1], 'a')
                output_deamonizer = 'file'
                self.print_text = print_optional(output_deamonizer)
            self.visibility = sys.argv[2] == 'visible'
        sys.stderr = sys.stdout
        if not self.visibility:
            from pyvirtualdisplay import Display
            self.display = Display(visible=0, size=(1024, 768))
            self.display.start()
            self.print_text(
                self.format_log('debug', 'message', 'Using virtual display.'))
        else:
            self.print_text(
                self.format_log('debug', 'message',
                                'Not using virtual display.'))

    def run(self):
        self.parse_command_line()
        # Try to set up a counter for exceptions.
        exceptionsTimeouts = 0
        # Run Pre
        if self.pre_functionality["function"] != None:
            self.pre_functionality["function"](
                *self.pre_functionality["args"],
                **self.pre_functionality["kwargs"])
        while True:
            self.print_text(
                self.format_log('debug', 'message',
                                'Inside the infinite loop.'))
            # Scape the fate of no internet
            self.print_text(
                self.format_log('info', 'current time', str(datetime.now())))
            while not connected_to_internet():
                self.print_text(
                    self.format_log(
                        'warning', 'connection error',
                        'Not connected to the internet. Going to sleep for five minutes'
                    ))
                time.sleep(60 * 5)
            self.print_text(
                self.format_log('debug', 'message',
                                'About to start the try except'))
            try:
                if self.main_functionality["function"] != None:
                    self.main_functionality["function"](
                        *self.main_functionality["args"],
                        **self.main_functionality["kwargs"])
            except TimeoutException as e:
                self.print_text(
                    self.format_log(
                        'error', 'exception',
                        'Timeout exception of selenium. Trying again.'))
                exceptionsTimeouts += 1
                # if exceptionsTimeouts % 6 == 0:
                # 	os.system("python send_text.py \"Error in quickbooks Too many timeouts. "+str(e)+"\"")
            except Exception as e:
                self.print_text(
                    self.format_log('fatal', 'exception',
                                    'Unrecognized exception.'))
            self.print_text(
                self.format_log('debug', 'message',
                                'Going to sleep for five minutes'))
            time.sleep(60 * 5)
        if not self.visibility:
            self.display.stop()
Exemplo n.º 42
0
def deploy_firefox(status_queue, browser_params, manager_params, crash_recovery):
    """ launches a firefox instance with parameters set by the input dictionary """
    root_dir = os.path.dirname(__file__)  # directory of this file
    logger = loggingclient(*manager_params['logger_address'])

    display_pid = None
    display_port = None
    fp = webdriver.FirefoxProfile()
    browser_profile_path = fp.path + '/'
    status_queue.put(('STATUS','Profile Created',browser_profile_path))

    # Set all prefs related to mobile js
    mobile_platform = browser_params['mobile_platform']
    configure_firefox.set_mobile_prefs(fp, mobile_platform)

    profile_settings = None  # Imported browser settings
    if browser_params['profile_tar'] and not crash_recovery:
        logger.debug("BROWSER %i: Loading initial browser profile from: %s" % (browser_params['crawl_id'], browser_params['profile_tar']))
        profile_settings = load_profile(browser_profile_path, manager_params, browser_params,
                                        browser_params['profile_tar'],
                                        load_flash=browser_params['disable_flash'] is False)
    elif browser_params['profile_tar']:
        logger.debug("BROWSER %i: Loading recovered browser profile from: %s" % (browser_params['crawl_id'], browser_params['profile_tar']))
        profile_settings = load_profile(browser_profile_path, manager_params, browser_params,
                                        browser_params['profile_tar'])
    status_queue.put(('STATUS','Profile Tar',None))

    if browser_params['random_attributes'] and profile_settings is None:
        logger.debug("BROWSER %i: Loading random attributes for browser" % browser_params['crawl_id'])
        profile_settings = dict()

        # choose a random screen-res from list
        resolutions = list()
        with open(os.path.join(root_dir, 'screen_resolutions.txt'), 'r') as f:
            for line in f:
                resolutions.append(tuple(line.strip().split(',')))
        profile_settings['screen_res'] = random.choice(resolutions)

        # set a random user agent from list
        ua_strings = list()
        with open(os.path.join(root_dir, 'user_agent_strings.txt'), 'r') as f:
            for line in f:
                ua_strings.append(line.strip())
        profile_settings['ua_string'] = random.choice(ua_strings)

    # If profile settings still not set - set defaults
    if profile_settings is None:
        profile_settings = dict()
        if mobile_platform == "android":
            profile_settings['screen_res'] = ANDROID_SCREEN_RES
            profile_settings['ua_string'] = "Mozilla/5.0 (Android 7.0; Mobile; rv:55.0) Gecko/55.0 Firefox/55.0"
            profile_settings['color_depth'] = 24
        elif mobile_platform == "iphone":
            profile_settings['screen_res'] = IPHONE_SCREEN_RES
            profile_settings['ua_string'] = "Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_2 like Mac OS X) AppleWebKit/603.2.4 (KHTML, like Gecko) FxiOS/7.5b3349 Mobile/14F89 Safari/603.2.4"
            profile_settings['color_depth'] = 32
        else:
            raise ValueError("Mobile mobile_platform value is not recognized")
    if profile_settings['ua_string'] is not None:
        logger.debug("BROWSER %i: Overriding user agent string with the following: %s" % (browser_params['crawl_id'], profile_settings['ua_string']))
        fp.set_preference("general.useragent.override", profile_settings['ua_string'])

    if browser_params['headless']:
        display = Display(visible=0, size=profile_settings['screen_res'])#,color_depth=profile_settings['color_depth'])
        display.start()
        display_pid = display.pid
        display_port = display.cmd_param[5][1:]
    status_queue.put(('STATUS','Display',(display_pid, display_port)))

    # Write extension configuration
    if browser_params['extension_enabled']:
        ext_loc = os.path.join(root_dir + "/../", 'Extension/firefox/openwpm.xpi')
        ext_loc = os.path.normpath(ext_loc)
        fp.add_extension(extension=ext_loc)
        fp.set_preference("*****@*****.**", "all")
        extension_config = dict()
        extension_config.update(browser_params)
        extension_config['logger_address'] = manager_params['logger_address']
        extension_config['sqlite_address'] = manager_params['aggregator_address']
        if manager_params.has_key('ldb_address'):
            extension_config['leveldb_address'] = manager_params['ldb_address']
        else:
            extension_config['leveldb_address'] = None
        extension_config['testing'] = manager_params['testing']
        with open(browser_profile_path + 'browser_params.json', 'w') as f:
            json.dump(extension_config, f)
        logger.debug("BROWSER %i: OpenWPM Firefox extension loaded" % browser_params['crawl_id'])

    if browser_params['proxy']:
        logger.warning("BROWSER %i: Use of the proxy is DEPRECATED and will be "
                       "removed from future releases. Use http_instrument." %
                       browser_params['crawl_id'])
        PROXY_HOST = "localhost"
        PROXY_PORT = browser_params['proxy']

        # Direct = 0, Manual = 1, PAC = 2, AUTODETECT = 4, SYSTEM = 5
        fp.set_preference("network.proxy.type", 1)
        fp.set_preference("network.proxy.http", PROXY_HOST)
        fp.set_preference("network.proxy.http_port", PROXY_PORT)
        fp.set_preference("network.proxy.ssl", PROXY_HOST)  # https sites
        fp.set_preference("network.proxy.ssl_port", PROXY_PORT)

        # set this to exclude sites from using proxy
        # http://kb.mozillazine.org/Network.proxy.no_proxies_on
        fp.set_preference("network.proxy.no_proxies_on", "")

        # copy the dbs into temp profile
        # these were created by manually adding the cert to
        # a previous tmp selenium profile
        shutil.copy(os.path.join(root_dir + "/../", 'Proxy/key3.db'), fp.path + '/key3.db')
        shutil.copy(os.path.join(root_dir + "/../", 'Proxy/cert8.db'), fp.path + '/cert8.db')

    # Disable flash
    if browser_params['disable_flash']:
        fp.set_preference('plugin.state.flash', 0)

    # Configure privacy settings
    configure_firefox.privacy(browser_params, fp, root_dir, browser_profile_path)

    # Set various prefs to improve speed and eliminate traffic to Mozilla
    configure_firefox.optimize_prefs(fp)

    # Launch the webdriver
    status_queue.put(('STATUS','Launch Attempted',None))
    fb = FirefoxBinary(root_dir  + "/../../firefox-bin/firefox")
    driver = webdriver.Firefox(firefox_profile=fp, firefox_binary=fb)
    status_queue.put(('STATUS','Browser Launched',(int(driver.binary.process.pid), profile_settings)))

    # set window size
    driver.set_window_size(*profile_settings['screen_res'])

    return driver, browser_profile_path, profile_settings
Exemplo n.º 43
0
 def __init__(self):
     display = Display(visible=0, size=(1600, 1024))
     display.start()
     self.driver = webdriver.Firefox()
     self.driver.delete_all_cookies()
Exemplo n.º 44
0
# У$ - Economics
# С$ - Sociology
# Ч$ - CultureС$

ENV = 'PRODUCTION'
# ENV = ''

if ENV == 'PRODUCTION':
    from pyvirtualdisplay import Display
    options = webdriver.ChromeOptions()
    options.add_argument('--disable-dev-shm-usage')
    options.add_argument('--no-sandbox')
    options.binary_location = '/usr/bin/google-chrome-stable'

    display = Display(visible=0, size=(1024, 768))
    display.start()

    driver = webdriver.Chrome(
        executable_path=
        '/srv/graduate-work-table/graduate_report/parser/unix/chromedriver',
        service_args=['--verbose'],
        chrome_options=options)
else:
    driver = webdriver.Chrome('win32/chromedriver.exe')

driver.get(
    'http://irbis-nbuv.gov.ua/cgi-bin/irbis64r_81/cgiirbis_64.exe?C21COM=F&I21DBN=ARD_EX&P21DBN=ARD&S21FMT=&S21ALL=&Z21ID='
)
time.sleep(5)
Exemplo n.º 45
0
def generate_map(config):
    renderer = config.pop('renderer', 'leaflet')
    output_format = config.pop('format', 'byte')
    width = config.pop('width')
    height = config.pop('height')

    display = Display(visible=0, size=(width, height))
    display.start()

    fp = webdriver.FirefoxProfile()
    fp.set_preference('browser.download.folderList', 2)
    fp.set_preference('browser.download.manager.showWhenStarting', False)
    fp.set_preference('browser.download.dir', TMP_DIR)
    fp.set_preference('browser.helperApps.neverAsk.saveToDisk', 'octet/stream')

    browser = webdriver.Firefox(firefox_profile=fp)
    dx, dy = browser.execute_script(
        'let w=window; return [w.outerWidth - w.innerWidth, w.outerHeight - w.innerHeight];'
    )
    browser.set_window_size(width + dx, height + dy)

    with open('{}.html'.format(renderer), 'r') as f:
        html = f.read()

    image = None
    try:
        html_fd, html_path = tempfile.mkstemp(suffix='.html', dir=WORK_DIR)
        image_name = str(uuid.uuid4())
        config['image_name'] = image_name

        with open(html_path, 'w') as f:
            f.write(
                html % {
                    'WD': os.getcwd(),
                    'WIDTH': width,
                    'HEIGHT': height,
                    'CONFIG': json.dumps(config)
                })
        os.close(html_fd)

        browser.get('file://{}'.format(html_path))

        delay = 10
        tries = 3

        while True:
            try:

                WebDriverWait(browser, delay).until(
                    EC.presence_of_element_located((By.ID, 'Ready')))
                screenshot = browser.get_screenshot_as_base64()
                if output_format == 'byte':
                    image = base64.b64decode(screenshot)
                elif output_format == 'base64':
                    image = bytes(screenshot, 'ascii')
                break
            except TimeoutException:
                if tries == 0:
                    break
                tries -= 1

        browser.quit()

        display.stop()

        return image
    finally:
        os.remove(html_path)
        return image
Exemplo n.º 46
0
def multi_mode(cli_parsed):
    dbm = db_manager.DB_Manager(cli_parsed.d + '/ew.db')
    dbm.open_connection()
    if not cli_parsed.resume:
        dbm.initialize_db()
    dbm.save_options(cli_parsed)
    m = Manager()
    targets = m.Queue()
    lock = m.Lock()
    multi_counter = m.Value('i', 0)
    display = None

    def exitsig(*args):
        dbm.close()
        if current_process().name == 'MainProcess':
            print ''
            print 'Resume using ./EyeWitness.py --resume {0}'.format(cli_parsed.d + '/ew.db')
        os._exit(1)

    signal.signal(signal.SIGINT, exitsig)
    if cli_parsed.resume:
        pass
    else:
        url_list, rdp_list, vnc_list = target_creator(cli_parsed)
        if any((cli_parsed.web, cli_parsed.headless)):
            for url in url_list:
                dbm.create_http_object(url, cli_parsed)
        for rdp in rdp_list:
            dbm.create_vnc_rdp_object('rdp', rdp, cli_parsed)
        for vnc in vnc_list:
            dbm.create_vnc_rdp_object('vnc', vnc, cli_parsed)

    if any((cli_parsed.web, cli_parsed.headless)):
        if cli_parsed.web and not cli_parsed.show_selenium:
            display = Display(visible=0, size=(1920, 1080))
            display.start()

        multi_total = dbm.get_incomplete_http(targets)
        if multi_total > 0:
            if cli_parsed.resume:
                print 'Resuming Web Scan ({0} Hosts Remaining)'.format(str(multi_total))
            else:
                print 'Starting Web Requests ({0} Hosts)'.format(str(multi_total))

        if multi_total < cli_parsed.threads:
            num_threads = multi_total
        else:
            num_threads = cli_parsed.threads
        for i in xrange(num_threads):
            targets.put(None)
        try:
            workers = [Process(target=worker_thread, args=(
                cli_parsed, targets, lock, (multi_counter, multi_total))) for i in xrange(num_threads)]
            for w in workers:
                w.start()
            for w in workers:
                w.join()
        except Exception as e:
            print str(e)

        # Set up UA table here
        if cli_parsed.cycle is not None:
            ua_dict = get_ua_values(cli_parsed.cycle)
            if not cli_parsed.ua_init:
                dbm.clear_table("ua")
                completed = dbm.get_complete_http()
                completed[:] = [x for x in completed if x.error_state is None]
                for item in completed:
                    for browser, ua in ua_dict.iteritems():
                        dbm.create_ua_object(item, browser, ua)

                cli_parsed.ua_init = True
                dbm.clear_table("opts")
                dbm.save_options(cli_parsed)

            for browser, ua in ua_dict.iteritems():
                targets = m.Queue()
                multi_counter.value = 0
                multi_total = dbm.get_incomplete_ua(targets, browser)
                if multi_total > 0:
                    print("[*] Starting requests for User Agent {0}"
                          " ({1} Hosts)").format(browser, str(multi_total))
                if multi_total < cli_parsed.threads:
                    num_threads = multi_total
                else:
                    num_threads = cli_parsed.threads
                for i in xrange(num_threads):
                    targets.put(None)
                workers = [Process(target=worker_thread,
                                   args=(cli_parsed, targets, lock,
                                         (multi_counter, multi_total),
                                         (browser, ua)))
                           for i in xrange(num_threads)]
                for w in workers:
                    w.start()
                for w in workers:
                    w.join()

    if any((cli_parsed.vnc, cli_parsed.rdp)):
        log._LOG_LEVEL = log.Level.ERROR
        multi_total, targets = dbm.get_incomplete_vnc_rdp()
        if multi_total > 0:
            print ''
            print 'Starting VNC/RDP Requests ({0} Hosts)'.format(str(multi_total))

            app = QtGui.QApplication(sys.argv)
            timer = QTimer()
            timer.start(10)
            timer.timeout.connect(lambda: None)

            # add qt4 reactor
            import qt4reactor
            qt4reactor.install()
            from twisted.internet import reactor

            for target in targets:
                if os.path.dirname(cli_parsed.d) != os.path.dirname(target.screenshot_path):
                    target.set_paths(cli_parsed.d)
                tdbm = db_manager.DB_Manager(cli_parsed.d + '/ew.db')
                if target.proto == 'vnc':
                    reactor.connectTCP(
                        target.remote_system, target.port,
                        vnc_module.RFBScreenShotFactory(
                            target.screenshot_path, reactor, app,
                            target, tdbm))
                else:
                    reactor.connectTCP(
                        target.remote_system, int(target.port),
                        rdp_module.RDPScreenShotFactory(
                            reactor, app, 1200, 800,
                            target.screenshot_path, cli_parsed.timeout,
                            target, tdbm))
            reactor.runReturn()
            app.exec_()

    if display is not None:
        display.stop()
    results = dbm.get_complete_http()
    vnc_rdp = dbm.get_complete_vnc_rdp()
    dbm.close()
    m.shutdown()
    write_vnc_rdp_data(cli_parsed, vnc_rdp)
    sort_data_and_write(cli_parsed, results)
Exemplo n.º 47
0
from pyvirtualdisplay import Display
from selenium import webdriver

print 'start'

display = Display(visible=0, size=(800, 600))
display.start()

browser = webdriver.Firefox()
browser.get('http://www.baidu.com')
print browser.title
browser.quit()

display.stop()
Exemplo n.º 48
0
 def setUpClass(cls):
     cls.display = Display(visible=0, size=(800, 600))
     cls.display.start()
     super(FMTestCase, cls).setUpClass()
Exemplo n.º 49
0
def get_urls(query,
             url,
             verbose=False,
             warning=True,
             user_agent=None,
             proxy=None,
             **kwargs):
    """
      Bypass Google captchas and Google API by using selenium-webdriver to gather
      the Google URL. This will open a robot controlled browser window and attempt
      to get a URL from Google that will be used for scraping afterwards.

      Only downside to this method is that your IP and user agent will be visible
      until the application pulls the URL.
    """
    if verbose:
        logger.debug(
            set_color("setting up the virtual display to hide the browser...",
                      level=10))
    ff_display = Display(visible=0, size=(800, 600))
    ff_display.start()
    logger.info(
        set_color(
            "firefox browser display will be hidden while it performs the query..."
        ))
    if warning:
        logger.warning(
            set_color(
                "your web browser will be automated in order for Zeus to successfully "
                "bypass captchas and API calls. this is done in order to grab the URL "
                "from the search and parse the results. please give selenium time to "
                "finish it's task...",
                level=30))
    if verbose:
        logger.debug(
            set_color("running selenium-webdriver and launching browser...",
                      level=10))

    if verbose:
        logger.debug(
            set_color(
                "adjusting selenium-webdriver user-agent to '{}'...".format(
                    user_agent),
                level=10))
    if proxy is not None:
        proxy_type = proxy.keys()
        proxy_to_use = Proxy({''.join(proxy_type): proxy[''.join(proxy_type)]})
        if verbose:
            logger.debug(
                set_color("setting selenium proxy to '{}'...".format(
                    ''.join(proxy_type) + "://" + ''.join(proxy.values())),
                          level=10))
    else:
        proxy_to_use = None
    profile = webdriver.FirefoxProfile()
    profile.set_preference("general.useragent.override", user_agent)
    browser = webdriver.Firefox(profile, proxy=proxy_to_use)

    logger.info(set_color("browser will open shortly..."))
    browser.get(url)
    if verbose:
        logger.debug(
            set_color(
                "searching search engine for the 'q' element (search button)...",
                level=10))
    search = browser.find_element_by_name('q')
    logger.info(
        set_color("searching '{}' using query '{}'...".format(url, query)))
    search.send_keys(query)
    search.send_keys(Keys.RETURN)  # hit return after you enter search text
    time.sleep(3)
    if verbose:
        logger.debug(set_color("obtaining URL from selenium..."))
    retval = browser.current_url
    if verbose:
        logger.debug(
            set_color("found current URL from selenium browser '{}'...".format(
                retval),
                      level=10))
    logger.info(set_color("closing the browser and continuing process.."))
    browser.close()
    ff_display.stop()
    return retval
from pyvirtualdisplay import Display
import scipy.optimize
import random
import scipy.misc
import torch
from scripts.rl_zforcing import ZForcing
import os

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np


display_ = Display(visible=0, size=(550, 500))
display_.start()



# Parse arguments

parser = argparse.ArgumentParser()
parser.add_argument("--env", required=True,
                    help="name of the environment to be run (REQUIRED)")
parser.add_argument("--demos-origin", default=None,
                    help="origin of the demonstrations: human | agent (REQUIRED or --model REQUIRED)")
parser.add_argument("--seed", type=int, default=None,
                    help="random seed (default: 0 if model agent, 1 if demo agent)")
parser.add_argument("--model", default=None,
                    help="name of the trained model (REQUIRED or --demos-origin REQUIRED)")
Exemplo n.º 51
0
### Grab the information from our configuration file
config = scraperfunctions.load_config()

### Get the current time if we don't already have one (and transform into a date object)
curr_time = scraperfunctions.get_curr_time(curr_time, parsefile)

### Establish our MySQL Connection (for logging, etc.)
engine, connection, metadata, mysql_table_name, mysql_log_name = scraperfunctions.create_mysql_engine(config)

########### Download actions
if download_desktop == 1:
    try:
        ### Initiate our virtual display
        print("Initiating virtual display")
        display = Display(visible=0, size=(1920, 1080))
        display.start()
    
        ### Let's start our browser
        browser = scraperfunctions.create_browser()
        
        ### Let's load the page work
        scraperfunctions.load_homepage(browser, pubshort, puburl)
        
        ### See if the MV list requires extra actions
        if puburl_mv_extraactions != None:
            ### Actions for acquiring MV List
            pass
        
        ### Let's first store the source code
        html_code = browser.page_source
Exemplo n.º 52
0
def setup_func():
    "set up test fixtures"
    global process, screen
    screen = Display(visible=0)
    screen.start()
    process = EasyProcess('gnumeric').start().sleep(3)
Exemplo n.º 53
0
    start = time.process_time()
    env = gym.make('Pong-v4')
    print("Number of obswervations: {}".format(env.observation_space))
    print("Number of allowed actions: {}".format(env.action_space))
    print(tf.__version__)
    print(tf.keras.__version__)
    optimizer = tf.train.AdamOptimizer(learning_rate)
    model = create_model()
    # model.load_weights('model/agentcycle1750-agent99gamma1kepochs')
    # print(model.summary())
    # print('Model loaded successfully!')
    memory = Memory()

    import skvideo.io
    from pyvirtualdisplay import Display
    display = Display(visible=0)
    display.start()
    out = skvideo.io.FFmpegWriter(filename+'.mp4')
    start_training = time.process_time()
    for cycle in range(num_cycles):
        observation = env.reset()
        previous_frame = pre_process(observation)
        while True:
            frame = env.render(mode='rgb_array')
            out.writeFrame(np.array(frame))
            current_frame = pre_process(observation)  
            delta_frame = current_frame - previous_frame   
            action = next_action(observations = delta_frame, model = model)
            next_observation, reward, done, info = env.step(action)
            memory.add_to_memory(delta_frame, action, reward)
            if done:
Exemplo n.º 54
0
def find_data():
    display = Display(visible=0, size=(1920, 1080))
    display.start()
    # Init headless ChromeDriver
    #chrome_path = r'/home/adam/chromedriver.exe'
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    driver = webdriver.Chrome(chrome_options=chrome_options)
    driver.set_window_size(1920, 1080)
    url = 'https://foodlicensing.fssai.gov.in/index.aspx'

    # Recieve JSON data as dictionary
    inputs = request.get_json()
    # Parse Inputs
    instate = str(inputs.get('state'))
    inbizname = str(inputs.get('business_name'))
    indist = str(inputs.get('district'))
    inregnum = inputs.get('registration_num')
    inbizkind = str(inputs.get('business_kind'))
    inproddesc = str(inputs.get('product_desc'))
    # Go to URL
    try:
        driver.get(url)
        driver.find_element_by_xpath(
            '//*[@id="demo-tabs-vertical"]/ul[2]/li[2]').click()
    except NoSuchElementException:
        driver.find_element_by_link_text('Click here to Refresh').click()

    # Click on the 'FBO Search' tab
    try:
        WebDriverWait(driver, 5).until(
            EC.presence_of_element_located((By.ID, 'demo-tabs-vertical')))
        driver.find_element_by_xpath(
            '//*[@id="demo-tabs-vertical"]/ul[2]/li[2]').click()
    except StaleElementReferenceException:
        print('Its taking too long!')

    ## Fill in Form with provided inputs

    # Find the matching State
    select0 = Select(
        driver.find_element_by_xpath('//*[@id="ctl00_content_ddlState"]'))
    print([o.text for o in select0.options])
    select0.select_by_visible_text(instate)
    # Find the matchin District
    if indist != '':
        select1 = Select(
            driver.find_element_by_xpath(
                '//*[@id="ctl00_content_ddlDistrict"]'))
        print([o.text for o in select1.options])
        select1.select_by_visible_text(indist)
    # Enter Company Name
    if inbizname != '':
        try:
            select2 = driver.find_element_by_xpath(
                '//*[@id="ctl00_content_txtName"]')
            select2.click()
            select2.send_keys(inbizname)
            select2.submit()
        except StaleElementReferenceException:
            select2 = driver.find_element_by_xpath(
                '//*[@id="ctl00_content_txtName"]')
            select2.click()
            select2.send_keys(inbizname)
            select2.submit()
    # Enter License/Registration number
    if inregnum != '':
        try:
            select3 = driver.find_element_by_xpath(
                '//*[@id="ctl00_content_txtLicense"]')
            select3.click
            select3.send_keys(inregnum)
            select3.submit()
        except StaleElementReferenceException:
            select3 = driver.find_element_by_xpath(
                '//*[@id="ctl00_content_txtLicense"]')
            select3.click
            select3.send_keys(inregnum)
            select3.submit()
    # Select Kind of Business
    if inbizkind != '':
        select4 = Select(
            driver.find_element_by_xpath('//*[@id="ctl00_content_ddlKOB"]'))
        print([o.text for o in select4.options])
        select4.select_by_visible_text(inbizkind)
    # Enter Product Description
    if inproddesc != '':
        try:
            select5 = driver.find_element_by_xpath(
                '//*[@id="ctl00_content_txtProduct"]')
            select5.click()
            select5.send_keys(inproddesc)
            select5.submit()
        except StaleElementReferenceException:
            select5 = driver.find_element_by_xpath(
                '//*[@id="ctl00_content_txtProduct"]')
            select5.click()
            select5.send_keys(inproddesc)
            select5.submit()
    # Click search
    driver.find_element_by_xpath('//*[@id="ctl00_content_btnsearch"]').click()
    # Wait for new tab to load
    try:
        WebDriverWait(driver, 7).until(
            EC.presence_of_element_located((By.ID, 'ctl00_content_update')))
    except TimeoutException:
        print('Results took too long to load!')

    # Switch active tad to search results
    window_before = driver.window_handles[0]
    window_after = driver.window_handles[1]
    driver.switch_to.window(window_after)
    # Handle Pagination
    try:
        select6 = Select(
            driver.find_element_by_xpath('//*[@id="ctl00_content_ddlPage"]'))
        print([o.text for o in select6.options])
        select6.select_by_visible_text('300')
        try:
            WebDriverWait(driver, 5).until(
                EC.presence_of_element_located(
                    (By.ID, 'ctl00_content_update')))
        except TimeoutException:
            print('Results took too long to load!')
    except NoSuchElementException:
        print('No pagination!')

    # Scrape Data
    info = []
    rows = driver.find_element_by_xpath(
        '//*[@id="ctl00_content_gvDetails"]/tbody').find_elements_by_tag_name(
            'tr')
    row_number = 1
    for row in rows:
        cells = row.find_element_by_xpath(
            '//*[@id="ctl00_content_gvDetails"]/tbody/tr[' + str(row_number) +
            ']').text
        name = cells.split(", ")[0]
        name = name[2:]
        registration = 0
        regis = [int(s) for s in cells.split() if s.isdigit()]
        registr = [word for word in regis if len(str(word)) == 14]
        if len(registr) == 1:
            registration = registr[0]
        row_number += 1
        info.append([name, instate, registration])

    info = info[1:]

    return jsonify(info)
Exemplo n.º 55
0
from easyprocess import EasyProcess
from pyvirtualdisplay import Display
#~ from pyvirtualdisplay.smartdisplay import SmartDisplay # needs pyscreenshot
import logging
logging.basicConfig(level=logging.DEBUG)
import time

_W = 700
_H = 600
# height percents
hp1 = 0.6
hp2 = 1 - hp1

Display(visible=1, size=(_W, _H)).start()

# EasyProcess.start() # spawns process in background
# EasyProcess.check() # loops process in foreground

try:
    EasyProcess('awesome -c rc.lua').start()
except Exception, detail:
    print detail

time.sleep(2)

try:
    EasyProcess('bash -c "cd $HOME && scite"').start()
except Exception, detail:
    print detail

time.sleep(2)
Exemplo n.º 56
0
def codechef():

    display = Display(visible=0, size=(800, 600))
    display.start()

    def check_exists_by_tag_name(scope_to_search, tag):
        try:
            scope_to_search.find_element_by_tag_name(tag)
        except NoSuchElementException:
            return False
        return True

    driver = webdriver.Firefox()

    def get_lang_for_submission(File_Path):
        if File_Path.endswith(".py"):
            return "python3"
        elif File_Path.endswith(".cpp"):
            return "cpp"
        elif File_Path.endswith(".java"):
            return "java"
        elif File_Path.endswith(".c"):
            return "c"
        elif File_Path.endswith(".php"):
            return "php"
        elif File_Path.endswith(".pl"):
            return "perl"
        elif File_Path.endswith(".rb"):
            return "ruby"
        elif File_Path.endswith(".go"):
            return "go"
        elif File_Path.endswith(".sh"):
            return "bash"
        elif File_Path.endswith(".sql"):
            return "sql"
        elif File_Path.endswith(".pas"):
            return "pascal"
        elif File_Path.endswith(".cs"):
            return "csharp"
        elif File_Path.endswith(".r"):
            return "r"
        elif File_Path.endswith(".js"):
            return "rhino"
        elif File_Path.endswith(".m"):
            return "octave"
        elif File_Path.endswith(".coffee"):
            return "coffeescript"
        elif File_Path.endswith(".b"):
            return "brainfuck"
        elif File_Path.endswith(".swift"):
            return "swift"
        elif File_Path.endswith(".lua"):
            return "lua"
        elif File_Path.endswith(".kt"):
            return "kotlin"
        else:
            return None

    def codechef_login(user, passw):
        username = driver.find_element_by_id("edit-name")
        username.send_keys(user)
        password = driver.find_element_by_id("edit-pass")
        password.send_keys(passw)
        submit = driver.find_element_by_id("edit-submit")
        submit.click()
        url = driver.current_url
        if url == "https://www.codechef.com/session/limit":
            box = driver.find_elements_by_xpath("//input[@type='checkbox']")
            for check in box:
                check.click()
            box[len(box) - 1].click()
            submit_session = driver.find_element_by_id("edit-submit")
            submit_session.click()

        language_choice = get_lang_for_submission(sys.argv[2])
        option_value = ""
        if language_choice == "cpp":
            option_value = "44"
        elif language_choice == "java":
            option_value = "10"
        elif language_choice == "python3":
            option_value = "116"
        elif language_choice == "c":
            option_value = "11"
        elif language_choice == "rhino":
            option_value = "35"
        else:
            option_value = None
        code_script = open(sys.argv[2], 'r')
        code_script = code_script.read()
        time.sleep(5)
        text_area = driver.find_element_by_id("edit-program")
        text_area.send_keys(code_script)
        select = Select(driver.find_element_by_id("edit-language"))
        select.select_by_value(option_value)
        # langauge_button=driver.find_element_by_xpath("//select[@name='language']")
        # driver.execute_script("arguments[0].click();",langauge_button)
        # language_choose=driver.find_element_by_xpath("//select[@name='language']/option[text()='C++14(gcc 6.3)']")
        # language_choose=driver.find_element_by_xpath("//select[@name='language']/option[@value='"+option_value+"']")
        # driver.execute_script("arguments[0].click();",language_choose)
        code_submit = driver.find_element_by_id("edit-submit-1")
        driver.execute_script("arguments[0].click();", code_submit)
        time.sleep(4)
        while True:
            result = driver.find_element_by_id("display_result")
            if check_exists_by_tag_name(result, "strong") == False:
                time.sleep(4)
            else:
                result_has_come = result.find_element_by_tag_name("strong")
                print(result_has_come.text)
                break

    GREEN = '\033[92m'
    GRAY = '\033[90m'
    CYAN = '\033[36m'
    RED = '\033[31m'
    YELLOW = '\033[33m'
    END = '\033[0m'
    UNDERLINE = '\033[4m'
    BOLD = '\033[1m'
    print(RED + BOLD + "Contest[Y/N]" + END, end=' ')
    choice = input()
    contest_id = ""
    if choice[0] == "Y":
        print(YELLOW + BOLD + "Enter contest_id" + END, end=' ')
        contest_id = input()
    codechef_link = "https://www.codechef.com/"
    if contest_id != "":
        codechef_link = codechef_link + contest_id + "/submit/"
    else:
        codechef_link = codechef_link + "submit/"
    print(RED + BOLD + "Enter question id:" + END, end=' ')
    question_id = input()
    print(GRAY + BOLD + "Enter your username:"******"Enter your password:"******"Enter your password: " + END)
    codechef_link = codechef_link + question_id
    driver.get(codechef_link)
    codechef_login(user, passw)
    display.stop()


# codechef()
Exemplo n.º 57
0
class Scraper(object):
    def __init__(self):
        self.project_directory = os.path.dirname(
            os.path.dirname(
                os.path.abspath(inspect.getfile(inspect.currentframe()))))
        # self.output_dir_path = os.path.join(self.project_directory, "scraper_output")
        self.output_dir_path = '/home/InternResults'
        self.download_dir_path = os.path.join(self.project_directory,
                                              "proxy_download_directory")
        self.headers = {
            "User-Agent":
            "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36"
        }
        self.prefs = {}
        self.prefs["profile.default_content_settings.popups"] = 0
        self.prefs["download.default_directory"] = self.download_dir_path
        self.initialize_driver()
        self.get_states()
        # subprocess.check_output("cp {}/results.csv /home/InternResults/".format(self.output_dir_path), shell=True)

    def initialize_driver(self):
        self.display = Display(visible=0, size=(800, 600))
        self.display.start()
        # Set driver options
        self.driver_options = webdriver.ChromeOptions()
        self.driver_options.add_experimental_option("prefs", self.prefs)
        self.driver_options.add_argument('--headless')
        self.driver_options.add_argument('--no-sandbox')
        self.driver_options.add_argument('--disable-dev-shm-usage')
        self.driver = webdriver.Chrome(options=self.driver_options)
        # Page load timeout 30 seconds
        self.driver.implicitly_wait(30)
        # self.random_request_limit = scraper_utils.generate_random_request_count()
        # self.random_delay = scraper_utils.return_randomized_delay()
        # self.current_proxy = ""
        self.driver.get('http://www.redfin.com/sitemap')

    def get_states(self):
        state_urls = self.driver.find_elements_by_xpath(
            "//div[@class='sitemap-section']/div[2]/ul[@class='list']/li/div/a"
        )
        for i in range(len(state_urls)):
            self.driver.find_elements_by_xpath(
                "//div[@class='sitemap-section']/div[2]/ul[@class='list']/li/div/a"
            )[i].click()
            # state_url.click()
            self.timeout(10)
            self.get_county_for_state()
            break

    def get_county_for_state(self):
        county_urls = self.driver.find_elements_by_xpath(
            "//div[@class='sitemap-section'][1]/div[2]/ul[@class='list']/li/div/a"
        )
        for i in range(len(county_urls)):
            print(i)
            self.driver.find_elements_by_xpath(
                "//div[@class='sitemap-section'][1]/div[2]/ul[@class='list']/li/div/a"
            )[i].click()
            # county_url.click()
            self.timeout(10)
            self.get_listings_for_each_county()
            # break
        self.driver.back()
        self.timeout(10)

    def get_listings_for_each_county(self):
        try:
            recently_sold_url = self.driver.find_element_by_xpath(
                "//div[@class='sitemap-section'][2]//ul/li//a[contains(text(), 'Recently Sold Homes')]"
            )
            recently_sold_url.click()
            self.timeout(4)
            jscode = 'var list = document.querySelectorAll(\'script[type="application/ld+json"]\');\nld_list = Array(list.length).join(",").split(",").map((i, index) => list[index].text.replace(\'[\', \'\').replace(\']\',\'\'));\nreturn ld_list;'
            try:
                print(self.driver.current_url)
                # Save the results of the first page
                if not os.path.isdir(self.output_dir_path):
                    os.mkdir(self.output_dir_path)
                # print("Here")
                # print(self.driver.execute_script(jscode))
                with open('{}/results.csv'.format(self.output_dir_path),
                          'a') as file:
                    file.write("\n".join(self.driver.execute_script(jscode)))

                # first iteration use find_element_by_xpath, if element not present this throws error
                pagination_result = self.driver.find_element_by_xpath(
                    "//div[@class='PagingControls']/button[@class='clickable buttonControl button-text']"
                )
                # print("Pagination found")
                pagination_result.click()
                # page 1 to page n
                forward_count = 1
                pagination_result = self.driver.find_elements_by_xpath(
                    "//div[@class='PagingControls']/button[@class='clickable buttonControl button-text']"
                )
                while (len(pagination_result) == 2):
                    if not os.path.isdir(self.output_dir_path):
                        os.mkdir(self.output_dir_path)
                    with open('{}/results.csv'.format(self.output_dir_path),
                              'a') as file:
                        file.write("\n".join(
                            self.driver.execute_script(jscode)))
                    pagination_result[1].click()
                    self.timeout(4)
                    pagination_result = self.driver.find_elements_by_xpath(
                        "//div[@class='PagingControls']/button[@class='clickable buttonControl button-text']"
                    )
                    forward_count += 1

                # Save the results of the nth page
                if not os.path.isdir(self.output_dir_path):
                    os.mkdir(self.output_dir_path)
                with open('{}/results.csv'.format(self.output_dir_path),
                          'a') as file:
                    file.write("\n".join(self.driver.execute_script(jscode)))

                # page n to page 1
                while (forward_count != 1):
                    self.driver.back()
                    forward_count -= 1
                    self.timeout(4)
            except:
                pass
                # print("No pagination found")

            self.driver.back()
            self.timeout(5)
        except:
            pass
        self.driver.back()
        self.timeout(5)

    def timeout(self, explicit_time=0):
        if not explicit_time:
            self.random_delay = scraper_utils.return_randomized_delay()
            time.sleep(self.random_delay)
        else:
            time.sleep(explicit_time)
Exemplo n.º 58
0
def startBrowsing():
    display = Display(visible=0, size=(800, 600))
    display.start()
    driver = webdriver.Chrome()
    return driver
Exemplo n.º 59
0
def real_scrapping():

    display = Display(visible=0, size=(1200, 900))
    display.start()

    try:
        Real = webdriver.Chrome(
            executable_path=os.path.abspath("/usr/bin/chromedriver"))
        Real.get("http://affiliates.realdealbet.com/")
        Real.find_element_by_name("username").send_keys("id_betfyuk")
        Real.find_element_by_name("password").send_keys("dontfuckwithme")
        pwd = Real.find_element_by_name("password")
        pwd.send_keys(Keys.RETURN)
        Real.implicitly_wait(10)
        window_after = Real.window_handles[1]
        Real.switch_to_window(window_after)
        mtd_valArr = []
        table = Real.find_element(by=By.ID, value="dashboard_quick_stats")
        mtds_val = table.find_element(by=By.CLASS_NAME,
                                      value="row_light_color")
        for mtd_val in mtds_val.find_elements_by_tag_name("td"):
            mtd_valArr.append(mtd_val.text)
        time.sleep(2)
        Real.find_element_by_xpath(
            '//*[@id="dashboard"]/div[1]/div[1]/div/div[1]/div/div/select[1]/option[2]'
        ).click()
        time.sleep(40)
        table = Real.find_element(by=By.ID, value="dashboard_quick_stats")
        mtds_val = table.find_element(by=By.CLASS_NAME,
                                      value="row_light_color")
        for mtd_val in mtds_val.find_elements_by_tag_name("td"):
            if mtd_val.text != 'Total -':
                mtd_valArr.append(mtd_val.text)

        Real.get(
            "https://partners.realdealbet.com/reporting/quick_summary_report.asp"
        )
        toDate = Real.find_element_by_id('enddate').get_attribute('value')
        toDateObj = datetime.datetime.strptime(toDate, '%Y/%m/%d').date()
        delta = datetime.timedelta(days=1)
        aDayAgo = toDateObj - delta
        aDayAgoObj = aDayAgo.strftime("%Y/%m/%d")
        reportDiv = Real.find_element_by_id("reportcriteria")
        merchantDiv = reportDiv.find_elements_by_tag_name("tr")[3]
        merchantId = merchantDiv.find_element_by_tag_name("select")
        merchant = merchantId.find_elements_by_tag_name("option")[0]

        Real.execute_script(
            "document.getElementById('startdate').value = '{0}'".format(
                aDayAgoObj))
        Real.execute_script(
            "document.getElementById('enddate').value = '{0}'".format(
                aDayAgoObj))
        merchant.click()
        time.sleep(5)
        Real.find_element_by_class_name("button").click()
        time.sleep(20)
        tableDiv = Real.find_element_by_id("internalreportdata")
        table = tableDiv.find_element_by_tag_name("table")
        todayVal = table.find_elements_by_tag_name("tr")

        pattern = re.compile(r'[\-\d.\d]+')
        impreto = pattern.search(todayVal[1].text).group(0)
        mtd_valArr.append(impreto)
        clito = pattern.search(todayVal[2].text).group(0)
        mtd_valArr.append(clito)
        regto = pattern.search(todayVal[4].text).group(0)
        mtd_valArr.append(regto)
        ndto = pattern.search(todayVal[7].text).group(0)
        mtd_valArr.append(ndto)
        commito = pattern.search(todayVal[-1].text).group(0)
        mtd_valArr.append(commito)
        mtd_valArr.append(aDayAgoObj)
        print(mtd_valArr)
        return mtd_valArr
    finally:
        Real.quit()
        display.stop()
Exemplo n.º 60
0
def Main():
    parser = OptionParser()
    parser.add_option("--crawl", dest="crawl", action="store_true", help="crawl url", default=False)
    parser.add_option("--crawl-landing", dest="crawl_landing", action="store_true", help="crawl url", default=False)
    parser.add_option("--working-dir", dest="workingdir", type="string", help="working directory", default='.')
    parser.add_option("--db-name", dest="db_name", type="string", help="database name", default='skillscommons')
    parser.add_option("--table-name", dest="table_name", type="string", help="table name", default='skill')
   
    parser.add_option("--main-table-name", dest="main_table_name", type="string", help="main table name", default='skillscommons')
    parser.add_option("--attachment-table-name", dest="attachment_table_name", type="string", help="attachment table name", default='attachment')
    parser.add_option("--meta-table-name", dest="meta_table_name", type="string", help="meta table name", default='meta_data')
    parser.add_option("--use-firefox", dest="use_firefox", action="store_true", help="use-firefox", default=True)
   
    (options, args) = parser.parse_args()    
    workingdir = options.workingdir.rstrip('/')
    
    if not os.path.exists(workingdir):
        parser.error("workingdir not exists")
    
    try:
        display = None
        from pyvirtualdisplay import Display
        display = Display(visible=0, size=(1000,900))
        display.start()
    except:
        print 'No Xvfb!'
    
    db = mysql.DB(db=options.db_name)
    db.set_autocommit(True)
    driver = crawlutils.open_driver(use_firefox=options.use_firefox)
    links=["https://www.skillscommons.org/discover?rpp=2000&page=1&group_by=none&etal=0",
    "https://www.skillscommons.org/discover?rpp=2000&page=2&group_by=none&etal=0",
    "https://www.skillscommons.org/discover?rpp=2000&page=3&group_by=none&etal=0"]
    try:
        if options.crawl:
            
            count = 0
            for link in links:
                print "Link :",link
            
                driver.get(link)
                time.sleep(5)
                medium_results=driver.find_element_by_class_name("medium-results")
                li=medium_results.find_elements_by_tag_name("li")
                for tag in li:
                    count+=1
                    print "Count :",count
                    link_tag=tag.find_element_by_tag_name("a")
                    title=link_tag.text.strip()
                    url=link_tag.get_attribute("href")
                    types=tag.find_elements_by_class_name("type")
                    if len(types)==2:
                        type=types[0].text.strip()
                        institution=types[1].text.strip()
                    else:
                        type=None
                        institution=types[0].text.strip()
                    description=tag.find_element_by_class_name("abstract").text.strip()
                    print "title :", title
                    print "url :",url
                    print "type :",type
                    print "institution :",institution
                    print "description :",description
            
                    data = {
                    'title':title,
                    'institution':institution,
                    'url':url,
                    'type':type,
                    'description':description,
                    }
                    db.insert(options.table_name, data)                      
               

        if options.crawl_landing:
            count=0
            skill=db.query("select distinct url from skill where crawled=0")
            print "Number of urls to crawl ",len(skill)
            for (src_url,) in skill:
                print "source url :",src_url
                print "count %s"%count
                count+=1
                driver.get(src_url)
                author=None
                col=driver.find_element_by_class_name("col-sm-8")
                title=col.find_element_by_tag_name("h1").text.strip()
                m = hashlib.md5()
                m.update(title+src_url)
                document_id=m.hexdigest()
                toc_html="/mnt/data/kendavar/skillscommons/%s.html"%document_id
                file(toc_html,"w","utf8").write(driver.page_source)
                authors=col.find_element_by_class_name("authors")
                if not authors.find_elements_by_tag_name("div"):
                    author=authors.text.strip()
                description=col.find_element_by_class_name("abstract").text
                files=col.find_element_by_class_name("files")
                file_information=files.find_elements_by_class_name("file-information")
                attachment=[]
                for attach in file_information:
                    attachment.append((attach.text.strip(),attach.find_element_by_tag_name("a").get_attribute("href")))
                dls=col.find_elements_by_tag_name("dl")
                meta={}
                string=''
                for dl in dls:
                    for div in dl.find_elements_by_tag_name("div"):
                        string=''
                        dd=div.find_element_by_tag_name("dd")
                        if dd.find_elements_by_tag_name("li"):
                            for li in dd.find_elements_by_tag_name("li"):
                                string=string+li.text.strip()+","
                        elif dd.find_elements_by_tag_name("a"):
                            string=[dd.text.strip()]
                            anchors=[]
                            for anchor in dd.find_elements_by_tag_name("a"):
                                if anchor.get_attribute("href") not in anchors:
                                    anchors.append(anchor.get_attribute("href"))
                                    string.append(anchor.get_attribute("href"))
                        else:
                            string=dd.text.strip()
                        meta[div.find_element_by_tag_name("dt").text.replace(":","").strip()]=string
                print "title :",title
                print "author :",author
                print "description :",description
                print "toc_path",toc_html
                data={
                "document_id":document_id,
                "title":title,
                "author":author,
                "description":description,
                "toc_path":toc_html
                }
                db.insert(options.main_table_name, data) 
                for (attachment_title,attachment_url) in attachment:
                      print "document_id":document_id,
                      print "attachment_title":attachment_title,
                      print "attachment_url":attachment_url
                      data={
                      "document_id":document_id,
                      "attachment_title":attachment_title,
                      "attachment_url":attachment_url
                      }
                      db.insert(options.attachment_table_name, data) 
                for key,value in meta.iteritems():
                      if value[-1]==",":
                          value=value[:-1]
                      print '%s : %s'%(key,value)

                      if type(value) is list:
                          for val in value:
                              meta_title=key
                              if i%2==0 :
                                  meta_value=val
                              else:
                                  meta_url=val
                              print "meta_title":meta_title
                              print "meta_value":meta_value
                              print "meta_url":meta_url
                              data={
                              "document_id":document_id,
                              "meta_title":meta_title,
                              "meta_value":meta_value,
                              "meta_url":meta_url
                              }
                              db.insert(options.meta_table_name, data)
                      else:
                          meta_title=key
                          meta_url=None
                          meta_value=value
                          print "meta_title":meta_title
                          print "meta_value":meta_value
                          print "meta_url":meta_url
                          data={
                          "document_id":document_id,
                          "meta_title":meta_title,
                          "meta_value":meta_value,
                          "meta_url":meta_url
                          }
                          db.insert(options.meta_table_name, data)
                data={
                "crawled":1
                }
                db.update(options.table_name,data,"url='%s'"%src_url)
                print "updated the table"

    except:
        traceback.print_exc()
        if driver:
            driver.save_screenshot(workingdir + '/error.png')
            print workingdir + '/error.png'
    
    finally:
        if driver:
            driver.quit()
        if display:
            display.stop()