def scrap_item_details(self, current_win_handle): logger.info('Scraping item details') all_win_handles = self.driver.window_handles try: for win_handle in all_win_handles: if win_handle != current_win_handle: logger.info('switching window handle') self.driver.switch_to.window(win_handle) sleep(7) logger.info('collecting item details') name = '' phone = '' street = '' locality = '' country = '' email = '' try: name = WebDriverWait( self.driver, 10).until(lambda x: x.find_element_by_css_selector( 'h1#HEADING.heading_title')) name = name.text name = name.encode('latin-1') except Exception, e: pass try: phone = WebDriverWait( self.driver, 10 ).until(lambda x: x.find_element_by_css_selector( 'div.phone.directContactInfo span:nth-child(2)')) phone = phone.text phone = phone.encode('latin-1') except Exception, e: pass try: email = WebDriverWait( self.driver, 10 ).until(lambda x: x.find_element_by_css_selector( 'div.details_tab div.additional_info div.content ul.detailsContent li a' )) email = email.get_attribute("href").encode( 'utf-8').replace('mailto:', '') email = email.encode('latin-1') except Exception, e: pass try: street = WebDriverWait(self.driver, 10).until( lambda x: x.find_elements_by_css_selector( 'div.address span.street-address')) street = street[0].text.encode('latin-1') except Exception, e: pass
def Results_Search(driver): namelist = [] nameUrlist = [] try: element = WebDriverWait(driver, 30).until( EC.presence_of_all_elements_located( (By.XPATH, '//td[@class="summary_data"]//a'))) title_list = driver.find_elements_by_xpath( '//td[@class="summary_data"]//a') except Exception as e: logger.error('連結抓取錯誤') try: element = WebDriverWait(driver, 30).until( EC.presence_of_all_elements_located( (By.CLASS_NAME, 'authorSetsNum'))) authorSetsNums = driver.find_elements_by_class_name('authorSetsNum') except Exception as e: logger.error('編號抓取錯誤') for index, title in enumerate(title_list): nameUrlist.append('http://www.researcherid.com/rid/' + title.get_attribute('title')) namelist.append(authorSetsNums[index].text + " : " + title.text) for index, element in enumerate(namelist): logger.info( '--------------------------------------------------------------------------' ) logger.info("NAME : " + element.encode("utf8").decode("cp950", "ignore")) try: #if index > 17 : script = "window.open('" + nameUrlist[index] + "', 'new_window')" logger.info(script) driver.execute_script(script) driver.switch_to_window(driver.window_handles[1]) Work_Search(nameUrlist[index], driver) time.sleep(3) #driver.implicitly_wait(5) driver.switch_to_window(driver.window_handles[0]) except Exception as e: driver.switch_to_window(driver.window_handles[0]) logger.info("NAME : " + element + ' Crawl Failed ' + str(e)) traceback = sys.exc_info()[2] logger.error(sys.exc_info()) logger.error(traceback.tb_lineno) logger.error(e) continue logger.info("NAME : " + element.encode("utf8").decode("cp950", "ignore") + ' Crawl Over') logger.info( '--------------------------------------------------------------------------' )
def test_platform(self): advertise = ThatAdvertise(self.driver) advertise.set_product() platform = Platform(self.driver) platform.set_platform() create_ads = CreateAds(self.driver) create_ads.set_title('test') create_ads.set_text('test') create_ads.set_image_small(self.IMAGE_SMALL) create_ads.set_image_big(self.IMAGE_BIG) create_ads.set_link('http://www.odnoklassniki.ru/event/ID') create_ads.add() create = CreateCompany(self.driver) create.click() edit = Edit(self.driver) edit.click_edit() platform_result = WebDriverWait(self.driver, 30, 0.1).until( lambda d: d.find_element_by_css_selector('.base-setting__pads-item__label').text ) self.assertEquals('Одноклассники: мобильная версия', platform_result.encode('utf-8'))
def add_data_to_list(driver): temp = [] sub_cat = '' Category = '' #complaint_number X = driver.find_element_by_xpath( "//div[@id='content']//form/table[1]/tbody/tr[1]/td[2]/b").text complaint_number = X.encode('utf-8').strip() #Date of Application X = driver.find_element_by_xpath( "//div[@id='content']//form/table[1]/tbody/tr[1]/td[4]").text Date_of_Application = X.encode('utf-8').strip() #hindi prbdescription: /html/body/div/form/table[1]/tbody/tr[2]/td[2] #hindi complainaint_name: /html/body/div/form/table[1]/tbody/tr[2]/td[1] #English prbdescription: /html/body/div/form/table[1]/tbody/tr[3]/td[2] #English complainaint_name: /html/body/div/form/table[1]/tbody/tr[2]/td[2] try: #Complaint_description X = driver.find_element_by_xpath( "//div[@id='content']//form/table[1]/tbody/tr[3]/td[2]").text Complaint_description = X.encode('utf-8').strip() except NoSuchElementException: #Complaint_description X = driver.find_element_by_xpath( "//div[@id='content']//form/table[1]/tbody/tr[2]/td[2]").text Complaint_description = X.encode('utf-8').strip() #Department_name X = driver.find_element_by_xpath( "//div[@id='content']//form/table[2]/tbody/tr[2]/td[2]").text Department_name = X.encode('utf-8').strip() #Status X = driver.find_element_by_xpath( "//div[@id='content']//form/table[2]/tbody/tr[3]/td[2]").text Status = X.encode('utf-8').strip() temp.append(complaint_number) temp.append(Date_of_Application) temp.append(Complaint_description) temp.append(Department_name) temp.append(Status) #Marathi complaint try: Print = driver.find_element_by_id('PRINT') driver.execute_script( "window.scrollTo(0, document.body.scrollHeight);") hover = ActionChains(driver).move_to_element(Print) hover.click(Print).perform() driver.implicitly_wait(5) #complaint number c_n = driver.find_element_by_xpath( "/html/body/form/div/table[1]/tbody/tr[3]/td[2]/b").text #Category X = WebDriverWait(driver, 5).until( EC.presence_of_element_located( (By.XPATH, "//div[@id='content']//table[1]/tbody/tr[4]/td[2]"))).text Category = X.encode('utf-8').strip() #sub Category X = driver.find_element_by_xpath( "//div[@id='content']//table[1]/tbody/tr[5]/td[2]").text sub_cat = X.encode('utf-8').strip() #location of complaint X = driver.find_element_by_xpath( "//div[@id='content']//table[5]/tbody/tr[2]/td[2]").text house_name = X.encode('utf-8').strip() #1 Y = driver.find_element_by_xpath( "//div[@id='content']//table[5]/tbody/tr[2]/td[4]").text house_no = Y.encode('utf-8').strip() #2 Y = driver.find_element_by_xpath( "//div[@id='content']//table[5]/tbody/tr[3]/td[2]").text Street1 = Y.encode('utf-8').strip() #3 Y = driver.find_element_by_xpath( "//div[@id='content']//table[5]/tbody/tr[3]/td[4]").text Street2 = Y.encode('utf-8').strip() #4 Y = driver.find_element_by_xpath( "//div[@id='content']//table[5]/tbody/tr[4]/td[2]").text Area1 = Y.encode('utf-8').strip() #5 Y = driver.find_element_by_xpath( "//div[@id='content']//table[5]/tbody/tr[4]/td[4]").text Area2 = Y.encode('utf-8').strip() #6 Y = driver.find_element_by_xpath( "//div[@id='content']//table[5]/tbody/tr[5]/td[2]").text city = Y.encode('utf-8').strip() #7 Y = driver.find_element_by_xpath( "//div[@id='content']//table[5]/tbody/tr[5]/td[4]").text Pincode = Y.encode('utf-8').strip() #8 #Address of complaint Y = driver.find_element_by_xpath( "/html/body/form/div/table[5]/tbody/tr[10]/td[2]").text X_House_no = Y.encode('utf-8').strip() Y = driver.find_element_by_xpath( "/html/body/form/div/table[5]/tbody/tr[10]/td[4]").text X_House_name = Y.encode('utf-8').strip() Y = driver.find_element_by_xpath( "/html/body/form/div/table[5]/tbody/tr[11]/td[2]").text X_street1 = Y.encode('utf-8').strip() Y = driver.find_element_by_xpath( "/html/body/form/div/table[5]/tbody/tr[11]/td[4]").text X_street2 = Y.encode('utf-8').strip() Y = driver.find_element_by_xpath( "/html/body/form/div/table[5]/tbody/tr[12]/td[2]").text X_area1 = Y.encode('utf-8').strip() Y = driver.find_element_by_xpath( "/html/body/form/div/table[5]/tbody/tr[12]/td[4]").text X_area2 = Y.encode('utf-8').strip() Y = driver.find_element_by_xpath( "/html/body/form/div/table[5]/tbody/tr[13]/td[2]").text X_city = Y.encode('utf-8').strip() X_pincode = ' ' Y = driver.find_element_by_xpath( "/html/body/form/div/table[5]/tbody/tr[14]/td[2]").text X_state = Y.encode('utf-8').strip() Y = driver.find_element_by_xpath( "/html/body/form/div/table[5]/tbody/tr[14]/td[4]").text X_country = Y.encode('utf-8').strip() temp.insert(2, '######') temp.insert(3, Category) temp.insert(4, sub_cat) #no landmark in Marathi complain temp.insert(5, ' ') loc = house_name + ',' + house_no + ',' + Street1 + ',' + Street2 + ',' + Area1 + ',' + Area2 + ',' + city + ',' + Pincode add_ = X_House_no + ',' + X_House_name + ',' + X_street1 + ',' + X_street2 + ',' + X_area1 + ',' + X_area2 + ',' + X_city + ',' + X_pincode + ',' + X_state + ',' + X_country temp.append(loc) temp.append(add_) driver.switch_to.default_content() return temp, 1, Date_of_Application #English Complaints except: #switching to appropriate frame x1 = driver.find_element_by_xpath( "//div[@id='content']//iframe[2]") driver.switch_to.frame(x1) #sub-category x = WebDriverWait(driver, 5).until( EC.presence_of_element_located( (By.XPATH, "//div[@id='viewer']/div/div[2]/div[11]"))) content = x.get_attribute('innerHTML') Val = content.strip(':') #Val = Val.encode('utf-8').strip(':') sub_cat = Val[2:] x = WebDriverWait(driver, 5).until( EC.presence_of_element_located( (By.XPATH, "//div[@id='viewer']/div/div[2]/div[12]"))) content = x.get_attribute('innerHTML') Y = content.encode('utf-8').strip() if Y != 'Description': sub_cat = sub_cat + ' ' + Y #22-85, fetching arbitary location to perform linear search on it location = list() for i in range(22, 85): try: x = WebDriverWait(driver, 5).until( EC.presence_of_element_located( (By.XPATH, "//div[@id='viewer']/div/div[2]/div[" + str(i) + "]"))) content = x.get_attribute('innerHTML') location.append(content.encode('utf-8').strip(' ')) except: break #location of complaint i = location.index('House Name') n = location.index('Pincode') label1 = [ 'House Name', 'House No.', 'Street1', 'Street2', 'Area1', 'Area2', 'City', 'Pincode' ] List1 = location[i:n + 2] for i in label1: try: List1.remove(i) except: continue Landmark = ' ' try: i = location.index('Landmark') loc = location[i + 1].strip(':') Landmark = loc[2:] except: Landmark = ' ' #Address of Applicant i = location.index('Address of Applicant :') n = location.index('Telephone(O)') label2 = [ 'House No', 'House Name', 'Street1', 'Street2', 'Area1', 'Area 2', 'City', 'Pin Code', 'State', 'Country' ] List2 = location[i + 1:n] for i in label2: try: List2.remove(i) except: continue date = '######' try: date_end_index = location.index('Responsible :') if date_end_index != -1: date_ = location[date_end_index + 1] date_i = date_.index('Date') date = date_[date_i + 7:] else: date = '######' except: date = '######' temp.insert(2, date) temp.insert(3, Category) temp.insert(4, sub_cat) temp.insert(5, Landmark) loc = ','.join(List1) add_ = ','.join(List2) temp.append(loc) temp.append(add_) return temp, 0, Date_of_Application