df['Link'] = links df['Review'] = reviews df['Salary'] = salaries return df df = {} # empty dictionary to store data for each job title print('Scraping Data...') for job in list_1: print('Scraping: ', job) driver = Edge(executable_path='msedgedriver.exe') driver.get('https://indeed.com') initial_search = driver.find_element_by_xpath( '//*[@id="whatWhereFormId"]/div[3]/button') initial_search.click() advanced_search = driver.find_element_by_xpath( '//*[@id="jobsearch"]/table/tbody/tr/td[4]/div/a') advanced_search.click() try: df[job] = get_jobs(position=job, pages=10) except: print('Error in: ', job) continue df_1 = pd.concat(df.values())
#img_str = base64.b64encode(buffered.getvalue()) images_men.append(src) # women images = driver.find_elements_by_css_selector("[data-gender=women]:not([data-int=undefined])") for img_tag in images: src = img_tag.get_attribute('src') #img = Image.open(requests.get(src, stream = True).raw) #buffered = BytesIO() #img.save(buffered, format="JPEG") #img_str = base64.b64encode(buffered.getvalue()) images_women.append(src) driver.get(url_names) txt_box = driver.find_element_by_xpath('//*[@id="main"]/div/form/input[3]') txt_box.clear() txt_box.send_keys("95") # men select = Select(driver.find_element_by_xpath('//*[@id="gender"]')) select.select_by_visible_text('male') time.sleep(2) driver.find_element_by_xpath('//*[@id="qc-cmp2-ui"]/div[2]/div/button[2]').click() time.sleep(3) driver.execute_script("window.scrollTo(0, 1080)") driver.find_element_by_xpath('//*[@id="main"]/div/form/input[4]').click() time.sleep(5) names = driver.find_elements_by_class_name('name_heading') for name in names: names_men.append(name.text)
import time import random options = EdgeOptions() options.use_chromium = True driver = Edge(options=options) driver.get("https://www.facebook.com/") driver.maximize_window() time.sleep(1) actions = driver.find_element_by_tag_name('body'); actions.send_keys(Keys.TAB * 4, Keys.ENTER) #actions.click() #actions.send_keys(Keys.ENTER) time.sleep(1) inputElement = driver.find_element_by_xpath("/html/body/div[1]/div[2]/div[1]/div/div/div/div[2]/div/div[1]/form/div[1]/div[1]/input") inputElement.send_keys('*****@*****.**') time.sleep(3) inputElement = driver.find_element_by_xpath("/html/body/div[1]/div[2]/div[1]/div/div/div/div[2]/div/div[1]/form/div[1]/div[2]/div/input") inputElement.send_keys('passwordfraca', Keys.ENTER) time.sleep(3) driver.get("https://www.facebook.com/") time.sleep(8) actions = driver.find_element_by_tag_name('body'); actions.click() time.sleep(8) driver.execute_script("window.scrollTo(0, 300)") time.sleep(4) driver.execute_script("window.scrollTo(300, 600)") time.sleep(4)
options = EdgeOptions() # options.add_argument("headless") # options.add_argument("disable-gpu") options.add_experimental_option("excludeSwitches", ['enable-automation', 'enable-logging']) # options.add_argument('-kiosk') #全屏打开 wd = Edge(options = options) wd.get('https://kyfw.12306.cn/otn/resources/login.html') time.sleep(1) #全屏 wd.maximize_window() #点击账户登录 wd.find_element_by_xpath('/html/body/div[2]/div[2]/ul/li[2]/a').click() time.sleep(1) #screenshot wd.save_screenshot('./10.screenshot.png') code_img_ele = wd.find_element_by_xpath('/html/body/div[2]/div[2]/div[1]/div[2]/div[3]/div/div[4]/img') #裁剪截图 location = code_img_ele.location print('location:' ,location) size = code_img_ele.size print('size:',size) rangle = ( location['x'],location['y'],location['x']+size['width'],location['y']+size['height']) i = Image.open('./10.screenshot.png')
from msedge.selenium_tools import Edge, EdgeOptions from selenium.webdriver.common.keys import Keys import time options = EdgeOptions() options.use_chromium = True driver = Edge(options=options) driver.maximize_window() driver.get("https://www.youtube.com/") time.sleep(1) driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") actions = driver.find_element_by_tag_name('body'); actions.send_keys(Keys.TAB * 4, Keys.ENTER) time.sleep(1) driver.get("https://www.youtube.com/results?search_query=Drive+Drive+Drive+song+(Impractical+Jokers)+-+2+HOUR+VERSION") time.sleep(5) inputElement = driver.find_element_by_xpath("/html/body/ytd-app/div/ytd-page-manager/ytd-search/div[1]/ytd-two-column-search-results-renderer/div/ytd-section-list-renderer/div[2]/ytd-item-section-renderer/div[3]/ytd-video-renderer[1]") inputElement.click() time.sleep(400) driver.close()
from selenium.webdriver.common.keys import Keys import time import random options = EdgeOptions() options.use_chromium = True driver = Edge(options=options) driver.get("https://www.instagram.com/") driver.maximize_window() time.sleep(3) actions = driver.find_element_by_tag_name('body') actions.send_keys(Keys.TAB * 3, Keys.ENTER) time.sleep(3) inputElement = driver.find_element_by_xpath( "/html/body/div[1]/section/main/article/div[2]/div[1]/div/form/div/div[1]/div/label/input" ) inputElement.send_keys("armaldoFransico", Keys.TAB, "criptografia", Keys.ENTER) time.sleep(5) actions = driver.find_element_by_tag_name('body') actions.send_keys(Keys.TAB, Keys.ENTER) time.sleep(4) driver.execute_script("window.scrollTo(0, 300)") time.sleep(4) driver.execute_script("window.scrollTo(300, 600)") time.sleep(4) driver.execute_script("window.scrollTo(600, 900)") time.sleep(4) driver.execute_script("window.scrollTo(900, 1200)") time.sleep(4)
wait = WebDriverWait(driver, 5) wait_tp = wait.until( EC.presence_of_element_located( (By.XPATH, target_xpath))) # 进入第二个页面 # target = driver.find_element_by_xpath(target_xpath) # driver.execute_script("arguments[0].scrollIntoView();", target) print('第%i次尝试成功' % (i + 1)) break except: # 清空对话框 driver.find_element_by_xpath( '//*[@id="app"]/div/div[3]/div[3]/div/input' ).clear() if i == 10: print("10次尝试都失败,请您手动登录!") sleep(0.8) # 选择地区(省、市、县区) driver.find_elements_by_xpath(target_xpath)[0].click() sleep(1) temp = '' i = 0 while temp != province: i += 1