Exemplo n.º 1
0
    df['Link'] = links
    df['Review'] = reviews
    df['Salary'] = salaries

    return df


df = {}  # empty dictionary to store data for each job title

print('Scraping Data...')
for job in list_1:
    print('Scraping: ', job)
    driver = Edge(executable_path='msedgedriver.exe')
    driver.get('https://indeed.com')

    initial_search = driver.find_element_by_xpath(
        '//*[@id="whatWhereFormId"]/div[3]/button')
    initial_search.click()

    advanced_search = driver.find_element_by_xpath(
        '//*[@id="jobsearch"]/table/tbody/tr/td[4]/div/a')
    advanced_search.click()

    try:
        df[job] = get_jobs(position=job, pages=10)

    except:
        print('Error in: ', job)
        continue

df_1 = pd.concat(df.values())
Exemplo n.º 2
0
    #img_str = base64.b64encode(buffered.getvalue())
    images_men.append(src)

# women
images = driver.find_elements_by_css_selector("[data-gender=women]:not([data-int=undefined])")
for img_tag in images:
    src = img_tag.get_attribute('src')
    #img = Image.open(requests.get(src, stream = True).raw)
    #buffered = BytesIO()
    #img.save(buffered, format="JPEG")
    #img_str = base64.b64encode(buffered.getvalue())
    images_women.append(src)

driver.get(url_names)

txt_box = driver.find_element_by_xpath('//*[@id="main"]/div/form/input[3]')
txt_box.clear()
txt_box.send_keys("95")

# men
select = Select(driver.find_element_by_xpath('//*[@id="gender"]'))
select.select_by_visible_text('male')
time.sleep(2)
driver.find_element_by_xpath('//*[@id="qc-cmp2-ui"]/div[2]/div/button[2]').click()
time.sleep(3)
driver.execute_script("window.scrollTo(0, 1080)") 
driver.find_element_by_xpath('//*[@id="main"]/div/form/input[4]').click()
time.sleep(5)
names = driver.find_elements_by_class_name('name_heading')
for name in names:
    names_men.append(name.text)
Exemplo n.º 3
0
import time
import random

options = EdgeOptions()
options.use_chromium = True

driver = Edge(options=options)
driver.get("https://www.facebook.com/")
driver.maximize_window()
time.sleep(1)
actions = driver.find_element_by_tag_name('body');  
actions.send_keys(Keys.TAB * 4, Keys.ENTER)
#actions.click()
#actions.send_keys(Keys.ENTER)
time.sleep(1)
inputElement = driver.find_element_by_xpath("/html/body/div[1]/div[2]/div[1]/div/div/div/div[2]/div/div[1]/form/div[1]/div[1]/input")
inputElement.send_keys('*****@*****.**')
time.sleep(3)
inputElement = driver.find_element_by_xpath("/html/body/div[1]/div[2]/div[1]/div/div/div/div[2]/div/div[1]/form/div[1]/div[2]/div/input")
inputElement.send_keys('passwordfraca', Keys.ENTER)
time.sleep(3)

driver.get("https://www.facebook.com/")
time.sleep(8)
actions = driver.find_element_by_tag_name('body');  
actions.click()
time.sleep(8)
driver.execute_script("window.scrollTo(0, 300)") 
time.sleep(4)
driver.execute_script("window.scrollTo(300, 600)") 
time.sleep(4)
Exemplo n.º 4
0

options = EdgeOptions()
# options.add_argument("headless")
# options.add_argument("disable-gpu")
options.add_experimental_option("excludeSwitches", ['enable-automation', 'enable-logging'])
# options.add_argument('-kiosk') #全屏打开

wd = Edge(options = options)
wd.get('https://kyfw.12306.cn/otn/resources/login.html')
time.sleep(1)
#全屏
wd.maximize_window()

#点击账户登录
wd.find_element_by_xpath('/html/body/div[2]/div[2]/ul/li[2]/a').click()

time.sleep(1)
#screenshot
wd.save_screenshot('./10.screenshot.png')

code_img_ele = wd.find_element_by_xpath('/html/body/div[2]/div[2]/div[1]/div[2]/div[3]/div/div[4]/img')

#裁剪截图
location = code_img_ele.location
print('location:' ,location)
size = code_img_ele.size
print('size:',size)
rangle = (
    location['x'],location['y'],location['x']+size['width'],location['y']+size['height'])
i = Image.open('./10.screenshot.png')
Exemplo n.º 5
0
from msedge.selenium_tools import Edge, EdgeOptions
from selenium.webdriver.common.keys import Keys
import time

options = EdgeOptions()
options.use_chromium = True

driver = Edge(options=options)
driver.maximize_window()
driver.get("https://www.youtube.com/")
time.sleep(1)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
actions = driver.find_element_by_tag_name('body');  
actions.send_keys(Keys.TAB * 4, Keys.ENTER)

time.sleep(1)



driver.get("https://www.youtube.com/results?search_query=Drive+Drive+Drive+song+(Impractical+Jokers)+-+2+HOUR+VERSION")
time.sleep(5)
inputElement = driver.find_element_by_xpath("/html/body/ytd-app/div/ytd-page-manager/ytd-search/div[1]/ytd-two-column-search-results-renderer/div/ytd-section-list-renderer/div[2]/ytd-item-section-renderer/div[3]/ytd-video-renderer[1]")
inputElement.click()


time.sleep(400)
driver.close()
Exemplo n.º 6
0
from selenium.webdriver.common.keys import Keys
import time
import random

options = EdgeOptions()
options.use_chromium = True

driver = Edge(options=options)
driver.get("https://www.instagram.com/")
driver.maximize_window()
time.sleep(3)
actions = driver.find_element_by_tag_name('body')
actions.send_keys(Keys.TAB * 3, Keys.ENTER)
time.sleep(3)
inputElement = driver.find_element_by_xpath(
    "/html/body/div[1]/section/main/article/div[2]/div[1]/div/form/div/div[1]/div/label/input"
)
inputElement.send_keys("armaldoFransico", Keys.TAB, "criptografia", Keys.ENTER)
time.sleep(5)
actions = driver.find_element_by_tag_name('body')
actions.send_keys(Keys.TAB, Keys.ENTER)

time.sleep(4)
driver.execute_script("window.scrollTo(0, 300)")
time.sleep(4)
driver.execute_script("window.scrollTo(300, 600)")
time.sleep(4)
driver.execute_script("window.scrollTo(600, 900)")
time.sleep(4)
driver.execute_script("window.scrollTo(900, 1200)")
time.sleep(4)
Exemplo n.º 7
0
                        wait = WebDriverWait(driver, 5)
                        wait_tp = wait.until(
                            EC.presence_of_element_located(
                                (By.XPATH, target_xpath)))

                        # 进入第二个页面

                        # target = driver.find_element_by_xpath(target_xpath)
                        # driver.execute_script("arguments[0].scrollIntoView();", target)
                        print('第%i次尝试成功' % (i + 1))
                        break

                    except:
                        # 清空对话框
                        driver.find_element_by_xpath(
                            '//*[@id="app"]/div/div[3]/div[3]/div/input'
                        ).clear()
                if i == 10:
                    print("10次尝试都失败,请您手动登录!")

                sleep(0.8)

                # 选择地区(省、市、县区)
                driver.find_elements_by_xpath(target_xpath)[0].click()

                sleep(1)

                temp = ''
                i = 0
                while temp != province:
                    i += 1