Exemplo n.º 1
0
def auto_data_exploration(driver):
    wait = WebDriverWait(driver, 30)
    # 1. 进入数据探索界面
    entry_li = driver.find_element_by_xpath(
        '/html/body/div[1]/div/div[2]/ul/li[2]/a')
    entry_li.click()
    wait_for(1)
    # 2. 进行数据探索
    wait.until(EC.element_to_be_clickable(
        (By.XPATH, '//*[@id="exploration"]')))
    data_exploration_bt = driver.find_element_by_xpath(
        '//*[@id="exploration"]')
    data_exploration_bt.click()
    wait_for(1)
    wait.until(
        EC.visibility_of_element_located(
            (By.XPATH, '//*[@id="pagecontent"]/ul/li/a')))
    df_heart_tab = driver.find_element_by_xpath(
        '//*[@id="pagecontent"]/ul/li/a')
    touch_actions = TouchActions(driver)
    touch_actions.scroll(0, 800).perform()
    wait_for(1)
    touch_actions = TouchActions(driver)
    touch_actions.scroll(0, -400).perform()
    # 3. 展示每个特征的详细分布比例
    """
    def scroll(self, x_offset, y_offset):
        """ Scrolls from current position.

         ``x_offset`` is the X offset to scroll to.

         ``y_offset`` is the Y offset to scroll to."""
        action = TouchActions(self._current_application())
        action.scroll(x_offset, y_offset).perform()
 def scrolling_actions(self):
     """
     An implementation on the scroll action using WinAppDriver, however currently winappdriver doesn't seem to support mouse scrolling.
     Not used because of the afor mentioned reasons.
     :return:
     """
     tactions = TouchActions(self.SABL.ret_driver())
     tactions.scroll(10, 10)
     tactions.perform()
Exemplo n.º 4
0
class URLImageCrawler:
    def __init__(self):
        self.is_close_driver = False
        self.btn_show_more = None
        self.div_selector = CSSSelector(
            "#search-unified-content .photo-list-photo-view")
        self.init_driver()
        self.ta = TA(self.driver)

    def init_driver(self):
        option = webdriver.ChromeOptions()
        option.add_argument("start-maximized")
        self.driver = webdriver.Chrome(chrome_options=option)

    def close_driver(self):
        if not self.is_close_driver:
            # self.driver.close()
            os.system("taskkill /f /im chromedriver.exe")
            print("[ * ] Close driver done")
            self.is_close_driver = True

    def is_display_btn_show_more(self):
        try:
            self.btn_show_more = self.driver.find_element(
                By.CSS_SELECTOR, ".infinite-scroll-load-more button")
            # print("[ * ] Btn show more is displayed")
            return True
        except:
            return False

    def write_urls(self, keyword, urls, path='./Data'):
        out_path = os.path.abspath(os.path.join(path, keyword + ".txt"))
        with open(out_path, 'w') as f:
            f.write('\n'.join(urls))
        print("[ + ] Finish write {} urls to {}".format(len(urls), out_path))

    def crawl_background_links(self, keyword_search, num_scroll=5):
        start_time = time.clock()
        driver = self.driver
        url = "https://www.flickr.com/search/?text={}".format(keyword_search)
        driver.get(url)

        base_url = "https:"
        urls = []

        try:
            WebDriverWait(driver, 2).until(
                EC.presence_of_element_located(
                    (By.CSS_SELECTOR,
                     "#search-unified-content .photo-list-photo-view")))
        except Exception as e:
            print("[ @ ] Exception: ", e)
            return

        try:
            a_elm = WebDriverWait(driver, 2).until(
                EC.presence_of_element_located(
                    (By.CSS_SELECTOR,
                     "#search-unified-content a[class~='view-more-link']")))
            text_str = a_elm.get_attribute("innerHTML")[len("View all "):]
            total_images_str = ''.join(text_str.split(','))
            total_images = int(total_images_str)
            num_scroll = int(math.floor(total_images / 300))
            print("[ * ] key word {}: {} total images - {} scroll".format(
                keyword, total_images, num_scroll))
        except Exception as e:
            print("[ - ] Exception: ", e)

        # scroll
        for _ in range(num_scroll):
            print("[ * ] {}: scroll {}".format(keyword, _ + 1))
            self.ta.scroll(0, 2000).perform()
            if self.is_display_btn_show_more():
                self.btn_show_more.click()

        root = html.fromstring(driver.page_source)
        div_elms = self.div_selector(root)
        # div_elms = driver.find_elements(By.CSS_SELECTOR, "#search-unified-content .photo-list-photo-view")
        # print("div_elms len = ", len(div_elms))
        for div_elm in div_elms:
            # value = div_elm.value_of_css_property('background-image')
            style = div_elm.attrib['style']
            start_index = style.find("url(")
            if start_index >= 0:
                end_index = style.find('")', start_index)
                url = style[start_index + 5:end_index]
                urls.append(base_url + url)

        end_time = time.clock()
        print("[ * ] {}: Collected {} urls => {} seconds".format(
            keyword_search, len(urls), end_time - start_time))

        self.write_urls(keyword_search, urls)
touch_actions = TouchActions(driver)

#Change these two for what to scrape and name of final file
filename = 'carmax_sedans'
car_type = 'sedans'

driver.get(url + car_type)
car_tile_xpath = "//a[@class='kmx-typography--font-alt kmx-typography--weight-400 kmx-elevation-02']"
see_more_button = "//button[@class='see-more see-more__cta-all']"

dropdown_xpath = "//div[@class='mdc-select__surface mdc-ripple-upgraded']"
wait.until(EC.presence_of_all_elements_located((By.XPATH, dropdown_xpath)))
dropdown_menu = driver.find_element_by_xpath(dropdown_xpath)
dropdown_menu.location_once_scrolled_into_view
touch_actions.scroll(0, -100)
touch_actions.perform()
dropdown_menu.click()
wait.until(
    EC.presence_of_all_elements_located(
        (By.XPATH, "//li[text()='Nationwide']")))
driver.find_element_by_xpath("//li[text()='Nationwide']").click()

# Finding all relevent makes
wait.until(EC.presence_of_element_located((By.XPATH, "//div[@id='Make']")))
driver.find_element_by_xpath("//div[@id='Make']").click()
wait.until(
    EC.presence_of_all_elements_located(
        (By.XPATH, "//span[@class='refinements--value--name']")))
time.sleep(1)
car_makes = driver.find_elements_by_xpath(
Exemplo n.º 6
0
 def scroll_up(self):
     Action = TouchActions(self.driver)
     Action.scroll(0, 500).perform()
     time.sleep(1)
Exemplo n.º 7
0
 def scroll_to(cls, web_element):
     ta = TouchActions(cls.driver)
     coords = web_element.location_once_scrolled_into_view
     ta.scroll(coords.get("x"), coords.get("y"))
     ta.perform()
Exemplo n.º 8
0
class URLImageCrawler:
    def __init__(self):
        self.keywords = []
        self.wait_time_per_img = 0.5  # seconds wait 1 image loaded
        self.max_num_continuous_scroll_per_img = 1  # if num scroll order to load 1 image exceed max_num then stop crawl

        self.init_driver()
        self.btn_show_more = None
        self.father_div = None
        self.country_div = None
        self.div_url_selector = CSSSelector("#rg .rg_di .rg_meta")
        print("[ * ] Init URLImageCrawler object done")

    def init_driver(self, url='https://images.google.com/'):
        try:
            # disable load image in browser
            option = webdriver.ChromeOptions()
            # prefs = {}
            # prefs["profile.managed_default_content_settings.image"] = 2
            # option.add_experimental_option("prefs", prefs)
            option.add_argument("start-maximized")
            # option.add_argument("user-data-dir=C:\\Users\\ad\\AppData\\Local\Google\\Chrome\\User Data\\Profile 1")

            self.driver = webdriver.Chrome(chrome_options=option)
            driver = self.driver
            self.ta = TA(self.driver)

            driver.get(url)
            print("[ * ] Current driver url : ", url)

            self.txt_search = driver.find_element_by_id("lst-ib")
            # print("[ ? ] Input search elment : ", self.txt_search)
            self.btn_search = driver.find_elements_by_css_selector(
                "#searchform button[name='btnG'][type='submit']")[0]
            # print("[ ? ] Button submit search : ", self.btn_search)
            # self.country_div = driver.find_element_by_id("fbarcnt")
            # print("[ ? ] Country div : ", self.country_div)

        except Exception as e:
            print("[@@@] Exception : ", e)

    def init_again(self):
        # self.btn_show_more = self.driver.find_element(By.ID, "smb")
        self.txt_search = self.driver.find_element_by_id("lst-ib")
        self.txt_search.clear()

    def close_driver(self):
        # self.driver.close()
        os.system("taskkill /f /im chromedriver.exe")
        print("[ * ] Close driver done")

    def read_file(self, intput_path='./Data/Search_List.txt'):
        '''
        :param path: link to file contain search list
        :return: list contain keywords
        '''

        with open(intput_path, 'r', encoding='utf-8') as f:
            keywords = f.read().strip().split('\n')

        self.keywords.extend(keywords)
        print("[ + ] Read file {} done => Collected {} new keywords".format(
            intput_path, len(keywords)))
        print("New keywords : ", keywords)
        print('[ * ] Current keywords : ', self.keywords)
        return keywords

    def write_file(self, keyword, urls, output_path='./Data/'):
        dir_path = os.path.join(os.path.abspath(output_path), keyword)
        if not os.path.exists(dir_path) or os.path.isfile(dir_path):
            os.mkdir(dir_path)
        path = os.path.join(dir_path, keyword + '-{}.txt'.format(time.time()))

        print("[ * ] Write file {} started".format(path))
        with open(path, 'w') as f:
            f.write('\n'.join(urls))
        print("[ + ] Write file {} : {} urls".format(path, len(urls)))

    def crawl_url_image(self,
                        crawl_more=True,
                        search_path='./Data/Search_List.txt',
                        output_path='./Data'):
        try:
            driver = self.driver
            keywords = self.read_file(search_path)
            keywords = [keywords[0]]

            for keyword in keywords:
                # Crawl image urls of keyword
                start_time = time.clock()

                self.txt_search.send_keys(keyword)
                self.btn_search.click()
                if self.btn_show_more is None:
                    self.btn_show_more = driver.find_element(By.ID, "smb")
                if self.father_div is None:
                    self.father_div = driver.find_element(By.ID, "rg")

                urls = []
                num_continuous_scroll = 0

                self.ta.scroll(0, 14000).perform()
                while not self.btn_show_more.is_displayed():
                    self.ta.scroll(0, 2000).perform()
                if crawl_more:
                    self.btn_show_more.click()
                    self.ta.scroll(0, 3000).perform()

                    root = html.fromstring(driver.page_source)
                    div_url_images = self.div_url_selector(root)
                    print("[ * ] Current loaded {} urls".format(
                        len(div_url_images)))
                    wait_img_id = len(div_url_images)

                    while num_continuous_scroll <= self.max_num_continuous_scroll_per_img:
                        # Wait load images
                        try:
                            print("Wait id ", wait_img_id)
                            div_url_image = WebDriverWait(
                                driver, self.wait_time_per_img
                            ).until(
                                EC.presence_of_element_located((
                                    By.CSS_SELECTOR,
                                    "div#rg [data-ri='{}'][class~='rg_di'] [class~='rg_meta']"
                                    .format(wait_img_id))))
                            # load image successful
                            # div_url_images = self.father_div.find_elements(By.CSS_SELECTOR, "[class~='rg_di'] [class~='rg_meta']")

                            # use lxml library order to parser
                            # root = html.fromstring(driver.page_source)
                            # div_url_images = self.div_url_selector(root)

                            self.ta.scroll(0, 4000).perform()
                            num_continuous_scroll = 0
                            wait_img_id += 70
                            # wait_img_id = len(div_url_images)
                            # print("Loaded number URL = ", wait_img_id - 1)

                        except:
                            # if num_continuous_scroll == 0:
                            #     wait_img_id -= 30
                            num_continuous_scroll += 1
                            print("[@@@] Khong load dc image")
                            # if self.btn_show_more.is_displayed():
                            #     self.btn_show_more.click()
                        # finally:
                        # if num_continuous_scroll == self.max_num_continuous_scroll_per_img:
                        #     self.ta.scroll(0, 2000).perform()

                print("[ * ] Crawl url of {} done".format(keyword))
                # urls = [json.loads(div.get_attribute("innerText")).get('ou', []) for div in div_url_images]

                root = html.fromstring(driver.page_source)
                div_url_images = self.div_url_selector(root)
                urls = [
                    json.loads(div.text).get('ou', [])
                    for div in div_url_images
                ]
                self.write_file(keyword, urls, output_path)

                end_time = time.clock()
                print("[ * ] Crawl Time for {} is : {} seconds".format(
                    keyword, (end_time - start_time)))

        finally:
            self.close_driver()

    def crawl_url_image1(self,
                         crawl_more=True,
                         search_path='./Data/Search_List.txt',
                         output_path='./Data'):
        try:
            driver = self.driver
            keywords = self.read_file(search_path)
            # keywords = [keywords[0]]

            for keyword in keywords:
                # Crawl image urls of keyword
                start_time = time.clock()
                self.init_again()
                self.txt_search.send_keys(keyword)
                self.txt_search.send_keys(Keys.ENTER)
                # self.btn_search.click()

                # if self.btn_show_more is None:
                self.btn_show_more = driver.find_element(By.ID, "smb")
                if self.father_div is None:
                    self.father_div = driver.find_element(By.ID, "rg")
                if self.country_div is None:
                    self.country_div = driver.find_element_by_id("fbarcnt")

                urls = []
                num_continuous_scroll = 0

                self.ta.scroll(0, 14000).perform()
                while not self.btn_show_more.is_displayed():
                    self.ta.scroll(0, 2000).perform()
                if crawl_more:
                    self.btn_show_more.click()
                    while not self.country_div.is_displayed():
                        self.ta.scroll(0, 2000).perform()
                    WebDriverWait(driver, 1)

                    # root = html.fromstring(driver.page_source)
                    # div_url_images = self.div_url_selector(root)
                    # print("[ * ] Current loaded {} urls".format(len(div_url_images)))
                    # wait_img_id = len(div_url_images)

                print("[ * ] Crawl url of {} done".format(keyword))
                # urls = [json.loads(div.get_attribute("innerText")).get('ou', []) for div in div_url_images]

                root = html.fromstring(driver.page_source)
                div_url_images = self.div_url_selector(root)
                urls = [
                    json.loads(div.text).get('ou', [])
                    for div in div_url_images
                ]
                self.write_file(keyword, urls, output_path)

                end_time = time.clock()
                print("[ * ] Crawl Time for {} is : {} seconds".format(
                    keyword, (end_time - start_time)))

        finally:
            self.close_driver()
Exemplo n.º 9
0
def similarity_process(driver):
    wait = WebDriverWait(driver, 30)
    wait_for(2)
    # 1. 数据获取
    wait.until(
        EC.element_to_be_clickable(
            (By.XPATH, '/html/body/div[1]/div/div[2]/ul/li[1]/a')))
    data_register = driver.find_element_by_xpath(
        '/html/body/div[1]/div/div[2]/ul/li[1]/a')
    data_register.click()
    wait_for(2)
    wait.until(
        EC.visibility_of_element_located((By.XPATH, '//*[@id="file_name"]')))
    wait.until(
        EC.visibility_of_element_located((By.XPATH, '//*[@id="label_name"]')))
    file_name = driver.find_element_by_xpath('//*[@id="file_name"]')
    label_name = driver.find_element_by_xpath('//*[@id="label_name"]')
    file_name.clear()
    wait_for(1)
    file_name.send_keys('df_heart_output_DATA.csv')
    wait_for(1)
    label_name.clear()
    wait_for(1)
    label_name.send_keys('label')
    wait_for(1)
    submit_bt = driver.find_element_by_xpath('//*[@id="submit"]')
    submit_bt.click()
    wait_for(2)
    # 2. 特征选择
    st(context=3)
    feature_selection = driver.find_element_by_xpath(
        '/html/body/div[1]/div/div[2]/ul/li[2]/a')
    feature_selection.click()
    wait_for(2)
    max_select_feature = driver.find_element_by_xpath(
        '//*[@id="selection_parameter_value"]')
    max_select_feature.clear()
    wait_for(1)
    max_select_feature.send_keys('25')
    wait_for(1)
    submit_bt = driver.find_element_by_xpath('//*[@id="submit"]')
    submit_bt.click()
    wait_for(1)
    touch_actions = TouchActions(driver)
    touch_actions.scroll(0, 2000).perform()
    wait_for(2)
    touch_actions = TouchActions(driver)
    touch_actions.scroll(0, -1000).perform()
    wait_for(2)
    touch_actions = TouchActions(driver)
    touch_actions.scroll(0, -500).perform()
    wait_for(2)
    # 3. 患者聚类
    st(context=3)
    patient_cluster = driver.find_element_by_xpath(
        '/html/body/div[1]/div/div[2]/ul/li[3]/a')
    patient_cluster.click()
    wait_for(2)
    touch_actions = TouchActions(driver)
    touch_actions.scroll(0, 400).perform()
    wait_for(2)
    touch_actions = TouchActions(driver)
    touch_actions.scroll(0, -200).perform()
    wait_for(2)
    feature_reduce_dimension = driver.find_element_by_xpath(
        '//*[@id="selection_parameter_value"]')
    feature_reduce_dimension.clear()
    wait_for(1)
    feature_reduce_dimension.send_keys('5')
    cluster_number = driver.find_element_by_xpath('//*[@id="n_clusters"]')
    cluster_number.clear()
    wait_for(1)
    cluster_number.send_keys('2')
    wait_for(1)
    submit_bt = driver.find_element_by_xpath('//*[@id="submit"]')
    submit_bt.click()
    wait_for(2)
    touch_actions = TouchActions(driver)
    touch_actions.scroll(0, 2000).perform()
    wait_for(2)
    touch_actions = TouchActions(driver)
    touch_actions.scroll(0, -1000).perform()
    wait_for(2)
    touch_actions = TouchActions(driver)
    touch_actions.scroll(0, -500).perform()
    wait_for(2)
    # 4. 规则挖掘
    st(context=3)
    rule_mining = driver.find_element_by_xpath(
        '/html/body/div[1]/div/div[2]/ul/li[4]/a')
    rule_mining.click()
    wait_for(2)
    label = driver.find_element_by_xpath('//*[@id="labels"]')
    label.clear()
    wait_for(1)
    label.send_keys('labels')
    wait_for(2)
    submit_bt = driver.find_element_by_xpath('//*[@id="submit"]')
    submit_bt.click()
    wait_for(2)
    touch_actions = TouchActions(driver)
    touch_actions.scroll(0, 2000).perform()
    wait_for(2)
Exemplo n.º 10
0
def auto_concat_dataframe(driver):
    wait = WebDriverWait(driver, 30)
    # 1. 回到'特征工程'界面
    entry_li = driver.find_element_by_xpath(
        '/html/body/div[1]/div/div[2]/ul/li[4]/a')
    entry_li.click()
    wait_for(1)
    touch_actions = TouchActions(driver)
    touch_actions.scroll(0, 800).perform()
    wait_for(2)
    # 2. 合并的dataframe 生成dataframe
    left_dataframe = driver.find_element_by_xpath(
        '//*[@id="concat_dataframe_action"]/div/div[2]/div[1]/div/div[2]/select[1]/option[2]'
    )
    left_dataframe.click()
    wait_for(1)
    left_dataframe = driver.find_element_by_xpath(
        '//*[@id="concat_dataframe_action"]/div/div[2]/div[1]/div/div[2]/select[1]/option[1]'
    )
    left_dataframe.click()
    wait_for(1)
    right_dataframe = driver.find_element_by_xpath(
        '//*[@id="concat_dataframe_action"]/div/div[2]/div[1]/div/div[2]/select[2]/option[2]'
    )
    right_dataframe.click()
    wait_for(1)
    right_dataframe = driver.find_element_by_xpath(
        '//*[@id="concat_dataframe_action"]/div/div[2]/div[1]/div/div[2]/select[2]/option[2]'
    )
    axis = driver.find_element_by_xpath(
        '//*[@id="concat_dataframe_action"]/div/div[2]/div[1]/div/div[3]/select[1]/option[2]'
    )
    axis.click()
    wait_for(1)
    right_dataframe = driver.find_element_by_xpath(
        '//*[@id="concat_dataframe_action"]/div/div[2]/div[1]/div/div[2]/select[2]/option[2]'
    )
    new_dataframe_name = driver.find_element_by_xpath(
        '//*[@id="concat_dataframe_action"]/div/div[2]/div[1]/div/div[3]/input[2]'
    )
    new_dataframe_name.clear()
    wait_for(1)
    new_dataframe_name.send_keys('df_heart_output')
    wait_for(1)
    concat_submit = driver.find_element_by_xpath('//*[@id="concat_dataframe"]')
    concat_submit.click()
    wait_for(2)
    # 3. 回到'数据预览'界面查看新生成的dataframe
    entry_li = driver.find_element_by_xpath(
        '/html/body/div[1]/div/div[2]/ul/li[1]/a')
    entry_li.click()
    wait_for(1)
    wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="df_sumbit"]')))
    df_submit = driver.find_element_by_xpath('//*[@id="df_sumbit"]')
    df_submit.click()
    wait_for(1)
    wait.until(
        EC.element_to_be_clickable(
            (By.XPATH, '//*[@id="accordion"]/div[5]/div[1]/a')))
    df_heart_output = driver.find_element_by_xpath(
        '//*[@id="accordion"]/div[5]/div[1]/a')
    df_heart_output.click()
    touch_actions = TouchActions(driver)
    touch_actions.scroll(0, 600).perform()
    wait_for(5)
    # 4. 生成csv文件
    create_csv = driver.find_element_by_xpath(
        '//*[@id="storecsvdf_heart_output_DATA"]')
    create_csv.click()
    wait_for(2)
Exemplo n.º 11
0
def auto_factor_feature_binning(driver):
    wait = WebDriverWait(driver, 30)
    entry_li = driver.find_element_by_xpath(
        '/html/body/div[1]/div/div[2]/ul/li[4]/a')
    entry_li.click()
    wait_for(1)
    # 1. 执行factor类型的特征打散
    wait.until(
        EC.element_to_be_clickable(
            (By.XPATH, '//*[@id="meta_factor"]/div/div/div[1]/h4/a')))
    df_heart = driver.find_element_by_xpath(
        '//*[@id="meta_factor"]/div/div/div[1]/h4/a')
    df_heart.click()
    wait_for(1)
    wait.until(
        EC.element_to_be_clickable(
            (By.XPATH, '//*[@id="collapseprodf_heart"]/div/input[2]')))
    new_dataframe_name = driver.find_element_by_xpath(
        '//*[@id="collapseprodf_heart"]/div/input[2]')
    new_dataframe_name.clear()
    wait_for(1)
    new_dataframe_name.send_keys('df_heart_derived_factor')
    wait_for(1)
    feature_indexs = [1, 2, 3, 4, 5, 6, 7, 8]
    for index in feature_indexs:
        xpath = '//*[@id="collapseprodf_heart"]/div/table/tbody/tr[' + str(
            index) + ']/td[1]/input'
        check_box = driver.find_element_by_xpath(xpath)
        check_box.click()
        wait_for(0.5)
    factor_binning_bt = driver.find_element_by_xpath(
        '//*[@id="featureengineerdf_heart"]')
    factor_binning_bt.click()
    wait_for(1)
    # 2. 查看打散后的生成的dataframe
    st(context=3)
    wait.until(EC.visibility_of_element_located((By.LINK_TEXT, '数据预览')))
    data_presee = driver.find_element_by_link_text('数据预览')
    data_presee.click()
    wait_for(1)
    store_path = driver.find_element_by_xpath('//*[@id="store_path"]')
    store_path.clear()
    wait_for(1)
    store_path.send_keys('data_curation.h5')
    wait_for(1)
    df_submit = driver.find_element_by_xpath('//*[@id="df_sumbit"]')
    df_submit.click()
    wait_for(2)
    df_heart_derived_factor_show = driver.find_element_by_xpath(
        '//*[@id="accordion"]/div[3]/div[1]/a')
    df_heart_derived_factor_show.click()
    wait_for(1)
    touch_actions = TouchActions(driver)
    touch_actions.scroll(0, 800).perform()
    wait_for(2)
    df_heart_derived_factor_show.click()
    wait_for(2)
    # 3. 回到feature engineering页面 删除原有的factor feature
    st(context=3)
    entry_li = driver.find_element_by_xpath(
        '/html/body/div[1]/div/div[2]/ul/li[3]/a')
    entry_li.click()
    wait_for(2)
    wait.until(
        EC.element_to_be_clickable(
            (By.XPATH, '//*[@id="accordion"]/form/div/div[1]/h4/a')))
    df_heart = driver.find_element_by_xpath(
        '//*[@id="accordion"]/form/div/div[1]/h4/a')
    df_heart.click()
    wait_for(2)
    feature_indexs = [2, 3, 6, 7, 9, 11, 12, 13]
    for index in feature_indexs:
        xpath = '//*[@id="collapsedf_heart"]/div/table/tbody/tr[' + str(
            index) + ']/td[1]/input'
        check_box = driver.find_element_by_xpath(xpath)
        check_box.click()
        wait_for(0.5)
    filter_bt = driver.find_element_by_xpath(
        '//*[@id="collapsedf_heart"]/div/input')
    filter_bt.click()
    wait_for(2)
    # 4. 重新执行data exploration
    st(context=3)
    entry_li = driver.find_element_by_xpath(
        '/html/body/div[1]/div/div[2]/ul/li[2]/a')
    entry_li.click()
    wait_for(1)
    wait.until(EC.element_to_be_clickable(
        (By.XPATH, '//*[@id="exploration"]')))
    data_exploration_bt = driver.find_element_by_xpath(
        '//*[@id="exploration"]')
    data_exploration_bt.click()
    wait_for(1)
    wait.until(
        EC.element_to_be_clickable(
            (By.XPATH, '//*[@id="pagecontent"]/ul/li[1]/a')))
    touch_actions = TouchActions(driver)
    touch_actions.scroll(0, 800).perform()
    wait_for(2)
Exemplo n.º 12
0
                    for m in scroll:
                        measure_soup = BeautifulSoup(str(m), 'lxml')
                        #print(measure_soup.prettify())
                        fill_in_copy = measure_soup.find(
                            'div', "v-list-item__title").text
                        ex_copy = measure_soup.find(
                            'div', "v-list-item__subtitle").text
                        print(fill_in_copy)
                        print(ex_copy)
                        fill_data.append(fill_in_copy)
                        ex_data.append(ex_copy)
                old_number = number_tag['style'][-len(number_tag['style']):-9]
                i = 0

            taction = TouchActions(driver)
            taction.scroll(0, 200)
            taction.perform()
            print(i)
            i += 1
            # end_card = timer.until(EC.presence_of_element_located((By.XPATH, f'//*[@id="app"]/div[1]/header/div/button[1]')))
            # actions = ActionChains(driver)
            # actions.move_to_element(end_card)
            # actions.click()
            # actions.perform()
        data = list(zip(fill_data, ex_data))
        print(data)
        with open(f'{topic}.csv', 'w', newline='') as csvfile:
            writer = csv.writer(csvfile, delimiter=',')
            print('hi')
            writer.writerow(['Fill_in Copy', 'Example Copy'])
            print('hello')
Exemplo n.º 13
0
    def build_driver(self, url):
        """
        构建阅读driver
        :return:
        """
        if not url:
            self.stopReadFlag = True
            self.print_log("已全部阅读完成\n")
            return

        self.readNum = self.readNum + 1
        self.print_log("开始阅读第 %d 篇,剩余 %d 篇" % (self.readNum, self.unReadNum))
        self.print_log("当前:%s" % url)
        self.c_service = Service(self.chromeDriverPath)
        self.c_service.command_line_args()
        self.c_service.start()

        mobileEmulation = {
            "deviceMetrics": {
                "width": 320,
                "height": 640,
                "pixelRatio": 3.0
            },
            "userAgent":
            'Mozilla/5.0 (Linux; Android 4.1.1; GT-N7100 Build/JRO03C) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/35.0.1916.138 Mobile Safari/537.36 T7/6.3'
        }
        # mobileEmulation = {'deviceName': 'Apple iPhone 5'}
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument('--window-size=250,640')
        chrome_options.add_argument('--disable-gpu')
        chrome_options.add_argument('--hide-scrollbars')
        chrome_options.add_argument('--disable-javascript')
        chrome_options.add_argument('--log-level=3')
        chrome_options.binary_location = self.chromeLocationEdit.text()
        chrome_options.add_experimental_option('mobileEmulation',
                                               mobileEmulation)
        chrome_options.add_experimental_option("excludeSwitches",
                                               ['enable-automation'])
        chrome_options.add_experimental_option('w3c', False)
        self.driver = webdriver.Chrome(options=chrome_options)

        # 操作这个对象.
        self.driver.get(url)
        num = random.randint(int(self.slipTimesFromEdit.text()),
                             int(self.slipTimesToEdit.text()))
        # 下滑次数
        hasNum = 0
        for n in range(num):
            if not self.stopReadFlag:
                holdTime = random.randint(int(self.pauseTimeFromEdit.text()),
                                          int(self.pauseTimeToEdit.text()))
                px = random.randint(int(self.pxFromEdit.text()),
                                    int(self.pxToEdit.text()))
                self.print_log("第 %d 次下滑,等待 %d 秒, 下滑 %d 像素" %
                               (n + 1, holdTime, px))
                # 每次下滑停顿时间
                sleep(holdTime)
                action = TouchActions(self.driver)
                action.scroll(0, 200).perform()
                hasNum = hasNum + 1
            else:
                break

        try:
            if self.driver is not None:
                self.driver.quit()
                self.driver = None
        except Exception as msg:
            pass

        try:
            if self.c_service is not None:
                self.c_service.stop()
                self.c_service = None
        except Exception as msg:
            pass

        if self.stopReadFlag:
            self.print_log("第 %d 篇阅未完成,共下滑 %d 次\n" % (self.readNum, hasNum))
            return
        else:
            self.print_log("第 %d 篇阅读完成,共下滑 %d 次\n" % (self.readNum, hasNum))

        try:
            # 删除第一行
            with open(os.path.abspath('unread.txt'), 'r',
                      encoding='utf-8') as f:
                content = f.readlines()
                with open(os.path.abspath('unread.txt'),
                          'w+',
                          encoding='utf-8') as f1:
                    f1.writelines(content[1:])
                    f1.flush()
                    f1.close()
                f.close()

            # 追加到最后一行
            with open(os.path.abspath('read.txt'), 'a', encoding='utf-8') as f:
                f.write(url)
                f.close()
        except Exception as e:
            print(e)
        pass
Exemplo n.º 14
0
'''
Created on May 18, 2018

@author: aranjan
'''
from selenium import webdriver
from selenium.webdriver.common.touch_actions import TouchActions
driver = webdriver.Chrome()
element = driver.find_element_by_class_name("name")
element1 = driver.find_element_by_class_name("namem")
touchaction = TouchActions(driver)
#Double taps on a given element.
touchaction.double_tap(element)
#Flicks, starting anywhere on the screen.
touchaction.flick(100, 100)
#Long press on an element.
touchaction.long_press(element1)
#Touch and scroll, moving by xoffset and yoffset.
touchaction.scroll(100, 200)