Пример #1
0
def _crawl(detail_link: str, driver: WebDriver) -> Info:
    driver.get(detail_link)
    time.sleep(5)

    accordeon = driver.find_elements_by_class_name('accordeon')

    try:
        driver.find_element_by_class_name(
            'moove-gdpr-infobar-allow-all').click(
            )  # get rid of cookie banner blocking all clicks on this website
    except:
        pass

    info = _get_all_present_fields(accordeon, driver)

    return _convert(info)
Пример #2
0
def _get_detail_links(driver: WebDriver) -> List[str]:
    time.sleep(10)  # website is very slow
    detail_links = []

    ngo_list = driver.find_element_by_id('toplist')
    links = ngo_list.find_elements_by_tag_name('a')

    for link in links:
        link_href = link.get_attribute("href")
        detail_links.append(link_href)

    return detail_links
Пример #3
0
def _crawl_detail_page(driver: WebDriver) -> List[DetailPageInfo]:
    detail_pages_infos = []

    for idx in range(1, 315):
        driver.get(URL)

        rows: List[WebElement] = _get_table_rows(driver)
        for row in rows:
            cols = row.find_elements_by_tag_name('td')

            row_idx: int = extract_idx(cols[0])
            if row_idx == idx:
                link_element = cols[0].find_element_by_tag_name('a')
                link_element.click()

                detail_page_info = _extract_detail_info(driver, idx)
                detail_pages_infos.append(detail_page_info)
                break

    _clean_detail_pages(detail_pages_infos)
    return detail_pages_infos
Пример #4
0
def ie(path='./IEDriverServer.exe') -> WebDriver:
    """
    IE
    :param path:IE Driver路径
    :return: WebDriver
    """
    dr = IE(executable_path=path)
    dr.set_page_load_timeout(30)
    dr.implicitly_wait(10)
    dr.maximize_window()
    return dr
Пример #5
0
def _extract_soft_facts(driver: WebDriver) -> SoftFacts:
    elements = driver.find_elements_by_tag_name('div')
    soft_facts_elements = elements[36]
    soft_facts_list = soft_facts_elements.find_elements_by_tag_name('p')

    aims = soft_facts_list[0].text.strip()
    activities = soft_facts_list[1].text.strip()
    accreditations = soft_facts_list[3].text.strip()
    areas_of_competence = _split_into_areas(soft_facts_list[7].text.strip())
    geographical_representation = _split_into_countries(
        soft_facts_list[8].text.strip())

    return SoftFacts(aims, activities, accreditations, areas_of_competence,
                     geographical_representation)
Пример #6
0
def _extract_hard_facts(driver: WebDriver, idx: int) -> HardFacts:
    hard_fact_element = driver.find_elements_by_tag_name('div')[12]

    website = hard_fact_element.find_elements_by_tag_name(
        'span')[1].text.strip()

    president_element = hard_fact_element.find_elements_by_class_name('row')[4]
    president_name = president_element.find_element_by_tag_name(
        'h4').text.strip()

    founding_year, staff_number, members_number, languages = _extract_quick_facts(
        driver, idx)

    return HardFacts(website, president_name, founding_year, staff_number,
                     members_number, languages)
Пример #7
0
    def getWebDriver(self, browser):
        print(browser)

        if browser.upper() == "IE":
            driver = InternetExplorerDriver()
            driver.maximize_window()
            self.logger.appendContent("新建IE驱动")
            return driver
        elif browser.upper() == "CHROME":
            options = ChromeOptions()
            options.add_argument("test-type")
            driver = ChromeDriver(chrome_options=options)
            driver.maximize_window()
            self.logger.appendContent("新建chrome驱动")
            return driver
        elif browser.upper() == "FIREFOX":
            driver = FirefoxDriverDriver()
            driver.maximize_window()
            self.logger.appendContent("新建FireFox驱动")
            return driver
        else:
            return None
Пример #8
0
def batchAccept(browser: WebDriver):
    # 批量受理
    u = 'http://10.204.14.35/eoms4/sheetBpp/myWaitingDealSheetQueryty.action?baseSchema=WF4_EL_TTM_TTH_EQU&var_pagesize=100'
    browser.get(u)
    total_ele = 'form#form1 span.pagenumber'
    total_element = browser.find_element(By.CSS_SELECTOR, total_ele)
    total = re.search(r'共([0-9]+)条数据', total_element.text).group(1)
    if total < 1:
        return
    todo_list = browser.find_elements(By.CSS_SELECTOR, 'table#tab tr')[1:]
    data = []
    i = 0
    for x in range(0, int(total)):
        title = todo_list[2 * x + 1].text
        if title.find('[数据网]') != -1:
            checkbox = todo_list[2 * x].find_element(By.NAME, 'checkid')
            checkbox.click()

    # batchAccept_btn = browser.find_element(By.CSS_SELECTOR, 'li.page_active_button')
    browser.execute_script('batchAccept();')
Пример #9
0
def jumpToEOMS(browser: WebDriver, keyword='') -> list:
    eoms_url = 'http://uip.ln.cmcc/_layouts/Document/BridgeToSPControl.aspx?skipcode=emoss'
    browser.get(eoms_url)
    # 故障处理工单(设备):
    todo_list_url = 'http://10.204.14.35/eoms4/sheetBpp/myWaitingDealSheetQueryGlobalTemplate.action?baseSchema=WF4_EL_TTM_TTH_EQU&var_pagesize=100'
    browser.get(todo_list_url)
    total_ele = 'form#form1 span.pagenumber'
    total_element = browser.find_element(By.CSS_SELECTOR, total_ele)
    total = re.search(r'共([0-9]+)条数据', total_element.text).group(1)
    # oo = $('table#tab tr')
    # Array.prototype.shift.apply(oo)
    todo_list = browser.find_elements(By.CSS_SELECTOR, 'table#tab tr')[1:]

    def get_url(title_ele):
        onclick = title_ele.find_element_by_tag_name('a').get_attribute(
            'onclick')
        args = re.findall(r'\'(.*?)\'', onclick)
        # browser.execute_script('')
        url = 'http://10.204.14.35/eoms4/sheet/openWaittingSheet.action?baseSchema={}&taskid={}&baseId={}&entryId=&version=&processType={}'
        url = url.format(args[0], args[2], args[1], args[3])
        # url = 'http://10.204.14.31:8001/bpp/ultrabpp/view.action?baseSchema={}&baseId={}&taskid={}&processType={}'
        # url = url.format(args[0], args[1], args[2], args[3])
        return url

    data = []
    # 待处理工单统计
    i = 0
    for x in range(0, int(total)):
        title = todo_list[2 * x + 1]
        if title.text.find(keyword) != -1:
            data.append({})
            data[i]['title'] = title.text[21:]
            content = todo_list[2 * x].find_elements_by_tag_name('td')
            data[i]['end_time'] = datep(content[4].text)
            data[i]['find_time'] = datep(content[5].text)
            data[i]['status'] = content[6].text
            data[i]['url'] = get_url(title)
            i += 1
    # 上清除时间的工单统计
    data2 = []
    # 未上清除时间统计
    data3 = []
    j = 0
    for x in range(i):
        browser.get(data[x]['url'])
        data[x]['clear_time'] = browser.find_element_by_id(
            'INC_Alarm_ClearTime').get_attribute('value')
        title_match = re.search(r'([A-Z\-0-9]+) 上报 (.+)', data[x]['title'])
        try:
            data[x]['device'] = title_match.group(1)
            data[x]['event'] = title_match.group(2)
        except:
            pass
        if data[x]['clear_time'] != '':
            data2.append(data[x])
            j += 1
        else:
            data3.append(data[x])

    # TODO 详细推送
    msg_title = '{}故障工单 {} 个,已上清除 {} 个。'
    msg_title = msg_title.format(keyword, len(data), len(data2))
    find_time2, find_time3 = [], []
    if data2 != []:
        for x in data2:
            find_time2.append(datef(x['find_time']))
    if data3 != []:
        for x in data3:
            find_time3.append(datef(x['find_time']))
    msg_text = '### {}\n\n**已上清除建单时间分别为**:\n\n{}\n\n**未上清除建单时间分别为**:\n\n{}\n\n> 推送时间:{}'
    t2, t3 = '\n\n'.join(find_time2), '\n\n'.join(find_time3)
    msg_text = msg_text.format(msg_title, t2, t3, datef())
    send_msg(msg_markdown(msg_title, msg_text, True))

    browser.get(eoms_url)
    return data, data2, data3
Пример #10
0
 def __init__(self, *args, **kwargs):
     IeWebDriver.__init__(self, *args, **kwargs)
     WaitUntil.__init__(self)
Пример #11
0
 def cleanUpCookiesAndLaunchIE(self):
     caps = DesiredCapabilities.INTERNETEXPLORER
     ie = WebDriver()
     ie.delete_all_cookies()
     ie.get("http://www.yahoo.com")
     return ie
Пример #12
0
 def cleanUpCookiesAndLaunchIE(self):
     caps = DesiredCapabilities.INTERNETEXPLORER
     ie = WebDriver()
     ie.delete_all_cookies()
     ie.get("http://www.yahoo.com")
     return ie
Пример #13
0
 def __init__(self):
     """ Create IE Driver Wrapper"""
     WebDriver.__init__(self)
     DriverWrapper.__init__(self)
Пример #14
0
def _get_table_rows(driver: WebDriver) -> List[WebElement]:
    time.sleep(1)  # if we don't wait here, the table might not be loaded yet
    table = driver.find_element_by_tag_name('table')
    body = table.find_element_by_tag_name('tbody')
    rows = body.find_elements_by_tag_name('tr')
    return rows
Пример #15
0
def _extract_quick_facts(
        driver: WebDriver, idx: int
) -> Tuple[Optional[int], Optional[int], Optional[int], List[str]]:
    founding_year = None
    staff_number = None
    members_number = None
    languages: List[str] = []

    possible_quick_facts_elements = driver.find_elements_by_class_name(
        'ng-scope')
    likely_quick_facts_elements = [
        p for p in possible_quick_facts_elements
        if p.find_elements_by_tag_name('strong')
    ]

    founded = [
        l for l in likely_quick_facts_elements if l.text.startswith('Founded')
    ]
    staff = [
        l for l in likely_quick_facts_elements
        if l.text.strip().endswith('staff')
    ]
    members = [
        l for l in likely_quick_facts_elements
        if l.text.strip().endswith('members')
    ]
    working_languages = [
        l for l in likely_quick_facts_elements
        if l.text.startswith('Working languages')
    ]

    if founded:
        founding_year = int(
            founded[0].find_element_by_tag_name('strong').text.strip())

    if staff:
        staff_string = staff[0].find_element_by_tag_name('strong').text.strip()
        staff_string = staff_string.replace(' ', '')
        try:
            staff_number = int(staff_string)
        except:
            if staff_string == '6fulltime+2interns':
                staff_number = 6
            if staff_string == '2.8':
                staff_number = 3
            if staff_string == '3employeesand7volunteers':
                staff_number = 3
            if staff_string == '5employees':
                staff_number = 5
            print(f'SKIPPING STAFF ({idx}) - {staff_string}')

    if members:
        members_string = members[0].find_element_by_tag_name(
            'strong').text.strip()
        members_string = members_string.replace(' ', '')
        members_string = members_string.replace('+', '')
        try:
            members_number = int(members_string)
        except:
            if members_string == '384organisations':
                members_number = 384
            if members_string == '50delegations':
                members_number = 50
            if members_string == '120,000':
                members_number = 120000
            if members_string == 'Around35':
                members_number = 35
            if members_string == 'about5000':
                members_number = 5000
            if members_string == '46nationaluniversitysportsgoverningbodies':
                members_number = 46
            if members_string == 'around1000membersin34branches':
                members_number = 1000
            if members_string == '44associations':
                members_number = 44
            if members_string == 'approx.700':
                members_number = 700
            if members_string == '5,000,000':
                members_number = 5000000
            if members_string == '250associations':
                members_number = 250
            if members_string == '28memberorganisations':
                members_number = 28

    if working_languages:
        languages = working_languages[0].find_element_by_tag_name(
            'strong').text.strip()

    return founding_year, staff_number, members_number, languages