Python WebHelper.get_page_content_from_url示例

编程语言: Python

命名空间/包名称: web_helper

类/类型: WebHelper

方法/功能: get_page_content_from_url

hotexamples.com的示例: 5

Python WebHelper.get_page_content_from_url - 已找到5个示例。这些是从开源项目中提取的最受好评的web_helper.WebHelper.get_page_content_from_url现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

get_auth_web_source(3)

get_page_content_from_url(3)

WebHelper(1)

get_auth_url_content(1)

get_final_url_content(1)

get_topic_extras(1)

get_web_source(1)

join_url(1)

示例#1

显示文件

文件： entity_sentence_search.py 项目： zihao-fan/EntitySearch

 def get_plain_text(self, url):
     text = ''
     try:
         page_content = WebHelper.get_page_content_from_url(url)
         if page_content is None:
             print('[Error]', url)
             return ''
         page_content = page_content.decode('utf-8')
         soup = BeautifulSoup(page_content, 'lxml')
         # kill all script and style elements
         for script in soup(["script", "style"]):
             script.extract()
         text = soup.get_text()
         # break into lines and remove leading and trailing space on each
         lines = (' '.join(line.strip().split())
                  for line in text.splitlines())
         text = '\n'.join(lines)
         text = os.linesep.join([s for s in text.splitlines() if s])
         time.sleep(random.randint(1, 3))
     except Exception as e:
         if isinstance(e, KeyboardInterrupt):
             exit()
         else:
             print(e)
     return text

示例#2

显示文件

 def get_search_page_by_name(cls, name):
     """
     get html content of the search page as a bing_result of the given name
     :param name: name to be searched on search engine
     :return: html content of search page
     """
     name = str(name).replace(' ', '+')
     search_url = cls.__SEARCH_ROOT_URL__ + name
     return WebHelper.get_page_content_from_url(search_url)

示例#3

显示文件

文件： search_helper.py 项目： wavegu/email_recommend_for_deploy

 def get_search_page_by_name(cls, name):
     """
     get html content of the search page as a bing_result of the given name
     :param name: name to be searched on search engine
     :return: html content of search page
     """
     name = str(name).replace(' ', '+')
     search_url = cls.__SEARCH_ROOT_URL__ + name
     return WebHelper.get_page_content_from_url(search_url)

示例#4

显示文件

    __RESULT_DIR_PATH__ = '../google_result/'
    __SEARCH_ROOT_URL__ = 'https://www.google.com/search?hl=en&safe=off&q='


class BingHelper(SearchHelper):
    __parser__ = BingPageHTMLParser
    __RESULT_DIR_PATH__ = '../bing_result/'
    __SEARCH_ROOT_URL__ = 'https://cn.bing.com/search?q='


if __name__ == '__main__':
    # bing_result = GoogleHelper.get_google_search_page_by_name('jie tang mail')
    # resultFile = open('bing_result.html', 'w')
    # resultFile.write(bing_result)
    #
    # title_url_dict = GoogleHelper.get_google_items_from_search_page(bing_result)
    # for url, title in title_url_dict:
    #     print url, title

    content = WebHelper.get_page_content_from_url(
        'http://www.google.com/search?q=jie+tang+tsinghua+email')
    result = open('bing_result.html', 'w')
    result.write(content)

    # proxy = urllib2.ProxyHandler({'http': 'http://*****:*****@tel.lc.ignw.net:25'})
    # auth = urllib2.HTTPBasicAuthHandler()
    # opener = urllib2.build_opener(proxy, auth, urllib2.HTTPHandler)
    # urllib2.install_opener(opener)
    # print 'ready to open'
    # conn = urllib2.urlopen('http://www.google.com')
    # print conn.read()

示例#5

显示文件

文件： search_helper.py 项目： wavegu/email_recommend_for_deploy

    __parser__ = GooglePageHTMLParser
    __RESULT_DIR_PATH__ = '../google_result/'
    __SEARCH_ROOT_URL__ = 'https://www.google.com/search?hl=en&safe=off&q='


class BingHelper(SearchHelper):
    __parser__ = BingPageHTMLParser
    __RESULT_DIR_PATH__ = '../bing_result/'
    __SEARCH_ROOT_URL__ = 'https://cn.bing.com/search?q='


if __name__ == '__main__':
    # bing_result = GoogleHelper.get_google_search_page_by_name('jie tang mail')
    # resultFile = open('bing_result.html', 'w')
    # resultFile.write(bing_result)
    #
    # title_url_dict = GoogleHelper.get_google_items_from_search_page(bing_result)
    # for url, title in title_url_dict:
    #     print url, title

    content = WebHelper.get_page_content_from_url('http://www.google.com/search?q=jie+tang+tsinghua+email')
    result = open('bing_result.html', 'w')
    result.write(content)

    # proxy = urllib2.ProxyHandler({'http': 'http://*****:*****@tel.lc.ignw.net:25'})
    # auth = urllib2.HTTPBasicAuthHandler()
    # opener = urllib2.build_opener(proxy, auth, urllib2.HTTPHandler)
    # urllib2.install_opener(opener)
    # print 'ready to open'
    # conn = urllib2.urlopen('http://www.google.com')
    # print conn.read()