Exemplo n.º 1
0
 def __init__(self):
     self.black_page = 'https://www.zhihu.com/account/unhuman?type=unhuman&message=%E7%B3%BB%E7%BB%9F%E6%A3%80%E6%B5%8B%E5%88%B0%E6%82%A8%E7%9A%84%E5%B8%90%E5%8F%B7%E6%88%96IP%E5%AD%98%E5%9C%A8%E5%BC%82%E5%B8%B8%E6%B5%81%E9%87%8F%EF%BC%8C%E8%AF%B7%E8%BE%93%E5%85%A5%E4%BB%A5%E4%B8%8B%E5%AD%97%E7%AC%A6%E7%94%A8%E4%BA%8E%E7%A1%AE%E8%AE%A4%E8%BF%99%E4%BA%9B%E8%AF%B7%E6%B1%82%E4%B8%8D%E6%98%AF%E8%87%AA%E5%8A%A8%E7%A8%8B%E5%BA%8F%E5%8F%91%E5%87%BA%E7%9A%84'
     self.start_url = 'https://zhuanlan.zhihu.com/yinjiaoshou886/answer'
     self.browser = webdriver.Chrome(
         executable_path='/home/caidong/developProgram/selenium/chromedriver'
     )
     self.SqlH = SqlHelper()
     self.SqlH.init_db('zhihu', 'zhihu_all')
     self.base_url = 'https://www.zhihu.com'
     self.user_home_url = ''
     self.current = 1
Exemplo n.º 2
0
 def __init__(self):
     self.start_url = 'https://www.zhihu.com/people/kaifulee/activities'
     self.base_url = 'https://www.zhihu.com'
     self.type = [
         'hot', 'local', 'shehui', 'guonei', 'guoji', 'recomment', 'junshi',
         'finance', 'technology', 'sports', 'fashionbang', 'fashionbang',
         'auto_moto', 'fangcan', 'technology', 'yangshengtang'
     ]
     self.SqlH = SqlHelper()
     self.SqlH.init_db('zhihu')
     self.page = 2
     self.totla_url_set = set()
     self.wait_use_url_set = set()
     self.current_type = ''
Exemplo n.º 3
0
 def __init__(self):
     self.black_page = 'https://www.zhihu.com/account/unhuman?type=unhuman&message=%E7%B3%BB%E7%BB%9F%E6%A3%80%E6%B5%8B%E5%88%B0%E6%82%A8%E7%9A%84%E5%B8%90%E5%8F%B7%E6%88%96IP%E5%AD%98%E5%9C%A8%E5%BC%82%E5%B8%B8%E6%B5%81%E9%87%8F%EF%BC%8C%E8%AF%B7%E8%BE%93%E5%85%A5%E4%BB%A5%E4%B8%8B%E5%AD%97%E7%AC%A6%E7%94%A8%E4%BA%8E%E7%A1%AE%E8%AE%A4%E8%BF%99%E4%BA%9B%E8%AF%B7%E6%B1%82%E4%B8%8D%E6%98%AF%E8%87%AA%E5%8A%A8%E7%A8%8B%E5%BA%8F%E5%8F%91%E5%87%BA%E7%9A%84'
     self.start_url = 'https://www.zhihu.com/people/kaifulee/followers?page=25583'
     #self.start_url = 'https://www.zhihu.com/people/ji-da-fa-37/activities'
     self.base_url = 'https://www.zhihu.com'
     self.SqlH = SqlHelper()
     self.SqlH.init_db('zhihu','zhihu_48000')
     #self.browser = webdriver.PhantomJS()
     # proxy = {'address': '60.168.104.30:3128',
     #          'username': '******',
     #          'password': '******'
     #           }
     # capabilities = dict(DesiredCapabilities.CHROME)
     # capabilities['proxy'] = {'proxyType': 'MANUAL',
     #                          'httpProxy': proxy['address'],
     #                          'ftpProxy': proxy['address'],
     #                          'sslProxy': proxy['address'],
     #                          'noProxy': '',
     #                          'class': "org.openqa.selenium.Proxy",
     #                          'autodetect': False}
     #
     # capabilities['proxy']['httpUsername'] = proxy['username']
     # capabilities['proxy']['httpPassword'] = proxy['password']
     # chromeOptions = webdriver.ChromeOptions()
     # chromeOptions.add_argument('--proxy-server=http://60.168.104.30:3128')
     #self.browser = webdriver.Chrome(chrome_options=chromeOptions,executable_path='/home/caidong/developProgram/selenium/chromedriver')
     #self.browser = webdriver.PhantomJS()
     #cookies = ZhihuLogin().login()
     #print(cookies)
     self.browser = webdriver.PhantomJS()
     self.browser = webdriver.Chrome(executable_path='/home/caidong/developProgram/selenium/chromedriver')
     #for cookie in cookies:
       #  self.browser.add_cookie({cookie['name']:cookie['value']})
     #self.browser.add_cookie(cookie)
     time.sleep(5)
     print('cookie',self.browser.get_cookies())
     #print(self.browser.get_cookies())
     #self.browser.add_cookie({"cookie":'_zap=b24c85f0-aae0-456a-ba87-e0919de79409; __utma=243313742.618834370.1505397831.1505397831.1505431589.2; __utmz=243313742.1505397831.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); d_c0="AJCCExnEYAyPTuiuB47mCQN_anS_LW2ZmQI=|1505432287"; q_c1=f92e81f1440d49eca643b9bd71df1d06|1505471670000|1502586350000; aliyungf_tc=AQAAABpahiv+pQIA4wmi0wpuOA0ptCdt; __utma=51854390.226003310.1505817316.1505817316.1505817316.1; __utmc=51854390; __utmz=51854390.1505817316.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utmv=51854390.000--|3=entry_date=20170813=1; XSRF-TOKEN=2|02bd5b9f|30893afa3ad96af92f8d3ffb67906faa338d76fe308d3fb267de6cad358569a837dc39ae|1505824255; _xsrf=24ae8d1f-0dde-4510-a20d-ec7278275ab1; l_cap_id="NDYzOWZmNjBmZDhjNDBkZWI5MDg0NjYyZDk4YTk2OTA=|1505824625|220e4527cbfe214589599d071685e4c7f62143fc"; r_cap_id="NWJhOTRmYzg2NTVlNDczY2ExZWY3YzgxNGQ2ZmRmM2I=|1505824625|b050327da2a8dedc37a8e744640b60b553f3b771"; cap_id="YjcyNGZkYjFlY2JkNDU3ZWFlYmQ0NjQ3ZDJmNDcwZjk=|1505824625|5804f3f4999cf311334c3664f2e41ad2d4d93029'})
     self.start_page = 48000
     self.end_page = 47000
Exemplo n.º 4
0
 def __init__(self):
     self.SqlH = SqlHelper()
     self.SqlH.init_db('zhihu')
     self.base_url = 'https://www.zhihu.com'
Exemplo n.º 5
0
 def __init__(self):
     self.SqlH = SqlHelper()
     self.SqlH.init_db('baiduNews')
Exemplo n.º 6
0
 def __init__(self):
     self.sqlhelper = SqlHelper()
Exemplo n.º 7
0
        return results
        print(items)
        return items

    def close_client(self):
        self.client.close()

    def count(self, condition=None):
        condition = dict(condition)
        return self.collection.find(condition).count()


if __name__ == '__main__':
    from MongoHelp import MongoHelper as SqlHelper
    sqlhelper = SqlHelper()
    sqlhelper.init_db('zhihu', 'zhihu_all')
    pre = sqlhelper.count({})
    print('sum:', str(sqlhelper.count({})))
    time.sleep(10)
    now = sqlhelper.count({})
    # url = sqlhelper.select_home_url({"$and":[{"special_url":{"$exists":True}},{"special_url":{"$ne":"none"}}]},count=100,page=1)
    # print("content",url)
    # for item in url:
    #     print(item)

    #####
    # url = sqlhelper.select_home_url({"special_name":{"$exists":True}},count=100,page=1)
    # for item in url:
    #     print(item)
    #
Exemplo n.º 8
0
 def __init__(self):
     self.type=['hot','local','shehui','guonei','guoji','recomment','junshi','finance','technology','sports','fashionbang','fashionbang','auto_moto','fangcan','technology','yangshengtang']
     self.SqlH= SqlHelper()
     self.SqlH.init_db('weixin')
     self.page=2
     self.current_type=''
Exemplo n.º 9
0
from MongoHelp import MongoHelper as SqlHelper

import csv, time

SqlH = SqlHelper()
SqlH.init_db('zhiHu', 'zhihu_all')
headers = [
    'user_name', 'answer_comment_1', 'answer_comment_2', 'answer_comment_3',
    'article_comment_1', 'article_comment_2', 'article_comment_3', 'answer',
    'user_home_url', 'article', 'flowing', 'followers', 'collect', 'answer',
    'article'
]
con = {
    "$and": [
        {
            'article_comment': {
                "$exists": True
            }
        },
        {
            'answer_comment': {
                "$exists": True
            }
        },
        {
            'flowing': {
                "$exists": True
            }
        },
        # {'export_flag': {"$exists": False}}
    ]