Пример #1
0
def getDetail():
    getObj = videoDemo()
    dbObj = Dbobj('redio', 're_')
    arr = []
    # if type == 0:
    #    _sort = pymongo.DESCENDING
    # else:
    #    _sort = pymongo.ASCENDING
    curTableObj = dbObj.getTbname('redios')
    base = 'https://www.xvideos.com/'
    while True:
        data = curTableObj.find({
            "imgs": {
                "$exists": False
            }
        }).sort('_id', pymongo.DESCENDING).limit(10)

        if data is None:
            sleep(10)
            continue
        for v in data:
            _curUpData = {}
            _rel = v.get('rel', 0)
            if _rel == 0:
                _rel = v.get('url')
            curUrl = base + _rel.strip('/')
            tags = getObj.vdetail(curUrl, v['id'])
            _curUpData['imgs'] = ''
            data = curTableObj.update({"id": v['id']}, {"$set": _curUpData})
        time.sleep(0.5)
Пример #2
0
def getList(obj,num):
    dbObj = Dbobj('redio','re_')
    arr = [];
    curTableObj = dbObj.getTbname('stars')
    url = "https://www.123fanhao.com/special-show-p-"+str(num)+".html"
    content = obj.mainContent(url)
    if content is None:
        return False 
    pattern = '<div class="col-xs-6 col-sm-2 placeholder">([.|\s|\S|\n]*?)<a href="([.|\s|\S|\n]*?)" target="_blank"><img src="([.|\s|\S|\n]*?)" title="([.|\s|\S|\n]*?)" class="img-thumbnail"></a><h4><a href="([.|\s|\S|\n]*?)" target="_blank">([.|\s|\S|\n]*?)</a>([.|\s|\S|\n]*?)</div>'
    listCates = re.findall(pattern,content) 
    if listCates is None:
        return False  
    #baseUrl = 'https://www.123fanhao.com'
    data = []
    for v in listCates:
        if len(v) < 1:
            continue
        curDict = {}    
        curDict['_id'] = dbObj.getNextValue('stars')
        curDict['ename'] = v[3].split('/')[1].strip()
        curDict['rel'] = v[1]
        curDict['img'] = v[2]
        curDict['cname']= v[5].strip()
        curDict['status']= 0
        data.append(curDict)
    curTableObj.insert_many(data);
Пример #3
0
def getDetail():
    getObj = videoDemo()
    dbObj = Dbobj('redio', 're_')
    arr = []
    # if type == 0:
    #    _sort = pymongo.DESCENDING
    # else:
    #    _sort = pymongo.ASCENDING
    curTableObj = dbObj.getTbname('ads')
    #data = curTableObj.aggregate([{"status":0},{"$sample":{"size":1}}])
    data = curTableObj.aggregate([
        {
            "$match": {
                'status': 0,
                'device': 2,
                'type': 1
            }
        },  #,"path":"$path","url":"$url","device":"$device","type":"$type","status":"$status", 
        {
            "$group": {
                "_id": "$_id",
                "count": {
                    "$sum": 1
                },
                "data": {
                    "$push": {
                        "url": "$url",
                        "status": "$status",
                        "_id": "$_id",
                        'device': "$device",
                        'type': "$type"
                    }
                }
            }
        },
        {
            "$sample": {
                "size": 1
            }
        },
        #{"$project":{"path":1,"url":1,"device":1,"type":1,"status":1}},
        {
            "$sort": {
                "_id": -1
            }
        }
    ])
    newData = {}
    for i in data:
        newData = i.get('data')[0]
    print(newData)
    print(newData.get('url'))
Пример #4
0
def deRange(num):
    getObj = videoDemo();
    dbObj = Dbobj('redio','re_')
    arr = [];
    curTableObj = dbObj.getTbname('redios')
    if num < 1:
        url="https://www.qq.com/"
    else:    
        url="https://www.qq.com/new/"+str(num);

    for v in getObj.getVedioUrl(url):
        if v['id']!='':
           v['_id'] = dbObj.getNextValue('redios')
           v['tags']=v['cates']=''
           v['status']=0
           curTableObj.update({"id":v['id'].strip()},{"$setOnInsert":v}, upsert=True);   
Пример #5
0
def poolDetail(start, end):
    getObj = videoDemo()
    dbObj = Dbobj('redio', 're_')
    arr = []
    # if type == 0:
    #    _sort = pymongo.DESCENDING
    # else:
    #    _sort = pymongo.ASCENDING
    curTableObj = dbObj.getTbname('redios')
    base = 'https://www.xvideos.com/'
    while True:
        data = curTableObj.find({
            "status": 0,
            "_id": {
                "$gte": start,
                "$lt": end
            }
        }).sort('_id', pymongo.DESCENDING).limit(10)

        if data is None:
            break
        #g = GoogleTranslator()
        for v in data:
            _rel = v.get('rel', 0)
            if _rel == 0:
                _rel = v.get('url')
            curUrl = base + _rel.strip('/')
            #curUrl = base+v['rel'].strip('/')
            #curTableObj.run(curUrl,i['id'])
            #curUrl='https://www.xvideos.com/video13860839/502_-_horny_asian_couple_had_sex_on_bed'
            tags = getObj.vdetail(curUrl, v['id'])

            v['title'] = v['title'].replace('&#039', '').replace('&amp;', '')

            #s=g.translate(v['title'])
            _curUpData = {}
            # if s == '':
            #      _curUpData['status'] = 3
            # else:
            #      _curUpData['status'] = 1
            #_curUpData['ctitle'] = s
            _curUpData['status'] = 1
            if tags != False:
                _curUpData['tags'] = tags
            data = curTableObj.update({"id": v['id']}, {"$set": _curUpData})
        time.sleep(0.5)
Пример #6
0
def getDetail():
    getObj = videoDemo();
    dbObj = Dbobj('redio','re_')
    arr = [];
    # if type == 0:
    #    _sort = pymongo.DESCENDING
    # else:
    #    _sort = pymongo.ASCENDING  
    curTableObj = dbObj.getTbname('local_stars')
    base = 'https://www.xvideos.com/'
    while True:
       data = curTableObj.find({"imgs":{"$exists":False}}).sort('_id', pymongo.DESCENDING).limit(10)

       if data is None:
          sleep(10);
          continue;
       for v in data:
          _curUpData = {}
          _rel = v.get('rel',0)
          curUrl = base+_rel.strip('/')+'#_tabAboutMe'  
          content = getObj.mainContent(curUrl) 
          if content is None:
             continue
          #print(content.encode('gbk', 'ignore').decode('gbk'))
          pattern = '<div class="profile-pic">([.|\s|\S|\n]*?)<img src="(.*)" onerror=([.|\s|\S|\n]*?)</div>' 
          listCates = re.findall(pattern,content)
          if listCates is None:
             break
          img = listCates[1]
          #print(img[1]);exit('5')
          #print(content.encode('gbk', 'ignore').decode('gbk'));exit(3)
          file = starDir(v['_id'])
          getObj.getOtherSource(file,img[1])
          _curUpData['imgs'] = ''
          data = curTableObj.update({"_id":v['_id']},{"$set":_curUpData})
       time.sleep(0.5)   
Пример #7
0
 def getLocalStar(self, obj):
     curlObj = videoDemo()
     dbObj = Dbobj('redio', 're_')
     #obj = getStarList(dbObj,curlObj)
     urls = [
         'https://www.xvideos.com/pornstars-index/japan',
         'https://www.xvideos.com/pornstars-index/china',
         'https://www.xvideos.com/pornstars-index/hong_kong'
     ]
     i = 0
     for url in urls:
         if i == 0:
             type = 1
         else:
             type = 0
         i += 1
         obj.getList(url, type)
Пример #8
0
 def failedLog(self, content):
     dbObj = Dbobj('redio', 're_')
     curTableObj = dbObj.getTbname('failedLog')
     curTableObj.insert(content)
Пример #9
0
 def __init__(self):
     dbObj = Dbobj('redio', 're_')
Пример #10
0
chrome_options.add_argument('--disable-gpu')  #如果不加这个选项,有时定位会出现问题

# 启动浏览器,获取网页源代码
browser = webdriver.Chrome('D:\\soft\\chromedriver_win32\\chromedriver.exe',
                           chrome_options=chrome_options)
#print(type(browser))

url = 'https://www.xvideos.com/change-country/cn'
#url = 'https://www.xvideos.com/'
get_url = browser.get(url)
htmlContent = browser.page_source
browser.quit()
if htmlContent is None:
    exit(1)
#print(htmlContent.encode('gbk', 'ignore').decode('gbk'));exit('3');
dbObj = Dbobj('redio', 're_')
#print(dbObj.getNextValue('trends'));exit('3')
pattern = '<nav aria-label="secondary">([.|\s|\S|\n]*?)</nav>'
trends = re.findall(pattern, htmlContent)
if trends is None:
    exit(1)
trends = trends[0]
pattern = '<li><a class="btn btn-default" href="([.|\s|\S|\n]*?)">([.|\s|\S|\n]*?)</a></li>'
trends = re.findall(pattern, trends)
#print(trends);#.encode('utf-8', 'ignore').decode('utf-8')
if trends is None or len(trends) < 1:
    exit(2)
curTableObj = dbObj.getTbname('trends')
reTrends = []
for i in trends:
    curTitle = i[1].strip()
Пример #11
0
 def __dbInfo(self, tableName):
     getObj = videoDemo()
     dbObj = Dbobj('redio', 're_')
     return dbObj.getTbname(tableName)
Пример #12
0
                        'cates_id': rel['_id'],
                        'url': v[0]
                    }
                    self.addCates(curData)
            del listCates
            del content
            if i % 10:
                time.sleep(0.5)
            i += 1

    def runTreands(self):
        threadObj = []
        for v in self.getTrendsList():
            vinfo = self.getTrendsDetail(v)

    def runCates(self):
        threadObj = []
        for v in self.getCateList():
            vinfo = self.getCateDetail(v)


if __name__ == '__main__':
    curlObj = videoDemo()
    dbObj = Dbobj('redio', 're_')
    obj = getTrendsAndCate(dbObj, curlObj)
    p = Pool(2)
    p.apply_async(obj.runTreands(), args=())
    p.apply_async(obj.runCates, args=())
    p.close()
    p.join()