def run(self): global cnt global srv_cnt while True: rrange = lambda a, b, c=1: str(c == 1 and random.randrange( a, b) or float(random.randrange(a * c, b * c)) / c) ip = '%s.%s.%s.%s' % (rrange(0, 255), rrange(0, 255), rrange( 0, 255), rrange(0, 255)) ua='Mozilla/'+rrange(4,7,10)+'.0 (Windows NT '+rrange(5,7)+'.'+rrange(0,3)+') AppleWebKit/'+rrange(535,538,10)+\ ' (KHTML, like Gecko) Chrome/'+rrange(21,27,10)+'.'+rrange(0,9999,10)+' Safari/'+rrange(535,538,10) headers = { 'User-Agent': ua, 'Accept-Language': 'zh-CN,zh;q=0.8', 'Accept-Charset': 'utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Connection': 'keep-alive', 'Cookie': 'ASP.NET_SessionId=ymc1eq55feaarv55mvhm3jv0', 'Accept-Encoding': 'gzip,deflate', 'X-Forward-For': ip, 'Client_IP': ip } length = random.randrange(6, 15) cardnum = str(random.randrange(0, 1e20)) cardid = str(random.randrange(0, 1e19)) cardtel = '13' + str(random.randrange(0, 1e10)) pwd = '' for i in range(random.randrange(6, 20)): pwd += alphabet[random.randrange(0, len(alphabet))] extra = '13' + str(random.randrange(0, 1e12)) randomid = random.randrange(1000, 9999) #logindata='CardNum=%s&Cardid=%s&Cardtel=%s&netType=%s&randomId=%s&randm2=%s&isDesktop=%B1%EA%D7%BC%B0%E6&submit.x=39&submit.y=20&id=&randomIdAppendToFormForMacSafari=%s'%(cardnum,cardid,cardtel,pwd,randomid,randomid,extra) logindata = 'CardNum=%s&Cardtel=%s&netType=%s&randomId=%s&randm2=%s' % ( cardnum, cardtel, pwd, randomid, randomid ) + '&isDesktop=%B1%EA%D7%BC%B0%E6&submit.x=39&submit.y=20&actionid=ok&id=' headers['cookie'] = '' headers['referer'] = '' resp, content = httplib2.Http().request(targ_url[0], headers=headers) if int(resp['status']) > 403: print('Thread %s got error 1' % self.name) continue headers['cookie'] = re.findall('=(.*?);', resp['set-cookie'])[0] headers['referer'] = targ_url[0] resp, content = httplib2.Http().request(targ_url[1], method='POST', headers=headers, body=logindata) if int(resp['status']) > 403: print('Thread %s got error 2' % self.name) continue cnt += 1 srv_cnt = re.findall('src=\"sxsh\.asp\?id\=(\d+)\"', content)[0] try: srv_cnt = re.findall('src\=\"sxsh\.asp\?id\=(\d+)\"', content)[0] except: pass time.sleep(1)
def run(self): global cnt while True: rrange = lambda a, b, c=1: str(c == 1 and random.randrange( a, b) or float(random.randrange(a * c, b * c)) / c) url = 'http://jiyhfvsd.2288.org/i/m1.aspx?/591/%s' % (rrange( 0, 591)) ip = '%s.%s.%s.%s' % (rrange(0, 255), rrange(0, 255), rrange( 0, 255), rrange(0, 255)) ua='Mozilla/'+rrange(4,7,10)+'.0 (Windows NT '+rrange(5,7)+'.'+rrange(0,3)+') AppleWebKit/'+rrange(535,538,10)+\ ' (KHTML, like Gecko) Chrome/'+rrange(21,27,10)+'.'+rrange(0,9999,10)+' Safari/'+rrange(535,538,10) headers = {'User-Agent':ua,'Accept-Language':'zh-CN,zh;q=0.8','Accept-Charset':'utf-8;q=0.7,*;q=0.7',\ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'\ ,'Connection': 'keep-alive','Cookie':'ASP.NET_SessionId=ymc1eq55feaarv55mvhm3jv0'}#,'X-Forward-For':ip,'Client_IP':ip} length = random.randrange(6, 15) pwd = '' for i in range(length): pwd += alphabet[random.randrange(0, len(alphabet))] chk = '' for i in range(4): chk += alphabet[random.randrange(0, 62)] url = "http://jiyhfvsd.2288.org/i/m1.aspx?%2f591%2f168" resp, content = httplib2.Http().request(url, method='GET') vs = re.findall('id="__VIEWSTATE" value="(.+)"', content)[0] logindata = { '__VIEWSTATE': vs, 'txtNumber': rrange(30000000, 2000000000), 'txtPwd': pwd, 'txtCheck': chk, 'imbBtn.x': rrange(0, 100), 'imbBtn.y': rrange(0, 100) } headers['cookie'] = re.findall('=(.*?);', resp['set-cookie'])[0] resp, content = httplib2.Http().request( url, method='POST', headers=headers, body=urllib.urlencode(logindata)) if int(resp['status']) > 403: print('Got error') cnt += 1 time.sleep(1)
GlobalLock.restype = ctypes.c_void_p lpGlobalMem = GlobalLock(hGlobalMem) memcpy(lpGlobalMem, ctypes.addressof(buffer), bufferSize) GlobalUnlock(hGlobalMem) if OpenClipboard(0): EmptyClipboard() SetClipboardData(win32con.CF_TEXT, hGlobalMem) CloseClipboard() def unquote(str): return urllib.unquote(str).replace('&', '&') home = "http://www.youiv.com/" http = httplib2.Http() pagest = 1 #pageend=467#467 baseurl = "http://www.youiv.com/forum.php?mod=forumdisplay&fid=279&&filter=typeid&typeid=%s&page=%d" cat = {'289': 'IV', '290': 'U15', '293': 'MAG'} totl = {'289': 356, '290': 105, '293': 7} convms = lambda a: time.strftime('%M:%S', time.localtime(a)) for c in cat: file = 'youiv\\%s.txt' % cat[c] analyzed = open(file, 'r').read() time0 = time.time() for i in range(pagest, totl[c] + 1): print('Page %d. Time %s ETA %s'%(i,\ convms(time.time()-time0),\ convms((time.time()-time0)*1.0/(i-pagest+1)*(totl[c]+1-pagest)))) if os.path.exists('youiv\\%d.txt' % (i)):
#coding:utf-8 import httplib2plus as httplib2, re, os, sys urllist = [ 'http://acm.hdu.edu.cn/QQ2013/user_view_single.php?oldcid=442&page=', 'http://acm.hdu.edu.cn/QQ2013/user_view_single.php?oldcid=437&page=', 'http://acm.hdu.edu.cn/QQ2013/user_view_single.php?oldcid=438&page=', 'http://acm.hdu.edu.cn/QQ2013/user_view_single.php?oldcid=439&page=', 'http://acm.hdu.edu.cn/QQ2013/user_view_single.php?oldcid=440&page=', 'http://acm.hdu.edu.cn/QQ2013/user_view_single.php?oldcid=441&page=' ] pageindex = [0, 0, 0, 0, 0, 0] if __name__ == '__main__': ht2 = httplib2.Http() reload(sys) sys.setdefaultencoding('utf-8') for url in urllist: while 1: urlfull = url + str(pageindex[urllist.index(url)]) print urlfull resp, content = ht2.request(urlfull) sectors = re.findall('ight_item(.*?)highl', content + 'highl', re.DOTALL) mon, date = re.findall('(\d)月(\d+)日', content + 'highl', re.DOTALL)[0] f = open('%s月%s日.csv' % (mon, date), 'a') if os.path.getsize('%s月%s日.csv' % (mon, date)) == 0: f.write('GID,姓名,性别,学号,学校,学院,专业,报名时间\n') for sec in sectors: for i in re.findall('TABLE_TEXT" align="center">(.*?)</td>', sec)[:8]: f.write(i + ',')
#coding:gbk import httplib2plus as httplib2,re,urllib,sys import _headers import clipboard if len(sys.argv)>1:topicid=sys.argv[1] else:topicid=raw_input('输入topicid或者网址 >') if topicid.startswith('http'):topicid=re.findall('.+/(\d+)/*',topicid.rstrip('/'))[0] url='http://verycd.gdajie.com/topics/%s' %topicid resp, content = httplib2.Http().request(url, method='GET',headers=headers) list=re.findall('<font color="red".*?href="(.*?)".*?</font>',content,re.DOTALL) str='' for li in list: if not li.startswith('http://www.verycd.gdajie.com/detail.htm'):continue content = httplib2.Http().request(li, method='GET',headers=_headers.get())[1] strp=re.findall("var ed2k_links = '(.+)';",content)[0] str+=(strp+'\n') print urllib.unquote(strp).decode('utf-8') clipboard.SetClipboardText(str) raw_input('\n抓取完成并已复制到剪贴板XD')