示例#1
0
def urllib2_yzm_cookie():
    url = 'http://202.119.81.112:8080/Logon.do?method=logon'

    headers = {
        'User-Agent':
        'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36'
    }

    data = {'USERNAME': '******', 'PASSWORD': '******', 'RANDOMCODE': ''}

    postdata = urllib.urlencode(data)

    # cookie
    cookie = cookielib.CookieJar()
    handler = urllib2.HTTPCookieProcessor(cookie)
    opener = urllib2.build_opener(handler)

    # 第一次请求网页得到cookie
    request = urllib2.Request(url, postdata, headers=headers)
    response = opener.open(request)

    # 由于验证码识别的误差,不停地获取验证码登陆,直到正确为止
    while True:
        # 获取验证码
        yzm = opener.open('http://202.119.81.112:8080/verifycode.servlet')
        yzmfile = open('yzm.jpg', 'wb')
        yzmfile.write(yzm.read())
        yzmfile.close()

        # 识别验证码
        image = Image.open('yzm.jpg')
        yzmtext = pytesser.image_to_string(image)
        tmp = list(yzmtext)
        while ' ' in tmp:
            tmp.remove(' ')
        while '\n' in tmp:
            tmp.remove('\n')
        yzmtext = ''.join(tmp)[:4]
        print yzmtext
        # return

        # 模拟登陆
        # data['RANDOMCODE'] = raw_input()
        data['RANDOMCODE'] = yzmtext
        postdata = urllib.urlencode(data)
        request = urllib2.Request(url, postdata, headers=headers)
        response = opener.open(request)

        # print response.read().decode('utf-8')

        # 爬取成绩
        grade_url = 'http://202.119.81.112:9080/njlgdx/kscj/cjcx_list'
        response = opener.open(grade_url)
        html = response.read()
        f = open('1.txt', 'w')
        f.write(html)
        f.close()
        # print html
        if re.search('RANDOMCODE', html) == None:
            break
示例#2
0
def get_webtoken():
    CaptchaUrl = "http://e.oppo.cn/loginCaptcha"
    LoginUrl = "http://e.oppo.cn/login"
    # 验证码地址和post地址
    while True:
        cookie = cookielib.CookieJar()
        handler = urllib2.HTTPCookieProcessor(cookie)
        opener = urllib2.build_opener(handler)
        # urllib2.install_opener(opener)
        # 将cookies绑定到一个opener cookie由cookielib自动管理
        while True:
            picture = opener.open(CaptchaUrl)  # 用openr访问验证码地址,获取cookie
            local = open('/data/rx/image/image.jpg', 'wb')
            local.write(picture.read())
            local.close()
            im = Image.open('/data/rx/image/image.jpg')
            text = pytesser.image_to_string(im).strip()
            #   text = pytesser.image_file_to_string(im,graceful_errors=True).strip()
            flag = len(text)
            #   print 'authCode=%s,lens=%s' % (text, flag)
            #   print len(text)
            if flag == 4:
                break
        boby = {'name': '扣费', 'passwd': '123456', 'captcha': text}
        data = urllib.urlencode(boby)
        request = urllib2.Request(url=LoginUrl, data=data)
        request.add_header('Content-Type', 'application/x-www-form-urlencoded')
        opener.open(request)  #response
        cookies = cookie.__str__().split(' ')[1]
        #  print 'cookies=%s' % cookies
        if cookies.find('WEBTOKEN') == 0:
            break
    return cookies
示例#3
0
def recognize(pic_name):

    im = pre_operation(pic_name)

    text = pytesser.image_to_string(im).strip()
    
    if(len(text) == 4 and isalpha(text)):
        print(text)
        return text.upper()
    return None
示例#4
0
 def __ImagePriceExtract(self, file):
     try:
         img = Image.open(file)
         r,g,b,a = img.split()
         img = Image.merge('RGB',(r,g,b))
         text = image_to_string(img)
         text = text.replace(" ",'').replace('\n','').replace('\r','')
         return float(text)
     except Exception,e:
         raise DropItem("item price can't not extracted successfully: %s" % e)
示例#5
0
def captchaRecognize(captchaImg):
    #识别修改后的图片
    text = pytesser.image_to_string(captchaImg)
    #使用正则表达式过滤除数字以外的识别数据
    captcha = re.findall('[0-9]',text)
    captcha =  ''.join(captcha)
    if len(captcha)!=4:
        return 0
    else:
        return captcha
示例#6
0
 def solve_captcha(self, img):
     imagename = img.split("/")[-1]
     req = urllib2.Request(img)
     img_file = Image.open(StringIO(urllib2.urlopen(req).read()))
     img_file.resize((116, 56), Image.NEAREST)
     if self.tmp:
         with tmanage() as t:
             n = ''.join([t, "/code.tif"])
             img_file.save(n)
             img_file = Image.open(n)
             s = image_to_string(img_file)
             img_file.close()
     else:
         output = StringIO()
         img_file.save(output, format="TIFF")
         s = image_to_string(Image.open(StringIO(output.getvalue())))
         output.close()
     s = s.replace(' ', '')
     s = s.replace('.', '')
     return s[:4]
示例#7
0
    def solve_captcha(self,img):
        imagename = img.split("/")[-1]
        req = urllib2.Request(img)
        img_file = Image.open(StringIO(urllib2.urlopen(req).read()))
        img_file.resize((116, 56), Image.NEAREST)
        if self.tmp:
            with tmanage() as t:  
                n = ''.join([t, "/code.tif"])
                img_file.save(n)
	        img_file = Image.open(n)
                s = image_to_string(img_file)
                img_file.close()
        else:
            output = StringIO()
            img_file.save(output,format="TIFF")
            s = image_to_string(Image.open(StringIO(output.getvalue())))
            output.close()
        s = s.replace(' ','')
        s = s.replace('.','')
        return s[:4]	
示例#8
0
 def access_company_url(self,company_url):
     base_url ='http://bj.597rcw.com'
     page = urllib2.urlopen(base_url+company_url).read().decode("gbk")
     number_urls=re.findall('/AspNet/StrToImg.ashx.+?/>', page, re.S)
     if len(number_urls)!=0:
         #'<td><b>北京中泰安瑞科技发展有限公司</b></td>"' 截取
         commpany_name=re.findall('<td><b>.+?</b></td>', page, re.S)[0][7:-9]
         #<td height="25" width="85%">饶经理</td>
         contacts_name=re.findall('<td height="25" width="85%">.+?</td>', page, re.S)[0][28:-5]
         #<img src="/AspNet/StrToImg.ashx?type=code&amp;email=p7JDp9Jzqo3AaSHCR4ySUg%3D%3D">
         number_url=number_urls[0][:-4]
         #把号码图片转换为文本
         img = Image.open(cStringIO.StringIO(urllib2.urlopen(base_url+number_url).read()))
         number = image_to_string(img)
         return commpany_name+","+contacts_name+","+number
     else:
         return ''
示例#9
0
# -*- coding: utf-8 -*-

from pytesser.pytesser import image_to_string

from PIL import *
import Image
import ImageEnhance

image = Image.open(r"TB2965Lb46I8KJjSszfXXaZVXXa_!!646445699.jpg.jpg")
enhancer = ImageEnhance.Contrast(image)
image_enhancer = enhancer.enhance(4)

print(image_to_string(image_enhancer))
示例#10
0
def ocr(image):
    return pytesser.image_to_string(image.convert('RGB'))
示例#11
0
def ocr_cropped(image, box):
    region = image.crop(box)
    region = region.convert('RGB')
    return pytesser.image_to_string(region)
示例#12
0
def ocr(image):
    return pytesser.image_to_string(image.convert('RGB'))
示例#13
0
                        'referer': 'http://hub.hust.edu.cn/index.jsp'
                    }).content
    global k1, k2
    k1, k2 = eval(content)
    verify_image_url = 'http://hub.hust.edu.cn/randomImage.action?k1=%s&k2=%s&uno=%s&time=%d' % (
        k1, k2, username, TIME)
    content = s.get(verify_image_url).content
    global vimg
    vimg = Image.open(StringIO(content))


get_rand_key()

try:
    from pytesser.pytesser import image_to_string
    vcode = image_to_string(vimg).strip()
    vimg.save('vcode.jpg')
    print vcode
except:
    import traceback, sys
    traceback.print_exc(file=sys.stdout)
    vimg.show()
    vcode = raw_input('verify code:')

data = {
    'usertype': 'xs',
    'username': username,
    'password': password,
    'rand': '',
    'ln': server,
    'random_key1': '',
示例#14
0
def resolver(path):
    result = pytesser.image_to_string(path)
    return result
示例#15
0
def extractText(left, top, right, bottom, scale = 2):
    im = capture(left, top, right, bottom)
    im = im.resize([scale * i for i in im.size])
    return tesser.image_to_string(im)
示例#16
0
# print headers['Cookie']

# 输入用户名和密码
login_info['USERNAME'] = raw_input('Username: '******'PASSWORD'] = raw_input('Password: '******'yzm.jpg', 'wb')
    yzmfile.write(yzm_response.content)
    yzmfile.close()
    # 识别验证码
    image = Image.open('yzm.jpg')
    yzmtext = pytesser.image_to_string(image)
    tmp = list(yzmtext)
    while ' ' in tmp:
        tmp.remove(' ')
    while '\n' in tmp:
        tmp.remove('\n')
    yzmtext = ''.join(tmp)[:4]
    # print yzmtext
    login_info['RANDOMCODE'] = yzmtext

    # 重新登陆,若验证码正确则登陆成功
    # should_success_response = requests.get(login_url, params=urllib.urlencode(login_info), headers=headers)
    should_success_response = requests.post(login_url,
                                            data=login_info,
                                            headers=headers)
    html = should_success_response.content
示例#17
0
def ocr_cropped(image, box):
    region = image.crop(box)
    region = region.convert('RGB')
    return pytesser.image_to_string(region)
示例#18
0
文件: main.py 项目: jackey/boomcode
run_from_console = __name__ == '__main__'
img = Image.open('code.png')

imgry = img.convert('L')

table = []
threshold = 150

for i in range(256):
    if i < threshold:
        table.append(0)
    else:
        table.append(1)

out = imgry.point(table, '1')

fname = 'test.tiff'

out.save(fname, dpi=(72, 72))

tiff = Image.open(fname)

code = pytesser.image_to_string(tiff)

clean_code = re.sub(r'[^0-9a-zA-Z]', '', code)

print clean_code

os.remove(fname)
示例#19
0
def extractText(left, top, right, bottom, scale=2):
    im = capture(left, top, right, bottom)
    im = im.resize([scale * i for i in im.size])
    return tesser.image_to_string(im)
示例#20
0
image_to_string = None
def get_rand_key():
  verify_url = 'http://hub.hust.edu.cn/randomKey.action?username=%s&time=%d' % (username, TIME)
  content = s.get(verify_url, headers={'referer': 'http://hub.hust.edu.cn/index.jsp'}).content
  global k1, k2
  k1, k2 = eval(content)
  verify_image_url = 'http://hub.hust.edu.cn/randomImage.action?k1=%s&k2=%s&uno=%s&time=%d' % (k1, k2, username, TIME)
  content = s.get(verify_image_url).content
  global vimg
  vimg = Image.open(StringIO(content))

get_rand_key()

try:
  from pytesser.pytesser import image_to_string
  vcode = image_to_string(vimg).strip()
  vimg.save('vcode.jpg')
  print vcode
except:
  import traceback, sys
  traceback.print_exc(file=sys.stdout)
  vimg.show()
  vcode = raw_input('verify code:')

data = {
    'usertype': 'xs',
    'username': username,
    'password': password,
    'rand': '',
    'ln': server,
    'random_key1' : '',