-
Notifications
You must be signed in to change notification settings - Fork 0
/
map_function.py
114 lines (77 loc) · 2.78 KB
/
map_function.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
from Cryptodome.Cipher import AES
from Cryptodome import Random
import binascii
import requests
import re
from lxml import etree
class MyAES(object):
def __init__(self,key,iv=None,mode=AES.MODE_CFB):
self.key=key
self.mode=mode
if iv:
self.iv = iv
else:
self.iv = Random.new().read(AES.block_size)
def encrypt(self,data):
if not isinstance(data,bytes):
data = data.encode()
aes_encrypter = AES.new(self.key,self.mode,self.iv)
result = aes_encrypter.encrypt(data)
return binascii.b2a_hex(result)
def decrtper(self,data):
if isinstance(data,bytes):
data = data.decode()
data = binascii.a2b_hex(data)
aes_decrpter = AES.new(self.key,self.mode,self.iv)
result = aes_decrpter.decrypt(data).decode()
return result
class JianshuHomePage():
def __init__(self,url):
self.url = url
self.ua = {
'User-Agent': 'Mozilla / 5.0(Windows NT 10.0;Win64;x64) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / 72.0.3626.96Safari / 537.36'
}
self.articles = []
self.token = None
self.note_id = []
def get_homepage(self):
resp = requests.get(self.url,headers=self.ua).text
self.token = re.findall(r'<meta name="csrf-token" content="(.*?)" />',resp)[0]
self.parse(resp)
def get_ajax(self,page):
headers = {
'x-csrf-token': self.token,
'x-requested-with': 'XMLHttpRequest'
}
if page <= 3:
headers['x-infinitescroll'] = 'true'
else:
headers['x-pjax'] = 'true'
headers.update(self.ua)
params = {
'seen_snote_ids[]':self.note_id,
'page':page
}
resp = requests.get(self.url,headers=headers,params=params).text
self.parse(resp)
def parse(self,resp):
page = etree.HTML(resp)
title = page.xpath('//div[@class="content"]/a/text()')
passage = page.xpath('//div[@class="content"]/p/text()')
self.articles.extend(zip(title, passage))
note_id = page.xpath('//li[@class="have-img"]/@data_note_id/text()')
self.note_id.extend(note_id)
def run(self):
self.get_homepage()
for i in range(2,4):
self.get_ajax(i)
for i in self.articles:
print(i)
if __name__ == '__main__':
key = b'aaaabbbbccccdddd'
data = 'ASD是什么意思_ASD在线翻译、解释、发音、同义词、反义词_英语...'
j = MyAES(key)
result = j.encrypt(data)
print(result)
x = JianshuHomePage('https://www.jianshu.com')
x.run()