/
sber-mailparser.py
188 lines (166 loc) · 6.74 KB
/
sber-mailparser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
import email
from email.utils import parseaddr
from email.header import decode_header, make_header
import imaplib
from bs4 import BeautifulSoup
import re
import requests
from datetime import datetime
import zipfile
import configparser
import json
import argparse
'''
Python 3 program for parsing Sberbank-specific email with file link.
Variables below should be adjusted before using
Program uses IMAP to retrieve messages, so make sure account is allowed to connect with IMAP
Requirements:
requests, bs4 (beautifulsoup 4), imaplib, re, datetime, email, zipfile
Author: Ivan Tyurin
tyurin.su | tg: @san3ko
'''
# cfg = 'D:/Prog/sber/config.ini'
parser = argparse.ArgumentParser()
parser.add_argument("-cfg", "--config", help="full path to config file")
args = parser.parse_args()
config = configparser.ConfigParser()
config.read(str(args.config))
# config.read(str(cfg))
# mail settings
server = config.get('mail', 'server')
user = config.get('mail', 'user')
pwd = config.get('mail', 'pwd')
# program settings
sender = config.get('program', 'sender')
match_regex = config.get('program', 'match_regex')
file_path = config.get('program', 'file_path')
extract_dir = config.get('program', 'extract_dir')
file_prefix = config.get('program', 'file_prefix')
target_file = config.get('program', 'target_file')
use_proxy = config.get('program', 'use_proxy')
# telegram settings
notify_admin = config.get('telegram', 'notify_admin') # Set False for disabling notifications
notify_tg = json.loads(config.get('telegram', 'notify_tg')) # List of chat_id's (make sure it is a list)
notify_tg_botkey = config.get('telegram', 'notify_tg_botkey') # bot key
def admin_notify(message):
"""
Telegram notification for admins
"""
a = {'ok' : False}
if notify_admin:
msg = "*Авто-загрузка | Сбербанк* \n" + message
# msg = re.sub("_", "\_", msg)
if len(notify_tg) > 1:
for tg_user in notify_tg:
params = {
'chat_id': tg_user,
'text': msg,
'parse_mode': 'Markdown'
}
url = "https://api.telegram.org/bot{}/sendMessage".format(notify_tg_botkey)
tg = requests.get(url, proxies=tg_proxy, data=params)
a = json.loads(tg.content.decode())
else:
params = {
'chat_id': notify_tg[0],
'text': msg,
'parse_mode': 'Markdown'
}
url = "https://api.telegram.org/bot{}/sendMessage".format(notify_tg_botkey)
tg = requests.get(url, proxies=tg_proxy, data=params)
a = json.loads(tg.content.decode())
if not a['ok']:
print("Telegram error:\n{}".format(a))
def parse_html(mail):
"""
Parser for HTML part of message
"""
link = ""
soup = BeautifulSoup(mail, "lxml")
# print(soup.prettify())
for item in soup.find_all('a'):
link = re.findall(match_regex, str(item.get('href')))
if len(link) != 0:
return [True, item.get('href')]
if len(link) == 0:
return [False, None]
def get_messages():
"""
Retrieving unread messages
"""
m.select('INBOX')
resp, items = m.search(None, '(UNSEEN)')
items = items[0].split()
# print(items)
return items
def download_file(url):
"""
File downloading
"""
name = file_path + file_prefix + "_" + datetime.today().strftime('%Y-%m-%d') + '.zip'
header = headers = {'User-Agent': str(config.get('other', 'user_agent'))}
try:
if use_proxy:
proxy = {'https': 'socks5://{}:{}@{}:{}'.format(config.get('proxy', 'proxy_user'),
config.get('proxy', 'proxy_password'),
config.get('proxy', 'proxy_server'),
config.get('proxy', 'proxy_port'))}
response = requests.get(url, allow_redirects=True, headers=header, proxies=proxy)
else:
response = requests.get(url, allow_redirects=True, headers=header)
except Exception:
# admin_notify("‼ *Ошибка!* `Ссылка недоступна для скачивания.`")
return [False, None]
else:
open(name, 'wb').write(response.content)
# print(name)
return [True, name]
def extract(file):
"""
Extracting file from archive
"""
name = file_prefix + "_" + datetime.today().strftime('%Y-%m-%d') + '.txt'
zip = zipfile.ZipFile(file, 'r')
for finfo in zip.infolist():
if finfo.filename == target_file:
finfo.filename = name
try:
zip.extract(finfo, path=extract_dir)
except Exception:
return [False, None]
else:
return [True, "{}{}".format(extract_dir, name)]
if __name__ == "__main__":
m = imaplib.IMAP4_SSL(server)
m.login(user, pwd)
items = get_messages()
if len(items) != 0:
for item in items:
resp, data = m.fetch(item, '(RFC822)')
mail_full = email.message_from_bytes(data[0][1])
if mail_full.is_multipart():
for part in mail_full.get_payload():
mail = mail_full.get_payload()[0].get_payload(decode=True).decode()
else:
mail = mail_full.get_payload(decode=True).decode()
parse = parse_html(mail)
mail_from = parseaddr(mail_full['From'])[1]
mail_from = mail_full['X-Envelope-From']
mail_subj = str(make_header(decode_header(mail_full['Subject'])))
if mail_from == sender:
if parse[0]:
download_res, name = download_file(parse[1])
if download_res:
extract_res, txt_name = extract(name)
if extract_res:
admin_notify("✅ Файл-выписка успешно загружена и распакована! \n`{} \n{}`".format(str(name), str(txt_name)))
else:
admin_notify("‼ *Ошибка!* `Не удалось распаковать файл.`")
else:
admin_notify("‼ *Ошибка!* `Ссылка недоступна для скачивания.`")
else:
admin_notify("❌ `Ссылка не найдена в последнем обработанном письме.`")
else:
admin_notify("📧 Новое сообщение от {} \n`Тема: {}`".format(mail_from, mail_subj))
else:
admin_notify('❌ Новых сообщений не найдено')