def __download_mails(self, folderid, mailid): url = "http://m0.mail.sina.com.cn/classic/read_mid.php" querystring = { "fid": "{}".format(folderid), "mid": "{}".format(mailid), "ts": "{}".format( str( int( datetime.datetime.now( pytz.timezone('Asia/Shanghai')).timestamp() * 1000))) } headers = ''' Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 Accept-Encoding: gzip, deflate Accept-Language: zh-CN,zh;q=0.9,en;q=0.8 Cache-Control: no-cache Host: m0.mail.sina.com.cn Pragma: no-cache Proxy-Connection: keep-alive Referer: http://m0.mail.sina.com.cn/classic/index.php Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36 ''' resp = self._ha.get_response(url, headers=headers, params=querystring) stream_length = resp.headers.get('Content-Length', 0) eml = ResponseIO(resp) return eml, stream_length
def __download_eml(self, mail_id): url = "https://mail.yeah.net/js6/read/readdata.jsp" # sid = self._get_sid() if self.sid is None: self._logger.error("Invalid cookie") querystring = { "sid": self.sid, "mid": mail_id, "mode": "download", "action": "download_eml" } headers = f''' Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 Accept-Encoding: gzip, deflate, br Accept-Language: zh-CN,zh;q=0.9,en;q=0.8 Cache-Control: no-cache Connection: keep-alive Host: mail.yeah.net Pragma: no-cache Referer: https://mail.yeah.net/js6/main.jsp?sid={self.sid}&df=mail163_letter Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36 ''' # eml = self._ha.get_response_stream(url, headers=headers, params=querystring) # return eml resp = self._ha.get_response(url, headers=headers, params=querystring) stream_length = resp.headers.get('Content-Length', 0) eml = ResponseIO(resp) return eml, stream_length
def _mail_detail(self, folder, message: json): try: mailid = message['id'] res = EML(self._clientid, self.task, self._userid, mailid, folder, self.task.apptype) try: res.subject = message["headers"]["subject"] except: pass try: res.provider = message["headers"]["from"][0]["email"] except: pass try: res.owner = message["headers"]["to"][0]["email"] except: pass t = message["headers"]["date"] res.sendtime = datetime.datetime.fromtimestamp(int(t)) url = f'https://apis.mail.yahoo.com/ws/v3/mailboxes/@.id=={self.boxesid}/messages/@.id=={mailid}/content/rawplaintext?appId=YMailNorrin&ymreqid=5b0965ee-2f08-96e1-1cbc-4800cc01b300&wssid={self.mailWssid}' headers = """ accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 accept-encoding: gzip, deflate, br accept-language: zh-CN,zh;q=0.9 cache-control: no-cache pragma: no-cache referer: https://mail.yahoo.com/ upgrade-insecure-requests: 1 user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36""" resp = self._ha.get_response(url, headers=headers) res.stream_length = resp.headers.get('Content-Length', 0) res.io_stream = ResponseIO(resp) return res except Exception: self._logger.error('Mail: {} got fail: {}'.format(folder.folderid, traceback.format_exc()))
def _get_mails(self, folder: Folder) -> iter: try: _csrf_token_, _root_token_ = self._get_csrf() foder = folder.folderid url = 'https://mail.aliyun.com/alimail/ajax/mail/queryMailList.txt?_timestamp_={}'.format( int(datetime.datetime.now(pytz.timezone('Asia/Shanghai')).timestamp() * 1000)) if foder == '11': postdata = f'showFrom=0&query=%7B%22tagIds%22%3A%5B%22{foder}%22%5D%7D&fragment=1&offset=0&length=75&curIncrementId=0&forceReturnData=1&_csrf_token_={_csrf_token_}&_root_token_={_root_token_}&_refer_hash_=h%3DWyJmbV8yIixbIjIiLCIiLHsiZklkIjoiMSJ9LHsibGFiZWwiOiLpgq7ku7YifV1d&_tpl_=v5' else: postdata = f'showFrom=0&query=%7B%22folderIds%22%3A%5B%22{foder}%22%5D%7D&fragment=1&offset=0&length=75&curIncrementId=0&forceReturnData=1&_csrf_token_={_csrf_token_}&_root_token_={_root_token_}&_refer_hash_=h%3DWyJmbV8yIixbIjIiLCIiLHsiZklkIjoiMSJ9LHsibGFiZWwiOiLpgq7ku7YifV1d&_tpl_=v5' html = self._ha.getstring(url, req_data=postdata, headers=""" Accept:*/* Accept-Encoding:gzip, deflate, br Accept-Language:zh-CN,zh;q=0.8 Cache-Control:no-cache Connection:keep-alive Content-Type:application/x-www-form-urlencoded Origin:https://mail.aliyun.com Pragma:no-cache Referer:https://mail.aliyun.com/alimail/ X-Requested-With:XMLHttpRequest""") if 'dataList' not in html: return None jshtml = json.loads(html) for data in jshtml['dataList']: mailid = data['mailId'] res_one = EML(self._clientid, self.task, self.userid, mailid, folder, self.task.apptype) res_one.owner = data['owner'] res_one.provider = data['from']['email'] sendtime = data['timestamp'] / 1000 res_one.sendetime = datetime.datetime.fromtimestamp(sendtime) mailid = quote_plus(mailid) url = 'https://mail.aliyun.com/alimail/internalLinks/downloadMail?id={}&charset='.format(mailid) headers = """ Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 Accept-Encoding: gzip, deflate, br Accept-Language: zh-CN,zh;q=0.9 Cache-Control: no-cache Connection: keep-alive Host: mail.aliyun.com Pragma: no-cache Referer: https://mail.aliyun.com/alimail/ Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36""" resp = self._ha.get_response(url, headers=headers) # 返回的headers里面没有Content-Length,自己设置 if 'K' in data['clientExtraInfo']['displaySize']: resp.headers['Content-Length'] = int(data['clientExtraInfo']['displaySize'][:-2]) * 1024 elif 'M' in data['clientExtraInfo']['displaySize']: resp.headers['Content-Length'] = int(float(data['clientExtraInfo']['displaySize'][:-2])) * 1024 * 1024 res_one.stream_length = resp.headers.get('Content-Length', 0) response = ResponseIO(resp) if response: res_one.io_stream = response yield res_one except Exception: self._logger.error('{} Got mails fail: {}'.format(self.userid, traceback.format_exc()))
def _mail_detai(self, html, folder): if 'var' not in html: return None jshtml = re.findall(r"('id'.*?)'hmid'", html) if jshtml: for mail in jshtml: mailid = substring(mail, "'id':'", "'") res_one = EML(self._clientid, self.task, self.userid, mailid, folder, self.task.apptype) res_one.provider = substring(mail, "'from':'", "'") res_one.subject = substring(mail, "'subject':'", "'") sendtime = substring(mail, "'sentDate':new Date(", "),") if "'read':true" in mail: res_one.state = 1 else: res_one.state = 0 j = 1 a = [] for i in sendtime.split(','): if j == 2: i = str(int(i) + 1) if len(i) == 1: i = '0' + i a.append(i) j += 1 sendtime = f'{a[0]}-{a[1]}-{a[2]} {a[3]}:{a[4]}:{a[5]}' res_one.sendtime = datetime.datetime.strptime(sendtime, "%Y-%m-%d %H:%M:%S") # 下载邮件 url = f'http://webmail.vip.126.com/js6/read/readdata.jsp?sid={self.sid}&mid={mailid}&mode=download&l=read&action=download_eml' headers = f""" Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 Accept-Encoding: gzip, deflate Accept-Language: zh-CN,zh;q=0.9 Cache-Control: no-cache Connection: keep-alive Host: webmail.vip.126.com Pragma: no-cache Referer: http://webmail.vip.126.com/js6/main.jsp?sid={self.sid}&df=mailvip Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36""" # response = self._ha.get_response_stream(url, headers=headers) resp = self._ha.get_response(url, headers=headers) res_one.stream_length = resp.headers.get('Content-Length', 0) res_one.io_stream = ResponseIO(resp) # res_one.io_stream = response yield res_one
def _get_mail(self, mailinfo: dict, sid): url = 'https://appmail.mail.10086.cn/RmWeb/mail?func=mbox:downloadMessages&' \ 'sid={}&&comefrom=54&mid={}'.format(sid, mailinfo['mid']) headers = f''' accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 accept-encoding: gzip, deflate, br accept-language: zh-CN,zh;q=0.9,en;q=0.8 cache-control: no-cache,no-cache pragma: no-cache upgrade-insecure-requests: 1 user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 ''' time.sleep(30) resp = self._ha.get_response(url, headers=headers) stream_length = resp.headers.get('Content-Length', 0) eml = ResponseIO(resp) return eml, stream_length
def _download_eml(self, mailid): try: url = '{}://{}/service/home/~/?auth=co&id={}'.format( self._scheme, self._host, mailid) resp = self._ha.get_response(url, headers=''' Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9 Accept-Encoding: gzip, deflate Accept-Language: zh-CN,zh;q=0.9 Cache-Control: max-age=0 Connection: keep-alive Host: {} Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36 '''.format(self._host)) stream_length = resp.headers.get('Content-Length', 0) eml = ResponseIO(resp) return eml, stream_length except Exception: self._logger.error('Download mail fail: {}'.format( traceback.format_exc()))
def download_mail(self, mailid): url = "https://mail.qq.com/cgi-bin/readmail?" \ "sid={}&mailid={}&action=downloademl".format(self._sid, mailid) headers = ''' accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 accept-encoding: gzip, deflate, br accept-language: zh-CN,zh;q=0.9,en;q=0.8 cache-control: no-cache,no-cache pragma: no-cache referer: https://mail.qq.com/ upgrade-insecure-requests: 1 user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36 ''' # response = requests.request("GET", url, headers=headers, stream=True) # response.raw.decode_content = True # return response.raw # eml = self._ha.get_response_stream(url, headers=headers) # return eml resp = self._ha.get_response(url, headers=headers) stream_length = resp.headers.get('Content-Length', 0) eml = ResponseIO(resp) return eml, stream_length
def __get_mail_streams(self, mailid, msid): url = "https://webmail30.189.cn/w2/downLoadAttachNormal.do" querystring = { "messageid": mailid, "msid": msid, "partid": "9999" } headers = ''' Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 Accept-Encoding: gzip, deflate, br Accept-Language: zh-CN,zh;q=0.9,en;q=0.8 Cache-Control: no-cache Connection: keep-alive Host: webmail30.189.cn Pragma: no-cache Referer: https://webmail30.189.cn/w2/logon/signOn.do Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36 ''' resp = self._ha.get_response(url, headers=headers, params=querystring) stream_length = resp.headers.get('Content-Length', 0) eml = ResponseIO(resp) return eml, stream_length
def _get_mails(self, folder: Folder) -> iter: """Get mails in given folder""" try: if folder is None: self._logger.error("Given folder is None") return urlnext: str = folder.folderurl page: int = 0 next_: bool = True maillastidx: int = 0 mailidx: int = 0 while next_: try: html = self._ha.getstring(urlnext, headers=""" accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 accept-encoding: gzip, deflate, br accept-language: zh-CN,zh;q=0.9 cache-control: no-cache pragma: no-cache upgrade-insecure-requests: 1 user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36""" ) hdoc = etree.HTML(html, etree.HTMLParser()) if hdoc is None: # self._logger.error( # "Parse mail html document of folder '{}' failed.". # format(folder.name)) self._logger.info("No mail find in folder '{}'".format( folder.name)) return # 有无下一页 next_ = False next_, urlnext, mailidx, maillastidx = self._get_next_page_url( hdoc, folder, mailidx, maillastidx) self._logger.info("Enter folder '{}' {}-{}".format( folder.name, maillastidx, str(mailidx) if mailidx > maillastidx else "")) # 解析邮件列表 mailnodes = hdoc.xpath('.//tr[@bgcolor]') if mailnodes is None or len(mailnodes) < 1: self._logger.info( "No mail found in folder '{}'".format(folder.name)) continue for mailnode in mailnodes: try: if mailnode is None: continue # 已读未读状态 isread: bool = False abgcolor = mailnode.xpath('./@bgcolor') if not abgcolor is None and len(abgcolor) > 0: strbgcolor = str(abgcolor[0]).strip() if not helper_str.is_none_or_empty( strbgcolor ) and strbgcolor == '#E8EEF7': isread = True # 获取邮件详情页 # href="?&th=1671b647a79e227b&v=c" xas = mailnode.xpath('.//td/a[@href]') if xas is None or len(xas) < 1: self._logger.error( "Get mail content url failed, skip this mail" ) continue xa = xas[0] xhref = xa.xpath('./@href') if xhref is None or len(xhref) < 1: self._logger.error( "Get mail content url failed, skip this mail1" ) continue strhref = str(xhref[0]).strip() if helper_str.is_none_or_empty(strhref): self._logger.error( "Get mail content url failed, skip this mail2" ) continue mailurl = "{}/{}".format(self._hpurlbase, strhref) succ, mailid = helper_str.substringif( mailurl, 'th=', '&') if not succ or helper_str.is_none_or_empty(mailid): self._logger.error( "Get mail id failed, skip this mail") continue xsubjs: str = xa.xpath('.//text()') if xsubjs is None or len(xsubjs) < 1: self._logger.error( "Get mail subject failed: {}".format( mailid)) continue subj = ''.join(str(xs) for xs in xsubjs) if '-' in subj: idx = subj.find('-') subj = subj[0:idx - 1].strip() # 点击 ‘显示原始邮件’ urlsrc = "{}/?&th={}&v=om".format( self._hpurlbase, mailid) html = self._ha.getstring(urlsrc, headers=""" accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 accept-encoding: gzip, deflate, br accept-language: zh-CN,zh;q=0.9 cache-control: no-cache pragma: no-cache referer: {} upgrade-insecure-requests: 1 user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36""" .format(mailurl)) if helper_str.is_none_or_empty(html): self._logger.error( "Get source mail page failed: {}".format( urlsrc)) continue hsrc = etree.HTML(html, etree.HTMLParser()) if hsrc is None: self._logger.error( "Parse mail source document failed: {}". format(urlsrc)) continue # 发送时间 sendtime = datetime.datetime(1970, 1, 1, 0, 0, 0) strsendtime = None m = self._resendtime.search(html) if m is None: self._logger.warn( "Get mail sendtime failed: {} {}".format( mailid, subj)) else: strsendtime = m.group('date').strip() if not helper_str.is_none_or_empty(strsendtime): try: sendtime: datetime.datetime = dateparser.parse( strsendtime) except Exception: try: sendtime = datetime.datetime.strptime( strsendtime, '%a, %d %b %Y %H:%M:%S %z' ).strftime('%a, %d %b %Y %H:%M:%S %z') except Exception: self._logger.warn( "Get mail sendtime failed: {} {}". format(mailid, subj)) sendtime = datetime.datetime( 1970, 1, 1, 0, 0, 0) # 找 下载邮件 按钮 xbtns = hsrc.xpath( './/a[@class="download-buttons"]') if xbtns is None or len(xbtns) < 1: self._logger.error( "Get mail download url failed: {} {}". format(mailid, subj)) continue xbtn = xbtns[0] xdurls = xbtn.xpath('.//@href') if xdurls is None or len(xdurls) < 1: self._logger.error( "Get mail download url failed: {} {}". format(mailid, subj)) continue downurl = "{}/{}".format( self._hpurlbase, str(xdurls[0]).strip().lstrip('/')) # 用uname_str会有问题的,应该固定使用一个值 mail = EML(self._clientid, self.task, self.uname_str, mailid, folder, self.task.apptype) mail.owner = self.uname_str mail.sendtime = sendtime mail.downloadurl = downurl headers = """ accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8 accept-encoding: gzip, deflate, br accept-language: zh-CN,zh;q=0.9 cache-control: no-cache pragma: no-cache referer: {} upgrade-insecure-requests: 1 user-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36""".format( urlsrc) resp = self._ha.get_response(mail.downloadurl, headers=headers) mail.stream_length = resp.headers.get( 'Content-Length', 0) mail.io_stream = ResponseIO(resp) yield mail except Exception: self._logger.error( "Parse one mail error: {}".format( traceback.format_exc())) except Exception: self._logger.error( "Get mails of folder '{}' error: {}".format( folder.name, traceback.format_exc())) except Exception: self._logger.error("Get mails of folder '{}' error: {}".format( folder.name, traceback.format_exc()))
def _get_mails(self, folder: Folder): try: pageNum = 0 next = True while next: pageNum += 1 url = f'http://mail.21cn.com/w2/mail/listMail.do?labelId={folder.folderid}&pageNum={pageNum}&excludeFlag=&mailFlag=&noCache={random.random()}' html = self._ha.getstring(url, headers=f""" Accept:*/* Accept-Encoding:gzip, deflate, sdch Accept-Language:zh-CN,zh;q=0.8 Cache-Control:no-cache Connection:keep-alive Pragma:no-cache Referer:ttp://mail.21cn.com/w2/logon/signOn.do X-Requested-With:XMLHttpRequest""") if not html or "\"code\":0" not in html: # 'logger.LogInformation($@"{strAccount} Get mail list failed: {html}"); break jshtml = json.loads(html) mailCount = jshtml['mailCount'] pageNumTmp = jshtml['pageNum'] pageSize = jshtml['pageSize'] mailList = jshtml['mailList'] next = mailCount > (pageNumTmp * pageSize) if mailList: for mail in mailList: messageId = mail['messageId'] res_one = EML(self._clientid, self.task, self.userid, messageId, folder, self.task.apptype) res_one.provider = json.dumps(mail['sendersAddress']) res_one.owner = json.dumps(mail['toAddress']) res_one.subject = mail['subject'] sendDate = mail['sendDate'] t = time.localtime(int(sendDate) / 1000) sendtime = time.strftime('%Y-%m-%d %H:%M:%S', t) res_one.sendtime = datetime.datetime.strptime( sendtime, "%Y-%m-%d %H:%M:%S") newMail = mail['newMail'] if not newMail: res_one.state = 1 else: res_one.state = 0 messageId = mail['messageId'] msId = mail['msId'] downloadUrl = f'http://mail.21cn.com/w2/downLoadAttachNormal.do?messageid={messageId}&msid={msId}&partid=9999' downHeaders = """ Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8 Accept-Encoding:gzip, deflate, sdch Accept-Language:zh-CN,zh;q=0.8 Cache-Control:no-cache Connection:keep-alive Pragma:no-cache Referer:http://mail.21cn.com/w2/logon/signOn.do Upgrade-Insecure-Requests:1""" # res_one.io_stream = self._ha.get_response_stream(downloadUrl, headers=downHeaders) resp = self._ha.get_response(downloadUrl, headers=downHeaders) if resp.status_code == 200: res_one.stream_length = resp.headers.get( 'Content-Length', 0) res_one.io_stream = ResponseIO(resp) yield res_one except Exception: self._logger.error('{} Got mails fail: {}'.format( self.userid, traceback.format_exc()))