def mangareader(url, download_chapters, args): html = get_html(url) global last if hasattr(args, 'last'): last = args.last series = title( re.search('<td.*?>\\s*Name:.*?<h2.*?>\\s*(.*?)\\s*</h2>\\s*</td>', html.replace('\n', '')).group(1)) status = re.search('<td.*?>\\s*Status:.*?<td>\\s*(.*?)\\s*</td>', html.replace('\n', '')).group(1) author = re.search('<td.*?>\\s*Author:.*?<td>\\s*(.*?)\\s*</td>', html.replace('\n', '')).group(1).partition('(')[0].strip() tags = re.findall('<a.*?><span class="genretags">(.*?)</span></a>', html) for j in range(len(tags)): for k in tag_dict: tags[j] = re.sub(k, tag_dict[k], tags[j]) chapters = [] for j in re.findall('<tr>\\s*<td>\\s*<div.*?</div>(.*?)</tr>', html, re.DOTALL | re.MULTILINE): match = re.search('<a.*?([\\d.,-]+)</a>(\\s*:\\s*)(.*?)\\s*</td>', j) num = float(match.group(1)) name = match.group(3) link = 'http://www.mangareader.net' + re.search( '<a\\s*href=\"(/.*?)\">', j).group(1) date = re.search('<td>(\\d{2})/(\\d{2})/(\\d{4})</td>', j) date = '{:04}-{:02}-{:02}'.format(int(date.group(3)), int(date.group(1)), int(date.group(2))) if name: name = '{} - {} : {}'.format(series, '{:3.1f}'.format(num).zfill(5), name) else: name = '{} - {}'.format(series, '{:3.1f}'.format(num).zfill(5)) if (download_chapters and num in download_chapters) or (not download_chapters and num > last): if args.debug or args.verbose: print(' Gathering info: \"{}\"'.format(name)) chap_html = get_html(link) links = [ 'http://www.mangareader.net' + i for i in re.findall( '<option value=\"(.*?)\".*?>\\d+</option>', chap_html) ] chapters.append({ 'name': name, 'links': links, 'backup_links': links, 'date': date, 'pages': len(links), 'num': num }) if chapters: function_name(chapters, series, tags, author, status, args)
def set_desc(self, chat_id, i, desc): try: self.plans[str(chat_id)][i]["desc"] = util.title(desc) iomgr.save(PLANS_JSON, self.plans) return "*Description is updated!* 😎" except IndexError: return False except KeyError: return False
def scan_fr(url, download_chapters,args): print("getting url "+url) html = get_html(url) global last if hasattr(args, 'last'): last=args.last series = title(re.search('(<h2 class="widget-title" style="display: inline-block;">)([^<]*)(</h2>)', html.replace('\n', '')).group(2)) print("series: series"+series) #FIND ALL # info_gen = re.findall('(<div class="cell">\\s*(.*?)\\s*</div>)', html.replace('\n', '')) ## ['*****@*****.**', '*****@*****.**'] status="" # not set in this source author="" # not set in this source tags="" # not set in this source # for j in range(len(tags)): #for k in tag_dict: #tags[j] = re.sub(k, tag_dict[k], tags[j]) chapters = [] # catch chapters list chapitres=re.search('(<ul class="chapters">(.*)</ul>)', html.replace('\n','').replace('\r','')) # char ? will be used to allow overlapping regex ! for j in re.findall('<h5 class="chapter-title-rtl">(.*?)</h5>', chapitres.group(1), re.DOTALL|re.MULTILINE)[::-1]: print("ligne trouvée:"+j) #match = re.search('<a.*[-/]([0-9]+).*',j,re.DOTALL|re.MULTILINE) match = re.search('<a.*[-/]([0-9.]+).*>(.*) ([0-9.]+)</a>',j,re.DOTALL|re.MULTILINE) # re.search('<a.*?>(.*?)([\\d,.]+)\\s*</a>', j, re.DOTALL|re.MULTILINE) #name = match.group(2) num = float(match.group(1)) link = "http://"+re.search('href=\".*(www.*?)\"', j).group(1) # no name, we use title instead name = '' date = "01/01/2000" serie_short=match.group(2) if name: name = '{} - {} : {}'.format(serie_short, '{:3.1f}'.format(num).zfill(5), name) else: name = '{} - {}'.format(series, '{:3.1f}'.format(num).zfill(5)) if (download_chapters and num in download_chapters) or (not download_chapters and num > last): if args.debug or args.verbose: print(' Gathering info: \"{}\"'.format(series)) print(' downloading chapter '+link) chap_html = get_html(link) links=[''] image_regex="data-src='(.*?) '" links = [i for i in re.findall(image_regex, chap_html)] chapters.append({'name':name, 'links':links, 'backup_links':links, 'date':date, 'pages':len(links), 'num':num}) args.url=url if chapters: function_name(chapters, series, tags, author, status,args)
def goodmanga(url, download_chapters,args): html = get_html(url) global last if hasattr(args, 'last'): last=args.last series = title(re.search('<h1>([^<>]*?)</h1>', html.replace('\n', '')).group(1)) status = re.search('<span>Status:</span>\\s*(.*?)\\s*</div>', html.replace('\n', '')).group(1) author = re.search('<span>Authors?:</span>\\s*(.*?)\\s*</div>', html.replace('\n', '')).group(1) tags = re.findall('<a.*?>(.*?)</a>', re.search('<span>Genres:</span>(.*?)\\s*</div>', html, re.DOTALL|re.MULTILINE).group(1)) for j in range(len(tags)): for k in tag_dict: tags[j] = re.sub(k, tag_dict[k], tags[j]) chapters = [] while True: for j in re.findall('<li>\\s*(.{1,300}?\\d{4}</span>)\\s*</li>', html, re.DOTALL|re.MULTILINE): match = re.search('<a.*?>.*?([\\d,.]+)\\s*</a>\\s*<span.*?>\\s*(.*?)\\s*</span>', j, re.DOTALL|re.MULTILINE) name = match.group(2) num = float(match.group(1)) link = re.search('href=\"(.*?)\"', j).group(1) try: date = datetime.strptime(re.search('([A-Za-z]*? \\d{1,2}, \\d{4})</span>', j).group(1), '%b %d, %Y').strftime('%Y-%m-%d') except: date = datetime.datetime.today().strftime('%Y-%m-%d') if name: name = '{} - {} : {}'.format(series, '{:3.1f}'.format(num).zfill(5), name) else: name = '{} - {}'.format(series, '{:3.1f}'.format(num).zfill(5)) if (download_chapters and num in download_chapters) or (not download_chapters and num > last): if args.debug or args.verbose: print(' Gathering info: \"{}\"'.format(name)) chap_html = get_html(link) img_url = re.sub('1.([jpgnig]{3})', '{}.\\1', re.search('</div>\\s*<a.*?>\\s*<img[^<]*?src=\"(.*?)\".*?>\\s*</a>', chap_html, re.DOTALL|re.MULTILINE).group(1)) pages = max([int(i) for i in re.findall('<option value=\".*?\".*?>\\s*(\\d+)\\s*</option>', chap_html)]) b_links = {float(i[1]):i[0] for i in re.findall('<option value=\"(.*?)\".*?>\\s*(\\d+)\\s*</option>', chap_html)} b_links = [b_links[i+1] for i in range(pages)] links = [img_url.format(i+1) for i in range(pages)] chapters.insert(0, {'name':name, 'links':links, 'backup_links':b_links, 'date':date, 'pages':pages, 'num':num}) match = re.search('<a href=\"(.*?)\">Next</a>', html) if match: html = get_html(match.group(1)) else: break if chapters: function_name(chapters, series, tags, author, status,args)
def new_plan(self, chat_id, desc, place, time): chat_id = str(chat_id) if chat_id not in self.plans: self.plans[chat_id] = [] # util.insert_new_plan(self.plans[chat_id], {"desc": util.title(desc), # "loc": place, # "dt": time}) self.plans[chat_id].append({ "desc": util.title(desc), "loc": place, "dt": time }) self.plans[chat_id] = sort_plans(self.plans[chat_id]) print("Added a new event!") iomgr.save(PLANS_JSON, self.plans)
def japscan(url, download_chapters, args): print("getting url " + url) html = get_html(url) global last if hasattr(args, 'last'): last = args.last series = title( re.search('(<h1 class="bg-header">).*>(.*)</a>(</h1>)', html.replace('\n', '')).group(2)) #FIND ALL info_gen = re.findall('(<div class="cell">\\s*(.*?)\\s*</div>)', html.replace( '\n', '')) ## ['*****@*****.**', '*****@*****.**'] status = info_gen[7][1] author = info_gen[5][1] tags = info_gen[7][1] # for j in range(len(tags)): #for k in tag_dict: #tags[j] = re.sub(k, tag_dict[k], tags[j]) chapters = [] # catch chapters list chapitres = re.search( '(<div id="liste_chapitres">(.*)</div>.*<div class="col-1-3")', html.replace('\n', '')) #print(html) #print(chapitres.group(1)) for j in re.findall('<li>(.*?)</li>', chapitres.group(1), re.DOTALL | re.MULTILINE)[::-1]: #match = re.search('<a.*[-/]([0-9]+).*',j,re.DOTALL|re.MULTILINE) match = re.search( '<a.*[-/]([0-9.]+).*>Scan (.*) ([0-9.]+) VF( : )?(.*)?<.*', j, re.DOTALL | re.MULTILINE) # re.search('<a.*?>(.*?)([\\d,.]+)\\s*</a>', j, re.DOTALL|re.MULTILINE) #name = match.group(2) num = float(match.group(1)) link = "http://" + re.search('href=\".*(www.*?)\"', j).group(1) name = match.group(5) date = "01/01/2000" serie_short = match.group(2) if name: name = '{} - {} : {}'.format(serie_short, '{:3.1f}'.format(num).zfill(5), name) else: name = '{} - {}'.format(series, '{:3.1f}'.format(num).zfill(5)) if (download_chapters and num in download_chapters) or (not download_chapters and num > last): if args.debug or args.verbose: print(' Gathering info: \"{}\"'.format(series)) chap_html = get_html(link) links = [''] #HACK : HAVE TO PARSE EACH PAGE TO RETRIEVE IMAGE for content in re.findall( '<option .* value=\"(.*?)\".*?>.*</option>', chap_html)[::-1]: # print("http://www.japscan.com"+content) content_html = get_html("http://www.japscan.com" + content) search = '<div itemscope itemtype="http://schema.org/Article">.*src="(.*[.][a-z]{0,4})" />' #link_page=re.search(search,content_html.replace('\n',''),re.MULTILINE) link_page = re.search(search, content_html.replace('\n', ''), re.MULTILINE) #print(content_html.replace('\n','')) try: #print(link_page.group(1)) links.append(link_page.group(1)) except: print('An error occurs, unable to search pages') print(content_html.replace('\n', '')) links.remove('') links = list(reversed(links)) chapters.append({ 'name': name, 'links': links, 'backup_links': links, 'date': date, 'pages': len(links), 'num': num }) args.url = url if chapters: function_name(chapters, series, tags, author, status, args)
def mymanga(url, download_chapters, args): html = get_html(url) global last if hasattr(args, 'last'): last = args.last series = title( re.search('(<h2 class="text-border">)(.*)(</h2>)', html.replace('\n', '')).group(2)) #FIND ALL info_gen = re.findall('(<div class="cell">\\s*(.*?)\\s*</div>)', html.replace( '\n', '')) ## ['*****@*****.**', '*****@*****.**'] status = 'default' #info_gen[7][1] author = 'default' #info_gen[5][1] tags = 'default' #info_gen[7][1] # for j in range(len(tags)): #for k in tag_dict: #tags[j] = re.sub(k, tag_dict[k], tags[j]) chapters = [] # catch chapters list chapitres = re.search( '(<section class="listchapseries fiche block sep">(.*)</section>)', html.replace('\n', '')) #print(html.replace('\n','')) #print(chapitres.group(1)) for j in re.findall('<li>(.*?)</li>', chapitres.group(1), re.DOTALL | re.MULTILINE)[::-1]: #match = re.search('<a.*[-/]([0-9]+).*',j,re.DOTALL|re.MULTILINE) match = re.search('<a.* href=".*/chapitre-([0-9.]+)/', j, re.DOTALL | re.MULTILINE) # re.search('<a.*?>(.*?)([\\d,.]+)\\s*</a>', j, re.DOTALL|re.MULTILINE) #name = match.group(2) num = float(match.group(1)) link = "http://" + re.search('href=\".*(www.*?)\" title', j).group(1) name = match.group(1) date = "01/01/2000" serie_short = series if name: name = '{} - {} : {}'.format(serie_short, '{:3.1f}'.format(num).zfill(5), name) else: name = '{} - {}'.format(series, '{:3.1f}'.format(num).zfill(5)) if (download_chapters and num in download_chapters) or (not download_chapters and num > last): if args.debug or args.verbose: print(' Gathering info: \"{}\"'.format(series)) chap_html = get_html(link) links = [''] #HACK : HAVE TO PARSE EACH PAGE TO RETRIEVE IMAGE #print(chap_html) for content in re.findall('<option value="(.*[0-9]?)" ', chap_html)[::-1]: #print("http://www.hitmanga.eu/"+content) content_html = get_html("http://www.hitmanga.eu/" + content) #print(content_html.replace('\n','').replace('\r','')) search = '<table id="picture">.*src="(.*[.][a-z]{0,4}?)" alt=.*</table>' #link_page=re.search(search,content_html.replace('\n',''),re.MULTILINE) link_page = re.search(search, content_html.replace('\n', ''), re.MULTILINE) #print(content_html.replace('\n','')) try: #print(link_page.group(1)) links.append(link_page.group(1)) except: print('An error occurs, unable to search pages') print(content_html.replace('\n', '')) links.remove('') links = list(reversed(links)) chapters.append({ 'name': name, 'links': links, 'backup_links': links, 'date': date, 'pages': len(links), 'num': num }) args.url = url if chapters: function_name(chapters, series, tags, author, status, args)
def batoto(url, download_chapters,args): batoto_username = args.username batoto_password = args.password login() for i in range(3): try: html = get_html(url+'/') break except: if i == 2: raise else: pass global last global session if hasattr(args, 'last'): last=args.last series = title(re.search('<h1.*?>[\\s\n]*(.*?)[\\s\n]*</h1>', html, re.DOTALL|re.MULTILINE).group(1)) status = re.search('<td.*?>Status:</td>\\s*<td>\\s*(.*?)\\s*</td>', html.replace('\n', '')).group(1) author = ', '.join(re.findall('<a.*?>(.*?)</a>', re.search('<td.*?>\\s*Authors?\\s*:?\\s*</td>\\s*<td>(.*?)</td>', html.replace('\n', '')).group(1))) tags = re.findall('<a.*?>\\s*<span.*?>\\s*([A-Za-z]*?)\\s*</span>\\s*</a>', re.search('<td.*?>\\s*Genres?\\s*:?\\s*</td>\\s*<td>(.*?)</td>', html.replace('\n', '')).group(1)) for j in range(len(tags)): for k in tag_dict: tags[j] = re.sub(k, tag_dict[k], tags[j]) chapters = [] for j in re.findall('<tr class=\"row lang_([A-Za-z]*?) chapter_row\".*?>(.*?)</tr>', html, re.DOTALL|re.MULTILINE)[::-1]: if j[0] == batoto_lang: match = re.search('<a href=\"([^\"]*?)\".*?>\\s*<img.*?>\\s*([^\"<>]*)(\\s*:\\s*)?(.*?)\\s*</a>', j[1], re.DOTALL|re.MULTILINE) name = match.group(4) m2 = re.search('[Cc]h(ap)?(ter)?\\.?\\s*([Ee]xtra:?)?\\s*([\\d\\.]+)\\s*(-\\s*[\\d\\.]+)?', match.group(2)) try: num = float(m2.group(4)) except: if args.debug: print(j[1]) raise ''' #TODO if m2.group(3): if chapters: num = chapters[-1]['num'] + .4 else: num = last + .4 ''' try: vol = int(re.search('[Vv]ol(ume)?\\.\\s*(\\d+)', match.group(2)).group(2)) except: vol = 0 link = match.group(1) uuid = link.rpartition('#')[2] ref = link.rpartition('/')[0]+'/' + "reader#" + uuid + "_1" head = {'Referer':ref, 'supress_webtoon':'t'} link = link.rpartition('/')[0]+'/'+ 'areader?id='+uuid+'&p=1' session.headers.update(head) try: date = datetime.strptime(re.search('<td.*?>(\\d{2} [A-Za-z]* \\d{4}.*?([Aa][Mm]|[Pp][Mm])).*?</td>', j[1]).group(1), '%d %B %Y - %I:%M %p').strftime('%Y-%m-%dT%H:%M:00') except: try: t = re.search('(\\d+) [Mm]inutes ago', j[1]).group(1) except: t = '1' if re.search('A minute ago', j[1]) else '' if t: unit = '%M' else: try: t = re.search('(\\d+) [Hh]ours ago', j[1]).group(1) except: t = '1' if re.search('An hour ago', j[1]) else '' if t: unit = '%H' else: try: t = re.search('(\\d+) [Dd]ays ago', j[1]).group(1) except: t = '1' if re.search('A day ago', j[1]) else '' if t: unit = '%d' else: try: t = re.search('(\\d+) [Ww]eeks ago', j[1]).group(1) except: t = '1' if re.search('A week ago', j[1]) else '' if t: unit = '%W' else: t = '0' unit = '%M' date = datetime.fromtimestamp((datetime.today()-datetime.strptime(t, unit)).total_seconds()).strftime('%Y-%m-%dT%H:%M:00') if name: name = '{} - {} : {}'.format(series, '{:3.1f}'.format(num).zfill(5), name) else: name = '{} - {}'.format(series, '{:3.1f}'.format(num).zfill(5)) if (download_chapters and num in download_chapters) or (not download_chapters and num > last): if args.debug or args.verbose: print(' Gathering info: \"{}\"'.format(name)) chap_html = get_html(link) img_url = re.sub('001\\.([A-Za-z]{3})', '{:03}.\\1', re.search('<div.*?>\\s*<a.*?>\\s*<img[^<]*?src=\"([^\"]*?)\"[^>]*?/>\\s*</div>', chap_html, re.DOTALL|re.MULTILINE).group(1)) zero = False if '{:03}' not in img_url: img_url = re.sub('000\\.([A-Za-z]{3})', '{:03}.\\1', img_url) zero = True if '{:03}' not in img_url: img_url = re.sub('000\\.([A-Za-z]{3})', '{:03}.\\1', img_url) zero = True if '{:03}' not in img_url: img_url = re.sub('01\\.([A-Za-z]{3})', '{:02}.\\1', img_url) zero = False if '{:02}' not in img_url: img_url = re.sub('00\\.([A-Za-z]{3})', '{:02}.\\1', img_url) zero = True if re.findall('<option value=\".*?\".*?>page (\\d+)</option>', chap_html): pages = max([int(i) for i in re.findall('<option value=\".*?\".*?>page (\\d+)</option>', chap_html)]) else: continue b_links = {float(i[1]):i[0] for i in re.findall('<option value=\"(.*?)\".*?>page (\\d+)</option>', chap_html)} b_links = [b_links[i+1] for i in range(pages)] if zero: links = [img_url.format(i) for i in range(pages)] else: links = [img_url.format(i+1) for i in range(pages)] chapters.append({'name':name, 'links':links, 'backup_links':b_links, 'date':date, 'pages':pages, 'num':num}) if chapters: function_name(chapters, series, tags, author, status,args)
def mangahere(url, download_chapters, args): html = get_html(url) global last if hasattr(args, 'last'): last = args.last series = title( re.search( '<h1 class="title"><span class="title_icon"></span>(.*?)</h1>', html.replace('\n', '')).group(1)) status = re.search('<li><label>Status:</label>(.*?)<', html.replace('\n', '')).group(1) author = ', '.join( re.findall( '<a.*?>(.*?)</a>', re.search('<li><label>Author\\(?s?\\)?:</label>(.*?)</li>', html.replace('\n', '')).group(1))) tags = re.search('<li><label>Genre\\(s\\):</label>(.*?)</li>', html).group(1).split(', ') for j in range(len(tags)): for k in tag_dict: tags[j] = re.sub(k, tag_dict[k], tags[j]) chapters = [] for j in re.findall( '<li>\\s*<span class=\"left\">\\s*(.*?\\d{4}</span>)\\s*</li>', html, re.DOTALL | re.MULTILINE)[::-1]: match = re.search( '<a.*?>.*?([\\d,.]+)\\s*</a>\\s*<span.*?>\\s*(.*?)\\s*</span>', j, re.DOTALL | re.MULTILINE) name = match.group(2) num = float(match.group(1)) link = re.search('href=\"(.*?)\"', j).group(1) try: date = datetime.strptime( re.search('([A-Za-z]*? \\d{1,2}, \\d{4})</span>', j).group(1), '%b %d, %Y').strftime('%Y-%m-%d') except: date = datetime.datetime.today().strftime('%Y-%m-%d') if name: name = '{} - {} : {}'.format(series, '{:3.1f}'.format(num).zfill(5), name) else: name = '{} - {}'.format(series, '{:3.1f}'.format(num).zfill(5)) if (download_chapters and num in download_chapters) or (not download_chapters and num > last): if args.debug or args.verbose: print(' Gathering info: \"{}\"'.format(name)) chap_html = get_html(link) img_url = re.sub( '001.([A-Za-z]{3})', '{:03}.\\1', re.search('<a.*?>\\s*<img[^<]*?src=\"(.*?)\".*?>\\s*</a>', chap_html, re.DOTALL | re.MULTILINE).group(1)) if '{:03}' not in img_url: img_url = re.sub('01.([A-Za-z]{3})', '{:02}.\\1', img_url) pages = max([ int(i) for i in re.findall( '<option value=\".*?\".*?>(\\d+)</option>', chap_html) ]) b_links = { float(i[1]): i[0] for i in re.findall( '<option value=\"(.*?)\".*?>(\\d+)</option>', chap_html) } b_links = [b_links[i + 1] for i in range(pages)] links = [img_url.format(i + 1) for i in range(pages)] chapters.append({ 'name': name, 'links': links, 'backup_links': b_links, 'date': date, 'pages': pages, 'num': num }) if chapters: function_name(chapters, series, tags, author, status, args)