def _scrape_photo_info_from_source_3(page_url): scraper = cfscrape.create_scraper() scraped_content = scraper.get(page_url).content soup = BeautifulSoup(scraped_content, "lxml") photos = soup.find_all("img", class_="main-image") photo_url = photos[0]["src"] # Scrape the aircraft model and airline aircraft_model, airline = None, None info_section = soup.find("section", class_="additional-info aircraft") p_elems = info_section.select("p") for p_elem in p_elems: text = p_elem.text.strip() if len(text) > 0: if "Aircraft: " in text: aircraft_model = text.split(":")[1].strip() if "Airline: " in text: airline = text.split(":")[1].strip() # Scrape the photographer's name photographer_name = None info_section = soup.find("section", class_="additional-info photographer") p_elems = info_section.select("p") for i, p_elem in enumerate(p_elems): text = p_elem.text.strip() if len(text) > 0: if i == 0: photographer_name = text.strip() size = "" # Placeholder - we set it after we download the photo return ((airline, page_url, photo_url, aircraft_model, size, "No", photographer_name))
def __init__(self, un, pw, session_path=None): ''' Params: un: account username (required) pw: account password (required) session_path: the path to the actual file you want to persist your cookies in If blank, saves to $HOME/.32p_cookies.dat ''' self.module = '[32P-AUTHENTICATION]' try: self.ses = cfscrape.create_scraper() except Exception as e: logger.error(self.module + " Can't create session with cfscrape") self.session_path = session_path if session_path is not None else os.path.join(mylar.CACHE_DIR, ".32p_cookies.dat") self.ses.cookies = LWPCookieJar(self.session_path) if not os.path.exists(self.session_path): logger.fdebug(self.module + ' Session cookie does not exist. Signing in and Creating.') self.ses.cookies.save() else: logger.fdebug(self.module + ' Session cookie found. Attempting to load...') self.ses.cookies.load(ignore_discard=True) self.un = un self.pw = pw self.authkey = None self.passkey = None self.uid = None self.inkdrops = None
def test_http_link_active(content, link=None): "link URL must be active" import cfscrape from requests.exceptions import RequestException from rfc3986 import is_valid_uri, uri_reference _verify_valid_link_entry(link) key, value = list(link.items())[0] if not is_valid_uri(value, require_scheme=True): return parsed_value = uri_reference(value) if parsed_value.scheme not in ("http", "https"): return # Hooray. if parsed_value.host.endswith("linkedin.com"): raise SkipTest("linkedin.com won't let us see {} anyway".format(value)) try: r = cfscrape.create_scraper().get(value, timeout=30.0, headers={"User-Agent": USER_AGENT}) except RequestException as exc: assert False, "error while checking {}: {}".format(value, exc) else: assert 200 <= r.status_code < 300, \ "expected {} link {} to be active, but got {}".format(key, value, r.status_code)
def fetch(): url = environ.get('URL') root_url = environ.get('ROOT_URL') scraper = cfscrape.create_scraper() html = scraper.get(url).content soup = BeautifulSoup(html, 'html.parser') posts = list() for link in soup.select('#threads a.title'): post = dict() try: post['title'] = link.text post['href'] = root_url + link.get('href') post['uid'] = post['href'].replace(root_url + 'threads/', '')[:6] #TODO posts.append(post) except Exception as e: print(e) pass return posts
def enter_raffle(url): """Enters raffle at given URL.""" headers = { "Host" : "csgorage.com", "Origin" : "http://csgorage.com", "Referer" : service_url + url, "Accept" : "application/json, text/javascript, */*; q=0.01", "Content-Type" : "application/x-www-form-urlencoded; charset=UTF-8", "Accept-Encoding" : "gzip, deflate", "Accept-Language" : "en-US,en;q=0.8" } r = cfscrape.create_scraper() s = scrape(url) raffleId = url[-5:] token_tag = s.find("span", { "class" : "hide tok"}).contents[1] token = str(token_tag)[6:-7] ticketId = randint(900,1350) payload = { 'rid' : raffleId, 'slots[]' : ticketId, '_token' : token, 'rnd' : 1 } t = r.post(service_url + "/getslotfree", data=payload, cookies=cookies, headers=headers) if t.status_code == 200: print("200") else: print("Not 200")
def Bookmarks(title): oc = ObjectContainer(title1 = title) post_values = { 'username' : username, 'password' : password } if username and password: sess = requests.session() s = cfscrape.create_scraper(sess) page = s.post("http://kissanime.com/Login", post_values) #bookmarks = s.get(BASE_URL + '/BookmarkList') #pagehtml = html.fromstring(bookmarks.text) return MessageContainer( "Success", page.text ) for each in pagehtml.xpath("//a[@class='aAnime']"): url = each.xpath("./@href")[0] title = each.xpath("./text()")[0] thumb = "" oc.add(DirectoryObject( key = Callback(EpisodeDetail, title = title, url = url), title = title, thumb = Resource.ContentsOfURLWithFallback(url = thumb, fallback='icon-cover.png') ) ) return oc else: return MessageContainer( "Error", "You need to provide a username and password" )
def __init__(self): self.anime = sys.argv[1] self.anime_url = 'http://kissanime.to/Anime/' self.scraper = cfscrape.create_scraper() self.s_check = ['{}/Episode'.format(self.anime), '?id='] self.audited_links = [] self.decoded_links = []
def get_url_headers(url, configfile, dbfile, headers): config = RssConfig('RSScrawler', configfile) proxy = config.get('proxy') scraper = cfscrape.create_scraper(delay=10) agent = fake_user_agent() headers.update({'User-Agent': agent}) if proxy: sj = decode_base64("c2VyaWVuanVua2llcy5vcmc=") mb = decode_base64("bW92aWUtYmxvZy50bw==") db = RssDb(dbfile, 'proxystatus') if sj in url: if db.retrieve("SJ") and config.get("fallback"): return scraper.get(url, headers=headers, timeout=30) elif mb in url: if db.retrieve("MB") and config.get("fallback"): return scraper.get(url, headers=headers, timeout=30) proxies = {'http': proxy, 'https': proxy} try: response = scraper.get(url, headers=headers, proxies=proxies, timeout=30) return response except Exception as e: print(u"Fehler beim Abruf von: " + url + " " + str(e)) return "" else: try: response = scraper.get(url, headers=headers, timeout=30) return response except Exception as e: print(u"Fehler beim Abruf von: " + url + " " + str(e)) return ""
def boerse_refresh(self): FILE = open(self.boerse_entries, "r") filetext = FILE.read() FILE.close() scraper = cfscrape.create_scraper() url = scraper.get(feeds['boerse_url']).content boerse = BeautifulSoup(url) for entry in boerse.findAll('item'): items = entry.find('title') title = '{}'.format(items).replace('<title>', '')\ .replace('</title>', '')\ .replace(' ', '.')\ .replace('.-.', '') if title not in filetext and\ any([x in title for x in whitelist['boerse']]) and\ any([x not in title for x in blacklist['boerse']]): FILE = open(self.boerse_entries, "a") FILE.write("{}\n".format(title)) FILE.close() self.on_rss_entry( '{0}{1}[BOERSE]{2} {3}'.format( self.BOLD, self.RED, self.END, title)) threading.Timer(feeds['boerse_delay'], self.boerse_refresh).start()
def __init__(self): self.items = 0 self.pages = 0 self.time = 0 self.memory = 0 self.scraper = cfscrape.create_scraper() self.scraper.headers.update(HEADERS)
def scrape(): try: purge() # Connect to the site scrp = cfscrape.create_scraper() rqst = scrp.get('http://800notes.com/').content soup = BeautifulSoup(rqst, 'lxml') # Connect to the database with sql.connect('complaint-scraper.db') as con: with con as cur: for div in soup.findAll('div', class_='oos_preview'): cnt = div.find('div', class_='oos_previewSide') wrp = div.find('div', class_='oos_previewMain') num = wrp.find('div', class_='oos_previewHeader') lnk = num.find('a', class_='oos_previewTitle') txt = wrp.find('div', class_='oos_previewBody') areaCode = lnk.text[:3] fullNmbr = areaCode + lnk.text[4:7] + lnk.text[8:] cmntText = txt.text numCmnts = cnt.text cur.execute(''' INSERT INTO Comments( Area_Code, Full_Number, Comment, Num_Comments) VALUES(?,?,?,?) ''', (areaCode, fullNmbr, cmntText, numCmnts)) except sql.IntegrityError, e: print "Error: %s" % e.args[0]
def cms_identifier(self): """ Identifies the target's content management system. """ engine.setup(self) targets = [target for target in self.args.target if target.strip()] error_count = 0 for url in targets: self.sanitize_url(url) msg = "Getting source for {}".format(self.url); report.low(msg) headers = {'User-Agent': "Mozilla/5.0 (X11; Fedora; Linux i686;" +\ "rv:40.0) Gecko/20100101 Firefox/40.1"} response = None try: response = requests.get(self.url, headers=headers, verify=False) if "Checking your browser before accessing" in response.content: msg ="Site: {} is using cloudflare. "\ "Trying to bypass cloudflare protection.".format(self.url);report.medium(msg) #damn cloudflare, lets see if how to circumvert it. #TODO: Ask for permision since executing JS might be a security issue. # https://github.com/Anorov/cloudflare-scrape cfscraper = cfscrape.create_scraper() response = cfscraper.get(self.url) except Exception as e: #print e error_count += 1 msg="Something went wrong while getting ({}), moving on...".format(self.url);report.error(msg) if error_count > 3: msg = "Too many error. Exiting..."; report.error(msg) sys.exit() framework, site = engine.pwn(self,response) if framework: report.info("This is a website based on: {0} from {1}".format(framework, site)) else: report.high("Failed to determine CMS of site.")
def scrape(url): """Connects to raffle url and returns a BeautifulSoup object.""" fullUrl = service_url + url r = cfscrape.create_scraper() s = r.get(fullUrl, cookies=cookies) t = BeautifulSoup(s.text, "html5lib") return t
def __init__(self, params): for param in params: print(param) # create a webdriver instance with a lenient timeout duration self.scraper = cfscrape.create_scraper() self.rootPage = "" self.file_extension = "" self.download(params)
def on_task_start(self, task, config): try: import cfscrape except ImportError as e: log.debug('Error importing cfscrape: %s' % e) raise plugin.DependencyError('cfscraper', 'cfscrape', 'cfscrape module required. ImportError: %s' % e) if config is True: task.requests = cfscrape.create_scraper(task.requests)
def scraper(): try: import cfscrape except ImportError as e: log.debug('Error importing cfscrape: %s', e) raise plugin.DependencyError('cfscraper', 'cfscrape', 'cfscrape module required. ImportError: %s' % e) else: return cfscrape.create_scraper()
def create_epdict(ep_dict, url): try: scraper = cfscrape.create_scraper() content = scraper.get(url).content soup = bs(content) epdict = {} eplist = [] titlelist = [] if "Drama" in url: for link in soup.find_all('a'): if 'href' in str(link): try: if "Episode-" in link['href']: possible = link['href'].split("Episode-")[1] possible = "/Episode-" + possible fullurl = url + possible fulltitle = link['title'].split("Episode ")[1][:9] integers = [str(i) for i in range(0,10)] if possible: episode = possible[:7] if fulltitle[0] in integers and fulltitle[1] in integers and fulltitle[2] in integers: title = fulltitle[:3] elif fulltitle[0] in integers and fulltitle[1] in integers: title = fulltitle[:2] else: title = fulltitle[0] eplist.append(fullurl.encode('ascii')) titlelist.append(title.encode('ascii')) except TypeError: pass epdict = od((zip(titlelist[::-1][0:], eplist[::-1][0:]))) return epdict else: for link in soup.find_all('a'): try: if "Episode-" in link['href']: possible = link['href'].split("Episode-")[1] possible = "/Episode-" + possible fullurl = url + possible fulltitle = link['title'].split("Episode ")[1][:9] integers = [str(i) for i in range(0,10)] if fulltitle[4] in integers and fulltitle[5] in integers and fulltitle[6] in integers: title = fulltitle[:7] elif fulltitle[3:6] == " - " and fulltitle[6::9] in integers and fulltitle[7::9] in integers and fulltitle[8::9] in integers: title = fulltitle[:9] elif 'v' in fulltitle[:4].lower(): title = fulltitle[0:5] else: title = fulltitle[:3] eplist.append(fullurl.encode('ascii')) titlelist.append(title.encode('ascii')) except TypeError: pass epdict = od((zip(sorted(titlelist),sorted(eplist)))) return epdict except: pass
def url_to_decklist(url): sess = requests.session() sess = cfscrape.create_scraper(sess) r = sess.get(url) data = BeautifulSoup(r.content, "html.parser") tbls = data.find_all('table', id="cards") #print(tbl.prettify().encode('utf-8')) decklist = get_cards_from_tables(tbls) return decklist
def latest_version(): scraper = cfscrape.create_scraper() data = scraper.get('http://teamspeak.com/downloads').content soup = BeautifulSoup(data, 'html.parser') def search(search_string): return soup.find_all(text=re.compile(search_string))[0].parent.\ find(class_='version').text return search(r'Client\ 64\-bit'), search(r'Server\ 64\-bit')
def __init__(self): self.dub_url = 'http://www.animeland.tv' self.dl_url = 'http://www.animeland.tv/download.php?id=' self.scraper = cfscrape.create_scraper() self.start = 1 self.end = sys.argv[2] self.build_args() if not os.path.exists(self.series): os.makedirs(self.series)
def get_episode_list(url): url_base = '{url.scheme}://{url.netloc}'.format(url=urlparse(url)) page = bs(cfscrape.create_scraper().get(url).content,'lxml') urls = page.find('table', {'class': 'listing'}).find_all('a') ep_list = [] for a in reversed(urls): urlep = a['href'] if a['href'].startswith('http') else url_base + a['href'] ep_list.append((urlep, a.string.strip())) return ep_list
def on_start(self, c, e): self.checktimer = int(self.settings["check_timer"]) self.shitmons = self.settings["shitmons"].split() self.alert_chan = self.settings["alert_chan"] self.lat = self.settings["lat"] self.lon = self.settings["lon"] self.session = cfscrape.create_scraper() self.last_scan = [] if self.checktimer > 0: self.search_schedule(self.checktimer)
def __init__(self, args, wait=(5, 10)): self.args = self._parse_args(args) _url = self.args.get('url') self.scraper = cfscrape.create_scraper() # Protection against possible scraping countermeasures. # Better safe than sorry. # You can't watch a 25-min ep in 10 seconds anyway. self.WAIT_RANGE = wait # other config self.URL_BASE = '{url.scheme}://{url.netloc}'.format(url=_url)
def get_coeff_list(self): scraper = cfscrape.create_scraper(js_engine='Node') response = None if os.path.isfile('data.json'): with open('data.json') as data_file: response = json.load(data_file) else: data = scraper.get('http://egb.com/ajax.php?act=UpdateTableBets&ajax=update&fg=1&ind=tables&limit=0&st=0&type=modules&ut=0').content # data = self.invoke_url('/ajax.php?act=UpdateTableBets&ajax=update&fg=1&ind=tables&limit=0&st=0&type=modules&ut=0') response = json.loads(data) return self.process_response(response)
def main(email): req = requests.get("https://hacked-emails.com/api?q=%s" % email) if "jschl-answer" in req.text: print "Cloudflare detected... Solving challenge." scraper = cfscrape.create_scraper() req = scraper.get("https://hacked-emails.com/api?q=%s" % email) print req.text if "jschl-answer" in req.text: return {} data = json.loads(req.text.encode('UTF-8')) return data
def JSCrawl(this, url): js_scraper = cfscrape.create_scraper() response = js_scraper.get(url, headers=this.Header, timeout=15.0) redirect_count = 0 while 'http-equiv="Refresh"' in response.text: response = js_scraper.get(response.url, headers=this.Header, timeout=15.0) redirect_count = redirect_count + 1 if redirect_count == 5: this.PrintError('REDIRECT COUNT OF 5 REACHED! ' + response.url) break return response
def getYearIMDB(imdbid = None): if not imdbid: return None ano = None url = "http://www.imdb.com/title/" + imdbid scraper = cfscrape.create_scraper() html = (scraper.get(url).content).split("\n") for line in range(0,len(html)): if " <script>(function(t){ (t.events = t.events || {})[\"csm_head_pre_title\"] = new Date().getTime(); })(IMDbTimer);</script>" in html[line]: ano = html[line+1].strip().split()[-3] break return ano
def get_src(url): r = requests.get(url) if r.status_code == 503: print "test" scraper = cfscrape.create_scraper() s = scraper.get(url) s.encoding = 'utf-8' return s.text elif r.status_code == 200: r.encoding = 'utf-8' return r.text exit()
def download(url,save=True,prnt=True): if prnt: print "Passing Cloufrare browser validation!!!" print "Please wait " scraper = cfscrape.create_scraper() if save: name = url.split("/")[-1] fs = open(name,"wb") fs.write(scraper.get(url).content) fs.close() return name else: return (scraper.get(url).content).split("\n")
def loadsite(self, id, link): title = os.path.join(mylar.CONFIG.CACHE_DIR, 'getcomics-' + id) with cfscrape.create_scraper() as s: self.cf_cookievalue, cf_user_agent = s.get_tokens(link, headers=self.headers) t = s.get(link, verify=True, cookies=self.cf_cookievalue, headers=self.headers, stream=True) with open(title+'.html', 'wb') as f: for chunk in t.iter_content(chunk_size=1024): if chunk: # filter out keep-alive new chunks f.write(chunk) f.flush()
def getHTML(link): scraper = cfscrape.create_scraper(delay = 10) req = scraper.get(link).content # print('Sleeping...') # time.sleep(5) return bs(req)
def wholeShow(self, url, cookie, token, language, resolution, skipper, episode_range): # print("Check my patreon for this : http://patreon.com/Xonshiz") headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/601.2.7 (KHTML, like Gecko) Version/9.0.1 Safari/601.2.7', 'Upgrade-Insecure-Requests': '1', 'Accept-Encoding': 'gzip, deflate' } sess = requests.session() sess = cfscrape.create_scraper(sess) page_source = sess.get(url=url, headers=headers, cookies=cookie).text.encode("utf-8") # with open("New_way.html", "w") as wf: # wf.write(page_source) dub_list = [] ep_sub_list = [] for episode_link, episode_type in re.findall( r'\<a href\=\"\/(.*?)\"\ title\=\"(.*?)', str(page_source)): if "(Dub)" in str(episode_type): dub_list.append(str(url) + "/" + str(str(episode_link).split("/")[-1])) else: ep_sub_list.append(str(url) + "/" + str(str(episode_link).split("/")[-1])) if len(dub_list) == 0 and len(ep_sub_list) == 0: print("Could not find the show links. Report on https://github.com/Xonshiz/anime-dl/issues/new") sys.exit() if episode_range != "All": # -1 to shift the episode number accordingly to the INDEX of it. List starts from 0 xD! starting = int(str(episode_range).split("-")[0]) - 1 ending = int(str(episode_range).split("-")[1]) indexes = [x for x in range(starting, ending)] # [::-1] in sub_list in beginning to start this from the 1st episode and at the last, it is to reverse the list again, becasue I'm reverting it again at the end. sub_list = [ep_sub_list[::-1][x] for x in indexes][::-1] else: sub_list = ep_sub_list if skipper == "yes": # print("DLing everything") print("Total Subs to download : %s" % len(sub_list)) for episode_url in sub_list[::-1]: # cookies, Token = self.webpagedownloader(url=url) # print("Sub list : %s" % sub_list) self.onlySubs(url=episode_url, cookies=cookie) print("-----------------------------------------------------------") print("\n") else: if str(language).lower() in ["english", "eng", "dub"]: # If the "dub_list" is empty, that means there are no English Dubs for the show, or CR changed something. if len(dub_list) == 0: print("No English Dub Available For This Series.") print( "If you can see the Dubs, please open an Issue on https://github.com/Xonshiz/anime-dl/issues/new") sys.exit() else: print("Total Episodes to download : %s" % len(dub_list)) for episode_url in dub_list[::-1]: # cookies, Token = self.webpagedownloader(url=url) # print("Dub list : %s" % dub_list) try: self.singleEpisode(url=episode_url, cookies=cookie, token=token, resolution=resolution) except Exception as SomeError: print("Error Downloading : {0}".format(SomeError)) pass print("-----------------------------------------------------------") print("\n") else: print("Total Episodes to download : %s" % len(sub_list)) for episode_url in sub_list[::-1]: # cookies, Token = self.webpagedownloader(url=url) # print("Sub list : %s" % sub_list) try: self.singleEpisode(url=episode_url, cookies=cookie, token=token, resolution=resolution) except Exception as SomeError: print("Error Downloading : {0}".format(SomeError)) pass print("-----------------------------------------------------------") print("\n")
def search(self): try: with cfscrape.create_scraper() as s: cf_cookievalue, cf_user_agent = s.get_tokens( self.url, headers=self.headers) t = s.get( self.url + '/', params={'s': self.query}, verify=True, cookies=cf_cookievalue, headers=self.headers, stream=True, timeout=30, ) with open(self.local_filename, 'wb') as f: for chunk in t.iter_content(chunk_size=1024): if chunk: # filter out keep-alive new chunks f.write(chunk) f.flush() except requests.exceptions.Timeout as e: logger.warn('Timeout occured fetching data from DDL: %s' % e) return 'no results' except requests.exceptions.ConnectionError as e: logger.warn( '[WARNING] Connection refused to DDL site, stopped by a small tank.' ' Error returned as : %s' % e) if any([ errno.ETIMEDOUT, errno.ECONNREFUSED, errno.EHOSTDOWN, errno.EHOSTUNREACH, ]): helpers.disable_provider('DDL', 'Connection Refused.') return 'no results' except Exception as err: logger.warn( '[WARNING] Unable to scrape remote site, stopped by a small tank.' ' Error returned as : %s' % err) if 'Unable to identify Cloudflare IUAM' in str(err): helpers.disable_provider( 'DDL', 'Unable to identify Cloudflare IUAM Javascript on website') # since we're capturing exceptions here, searches from the search module # won't get capture. So we need to do this so they get tracked. exc_type, exc_value, exc_tb = sys.exc_info() filename, line_num, func_name, err_text = traceback.extract_tb( exc_tb)[-1] tracebackline = traceback.format_exc() except_line = { 'exc_type': exc_type, 'exc_value': exc_value, 'exc_tb': exc_tb, 'filename': filename, 'line_num': line_num, 'func_name': func_name, 'err': str(err), 'err_text': err_text, 'traceback': tracebackline, 'comicname': None, 'issuenumber': None, 'seriesyear': None, 'issueid': self.issueid, 'comicid': self.comicid, 'mode': None, 'booktype': None, } helpers.log_that_exception(except_line) return 'no results' else: return self.search_results()
def create_scraper(self): self.cfscraper = cfscrape.create_scraper() self.cfscraper.headers = {'user-agent' : self.USER_AGENT}
from os import get_terminal_size from cfscrape import create_scraper from tldextract import TLDExtract from tldextract import extract from bs4 import BeautifulSoup from requests.exceptions import SSLError from requests.exceptions import MissingSchema from requests.exceptions import ConnectTimeout from . import konfigurasi as konfig HEADERS = {'Accept': 'image/jpg'} TIMEOUT = konfig.timeout DEFAULT_PATH = konfig.default_path browser = create_scraper() domainExtract = TLDExtract(cache_file=DEFAULT_PATH + "\\cache_domain_TLD.txt") LEBAR_PDF = konfig.lebar_pdf ##=======================================================================## class URL(): def __init__(self, url): self.website = domainExtract(url).registered_domain try: url = self.normalisasi_url(url) self.__url = url self.website = domainExtract(url).registered_domain self.lebar_pdf = LEBAR_PDF except Exception as msg: raise Exception(msg)
class Scraper: # Class variables scraper = create_scraper() data = scraper.get("https://toc.qidianunderground.org/").content titles = [] list_of_grouped_links = [] combined = {} # Create soup soup = BeautifulSoup(data, "html.parser") # Argument parser parser = ArgumentParser(description="A scraper for the QidianUnderground ToC") parser.add_argument( "-q", "--quiet", action="store_true", default=False, help="Print only success or failure", ) args = parser.parse_args() quiet = args.quiet # Main method def scrape(self): if not self.quiet: center_heading("Titles") self.titles = self.get_titles() center_heading("Chapters") self.list_of_grouped_links = self.get_list_of_grouped_links() center_heading("Combining titles and links") self.combine_titles_and_links() center_heading("Writing to CSV") self.write_to_csv() else: self.titles = self.get_titles() self.list_of_grouped_links = self.get_list_of_grouped_links() self.combine_titles_and_links() self.write_to_csv() @star_wrapper(quiet) def get_titles(self): p = self.soup.select(".content p") titles = [] # Print out how many items in p if not self.quiet: print("{} titles found".format(len(p))) try: for n, title in enumerate(p): # Get p text title = p[n].get_text() # Seperate title from "updated x time ago" try: title = title[0 : title.index("\n")] # If it can't it's not a valid title except ValueError: pass titles.append(title) print("Titles: Success") return titles # If somehow something goes wrong except Exception as error: raise Exception("Something went wrong:\n {}".format(error)) @star_wrapper(quiet) def get_list_of_grouped_links(self): # one novel = one li # Get all li from site unparsed_lists = self.soup.select(".content ul li") parsed_lists = [] missing = 0 for n, li in enumerate(unparsed_lists): # Get all chapter links unformatted_links = li.findAll() formatted_links = [] if not self.quiet: print( '* Getting {} chapter links for "{}"'.format( len(unformatted_links), self.titles[n] ) ) # For each link for n, link in enumerate(unformatted_links): # If it has a href it's a link try: link = unformatted_links[n]["href"] linkText = unformatted_links[n].get_text() # If the first 3 chars can't be converted to int it's not a chapter link try: int(linkText[0:3]) # So ignore it except: continue # If it's not a link except KeyError: missing += 1 link = "Missing" # Remove "Missing " and get missing chapter number linkText = unformatted_links[n].get_text()[8:] # Add (chapter length, chapter link) to book list formatted_links.append((linkText, link)) # Add book list to scraper list parsed_lists.append(formatted_links) if not missing: print("Chapters: Success") else: print("Chapters: {} missing".format(missing)) return parsed_lists @star_wrapper(quiet) def combine_titles_and_links(self): for i, group_of_links in enumerate(self.list_of_grouped_links): # For every book in titles create a dictionary entry and assign to it a list of tuples self.combined[self.titles[i]] = group_of_links if self.quiet: print("Combining: Success") else: print("* Success") @star_wrapper(quiet) def write_to_csv(self): with open("qidianunderground.csv", "w") as csv_file: csv_writer = writer(csv_file) headers = ["Title", "Chapter Range", "Link"] csv_writer.writerow(headers) # For every book in scraper list for title, links in self.combined.items(): # For every link in book list for link in links: # Title Chapter | Length Chapter | Links csv_writer.writerow([title, link[0], link[1]]) if self.quiet: print("CSV Write: Success") else: print("* Success")
import cfscrape import requests import os from recaptcha import * from bs4 import BeautifulSoup import re # Requests wrapper #url = 'https://www.acgnx.se/' url = 'https://www.acgnx.se/show-8A7C71BCBEB854DDF0880AF26FB4504A47F50B2D.html' session = requests.session() session.headers = 'content-type' session.mount("http://", cfscrape.CloudflareScraper()) scraper = cfscrape.create_scraper(sess=session) req = scraper.get(url).content #print req ### Save request as HTML named as 'Result.html' f_name = '\Result.html' f = open(f_name, 'w') f.write(req.encode('UTF-8')) f.close ### Excute JavaScript file start = time() driver = webdriver.Chrome( os.getcwd() + "\chromedriver.exe" ) # Optional argument, if not specified will search path. driver.get(os.getcwd() + f_name) #print driver.page_source
def test_download_chapter(self): scraper = cfscrape.create_scraper() chapter_url = 'https://www.japscan.to/lecture-en-ligne/hajime-no-ippo/1255/' download_helper.download_chapter(scraper, chapter_url)
def get_tokens_page(self, page: int = 1, elements: Literal[10, 25, 50, 100] = 100) -> Sequence[EtherscanToken]: import cfscrape scraper = cfscrape.create_scraper() # Bypass cloudfare response = scraper.get(f'{self.tokens_url}?ps={elements}&p={page}') return self._parse_tokens_page(response.content)
def content(url, searched=False): try: scraper = cfscrape.create_scraper() c = scraper.get(url).content r = dom_parser2.parse_dom(c, 'div', {'class': 'video'}) r = [(dom_parser2.parse_dom(i, 'a'),\ dom_parser2.parse_dom(i, 'img', req=['src','width','height'])) \ for i in r if i] r = [(i[0][0].attrs['href'], i[0][0].attrs['title'], i[1][0].attrs['src']) for i in r if i] if (not r) and (not searched): log_utils.log( 'Scraping Error in %s:: Content of request: %s' % (base_name.title(), str(c)), log_utils.LOGERROR) kodi.notify(msg='Scraping Error: Info Added To Log File', duration=6000, sound=True) except Exception as e: if (not searched): log_utils.log( 'Fatal Error in %s:: Error: %s' % (base_name.title(), str(e)), log_utils.LOGERROR) kodi.notify(msg='Fatal Error', duration=4000, sound=True) quit() else: pass dirlst = [] for i in r: try: name = i[1].title() if searched: description = 'Result provided by %s' % base_name.title() else: description = name content_url = i[0] + '|SPLIT|%s' % base_name fanarts = xbmc.translatePath( os.path.join('special://home/addons/script.xxxodus.artwork', 'resources/art/%s/fanart.jpg' % filename)) dirlst.append({ 'name': name, 'url': content_url, 'mode': player_mode, 'icon': i[2], 'fanart': i[2], 'description': description, 'folder': False }) except Exception as e: log_utils.log( 'Error adding menu item %s in %s:: Error: %s' % (i[1].title(), base_name.title(), str(e)), log_utils.LOGERROR) if dirlst: buildDirectory(dirlst, stopend=True, isVideo=True, isDownloadable=True) else: if (not searched): kodi.notify(msg='No Content Found') quit() if searched: return str(len(r)) if not searched: search_pattern = '''rel\=['"]next['"]\s*href=['"]([^'"]+)\s*''' helper.scraper().get_next_page(content_mode, url, search_pattern, filename)
def scrape_data(): with open('./coinprice.json', 'r+', encoding='utf-8') as file: oldsoup = json.load(file) #------------------------------ASSIGNING OLD VALUES----------------------------# try: #---------------ERROR VALUES--------------------------# oldvalue['old_btcx_error'] = oldsoup['btcx_error'] oldvalue['old_buyucoin_error'] = oldsoup['buyucoin_error'] oldvalue['old_coindelta_error'] = oldsoup['coindelta_error'] oldvalue['old_coinome_error'] = oldsoup['coinome_error'] oldvalue['old_coinsecure_error'] = oldsoup['coinsecure_error'] oldvalue['old_ethex_error'] = oldsoup['ethex_error'] oldvalue['old_koinex_error'] = oldsoup['koinex_error'] oldvalue['old_pocketbits_error'] = oldsoup['pocketbits_error'] oldvalue['old_unocoin_error'] = oldsoup['unocoin_error'] oldvalue['old_zebpay_error'] = oldsoup['zebpay_error'] except Exception as e: oldvalue['old_btcx_error'] = 'false' oldvalue['old_buyucoin_error'] = 'false' oldvalue['old_coindelta_error'] = 'false' oldvalue['old_coinome_error'] = 'false' oldvalue['old_coinsecure_error'] = 'false' oldvalue['old_ethex_error'] = 'false' oldvalue['old_koinex_error'] = 'false' oldvalue['old_pocketbits_error'] = 'false' oldvalue['old_unocoin_error'] = 'false' oldvalue['old_zebpay_error'] = 'false' error(e) #------------------------BEST BUY FROM-------------------------# try: oldvalue['old_best_btc_buy_from'] = oldsoup['best_btc_buy_from'] oldvalue['old_best_eth_buy_from'] = oldsoup['best_eth_buy_from'] oldvalue['old_best_bch_buy_from'] = oldsoup['best_bch_buy_from'] oldvalue['old_best_ltc_buy_from'] = oldsoup['best_ltc_buy_from'] oldvalue['old_best_xrp_buy_from'] = oldsoup['best_xrp_buy_from'] oldvalue['old_best_dash_buy_from'] = oldsoup['best_dash_buy_from'] #------------------------BEST SELL TO-------------------------# oldvalue['old_best_btc_sell_to'] = oldsoup['best_btc_sell_to'] oldvalue['old_best_eth_sell_to'] = oldsoup['best_eth_sell_to'] oldvalue['old_best_bch_sell_to'] = oldsoup['best_bch_sell_to'] oldvalue['old_best_ltc_sell_to'] = oldsoup['best_ltc_sell_to'] oldvalue['old_best_xrp_sell_to'] = oldsoup['best_xrp_sell_to'] oldvalue['old_best_dash_sell_to'] = oldsoup['best_dash_sell_to'] except Exception as e: oldvalue['old_best_btc_buy_from'] = '---' oldvalue['old_best_eth_buy_from'] = '---' oldvalue['old_best_bch_buy_from'] = '---' oldvalue['old_best_ltc_buy_from'] = '---' oldvalue['old_best_xrp_buy_from'] = '---' oldvalue['old_best_btc_sell_to'] = '---' oldvalue['old_best_eth_sell_to'] = '---' oldvalue['old_best_bch_sell_to'] = '---' oldvalue['old_best_ltc_sell_to'] = '---' oldvalue['old_best_xrp_sell_to'] = '---' oldvalue['old_best_dash_sell_to'] = '---' error(e) #---------------------COINS BUY/SELL-------------------------# oldvalue['old_unocoin_btc_buy'] = oldsoup['unocoin_btc_buy'] oldvalue['old_unocoin_btc_sell'] = oldsoup['unocoin_btc_sell'] oldvalue['old_unocoin_timestamp'] = oldsoup['unocoin_timestamp'] oldvalue['old_zebpay_btc_buy'] = oldsoup['zebpay_btc_buy'] oldvalue['old_zebpay_btc_sell'] = oldsoup['zebpay_btc_sell'] oldvalue['old_zebpay_timestamp'] = oldsoup['zebpay_timestamp'] oldvalue['old_koinex_btc_buy'] = oldsoup['koinex_btc_buy'] oldvalue['old_koinex_btc_sell'] = oldsoup['koinex_btc_sell'] oldvalue['old_koinex_eth_buy'] = oldsoup['koinex_eth_buy'] oldvalue['old_koinex_eth_sell'] = oldsoup['koinex_eth_sell'] oldvalue['old_koinex_ltc_buy'] = oldsoup['koinex_ltc_buy'] oldvalue['old_koinex_ltc_sell'] = oldsoup['koinex_ltc_sell'] oldvalue['old_koinex_bch_buy'] = oldsoup['koinex_bch_buy'] oldvalue['old_koinex_bch_sell'] = oldsoup['koinex_bch_sell'] oldvalue['old_koinex_xrp_buy'] = oldsoup['koinex_xrp_buy'] oldvalue['old_koinex_xrp_sell'] = oldsoup['koinex_xrp_sell'] oldvalue['old_koinex_timestamp'] = oldsoup['koinex_timestamp'] oldvalue['old_buyucoin_btc_buy'] = oldsoup['buyucoin_btc_buy'] oldvalue['old_buyucoin_btc_sell'] = oldsoup['buyucoin_btc_sell'] oldvalue['old_buyucoin_timestamp'] = oldsoup['buyucoin_timestamp'] oldvalue['old_coinsecure_btc_buy'] = oldsoup['coinsecure_btc_buy'] oldvalue['old_coinsecure_btc_sell'] = oldsoup['coinsecure_btc_sell'] oldvalue['old_coinsecure_timestamp'] = oldsoup['coinsecure_timestamp'] oldvalue['old_coinome_btc_buy'] = oldsoup['coinome_btc_buy'] oldvalue['old_coinome_btc_sell'] = oldsoup['coinome_btc_sell'] oldvalue['old_coinome_bch_buy'] = oldsoup['coinome_bch_buy'] oldvalue['old_coinome_bch_sell'] = oldsoup['coinome_bch_sell'] oldvalue['old_coinome_ltc_buy'] = oldsoup['coinome_ltc_buy'] oldvalue['old_coinome_ltc_sell'] = oldsoup['coinome_ltc_sell'] oldvalue['old_coinome_dash_buy'] = oldsoup['coinome_dash_buy'] oldvalue['old_coinome_dash_sell'] = oldsoup['coinome_dash_sell'] oldvalue['old_coinome_timestamp'] = oldsoup['coinome_timestamp'] oldvalue['old_pocketbits_btc_buy'] = oldsoup['pocketbits_btc_buy'] oldvalue['old_pocketbits_btc_sell'] = oldsoup['pocketbits_btc_sell'] oldvalue['old_pocketbits_timestamp'] = oldsoup['pocketbits_timestamp'] oldvalue['old_ethex_eth_buy'] = oldsoup['ethex_eth_buy'] oldvalue['old_ethex_eth_sell'] = oldsoup['ethex_eth_sell'] oldvalue['old_ethex_timestamp'] = oldsoup['ethex_timestamp'] oldvalue['old_btcx_xrp_buy'] = oldsoup['btcx_xrp_buy'] oldvalue['old_btcx_xrp_sell'] = oldsoup['btcx_xrp_sell'] oldvalue['old_btcx_timestamp'] = oldsoup['btcx_timestamp'] oldvalue['old_coindelta_btc_buy'] = oldsoup['coindelta_btc_buy'] oldvalue['old_coindelta_btc_sell'] = oldsoup['coindelta_btc_sell'] oldvalue['old_coindelta_eth_buy'] = oldsoup['coindelta_eth_buy'] oldvalue['old_coindelta_eth_sell'] = oldsoup['coindelta_eth_sell'] oldvalue['old_coindelta_ltc_buy'] = oldsoup['coindelta_ltc_buy'] oldvalue['old_coindelta_ltc_sell'] = oldsoup['coindelta_ltc_sell'] oldvalue['old_coindelta_bch_buy'] = oldsoup['coindelta_bch_buy'] oldvalue['old_coindelta_bch_sell'] = oldsoup['coindelta_bch_sell'] oldvalue['old_coindelta_xrp_buy'] = oldsoup['coindelta_xrp_buy'] oldvalue['old_coindelta_xrp_sell'] = oldsoup['coindelta_xrp_sell'] oldvalue['old_coindelta_timestamp'] = oldsoup['coindelta_timestamp'] #-----------------BEST BUY SELL VALUE------------------# try: oldvalue['old_best_btc_buy'] = oldsoup['best_btc_buy'] oldvalue['old_best_btc_sell'] = oldsoup['best_btc_sell'] oldvalue['old_best_eth_buy'] = oldsoup['best_eth_buy'] oldvalue['old_best_eth_sell'] = oldsoup['best_eth_sell'] oldvalue['old_best_bch_buy'] = oldsoup['best_bch_buy'] oldvalue['old_best_bch_sell'] = oldsoup['best_bch_sell'] oldvalue['old_best_ltc_buy'] = oldsoup['best_ltc_buy'] oldvalue['old_best_ltc_sell'] = oldsoup['best_ltc_sell'] oldvalue['old_best_xrp_buy'] = oldsoup['best_xrp_buy'] oldvalue['old_best_xrp_sell'] = oldsoup['best_xrp_sell'] oldvalue['old_best_dash_buy'] = oldsoup['best_dash_buy'] oldvalue['old_best_dash_sell'] = oldsoup['best_dash_sell'] except exception as e: oldvalue['old_best_btc_buy'] = 0 oldvalue['old_best_btc_sell'] = 0 oldvalue['old_best_eth_buy'] = 0 oldvalue['old_best_eth_sell'] = 0 oldvalue['old_best_bch_buy'] = 0 oldvalue['old_best_bch_sell'] = 0 oldvalue['old_best_ltc_buy'] = 0 oldvalue['old_best_ltc_sell'] = 0 oldvalue['old_best_xrp_buy'] = 0 oldvalue['old_best_xrp_sell'] = 0 oldvalue['old_best_dash_buy'] = 0 oldvalue['old_best_dash_sell'] = 0 error(e) #--------------------------FEES IN JSON---------------------------------------# try: coinsoup['buyucoin_buy_fees'] = buyucoin_buy_fees coinsoup['buyucoin_sell_fees'] = buyucoin_sell_fees coinsoup['btcx_buy_fees'] = btcx_buy_fees coinsoup['btcx_sell_fees'] = btcx_sell_fees coinsoup['coindelta_buy_fees'] = coindelta_buy_fees coinsoup['coindelta_sell_fees'] = coindelta_sell_fees coinsoup['coinome_buy_fees'] = coinome_buy_fees coinsoup['coinome_sell_fees'] = coinome_sell_fees coinsoup['coinsecure_buy_fees'] = coinsecure_buy_fees coinsoup['coinsecure_sell_fees'] = coinsecure_sell_fees coinsoup['ethex_buy_fees'] = ethex_buy_fees coinsoup['ethex_sell_fees'] = ethex_sell_fees coinsoup['koinex_buy_fees'] = koinex_buy_fees coinsoup['koinex_sell_fees'] = koinex_sell_fees coinsoup['pocketbits_buy_fees'] = pocketbits_buy_fees coinsoup['pocketbits_sell_fees'] = pocketbits_sell_fees coinsoup['unocoin_buy_fees'] = unocoin_buy_fees coinsoup['unocoin_sell_fees'] = unocoin_sell_fees coinsoup['zebpay_buy_fees'] = zebpay_buy_fees coinsoup['zebpay_sell_fees'] = zebpay_sell_fees except Exception as e: coinsoup['buyucoin_buy_fees'] = 0 coinsoup['buyucoin_sell_fees'] = 0 coinsoup['btcx_buy_fees'] = 0 coinsoup['btcx_sell_fees'] = 0 coinsoup['coindelta_buy_fees'] = 0 coinsoup['coindelta_sell_fees'] = 0 coinsoup['coinome_buy_fees'] = 0 coinsoup['coinome_sell_fees'] = 0 coinsoup['coinsecure_buy_fees'] = 0 coinsoup['coinsecure_sell_fees'] = 0 coinsoup['ethex_buy_fees'] = 0 coinsoup['ethex_sell_fees'] = 0 coinsoup['koinex_buy_fees'] = 0 coinsoup['koinex_sell_fees'] = 0 coinsoup['pocketbits_buy_fees'] = 0 coinsoup['pocketbits_sell_fees'] = 0 coinsoup['unocoin_buy_fees'] = 0 coinsoup['unocoin_sell_fees'] = 0 coinsoup['zebpay_buy_fees'] = 0 coinsoup['zebpay_sell_fees'] = 0 #----------------------- BEST DICTS-----------------------------# btc_buy_dict = {} eth_buy_dict = {} bch_buy_dict = {} ltc_buy_dict = {} xrp_buy_dict = {} dash_buy_dict = {} btc_sell_dict = {} eth_sell_dict = {} bch_sell_dict = {} ltc_sell_dict = {} xrp_sell_dict = {} dash_sell_dict = {} #--------------------------FETCHING APIS-----------------------------# try: driver1.get('https://www.unocoin.com/trade?all') html1 = driver1.page_source soup1 = BeautifulSoup(html1, "html.parser") json_soup = json.loads(soup1.get_text()) unocoin_btc_buy = float(json_soup.pop('buy')) unocoin_btc_sell = float(json_soup.pop('sell')) coinsoup['unocoin_btc_buy'] = float(unocoin_btc_buy) coinsoup['unocoin_btc_sell'] = float(unocoin_btc_sell) coinsoup['unocoin_timestamp'] = int( (datetime.datetime.utcnow() - datetime.datetime(1970, 1, 1, 0, 0, 0, 0)).total_seconds()) print('unocoin working') except Exception as e: print('some exception occured in unocoin') unocoin_btc_buy = float(oldvalue['old_unocoin_btc_buy']) unocoin_btc_sell = float(oldvalue['old_unocoin_btc_sell']) coinsoup['unocoin_btc_buy'] = float(oldvalue['old_unocoin_btc_buy']) coinsoup['unocoin_btc_sell'] = float(oldvalue['old_unocoin_btc_sell']) coinsoup['unocoin_timestamp'] = int(oldvalue['old_unocoin_timestamp']) error(e) try: html22 = requests.get( 'https://www.zebapi.com/api/v1/market/ticker-new/BTC/INR') soup22 = html22.json() soup_zebpay = html22.json() zebpay_btc_buy = float(soup_zebpay['buy']) zebpay_btc_sell = float(soup_zebpay['sell']) coinsoup['zebpay_btc_buy'] = float(zebpay_btc_buy) coinsoup['zebpay_btc_sell'] = float(zebpay_btc_sell) coinsoup['zebpay_timestamp'] = int( (datetime.datetime.utcnow() - datetime.datetime(1970, 1, 1, 0, 0, 0, 0)).total_seconds()) print('zebpay working') except Exception as e: print('some exception occured in zebpay') zebpay_btc_buy = float(oldvalue['old_zebpay_btc_buy']) zebpay_btc_sell = float(oldvalue['old_zebpay_btc_sell']) coinsoup['zebpay_btc_buy'] = float(oldvalue['old_zebpay_btc_buy']) coinsoup['zebpay_btc_sell'] = float(oldvalue['old_zebpay_btc_sell']) coinsoup['zebpay_timestamp'] = int(oldvalue['old_zebpay_timestamp']) error(e) try: kscraper = cfscrape.create_scraper() html_koinex = kscraper.get("https://koinex.in/api/ticker") soup_koinex = html_koinex.json() koinex_eth = (soup_koinex['stats']['ETH']) koinex_btc = (soup_koinex['stats']['BTC']) koinex_ltc = (soup_koinex['stats']['LTC']) koinex_bch = (soup_koinex['stats']['BCH']) koinex_xrp = (soup_koinex['stats']['XRP']) koinex_eth_buy = float(koinex_eth['lowest_ask']) koinex_eth_sell = float(koinex_eth['highest_bid']) koinex_btc_buy = float(koinex_btc['lowest_ask']) koinex_btc_sell = float(koinex_btc['highest_bid']) koinex_ltc_buy = float(koinex_ltc['lowest_ask']) koinex_ltc_sell = float(koinex_ltc['highest_bid']) koinex_bch_buy = float(koinex_bch['lowest_ask']) koinex_bch_sell = float(koinex_bch['highest_bid']) koinex_xrp_buy = float(koinex_xrp['lowest_ask']) koinex_xrp_sell = float(koinex_xrp['highest_bid']) coinsoup['koinex_eth_buy'] = float(koinex_eth_buy) coinsoup['koinex_eth_sell'] = float(koinex_eth_sell) coinsoup['koinex_btc_buy'] = float(koinex_btc_buy) coinsoup['koinex_btc_sell'] = float(koinex_btc_sell) coinsoup['koinex_ltc_buy'] = float(koinex_ltc_buy) coinsoup['koinex_ltc_sell'] = float(koinex_ltc_sell) coinsoup['koinex_bch_buy'] = float(koinex_bch_buy) coinsoup['koinex_bch_sell'] = float(koinex_bch_sell) coinsoup['koinex_xrp_buy'] = float(koinex_xrp_buy) coinsoup['koinex_xrp_sell'] = float(koinex_xrp_sell) print('koinex working') coinsoup['koinex_timestamp'] = int( (datetime.datetime.utcnow() - datetime.datetime(1970, 1, 1, 0, 0, 0, 0)).total_seconds()) except Exception as e: print('some exception occured in koinex') koinex_btc_buy = float(oldvalue['old_koinex_btc_buy']) koinex_btc_sell = float(oldvalue['old_koinex_btc_sell']) koinex_eth_buy = float(oldvalue['old_koinex_eth_buy']) koinex_eth_sell = float(oldvalue['old_koinex_eth_sell']) koinex_bch_buy = float(oldvalue['old_koinex_bch_buy']) koinex_bch_sell = float(oldvalue['old_koinex_bch_sell']) koinex_ltc_buy = float(oldvalue['old_koinex_ltc_buy']) koinex_ltc_sell = float(oldvalue['old_koinex_ltc_sell']) koinex_xrp_buy = float(oldvalue['old_koinex_xrp_buy']) koinex_xrp_sell = float(oldvalue['old_koinex_xrp_sell']) coinsoup['koinex_btc_buy'] = float(oldvalue['old_koinex_btc_buy']) coinsoup['koinex_btc_sell'] = float(oldvalue['old_koinex_btc_sell']) coinsoup['koinex_eth_buy'] = float(oldvalue['old_koinex_eth_buy']) coinsoup['koinex_eth_sell'] = float(oldvalue['old_koinex_eth_sell']) coinsoup['koinex_bch_buy'] = float(oldvalue['old_koinex_bch_buy']) coinsoup['koinex_bch_sell'] = float(oldvalue['old_koinex_bch_sell']) coinsoup['koinex_ltc_buy'] = float(oldvalue['old_koinex_ltc_buy']) coinsoup['koinex_ltc_sell'] = float(oldvalue['old_koinex_ltc_sell']) coinsoup['koinex_xrp_buy'] = float(oldvalue['old_koinex_xrp_buy']) coinsoup['koinex_xrp_sell'] = float(oldvalue['old_koinex_xrp_sell']) coinsoup['koinex_timestamp'] = int(oldvalue['old_koinex_timestamp']) error(e) try: bscraper = cfscrape.create_scraper() html_buyucoin = bscraper.get("https://www.buyucoin.com/api/v1/btc") soup_buyucoin = html_buyucoin.json() for sub_obj in soup_buyucoin["BuyUcoin_data"]: buyucoin_btc_buy = float(sub_obj.pop('btc_buy_price')) buyucoin_btc_sell = float(sub_obj.pop('btc_sell_price')) coinsoup['buyucoin_btc_buy'] = float(buyucoin_btc_buy) coinsoup['buyucoin_btc_sell'] = float(buyucoin_btc_sell) coinsoup['buyucoin_timestamp'] = int( (datetime.datetime.utcnow() - datetime.datetime(1970, 1, 1, 0, 0, 0, 0)).total_seconds()) print('buyucoin working') except Exception as e: print('some exception occured in buyucoin') buyucoin_btc_buy = float(oldvalue['old_buyucoin_btc_buy']) buyucoin_btc_sell = float(oldvalue['old_buyucoin_btc_sell']) coinsoup['buyucoin_btc_buy'] = float(oldvalue['old_buyucoin_btc_buy']) coinsoup['buyucoin_btc_sell'] = float( oldvalue['old_buyucoin_btc_sell']) coinsoup['buyucoin_timestamp'] = int( oldvalue['old_buyucoin_timestamp']) error(e) try: csscraper = cfscrape.create_scraper() html_coinsecure = csscraper.get( "https://api.coinsecure.in/v1/exchange/ticker") soup_coinsecure = html_coinsecure.json() #soup_coinsecure = xmltodict.parse(html5,process_namespaces=True) coinsecure_btc_buy = float(soup_coinsecure['message']['ask'] / 100) coinsecure_btc_sell = float(soup_coinsecure['message']['bid'] / 100) coinsoup['coinsecure_btc_buy'] = float(coinsecure_btc_buy) coinsoup['coinsecure_btc_sell'] = float(coinsecure_btc_sell) coinsoup['coinsecure_timestamp'] = int( (datetime.datetime.utcnow() - datetime.datetime(1970, 1, 1, 0, 0, 0, 0)).total_seconds()) print('coinsecure working') except Exception as e: print('some exception occured in coinsecure') coinsecure_btc_buy = float(oldvalue['old_coinsecure_btc_buy']) coinsecure_btc_sell = float(oldvalue['old_coinsecure_btc_sell']) coinsoup['coinsecure_btc_buy'] = float( oldvalue['old_coinsecure_btc_buy']) coinsoup['coinsecure_btc_sell'] = float( oldvalue['old_coinsecure_btc_sell']) coinsoup['coinsecure_timestamp'] = int( oldvalue['old_coinsecure_timestamp']) curtime = datetime.datetime.now(tz) fulltime = datetime.datetime.now() error(e) try: html6 = requests.get('https://api.ethexindia.com/ticker') soup6 = html6.json() json_soup6 = soup6 ethex_eth_buy = float(json_soup6.pop('ask')) ethex_eth_sell = float(json_soup6.pop('bid')) coinsoup['ethex_eth_buy'] = float(ethex_eth_buy) coinsoup['ethex_eth_sell'] = float(ethex_eth_sell) coinsoup['ethex_timestamp'] = int( (datetime.datetime.utcnow() - datetime.datetime(1970, 1, 1, 0, 0, 0, 0)).total_seconds()) print('ethex working') except Exception as e: print('some exception occured in ethex') ethex_eth_buy = float(oldvalue['old_ethex_eth_buy']) ethex_eth_sell = float(oldvalue['old_ethex_eth_sell']) coinsoup['ethex_eth_buy'] = float(oldvalue['old_ethex_eth_buy']) coinsoup['ethex_eth_sell'] = float(oldvalue['old_ethex_eth_sell']) coinsoup['ethex_timestamp'] = int(oldvalue['old_ethex_timestamp']) error(e) try: bscraper = cfscrape.create_scraper() html_btcx = bscraper.get("https://api.btcxindia.com/ticker/") soup_btcx = html_btcx.json() btcx_xrp_buy = float(soup_btcx['ask']) btcx_xrp_sell = float(soup_btcx['bid']) coinsoup['btcx_xrp_buy'] = float(btcx_xrp_buy) coinsoup['btcx_xrp_sell'] = float(btcx_xrp_sell) print('Btcx Working') coinsoup['btcx_timestamp'] = int( (datetime.datetime.utcnow() - datetime.datetime(1970, 1, 1, 0, 0, 0, 0)).total_seconds()) except Exception as e: print('some exception occured in btcx') btcx_xrp_buy = float(oldvalue['old_btcx_xrp_buy']) btcx_xrp_sell = float(oldvalue['old_btcx_xrp_sell']) coinsoup['btcx_xrp_buy'] = float(oldvalue['old_btcx_xrp_buy']) coinsoup['btcx_xrp_sell'] = float(oldvalue['old_btcx_xrp_sell']) coinsoup['btcx_timestamp'] = int(oldvalue['old_btcx_timestamp']) error(e) try: html8 = requests.get('https://www.coinome.com/api/v1/ticker.json') soup8 = html8.json() json_soup8 = soup8 coinome_btc_buy = float(json_soup8['btc-inr']['lowest_ask']) coinome_btc_sell = float(json_soup8['btc-inr']['highest_bid']) coinome_bch_buy = float(json_soup8['bch-inr']['lowest_ask']) coinome_bch_sell = float(json_soup8['bch-inr']['highest_bid']) coinome_ltc_buy = float(json_soup8['ltc-inr']['lowest_ask']) coinome_ltc_sell = float(json_soup8['ltc-inr']['highest_bid']) coinome_dash_buy = float(json_soup8['dash-inr']['lowest_ask']) coinome_dash_sell = float(json_soup8['dash-inr']['highest_bid']) coinsoup['coinome_btc_buy'] = float(coinome_btc_buy) coinsoup['coinome_btc_sell'] = float(coinome_btc_sell) coinsoup['coinome_bch_buy'] = float(coinome_bch_buy) coinsoup['coinome_bch_sell'] = float(coinome_bch_sell) coinsoup['coinome_ltc_buy'] = float(coinome_ltc_buy) coinsoup['coinome_ltc_sell'] = float(coinome_ltc_sell) coinsoup['coinome_dash_buy'] = float(coinome_dash_buy) coinsoup['coinome_dash_sell'] = float(coinome_dash_sell) coinsoup['coinome_timestamp'] = int( (datetime.datetime.utcnow() - datetime.datetime(1970, 1, 1, 0, 0, 0, 0)).total_seconds()) print('coinome working') print(coinome_dash_buy) print(coinome_dash_sell) except Exception as e: print('some exception occured in coinome') coinome_btc_buy = float(oldvalue['old_coinome_btc_buy']) coinome_btc_sell = float(oldvalue['old_coinome_btc_sell']) coinome_bch_buy = float(oldvalue['old_coinome_bch_buy']) coinome_bch_sell = float(oldvalue['old_coinome_bch_sell']) coinome_ltc_buy = float(oldvalue['old_coinome_ltc_buy']) coinome_ltc_sell = float(oldvalue['old_coinome_ltc_sell']) coinsoup['coinome_btc_buy'] = float(oldvalue['old_coinome_btc_buy']) coinsoup['coinome_btc_sell'] = float(oldvalue['old_coinome_btc_sell']) coinsoup['coinome_bch_buy'] = float(oldvalue['old_coinome_bch_buy']) coinsoup['coinome_bch_sell'] = float(oldvalue['old_coinome_bch_sell']) coinsoup['coinome_ltc_buy'] = float(oldvalue['old_coinome_ltc_buy']) coinsoup['coinome_ltc_sell'] = float(oldvalue['old_coinome_ltc_sell']) coinsoup['coinome_dash_buy'] = float(oldvalue['old_coinome_dash_buy']) coinsoup['coinome_dash_sell'] = float( oldvalue['old_coinome_dash_sell']) coinsoup['coinome_timestamp'] = int(oldvalue['old_coinome_timestamp']) error(e) try: html9 = requests.get('https://pocketbits.in/api/ticker') soup9 = html9.json() json_soup9 = soup9 pocketbits_btc_buy = float(json_soup9.pop('buy')) pocketbits_btc_sell = float(json_soup9.pop('sell')) coinsoup['pocketbits_btc_buy'] = float(pocketbits_btc_buy) coinsoup['pocketbits_btc_sell'] = float(pocketbits_btc_sell) coinsoup['pocketbits_timestamp'] = int( (datetime.datetime.utcnow() - datetime.datetime(1970, 1, 1, 0, 0, 0, 0)).total_seconds()) print('pocketbits working') except Exception as e: print('some exception occured in pocketbits') pocketbits_btc_buy = float(oldvalue['old_pocketbits_btc_buy']) pocketbits_btc_sell = float(oldvalue['old_pocketbits_btc_sell']) coinsoup['pocketbits_btc_buy'] = float( oldvalue['old_pocketbits_btc_buy']) coinsoup['pocketbits_btc_sell'] = float( oldvalue['old_pocketbits_btc_sell']) coinsoup['pocketbits_timestamp'] = int( oldvalue['old_pocketbits_timestamp']) error(e) try: cscraper = cfscrape.create_scraper() cdrbtc = cscraper.get( 'https://coindelta.com/api/v1/public/getticker/?market=btc-inr') cdreth = cscraper.get( 'https://coindelta.com/api/v1/public/getticker/?market=eth-inr') cdrltc = cscraper.get( 'https://coindelta.com/api/v1/public/getticker/?market=ltc-inr') cdrbch = cscraper.get( 'https://coindelta.com/api/v1/public/getticker/?market=bch-inr') cdrxrp = cscraper.get( 'https://coindelta.com/api/v1/public/getticker/?market=xrp-inr') cdrbtc_soup = cdrbtc.json() coindelta_buy_btc = float(cdrbtc_soup[0]['Ask']) coinsoup['coindelta_btc_buy'] = float(coindelta_buy_btc) coindelta_sell_btc = float(cdrbtc_soup[0]['Bid']) coinsoup['coindelta_btc_sell'] = float(coindelta_sell_btc) cdreth_soup = cdreth.json() coindelta_buy_eth = float(cdreth_soup[0]['Ask']) coinsoup['coindelta_eth_buy'] = float((coindelta_buy_eth)) coindelta_sell_eth = float(cdreth_soup[0]['Bid']) coinsoup['coindelta_eth_sell'] = float(coindelta_sell_eth) cdrltc_soup = cdrltc.json() coindelta_buy_ltc = float(cdrltc_soup[0]['Ask']) coinsoup['coindelta_ltc_buy'] = float(coindelta_buy_ltc) coindelta_sell_ltc = float(cdrltc_soup[0]['Bid']) coinsoup['coindelta_ltc_sell'] = float(coindelta_sell_ltc) cdrbch_soup = cdrbch.json() coindelta_buy_bch = float(cdrbch_soup[0]['Ask']) coinsoup['coindelta_bch_buy'] = float(coindelta_buy_bch) coindelta_sell_bch = float(cdrbch_soup[0]['Bid']) coinsoup['coindelta_bch_sell'] = float(coindelta_sell_bch) cdrxrp_soup = cdrxrp.json() coindelta_buy_xrp = float(cdrxrp_soup[0]['Ask']) coinsoup['coindelta_xrp_buy'] = float(coindelta_buy_xrp) coindelta_sell_xrp = float(cdrxrp_soup[0]['Bid']) coinsoup['coindelta_xrp_sell'] = float(coindelta_sell_xrp) coinsoup['coindelta_timestamp'] = int( (datetime.datetime.utcnow() - datetime.datetime(1970, 1, 1, 0, 0, 0, 0)).total_seconds()) print('coindelta working') except Exception as e: print('some exception occured in coindelta') coindelta_buy_btc = float(oldvalue['old_coindelta_btc_buy']) coindelta_sell_btc = float(oldvalue['old_coindelta_btc_sell']) coindelta_buy_eth = float(oldvalue['old_coindelta_eth_buy']) coindelta_sell_eth = float(oldvalue['old_coindelta_eth_sell']) coindelta_buy_bch = float(oldvalue['old_coindelta_bch_buy']) coindelta_sell_bch = float(oldvalue['old_coindelta_bch_sell']) coindelta_buy_ltc = float(oldvalue['old_coindelta_ltc_buy']) coindelta_sell_ltc = float(oldvalue['old_coindelta_ltc_sell']) coindelta_buy_xrp = float(oldvalue['old_coindelta_xrp_buy']) coindelta_sell_xrp = float(oldvalue['old_coindelta_xrp_sell']) coinsoup['coindelta_btc_buy'] = float( oldvalue['old_coindelta_btc_buy']) coinsoup['coindelta_btc_sell'] = float( oldvalue['old_coindelta_btc_sell']) coinsoup['coindelta_eth_buy'] = float( oldvalue['old_coindelta_eth_buy']) coinsoup['coindelta_eth_sell'] = float( oldvalue['old_coindelta_eth_sell']) coinsoup['coindelta_bch_buy'] = float( oldvalue['old_coindelta_bch_buy']) coinsoup['coindelta_bch_sell'] = float( oldvalue['old_coindelta_bch_sell']) coinsoup['coindelta_ltc_buy'] = float( oldvalue['old_coindelta_ltc_buy']) coinsoup['coindelta_ltc_sell'] = float( oldvalue['old_coindelta_ltc_sell']) coinsoup['coindelta_xrp_buy'] = float( oldvalue['old_coindelta_xrp_buy']) coinsoup['coindelta_xrp_sell'] = float( oldvalue['old_coindelta_xrp_sell']) coinsoup['coindelta_timestamp'] = int( oldvalue['old_coindelta_timestamp']) error(e) with open('./coinprice.json', 'w', encoding='utf-8') as f: json.dump(coinsoup, f) current_time = int(datetime.datetime.now().timestamp()) json_content = json.load(open('coinprice.json')) try: if (int(current_time - json_content['btcx_timestamp'] <= 120)): btcx_err = False xrp_buy_dict['btcxindia'] = btcx_xrp_buy xrp_sell_dict['btcxindia'] = btcx_xrp_sell else: btcx_err = True if (int(current_time - json_content['coindelta_timestamp'] <= 120)): coindelta_err = False btc_buy_dict['coindelta'] = coindelta_buy_btc eth_buy_dict['coindelta'] = coindelta_buy_eth bch_buy_dict['coindelta'] = coindelta_buy_bch ltc_buy_dict['coindelta'] = coindelta_buy_ltc xrp_buy_dict['coindelta'] = coindelta_buy_xrp btc_sell_dict['coindelta'] = coindelta_sell_btc eth_sell_dict['coindelta'] = coindelta_sell_eth bch_sell_dict['coindelta'] = coindelta_sell_bch ltc_sell_dict['coindelta'] = coindelta_sell_ltc xrp_sell_dict['coindelta'] = coindelta_sell_xrp else: coindelta_err = True if (int(current_time - json_content['coinome_timestamp'] <= 120)): coinome_err = False btc_buy_dict['coinome'] = coinome_btc_buy bch_buy_dict['coinome'] = coinome_bch_buy ltc_buy_dict['coinome'] = coinome_ltc_buy dash_buy_dict['coinome'] = coinome_dash_buy btc_buy_dict['coinome'] = coinome_btc_sell bch_sell_dict['coinome'] = coinome_bch_sell ltc_sell_dict['coinome'] = coinome_ltc_sell dash_sell_dict['coinome'] = coinome_dash_sell else: coinome_err = True if (int(current_time - json_content['coinsecure_timestamp'] <= 120)): coinsecure_err = False btc_buy_dict['coinsecure'] = coinsecure_btc_buy btc_sell_dict['coinsecure'] = coinsecure_btc_sell else: coinsecure_err = True if (int(current_time - json_content['ethex_timestamp'] <= 120)): ethex_err = False eth_buy_dict['ethexindia'] = ethex_eth_buy eth_sell_dict['ethexindia'] = ethex_eth_sell else: ethex_err = True if (int(current_time - json_content['koinex_timestamp'] <= 120)): koinex_err = False btc_buy_dict['koinex'] = koinex_btc_buy bch_buy_dict['koinex'] = koinex_bch_buy ltc_buy_dict['koinex'] = koinex_ltc_buy eth_buy_dict['koinex'] = koinex_eth_buy xrp_buy_dict['koinex'] = koinex_xrp_buy btc_buy_dict['koinex'] = koinex_btc_sell bch_sell_dict['koinex'] = koinex_bch_sell ltc_sell_dict['koinex'] = koinex_ltc_sell eth_sell_dict['koinex'] = koinex_eth_sell xrp_sell_dict['koinex'] = koinex_xrp_sell else: koinex_err = True if (int(current_time - json_content['pocketbits_timestamp'] <= 120)): pocketbits_err = False btc_buy_dict['pocketbits'] = pocketbits_btc_buy btc_sell_dict['pocketbits'] = pocketbits_btc_sell else: pocketbits_err = True if (int(current_time - json_content['buyucoin_timestamp'] <= 120)): buyucoin_err = False btc_buy_dict['buyucoin'] = buyucoin_btc_buy btc_sell_dict['buyucoin'] = buyucoin_btc_sell else: buyucoin_err = True if (int(current_time - json_content['unocoin_timestamp'] <= 120)): unocoin_err = False btc_buy_dict['unocoin'] = unocoin_btc_buy btc_sell_dict['unocoin'] = unocoin_btc_sell else: unocoin_err = True if (int(current_time - json_content['zebpay_timestamp'] <= 120)): zebpay_err = False btc_buy_dict['zebpay'] = zebpay_btc_buy btc_sell_dict['zebpay'] = zebpay_btc_sell else: zebpay_err = True except Exception as e: buyucoin_err = oldvalue['old_buyucoin_error'] btcx_err = oldvalue['old_btcx_error'] coindelta_err = oldvalue['old_coindelta_error'] coinome_err = oldvalue['old_coinome_error'] coinsecure_err = oldvalue['old_coinsecure_error'] ethex_err = oldvalue['old_ethex_error'] koinex_err = oldvalue['old_koinex_error'] pocketbits_err = oldvalue['old_pocketbits_error'] unocoin_err = oldvalue['old_unocoin_error'] zebpay_err = oldvalue['old_zebpay_error'] error(e) try: best_btc_buy = min(btc_buy_dict.values()) best_btc_sell = max(btc_sell_dict.values()) best_eth_buy = min(eth_buy_dict.values()) best_eth_sell = max(eth_sell_dict.values()) best_bch_buy = min(bch_buy_dict.values()) best_bch_sell = max(bch_sell_dict.values()) best_ltc_buy = min(ltc_buy_dict.values()) best_ltc_sell = max(ltc_sell_dict.values()) best_xrp_buy = min(xrp_buy_dict.values()) best_xrp_sell = max(xrp_sell_dict.values()) best_dash_buy = min(dash_buy_dict.values()) best_dash_sell = max(dash_sell_dict.values()) print(best_dash_buy) print(best_dash_sell) except Exception as e: print("some exception occ in best values") best_btc_buy = oldvalue['old_best_btc_buy'] best_eth_buy = oldvalue['old_best_eth_buy'] best_bch_buy = oldvalue['old_best_bch_buy'] best_ltc_buy = oldvalue['old_best_ltc_buy'] best_xrp_buy = oldvalue['old_best_xrp_buy'] best_dash_buy = oldvalue['old_best_dash_buy'] best_btc_sell = oldvalue['old_best_btc_sell'] best_eth_sell = oldvalue['old_best_eth_sell'] best_bch_sell = oldvalue['old_best_bch_sell'] best_ltc_sell = oldvalue['old_best_ltc_sell'] best_xrp_sell = oldvalue['old_best_xrp_sell'] best_dash_sell = oldvalue['old_best_dash_sell'] error(e) try: best_btc_buy_from = (min(btc_buy_dict, key=btc_buy_dict.get)) best_btc_sell_to = (max(btc_sell_dict, key=btc_sell_dict.get)) best_eth_buy_from = (min(eth_buy_dict, key=eth_buy_dict.get)) best_eth_sell_to = (max(eth_sell_dict, key=eth_sell_dict.get)) best_bch_buy_from = (min(bch_buy_dict, key=bch_buy_dict.get)) best_bch_sell_to = (max(bch_sell_dict, key=bch_sell_dict.get)) best_ltc_buy_from = (min(ltc_buy_dict, key=ltc_buy_dict.get)) best_ltc_sell_to = (max(ltc_sell_dict, key=ltc_sell_dict.get)) best_xrp_buy_from = (min(xrp_buy_dict, key=xrp_buy_dict.get)) best_xrp_sell_to = (max(xrp_sell_dict, key=xrp_sell_dict.get)) best_dash_buy_from = (min(dash_buy_dict, key=dash_buy_dict.get)) best_dash_sell_to = max(dash_sell_dict, key=dash_sell_dict.get) except Exception as e: best_btc_buy_from = oldvalue['old_best_btc_buy_from'] best_eth_buy_from = oldvalue['old_best_eth_buy_from'] best_bch_buy_from = oldvalue['old_best_bch_buy_from'] best_ltc_buy_from = oldvalue['old_best_ltc_buy_from'] best_xrp_buy_from = oldvalue['old_best_xrp_buy_from'] best_dash_buy_from = oldvalue['old_best_dash_buy_from'] best_btc_sell_to = oldvalue['old_best_btc_sell_to'] best_eth_sell_to = oldvalue['old_best_eth_sell_to'] best_bch_sell_to = oldvalue['old_best_bch_sell_to'] best_ltc_sell_to = oldvalue['old_best_ltc_sell_to'] best_xrp_sell_to = oldvalue['old_best_xrp_sell_to'] best_dash_sell_to = oldvalue['old_best_dash_sell_to'] error(e) coinsoup['btcx_error'] = btcx_err coinsoup['buyucoin_error'] = buyucoin_err coinsoup['coindelta_error'] = coindelta_err coinsoup['coinome_error'] = coinome_err coinsoup['coinsecure_error'] = coinsecure_err coinsoup['ethex_error'] = ethex_err coinsoup['koinex_error'] = koinex_err coinsoup['pocketbits_error'] = pocketbits_err coinsoup['unocoin_error'] = unocoin_err coinsoup['zebpay_error'] = zebpay_err coinsoup['best_btc_buy'] = best_btc_buy coinsoup['best_btc_sell'] = best_btc_sell coinsoup['best_eth_buy'] = best_eth_buy coinsoup['best_eth_sell'] = best_eth_sell coinsoup['best_bch_buy'] = best_bch_buy coinsoup['best_bch_sell'] = best_bch_sell coinsoup['best_ltc_buy'] = best_ltc_buy coinsoup['best_ltc_sell'] = best_ltc_sell coinsoup['best_xrp_buy'] = best_xrp_buy coinsoup['best_xrp_sell'] = best_xrp_sell coinsoup['best_dash_buy'] = best_dash_buy coinsoup['best_dash_sell'] = best_dash_sell coinsoup['best_btc_buy_from'] = best_btc_buy_from coinsoup['best_btc_sell_to'] = best_btc_sell_to coinsoup['best_eth_buy_from'] = best_eth_buy_from coinsoup['best_eth_sell_to'] = best_eth_sell_to coinsoup['best_bch_buy_from'] = best_bch_buy_from coinsoup['best_bch_sell_to'] = best_bch_sell_to coinsoup['best_ltc_buy_from'] = best_ltc_buy_from coinsoup['best_ltc_sell_to'] = best_ltc_sell_to coinsoup['best_xrp_buy_from'] = best_xrp_buy_from coinsoup['best_xrp_sell_to'] = best_xrp_sell_to coinsoup['best_dash_buy_from'] = best_dash_buy_from coinsoup['best_dash_sell_to'] = best_dash_sell_to try: with open('./coinprice.json', 'w', encoding='utf-8') as f: json.dump(coinsoup, f) cur_time = datetime.datetime.now(tz) print(cur_time.strftime(timefmt)) #print(best_btc_sell_to) except Exception as e: print('some exception occured in writing in file') error(e)
#!/usr/bin/env python2.7 from __future__ import absolute_import import cfscrape from bs4 import BeautifulSoup import json, re from .classes import WebEpisode, WebSeries, WebVideo, WebFetcher base = "http://www.masterani.me/anime" api_base = "http://www.masterani.me/api/anime/" scraper = cfscrape.create_scraper() # to get past cloudflare def fetch_masterani(search_query): params = { "search": search_query, "sb": "true" } # masterani will use a much better string matcher with this jsonresults = scraper.get(api_base + "search", params=params).content # json results has the actual results we care about # the html just contains some additional metadata we can use videolist = [] for anime in json.loads(jsonresults.decode('utf-8')): link = base + "/info/" + anime['slug'] name = anime['title'] animeid = str(anime['id']) detailjson = scraper.get(api_base + animeid + "/detailed").content d = json.loads(detailjson.decode('utf-8')) episode_count = d['info']['episode_count'] synopsis = d['info']['synopsis']
from bs4 import BeautifulSoup import urllib from IPython.display import Image, display # import cfscrape scraper = cfscrape.create_scraper() # returns a CloudflareScraper instance # Or: scraper = cfscrape.CloudflareScraper() # CloudflareScraper inherits from requests.Session # print scraper.get("https://www.fxp.co.il/forumdisplay.php?f=626").content # url = 'https://www.fxp.co.il/forumdisplay.php?f=626' # r = urllib.urlopen(url).read() soup = BeautifulSoup( scraper.get("https://www.fxp.co.il/forumdisplay.php?f=626").content, 'lxml') sublinks = [ 'https://www.fxp.co.il/' + x['href'] for x in soup.find_all('a', class_='title')[:] ] for link in sublinks: soup = BeautifulSoup(scraper.get(link).content, 'lxml') soup.find_all('blockquore', recursive=True) print() print()
def do_head_request(self, lin_and_par_tup): print() print('-- in do_head_request() ') if not lin_and_par_tup: #nothing correct passed in return print("here is the tuple in do_head_request(): " + str(lin_and_par_tup)) rdone_singles = self.MAIN_DICT.get(self.rdonesingles) if lin_and_par_tup[0] in rdone_singles: print( '!!!!!============================found dupe in do_head_request() - skipping. ' ) return parent = "empty" head_response = None link_to_ck, parent = lin_and_par_tup[0], lin_and_par_tup[1] self.myprint("trying THIS_LN: " + link_to_ck + " parent: " + parent + " in do_head_request") try: #--------------- simple request of head only -------------------------------!!------- head_response = requests.head(link_to_ck) # --------------- simple request of head only -----------------------------!!!--------- self.MAIN_DICT.get(self.rdonesingles).append( link_to_ck) # record we did this one head_stat = head_response.status_code self.myprint("do_head_request is : " + str(head_stat)) if head_stat == 404: scraper = cfscrape.create_scraper( ) # returns a CloudflareScraper instance print("Trying Cloudflare unit ---------!!!") scp = scraper.get(link_to_ck) head_stat = scp.status_code except Exception as e: self.myprint("Exception inside do_head_request. " + str(e)) self.handle_exc(link_to_ck, e, parent) return try: if head_stat < 301: return elif head_stat > 300: if head_stat == 301: # perm redirect follow_url = head_response._next.url # only for 301 errs self.MAIN_DICT.get(self.redirs).append( (link_to_ck, parent, follow_url)) return elif head_stat in [400, 404, 408, 409, 410]: self.add_err_to_errlinks(link_to_ck, head_stat, parent) else: return # else: # no status means no regular result # self.add_err_to_errlinks(link_to_ck, 000 , parent) except Exception as e: self.myprint("Exception inside do_head_request 2: " + str(e))
import cfscrape import re import json from random import choice from bs4 import BeautifulSoup from pyjsparser import parse cfscraper = cfscrape.create_scraper(delay=12) user_agents = ['Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0.1 Safari/602.2.14', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'] def randomUserAgent(): return { 'user-agent': choice(user_agents) } def scrapeLastAnimeAdded(): response = cfscraper.get('https://animeflv.net/', headers=randomUserAgent()) if response.status_code != 200: return [] html_file = response.content
def test_download_page(self): scraper = cfscrape.create_scraper() page_url = 'https://www.japscan.to/lecture-en-ligne/hajime-no-ippo/1255/1.html' download_helper.download_page(scraper, page_url)
import tempfile import os import requests from anime_downloader import session from anime_downloader.const import get_random_header __all__ = [ 'get', 'post', 'soupify', ] logger = logging.getLogger(__name__) cf_session = cfscrape.create_scraper() default_headers = get_random_header() temp_dir = tempfile.mkdtemp(prefix='animedl') logger.debug(f"HTML file temp_dir: {temp_dir}") def setup(func): """ setup is a decorator which takes a function and converts it into a request method """ def setup_func(url: str, cf: bool = False, sel: bool = False, referer: str = None, cache: bool = True,
import requests headers = { 'Host': 'www.crunchyroll.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': '*', 'Referer': 'https://www.crunchyroll.com/login', 'Content-Type': 'application/x-www-form-urlencoded', 'Connection': 'keep-alive' } sess = requests.session() sess = cfscrape.create_scraper(sess) username = sys.argv[1] password = sys.argv[2] cpath = sys.argv[3] response = sess.get("https://www.crunchyroll.com/login", headers=headers) cj = http.cookiejar.MozillaCookieJar(cpath) for c in sess.cookies: cj.set_cookie(c) soup = BeautifulSoup(response.text, features="html.parser") token = soup.find('input', {'name': 'login_form[_token]'})['value'] payload = { "login_form[name]": username,
def checkProxy(proxy, url): # with open(originalWorkingDirectory+os.sep+"proxies.txt","rb") as f: # proxies=pickle.load(f) # proxies = createProxyList() # proxy_pool = cycle(proxies) # print(proxies) # url = 'https://httpbin.org/ip' # url="https://google.com" # print("Changing Proxy...") # for i in range(0,80): # Get a proxy from the pool # proxy = next(proxy_pool) # print("Request #%d"%i) try: # print(next(proxy_pool)) # proxy = next(proxy_pool) # print("Request #%d"%i) # print(url) if not checkInternet(): print( "Could not connect, trying again in 3 seconds! " ) time.sleep(3) checkProxy(proxy, url) return scraper = cfscrape.create_scraper() # requests.packages.urllib3.disable_warnings() # response = scraper.get(url,proxies={"http": proxy, "https": proxy},headers={'User-Agent': 'Chrome'}, timeout=5) response = scraper.get(url, proxies={"https": proxy}, headers={'User-Agent': 'Chrome'}, timeout=5) # response = requests.get(url,proxies={"http": proxy, "https": proxy}) # print(response.json()) except: # print(proxy+" Failed.") # proxies.remove(proxy) stdout.write("%s \r" % proxy) stdout.flush() # time.sleep(.1) # print("Bad Proxy", sep=' ', end='', flush=True) return (-1) else: # we will never run same proxy again # proxies.remove(proxy) stdout.write("XXX---Bad proxy---XXX\r") stdout.flush() # time.sleep(.1) # print(proxy, sep=' ', end='', flush=True) return (proxy) # with open(originalWorkingDirectory+os.sep+"proxies.txt","wb") as f: # pickle.dump(proxies, f) # break # print("Proxy found : "+proxy) # return(proxy) # def removeProxy(proxy,originalWorkingDirectory): # with open(originalWorkingDirectory+os.sep+"proxies.txt","rb") as f: # proxies=pickle.load(f) # proxies.remove(proxy) # with open(originalWorkingDirectory+os.sep+"proxies.txt","wb") as f: # pickle.dump(proxies, f) # url="https://readcomiconline.to/Comic/" # rotateProxy.createProxyList(url)
def handle(msg): print msg chat_id = msg['chat']['id'] command = msg['text'] print 'Got command: %s' % command if command == '/chat_id': #get user chat-id print chat_id elif command == command: if chat_id != 1578421667: #can delete it num = 1 for i in range(1, 2): # number of pages ############ get web page source ####################### scraper = cfscrape.create_scraper() url = "http://movie98.net/page/" + str(i) + "?s=" + command get = scraper.get( url ).content # use scraper for bypass some bot blockers :-\ soup = BeautifulSoup(get, "html.parser") soup.prettify() # print soup ########## get title ############## for post in soup.find_all("a", attrs={ "rel": "bookmark", "class": "more" }): # print post ti = post['title'] seri = ti.split() if u'سریال' not in seri: href = post['href'] tit2 = ti.encode('utf-8') href8 = href.encode('utf-8') hashtag = "\n" + "#" + "movie" + "_" + str(num) tit = tit2 + hashtag ########### get continue link ############## bot.sendMessage(chat_id, "######## Next Video ########") bot.sendMessage(chat_id, tit) get_two = scraper.get(href).text soup_two = BeautifulSoup(get_two, "html.parser") soup_two.prettify("utf-8") num = num + 1 ############# get images ############## for cont in soup_two.find_all( "div", attrs={"class": "context"}): for pis in cont.find_all("p"): for img in pis.find_all( "img", attrs={ "class": "size-full", "height": "500" } ): # find images and send to user ,but some pictures were not sent(I could not fix it)because of this i disable it co = 1 image = img['src'] # image8 = image.encode('utf-8') # print image8 # print image # de = image.decode('utf-8') # bot.sendPhoto(chat_id, str(de)) # print "aaaaaa" for data in cont.find_all( "p", attrs={"style": "text-align: justify;"}): if co <= 1: co = co + 1 data2 = data.getText() # print data2 # print "ccccccc" for download in soup_two.find_all( "div", attrs={"class": "downloadmovie" }): # get the download links ############ get informations ######################## data4 = data2.encode('utf-8') data8 = data4.replace(':', '') # print data8 data5 = data8.replace('منتشر کننده فایل Movie98', '') # print data5 data6 = data5.replace('زیرنویس در ادامه مطلب', 'زیرنویس : در ادامه') # print data6 # print "bbbbbb" bot.sendMessage(chat_id, data6) qu = 0 for li in download.find_all("li"): lili = li.getText() lili = lili.encode('utf-8') # print "fffff" ############ remove other texts ####################### del1 = lili.replace('دانلود', '') info = del1.replace('زیرنویس', '') # print info ########## get download link ########################## link_sub2 = '<a href="#"></a>' for sub in li.find_all( "a", attrs={"class": "msubtitle"}): sub = sub['href'] link_sub = '<a href="%s">download subtitle...</a>' % sub link_sub2 = link_sub.encode('utf-8') # print "hhhhh" for a in li.find_all("a", attrs={"class": "mlink"}): lin = a['href'] qu = 1 + qu text = "لینک دانلود شماره" # text2 = text.encode('utf-8') qul = text + str(qu) ### make link with parsing html link = '<a href="%s">%s</a>' % (lin, ti) link2 = link.encode('utf-8') full = qul + '\n' + link2 + '\n' + info + '\n' + link_sub2 bot.sendMessage( chat_id, full, parse_mode="HTML", disable_web_page_preview='true')
def singleEpisode(self, url, cookies, token, resolution): video_id = str(url.split('-')[-1]).replace("/", "") logging.debug("video_id : %s", video_id) headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36', 'Upgrade-Insecure-Requests': '1', 'Accept-Encoding': 'gzip, deflate' } sess = requests.session() sess = cfscrape.create_scraper(sess) info_url = "" resolution_to_find = None if str(resolution).lower() in ['1080p', '1080', 'fhd', 'best']: info_url = "http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=108&video_quality=80¤t_page=%s" % ( video_id, url) resolution_to_find = "1920x1080" elif str(resolution).lower() in ['720p', '720', 'hd']: info_url = "http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=106&video_quality=62¤t_page=%s" % ( video_id, url) resolution_to_find = "1280x720" elif str(resolution).lower() in ['480p', '480', 'sd']: info_url = "http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=106&video_quality=61¤t_page=%s" % ( video_id, url) resolution_to_find = "848x480" elif str(resolution).lower() in ['360p', '360', 'cancer']: info_url = "http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=106&video_quality=60¤t_page=%s" % ( video_id, url) resolution_to_find = "640x360" elif str(resolution).lower() in ['240p', '240', 'supracancer']: info_url = "http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=106&video_quality=60¤t_page=%s" % ( video_id, url) resolution_to_find = "428x240" logging.debug("info_url : %s", info_url) if resolution_to_find is None: print('Unknown requested resolution %s' % str(resolution).lower()) return xml_page_connect = sess.get(url=info_url, headers=headers, cookies=cookies) if xml_page_connect.status_code == 200: xml_page = xml_page_connect.text.encode("utf-8") try: m3u8_file_link = str(re.search(r'<file>(.*?)</file>', xml_page).group(1)).replace("&", "&") logging.debug("m3u8_file_link : %s", m3u8_file_link) if not m3u8_file_link: # If no m3u8 found, try the rtmpdump... try: host_link = re.search(r'<host>(.*?)</host>', xml_page).group(1) logging.debug("Found RTMP DUMP!") print("RTMP streams not supported currently...") except Exception as NoRtmpDump: print("No RTMP Streams Found...") print(NoRtmpDump) else: anime_name = re.sub(r'[^A-Za-z0-9\ \-\' \\]+', '', str( re.search(r'<series_title>(.*?)</series_title>', xml_page).group(1))).title().strip() episode_number = re.search(r'<episode_number>(.*?)</episode_number>', xml_page.decode("utf-8")).group(1) #video_width = re.search(r'<width>(.*?)</width>', xml_page.decode("utf-8")).group(1) #video_height = re.search(r'<height>(.*?)</height>', xml_page.decode("utf-8")).group(1) video_width, video_height = resolution_to_find.split("x") video_resolution = str(video_width) + "x" + str(video_height) file_name = animeName.animeName().nameEdit(animeName=anime_name, episodeNumber=episode_number, resolution=video_resolution) output_directory = os.path.abspath("Output" + os.sep + str(anime_name) + "/") # print("output_directory : {0}".format(output_directory)) if not os.path.exists("Output"): os.makedirs("Output") if not os.path.exists(output_directory): os.makedirs(output_directory) file_location = str(output_directory) + os.sep + str(file_name).replace(".mp4", ".mkv") logging.debug("anime_name : %s", anime_name) logging.debug("episode_number : %s", episode_number) logging.debug("video_resolution : %s", video_resolution) logging.debug("file_name : %s", file_name) if os.path.isfile(file_location): print('[Anime-dl] File Exists! Skipping %s\n' % file_name) pass else: self.subFetcher( xml=str(xml_page), episode_number=episode_number, file_name=file_name) m3u8_file_connect = sess.get(url=m3u8_file_link, cookies=cookies, headers=headers) try: #m3u8_file_text = m3u8_file_connect.text.splitlines()[2] m3u8_file_text = None available_resolutions = [] next_line_is_good = False for i, currentLine in enumerate(m3u8_file_connect.text.splitlines()): if next_line_is_good: m3u8_file_text = currentLine logging.debug("file to download : %s", m3u8_file_text) break elif currentLine.startswith("#EXT-X"): currentLineResolution = re.search(r'RESOLUTION=[0-9]+x([0-9]+)', currentLine) if currentLineResolution: currentLineResolution = currentLineResolution.group(1) + 'p' if currentLineResolution not in available_resolutions: available_resolutions.append(currentLineResolution) if resolution_to_find in currentLine: next_line_is_good = True if m3u8_file_text is None: print('Could not find the requested resolution [%s] in the master.m3u8 file\n' % resolution_to_find) if len(available_resolutions) > 0: print('Here are all the available resolutions: [%s]' % ', '.join(sorted(available_resolutions))) return logging.debug("m3u8_file_text : %s", m3u8_file_text) ffmpeg_command = 'ffmpeg -i "{0}" -c copy -bsf:a aac_adtstoasc "{1}/{2}"'.format( m3u8_file_text, os.getcwd(), file_name) logging.debug("ffmpeg_command : %s", ffmpeg_command) subprocess.call(ffmpeg_command, shell=True) subtitles_files = [] for sub_file in glob("*.ass"): if sub_file.endswith(".enUS.ass"): subtitles_files.insert(0, "--track-name 0:English(US) --ui-language en --language 0:eng --default-track 0:yes --sub-charset 0:utf-8 " + '"' + str( os.path.realpath(sub_file)) + '" ') elif sub_file.endswith(".enGB.ass"): subtitles_files.append( "--track-name 0:English(UK) --ui-language en --language 0:eng --default-track 0:no --sub-charset 0:utf-8 " + '"' + str( os.path.realpath(sub_file)) + '" ') elif sub_file.endswith(".esLA.ass"): subtitles_files.append( "--track-name 0:Espanol --ui-language es --language 0:spa --default-track 0:no --sub-charset 0:utf-8 " + '"' + str( os.path.realpath(sub_file)) + '" ') elif sub_file.endswith(".esES.ass"): subtitles_files.append( "--track-name 0:Espanol(Espana) --ui-language es --language 0:spa --default-track 0:no --sub-charset 0:utf-8 " + '"' + str( os.path.realpath(sub_file)) + '" ') elif sub_file.endswith(".ptBR.ass"): subtitles_files.append( "--track-name 0:Portugues(Brasil) --ui-language pt --language 0:por --default-track 0:no --sub-charset 0:utf-8 " + '"' + str( os.path.realpath(sub_file)) + '" ') elif sub_file.endswith(".ptPT.ass"): subtitles_files.append( "--track-name 0:Portugues(Portugal) --ui-language pt --language 0:por --default-track 0:no --sub-charset 0:utf-8 " + '"' + str( os.path.realpath(sub_file)) + '" ') elif sub_file.endswith(".frFR.ass"): subtitles_files.append( "--track-name 0:Francais(France) --ui-language fr --language 0:fre --default-track 0:no --sub-charset 0:utf-8 " + '"' + str( os.path.realpath(sub_file)) + '" ') elif sub_file.endswith(".deDE.ass"): subtitles_files.append( "--track-name 0:Deutsch --ui-language de --language 0:ger --default-track 0:no --sub-charset 0:utf-8 " + '"' + str( os.path.realpath(sub_file)) + '" ') elif sub_file.endswith(".arME.ass"): subtitles_files.append( "--track-name 0:Arabic --language 0:ara --default-track 0:no --sub-charset 0:utf-8 " + '"' + str( os.path.realpath(sub_file)) + '" ') elif sub_file.endswith(".itIT.ass"): subtitles_files.append( "--track-name 0:Italiano --ui-language it --language 0:ita --default-track 0:no --sub-charset 0:utf-8 " + '"' + str( os.path.realpath(sub_file)) + '" ') elif sub_file.endswith(".trTR.ass"): subtitles_files.append( "--track-name 0:Turkce --ui-language tr --language 0:tur --default-track 0:no --sub-charset 0:utf-8 " + '"' + str( os.path.realpath(sub_file)) + '" ') else: subtitles_files.append( "--track-name 0:und --default-track 0:no --sub-charset 0:utf-8 " + '"' + str( os.path.realpath(sub_file)) + '" ') subs_files = self.duplicate_remover(subtitles_files) logging.debug("subs_files : %s", subs_files) font_files = [os.path.realpath(font_file) for font_file in glob(str(os.getcwd()) + "/Fonts/*.*")] fonts = '--attachment-mime-type application/x-truetype-font --attach-file "' + str( '" --attachment-mime-type application/x-truetype-font --attach-file "'.join( font_files)) + '"' if len(font_files) == 0: fonts = '' mkv_merge_command = 'mkvmerge --ui-language en --output "%s" ' % str(file_name).replace( ".mp4", ".mkv") + '"' + str( file_name) + '" ' + ' '.join(subs_files) + ' ' + str(fonts) logging.debug("mkv_merge_command : %s", mkv_merge_command) try: subprocess.call(mkv_merge_command, shell=True) for video_file in glob("*.mkv"): try: move(video_file, output_directory) except Exception as e: print(str(e)) pass for video in glob("*.mp4"): os.remove(os.path.realpath(video)) for sub_file_delete in glob("*.ass"): os.remove(os.path.realpath(sub_file_delete)) except Exception as FileMuxingException: print("Sees like I couldn't mux the files.") print("Check whether the MKVMERGE.exe is in PATH or not.") print(FileMuxingException) for video_file in glob("*.mp4"): try: move(video_file, output_directory) except Exception as e: print(str(e)) pass for sub_files in glob("*.ass"): try: move(sub_files, output_directory) except Exception as e: print(str(e)) pass except Exception as NoM3u8File: print("Couldn't connect to the m3u8 file download link...") print(NoM3u8File) sys.exit(1) except Exception as NotAvailable: print("Seems like this video isn't available...") print(NotAvailable) else: print("Could not connect to Crunchyroll's media page.") print("It reurned : {0}".format(xml_page_connect.status_code))
def scrapeURL(url): cookies = pycookiecheat.chrome_cookies(url) return cfscrape.create_scraper().get(url, cookies=cookies).content
def bloomberg(site): """ live forex rates scraped from bloomberg.com """ uri = "https://www.bloomberg.com/markets/api/bulk-time-series/price/" endpoint = "USDCNY%3ACUR,USDRUB%3ACUR,USDJPY%3ACUR,USDEUR%3ACUR,USDKRW%3ACUR,XAUUSD%3ACUR,XAGUSD%3ACUR" url = uri + endpoint headers = { "authority": "www.bloomberg.com", "method": "GET", "path": ("/markets/api/comparison/data?securities=" + "USDCNY%3ACUR,USDRUB%3ACUR,USDJPY%3ACUR,USDEUR%3ACUR,USDKRW%3ACUR,XAUUSD%3ACUR,XAGUSD%3ACUR" + "&securityType=CURRENCY&locale=en"), "scheme": "https", "accept": ("text/html,application/xhtml+xml,application/xml;q=0.9,image/" + "webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"), "accept-encoding": "gzip, deflate, br", "accept-language": "en-US,en;q=0.9", "cache-control": "max-age=0", "cookie": ("bbAbVisits=1; _pxhd=e24b47c64d37711c147cfb3c4b35c845563d2f9831b" + "03d9189f8cd761bc2be4f:d78eeb01-34c9-11ea-8f86-51d2aad9afb3; _px" + "vid=d78eeb01-34c9-11ea-8f86-51d2aad9afb3; _reg-csrf=s%3Ab0pWvbcs" + "UtrjYeJ0T2GrTaaD.8kaQlvHchJ1D%2FZZMaQWQiTizJTxrqqyzzuEZHEvlQNw;" + " agent_id=7989385a-d6d9-4446-b7aa-3c937407862b;" + " session_id=5702901e-d5fe-41e7-b259-df46322015e0;" + " session_key=3179869387f4c4ec4385e0d16222f0e59f48c47f;" + " _user-status=anonymous; _is-ip-whitelisted=false;" + " _user-ip=91.132.137.116; trc_cookie_storage=taboola%2520global%253" + "Auser-id%3D2f4acdc6-7c3c-412c-8766-d9c80dcffc38-tuct513df3e;" + " bdfpc=004.0586371899.1578785723722;" + " _reg-csrf-token=4ZxUa9q8-fkNXQkoHHXhnobWne1sDlIVcKEQ"), "dnt": "1", "if-none-match": 'W/"lZU52eQYxjadyNKGCyftEg=="', "sec-fetch-mode": "navigate", "sec-fetch-site": "none", "sec-fetch-user": "******", "upgrade-insecure-requests": "1", "user-agent": ("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36" + " (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36 OPR/66.0.3515.27" ), } try: session = requests.Session() session.headers = headers cfscrape_requests = cfscrape.create_scraper(sess=session) ret = cfscrape_requests.get(url, timeout=(15, 15)).json() data = {} for item in ret: symbol = item["id"].replace(":CUR", "") symbol = symbol[:3] + ":" + symbol[-3:] data[symbol] = float(item["lastPrice"]) data["USD:XAG"] = 1 / data.pop("XAG:USD") data["USD:XAU"] = 1 / data.pop("XAU:USD") data = refine_data(data) print(site, data) race_write(f"{site}_forex.txt", json_dumps(data)) except: print(f"{site} failed to load")
def scraper(self): import cfscrape return cfscrape.create_scraper() # Bypass cloudfare
def downloadit(self, id, link, mainlink, resume=None): # logger.info('[%s] %s -- mainlink: %s' % (id, link, mainlink)) if mylar.DDL_LOCK is True: logger.fdebug( '[DDL] Another item is currently downloading via DDL. Only one item can' ' be downloaded at a time using DDL. Patience.') return else: mylar.DDL_LOCK = True myDB = db.DBConnection() filename = None try: with cfscrape.create_scraper() as s: if resume is not None: logger.info( '[DDL-RESUME] Attempting to resume from: %s bytes' % resume) self.headers['Range'] = 'bytes=%d-' % resume cf_cookievalue, cf_user_agent = s.get_tokens( mainlink, headers=self.headers, timeout=30) t = s.get( link, verify=True, cookies=cf_cookievalue, headers=self.headers, stream=True, timeout=30, ) filename = os.path.basename(urllib.parse.unquote( t.url)) # .decode('utf-8')) if 'GetComics.INFO' in filename: filename = re.sub('GetComics.INFO', '', filename, re.I).strip() try: remote_filesize = int(t.headers['Content-length']) logger.fdebug('remote filesize: %s' % remote_filesize) except Exception as e: if 'run.php-urls' not in link: link = re.sub('run.php-url=', 'run.php-urls', link) link = re.sub('go.php-url=', 'run.php-urls', link) t = s.get( link, verify=True, cookies=cf_cookievalue, headers=self.headers, stream=True, timeout=30, ) filename = os.path.basename(urllib.parse.unquote( t.url)) # .decode('utf-8')) if 'GetComics.INFO' in filename: filename = re.sub('GetComics.INFO', '', filename, re.I).strip() try: remote_filesize = int(t.headers['Content-length']) logger.fdebug('remote filesize: %s' % remote_filesize) except Exception as e: logger.warn( '[WARNING] Unable to retrieve remote file size - this' ' is usually due to the page being behind a different' ' click-bait/ad page. Error returned as : %s' % e) logger.warn( '[WARNING] Considering this particular download as' ' invalid and will ignore this result.') remote_filesize = 0 mylar.DDL_LOCK = False return { "success": False, "filename": filename, "path": None, } else: logger.warn( '[WARNING] Unable to retrieve remote file size - this is' ' usually due to the page being behind a different' ' click-bait/ad page. Error returned as : %s' % e) logger.warn( '[WARNING] Considering this particular download as invalid' ' and will ignore this result.') remote_filesize = 0 mylar.DDL_LOCK = False return { "success": False, "filename": filename, "path": None } # write the filename to the db for tracking purposes... myDB.upsert( 'ddl_info', { 'filename': filename, 'remote_filesize': remote_filesize }, {'id': id}, ) if mylar.CONFIG.DDL_LOCATION is not None and not os.path.isdir( mylar.CONFIG.DDL_LOCATION): checkdirectory = mylar.filechecker.validateAndCreateDirectory( mylar.CONFIG.DDL_LOCATION, True) if not checkdirectory: logger.warn( '[ABORTING] Error trying to validate/create DDL download' ' directory: %s.' % mylar.CONFIG.DDL_LOCATION) return { "success": False, "filename": filename, "path": None } path = os.path.join(mylar.CONFIG.DDL_LOCATION, filename) # if t.headers.get('content-encoding') == 'gzip': # buf = StringIO(t.content) # f = gzip.GzipFile(fileobj=buf) if resume is not None: with open(path, 'ab') as f: for chunk in t.iter_content(chunk_size=1024): if chunk: f.write(chunk) f.flush() else: with open(path, 'wb') as f: for chunk in t.iter_content(chunk_size=1024): if chunk: f.write(chunk) f.flush() except Exception as e: logger.error('[ERROR] %s' % e) mylar.DDL_LOCK = False return {"success": False, "filename": filename, "path": None} else: mylar.DDL_LOCK = False if os.path.isfile(path): if path.endswith('.zip'): new_path = os.path.join( mylar.CONFIG.DDL_LOCATION, re.sub('.zip', '', filename).strip()) logger.info( 'Zip file detected.' ' Unzipping into new modified path location: %s' % new_path) try: zip_f = zipfile.ZipFile(path, 'r') zip_f.extractall(new_path) zip_f.close() except Exception as e: logger.warn( '[ERROR: %s] Unable to extract zip file: %s' % (e, new_path)) return { "success": False, "filename": filename, "path": None } else: try: os.remove(path) except Exception as e: logger.warn( '[ERROR: %s] Unable to remove zip file from %s after' ' extraction.' % (e, path)) filename = None else: new_path = path return { "success": True, "filename": filename, "path": new_path }
def fxempire1(site): """ live forex rates scraped from fxempire.com (backdoor to xignite) """ url = "https://www.fxempire.com/api/v1/en/markets/list" headers = { "authority": "www.fxempire.com", "method": "GET", "path": "/api/v1/en/markets/list", "scheme": "https", "accept": ("text/html,application/xhtml+xml,application/xml;q=0.9,image/webp," + "image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"), "accept-encoding": "gzip, deflate, br", "accept-language": "en-US,en;q=0.9", "cache-control": "max-age=0", "dnt": "1", "sec-fetch-mode": "navigate", "sec-fetch-site": "none", "sec-fetch-user": "******", "upgrade-insecure-requests": "1", "user-agent": ("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36" + " (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36 OPR/66.0.3515.27" ), } try: session = requests.Session() session.headers = headers cfscrape_requests = cfscrape.create_scraper(sess=session) ret = cfscrape_requests.get(url, timeout=(15, 15)).json() data = {} for item in ret["forex"]: if item: try: pair = item["name"].replace("/", ":") price = item["value"] data[pair] = float(price) except: pass for item in ret["commodities"]: try: if item["symbol"] in ["XAUUSD", "XAGUSD"]: pair = "USD:" + item["symbol"].replace("USD", "") price = 1 / float(item["value"]) data[pair] = price except: pass data = {k: v for k, v in data.items() if "RUB" not in k} # RUBLE is stale data = refine_data(data) print(site, data) race_write(f"{site}_forex.txt", json_dumps(data)) except: print(f"{site} failed to load")
import packages.requests as requests import re # New Cloudflare 28.Sep.2019 import cfscrape scrapper = cfscrape.create_scraper( delay=10) # returns a CloudflareScraper instance # Site splitter def findZiploc(addonpage): # Curse if addonpage.startswith('https://mods.curse.com/addons/wow/'): return curse(convertOldCurseURL(addonpage)) elif addonpage.startswith('https://www.curseforge.com/wow/addons/'): return curse(addonpage) # Curse Project elif addonpage.startswith('https://wow.curseforge.com/projects/'): if addonpage.endswith('/files'): # Remove /files from the end of the URL, since it gets added later return curseProject(addonpage[:-6]) else: return curseProject(addonpage) # WowAce Project elif addonpage.startswith('https://www.wowace.com/projects/'): if addonpage.endswith('/files'): # Remove /files from the end of the URL, since it gets added later return wowAceProject(addonpage[:-6])
def fxcm(site): """ live forex rates scraped from fxcm.com """ timestamp = int(time.time() * 1000) - 1000 url = f"https://ratesjson.fxcm.com/DataDisplayer?t={timestamp}" headers = { "authority": "www.fxcm.com", "method": "GET", "path": "/api/v1/en/markets/list", "scheme": "https", "accept": ("text/html,application/xhtml+xml,application/xml;q=0.9,image/webp," + "image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"), "accept-encoding": "gzip, deflate, br", "accept-language": "en-US,en;q=0.9", "cache-control": "max-age=0", "dnt": "1", "sec-fetch-mode": "navigate", "sec-fetch-site": "none", "sec-fetch-user": "******", "upgrade-insecure-requests": "1", "user-agent": ("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, " + "like Gecko) Chrome/79.0.3945.79 Safari/537.36 OPR/66.0.3515.27"), } try: # fails during some hours of day session = requests.Session() session.headers = headers cfscrape_requests = cfscrape.create_scraper(sess=session) ret = cfscrape_requests.get(url, timeout=(15, 15)).text # print (ret) data = (ret.replace(" ", "").replace('null({"Rates":', "").replace(",}]});", "}]").replace(",}", "}")) # print(data) # {"Symbol":"CHFJPY","Bid":"1.1","Ask":"1.2","Spread":"0.1","ProductType":"1",} raw = json_loads(data) data = {} for item in raw: symbol = item["Symbol"] if symbol.isupper() and (len(symbol) == 6): symbol = symbol[:3] + ":" + symbol[-3:] data[symbol] = (float(item["Ask"]) + float(item["Bid"])) / 2 data = refine_data(data) print(site, data) race_write(f"{site}_forex.txt", json_dumps(data)) except: print(f"{site} failed to load")
import urllib.parse import cfscrape from requests.exceptions import RequestException import zipfile from modules import defines import os import re import time import tempfile from _thread import start_new_thread from threading import Lock from subprocess import check_output, check_call import hashlib import json scraper = cfscrape.create_scraper() # Debug helper: caches html page to not hammer server while testing/debugging/coding class CachedResponse: content = "" def __init__(self, data): self.content = data def read(self): return self.content # Debug helper: caches html page to not hammer server while testing/debugging/coding class CacheDecorator(object):
async def read_logins(syncTarget): values = ('user', 'password', 'serverid', 'webserviceurl', 'loc', 'folder', 'admin_file', 'admin_line', 'chat_file', 'chat_line', 'kill_file', 'kill_line', 'login_file', 'login_line', 'violations_file', 'violations_line') print( 'scumlogs v1.0, scum server logs downloader from gportal\nby htttps://GAMEBotLand.com' ) try: loadConfigini() except: global configini configini = {} for value in values: if value not in configini: configini[value] = '' if configini['folder'] != '': if configini['folder'][-1:] != '/' and configini['folder'][-1:] != '\\': configini['folder'] = configini['folder'] + '/' saveConfigini() if configini['loc'] == 'com': loc = 'com' else: loc = 'us' URL_LOGIN = '******'.format( configini['loc']) URL_LOGS = 'https://www.g-portal.{0}/server/scum/{1}/logs'.format( configini['loc'], configini['serverid']) headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)' } with cfscrape.create_scraper() as session: try: log('connecting g-portal...') payload = { '_method': 'POST', 'login': configini['user'], 'password': configini['password'], 'rememberme': 1 } raw_response = session.post(URL_LOGIN, headers=headers, data=payload) raw_response = session.get(URL_LOGS, headers=headers) response = raw_response.text html = BeautifulSoup(response, 'html.parser') select = html.find('div', {'class': 'wrapper logs'}) logList = select['data-logs'] logs = json.loads(logList) for i in range(len(logs)): getid = logs["file_" + str(i + 1)] id = (getid[int(getid.find('Logs')) + 5:]) type = id.split('_')[0] if type == syncTarget: if configini[type + '_file'] != '': if id < configini[type + '_file']: continue payload = { '_method': 'POST', 'load': 'true', 'ExtConfig[config]': getid } raw_response = session.post(URL_LOGS, headers=headers, data=payload) response = raw_response.text content = json.loads(response) lines = content["ExtConfig"]["content"].splitlines() filename = configini['folder'] + id file = open(filename, "a+", encoding='utf-8') found = False writing = False for line in lines: if id == configini[type + '_file'] and not found: if line == configini[type + '_line']: found = True continue else: file.write(line + '\n') table = id.split('_') table_1 = table[0] if table_1 == 'admin': await sendToServer(line, 'admin') if table_1 == 'chat': await sendToServer(line, 'chat') if table_1 == 'kill': await sendToServer(line, 'kill') if table_1 == 'login': await sendToServer(line, 'login') if table_1 == 'violations': await sendToServer(line, 'violations') writing = True if writing: if found: log('updating {}'.format(id)) else: log('creating {}'.format(id)) file.close() configini[type + '_file'] = id configini[type + '_line'] = lines[-1] else: continue saveConfigini() except Exception as e: print(e) log('error connecting, check connectivity and scumlogs.ini') help()