def createItemList(): request = urllib.request.urlopen('http://api.walmartlabs.com/v1/taxonomy?format=json&apiKey=tkbnu8astb9xxtn2ux9vw73b') response = request.read() jdict = json.loads(response.decode()) categories = [] items = {} for i in jdict['categories']: categories.append(i['id']) nums = random.sample(range(0,len(categories)), 3) reqStr1 = 'http://api.walmartlabs.com/v1/paginated/items?format=json&&category='+categories[nums[0]]+'&apiKey=tkbnu8astb9xxtn2ux9vw73b' reqStr2 = 'http://api.walmartlabs.com/v1/paginated/items?format=json&&category='+categories[nums[1]]+'&apiKey=tkbnu8astb9xxtn2ux9vw73b' reqStr3 = 'http://api.walmartlabs.com/v1/paginated/items?format=json&&category='+categories[nums[2]]+'&apiKey=tkbnu8astb9xxtn2ux9vw73b' request = urllib.request.urlopen(reqStr1) response = request.read() jdict = json.loads(response.decode()) addToItemList(jdict, items) request = urllib.request.urlopen(reqStr2) response = request.read() jdict = json.loads(response.decode()) addToItemList(jdict, items) request = urllib.request.urlopen(reqStr3) response = request.read() jdict = json.loads(response.decode()) addToItemList(jdict, items) return items
def _access_project(): """ Call the homepage of the project for given branch if an url is set. This is a cheap way to fill the lru cache. """ if hasattr(env, 'url'): # wait for uwsgi-restart after touch. time.sleep(10) for lang in settings.LANGUAGES: url = urllib.request.urlopen(env.url.format(lang[0])) with contextlib.closing(url) as request: request.read() print('Read response from: {}'.format(request.url))
def get_nhl_live_games(self, e, webCall=False): if e.input: today = e.input else: today = datetime.date.today().strftime("%Y-%m-%d") url = "http://live.nhle.com/GameData/GCScoreboard/{}.jsonp".format(today) request = urllib.request.urlopen(url) data = request.read().decode()[15:-2] data = json.loads(data) games = [] for game in data['games']: if not game['bsc']: start = game['bs'].replace(':00 ', ' ') gametxt = "{} - {} ({} ET)".format(game['atcommon'].title(), game['htcommon'].title(), start) else: gametxt = "{} {} - {} {} ({})".format(game['atcommon'].title(), game['ats'], game['hts'], game['htcommon'].title(), game['bs']) games.append(gametxt) if webCall: return " | ".join(games) e.output = " | ".join(games) return e
def get_zillow_estimate_iter_test(): key = config.get('zillow', 'api_key') houseCode = config.get('zillow', 'property_id') request = urllib.request.urlopen(zillow_service + "?zws-id=" + key + "&zpid=" + houseCode) data = request.read() f = open('housedata.xml', 'wb') f.write(data) f.close() #ET.parse() creates an ElementTree tree1 = ET.parse('housedata.xml') root = tree1.getroot() #ET.fromstring() creats an Element #this element is equivalent to the root of the above ElementTree tree2 = ET.fromstring(data) print("Tree1: " + str(type(tree1))) print("Tree2: " + str(type(tree2))) print() print(ET.tostring(root)) for element in root: print(element.tag) for child in element: print(child.tag, child.attrib, child.text) print() print("now the iter") for value in root.iter('amount'): print(value.text)
def read_weather(): """ Reads the current weather state, if enabled, and stores it. """ # Only when explicitly enabled in settings. weather_settings = WeatherSettings.get_solo() if not weather_settings.track: return # Fetch XML from API. request = urllib.request.urlopen(BUIENRADAR_API_URL) response_bytes = request.read() request.close() response_string = response_bytes.decode("utf8") # Use simplified xPath engine to extract current temperature. root = ET.fromstring(response_string) xpath = BUIENRADAR_XPATH.format( weather_station_id=weather_settings.buienradar_station ) temperature_element = root.find(xpath) temperature = temperature_element.text # Gas readings trigger these readings, so the 'read at' timestamp should be somewhat in sync. # Therefor we align temperature readings with them, having them grouped by hour that is.. read_at = timezone.now().replace(minute=0, second=0, microsecond=0) TemperatureReading.objects.create(read_at=read_at, degrees_celcius=temperature)
def SABnzbd(title=None, nzburl=None): # Changes https to http HOST = lazylibrarian.SAB_HOST + ":" + lazylibrarian.SAB_PORT if not str(HOST)[:4] == "http": HOST = "http://" + HOST params = {} # Login for user params["mode"] = "addurl" params["name"] = nzburl # Checks that all are defined and nothing is missing if lazylibrarian.SAB_USER: params["ma_username"] = lazylibrarian.SAB_USER if lazylibrarian.SAB_PASS: params["ma_password"] = lazylibrarian.SAB_PASS if lazylibrarian.SAB_API: params["apikey"] = lazylibrarian.SAB_API if lazylibrarian.SAB_CAT: params["cat"] = lazylibrarian.SAB_CAT if lazylibrarian.USENET_RETENTION: params["maxage"] = lazylibrarian.USENET_RETENTION ## FUTURE-CODE # if lazylibrarian.SAB_PRIO: # params["priority"] = lazylibrarian.SAB_PRIO # if lazylibrarian.SAB_PP: # params["script"] = lazylibrarian.SAB_SCRIPT # Encodes parameters URL = HOST + "/api?" + urllib.parse.urlencode(params) # to debug because of api logger.debug('Request url for <a href="%s">SABnzbd</a>' % URL) try: request = urllib.request.urlopen(URL) except (EOFError, IOError) as e: logger.error("Unable to connect to SAB with URL: %s" % url) return False except httplib.InvalidURL as e: logger.error("Invalid SAB host, check your config. Current host: %s" % HOST) return False result = request.read().strip() if not result: log.error("SABnzbd didn't return anything.") return False logger.debug("Result text from SAB: " + result) if result == "ok": logger.info("NZB sent to SAB successfully.") return True elif result == "Missing authentication": logger.error("Incorrect username/password.") return False else: logger.error("Unknown error: " + result) return False
def Challenge13(): import xmlrpc.client import urllib.request startAddr = 'http://www.pythonchallenge.com/pc/return/evil4.jpg' resultAddr = 'http://www.pythonchallenge.com/pc/return/' XMLRPCserver = xmlrpc.client.Server( 'http://www.pythonchallenge.com/pc/phonebook.php' ) auth_handler = urllib.request.HTTPBasicAuthHandler() auth_handler.add_password(realm='inflate', uri=startAddr, user='******', passwd='file') opener = urllib.request.build_opener(auth_handler) urllib.request.install_opener(opener) request = urllib.request.urlopen(startAddr) rData = request.read().decode() evilName = rData.split()[0] resultAddr += XMLRPCserver.phone(evilName).split('-')[1].lower() + '.html' print(resultAddr)
def listen(self): logging.debug(u'OnigiriAlert.listen() started.') url = TWITCASTING_API_LIVE_STATUS + '?type=json&user='******'test exception') except Exception as error: logging.error("caught exception in polling loop, error: [{}]".format(error)) # os.sys.exit() time.sleep(POLLING_INTERVAL) logging.debug(u'OnigiriAlert.listen() ended.')
def hook_callback(request, *args, **kwargs): import json import urllib.request print("hook here") data = request.read().decode('utf-8') res = json.loads(data) email = res['commits'][0]['author']['email'] u = User.objects.filter(email__exact=email).first() p = Project.objects.filter(repository_url__exact=res['repository']['html_url']).first() from AutoDoApp.Manager import ManagerThread m = ManagerThread() m.put_request(req=res['repository']['html_url'], desc=p.description) token = u.access_token import time time.sleep(10) # Temporal time sleep branch_id = p.branch_count autodo_prefix_branch_name = "AutoDo_" + str(branch_id) branch_name = "refs/heads/" + autodo_prefix_branch_name create_a_branch(access_token=token, branch_name=branch_name, request=request) create_file_commit(token, branch_name, request) # OAuth call back token create_pull_request(token, autodo_prefix_branch_name, request) p.update() return HttpResponse(res)
def pywget_inside_crawler(url, depth, start_dir, start_file, root_dir_name): """ Crawl the given url find all <a href> and <img src> tags Get the information inside the tags and apply pywget_recursive() function on each of them Arguments: url -- the url that is to be crawler depth -- total number of recursions start_dir -- the directory of the this py file start_file -- the first file that was downloaded, store it to avoid cycles root_dir_name -- the root derectory to for downloading files """ depth -= 1 content = '' try: request = urllib.request.urlopen(url) content = request.read().decode("utf-8") except: pass # all the information that's inside <a href> and <img src> tags match = re.findall(r'<a href="(.*?)"', content) + \ re.findall(r'<a href = "(.*?)"', content) + \ re.findall(r'<img src="(.*?)"', content) + \ re.findall(r'<img src = "(.*?)"', content) prefix = url[0 : url.rfind('/')] # a prefix of the link. useful to check if a link is under the same domain all_item_list = add_item_to_list(match, prefix) # add information to a list for item in all_item_list: pywget_recursive(item, depth, start_dir, start_file, root_dir_name) # recursively download the information
def main(): """Main function""" # PARSE OPTIONS ########################################################### parser = argparse.ArgumentParser(description='A BeautifulSoup snippet.') parser.add_argument("url", nargs=1, metavar="URL", help="The URL of the webpage to parse.") args = parser.parse_args() url = args.url[0] #print("url:", url) # GET HTML ################################################################ request = urllib.request.urlopen(url) #print("STATUS:", request.status) html = request.read() #print(html) # PARSE HTML ############################################################## soup = BeautifulSoup(html) #print(soup.prettify()) for img in soup.find_all('img'): print(img.get('src'))
def __init__(self, force_update): self.web_version = "" self.web_files = [] response = "" try: update_info_url = _url_prefix + "update.info" request = urlopen(update_info_url) response = request.read().decode("utf-8") except urllib.HTTPError as e: logging.exception("Unable to get latest version info - HTTPError = %s" % e.reason) except urllib.URLError as e: logging.exception("Unable to get latest version info - URLError = %s" % e.reason) except httplib.HTTPException as e: logging.exception("Unable to get latest version info - HTTPException") except Exception as e: import traceback logging.exception("Unable to get latest version info - Exception = %s" % traceback.format_exc()) if len(response) > 0: updateInfo = json.loads(response) self.web_version = updateInfo["version"] self.web_files = updateInfo["files"] logging.info("Cnchi Internet version: %s" % self.web_version) self.force = force_update
def download(self, path): """Scarica o mantieni il file""" target_path = self._generate_path(path) target_file = os.path.join(target_path, self.name) downf = not os.path.exists(target_file) if not downf: """ A questo livello, il file esiste""" self.path = target_file self.directory = target_path downf = downf or (self.size != os.path.getsize(target_file)) if downf: try: request = urllib.request.urlopen(self.url) f = open(target_file, 'wb') while True: data = request.read(100*1024) if data: print("""downloading %s (%d/%d)\r""" % (self.name, os.path.getsize(target_file), self.size)) f.write(data) else: break print("""%s completed""" % (self.name)) f.close() self.path = target_file self.directory = target_path except urllib.error.HTTPError: path = None
def get_webpage(section): web_page = "http://redesign.swahilipothub.co.ke/{}".format(section) try: with urllib.request.urlopen(web_page) as request: return request.read() except urllib.error.HTTPError: return None
def main(): """Main function""" # PARSE OPTIONS ########################################################### parser = argparse.ArgumentParser(description='A BeautifulSoup snippet.') parser.add_argument("url", nargs=1, metavar="URL", help="The URL of the webpage to parse.") args = parser.parse_args() url = args.url[0] print("url:", url) # GET HTML ################################################################ request = urllib.request.urlopen(url) print("STATUS:", request.status) html = request.read() #print(html) # PARSE HTML ############################################################## soup = BeautifulSoup(html) print(soup.prettify()) print("Element name:", soup.title.name) print("Element value:", soup.title.string) print() for anchor in soup.find_all('a'): print(anchor.get('href'))
def hook_callback(request, *args, **kwargs): import json # print("hook here") data = request.read().decode('utf-8') res = json.loads(data) print(res) name = res['repository']['owner']['login'] u = User.objects.filter(account_ID__exact=name).first() repository_url = "https://github.com/" + res['repository']['full_name'] print(repository_url) p = Project.objects.filter(repository_url__exact=repository_url).first() from AutoDoApp.Manager import ManagerThread m = ManagerThread() m.put_request(req=repository_url, desc=p.description) token = u.access_token import time time.sleep(10) # Temporal time sleep branch_id = p.branch_count autodo_prefix_branch_name = "AutoDo_" + str(branch_id) branch_name = "refs/heads/" + autodo_prefix_branch_name project_name = res['repository']['full_name'].split('/')[1] create_a_branch(access_token=token, branch_name=branch_name, user_name=name, project_name=project_name) create_file_commit(token, branch_name, name, project_name) # OAuth call back token create_pull_request(token, autodo_prefix_branch_name, name, project_name) p.update() return HttpResponse(res)
def callAPI(self, resourcePath, method, queryParams, postData, headerParams=None): url = self.apiServer + resourcePath headers = {} if headerParams: for param, value in headerParams.items(): headers[param] = value #headers['Content-type'] = 'application/json' headers['api_key'] = self.apiKey if self.cookie: headers['Cookie'] = self.cookie data = None if queryParams: # Need to remove None values, these should not be sent sentQueryParams = {} for param, value in queryParams.items(): if value != None: sentQueryParams[param] = value url = url + '?' + urllib.parse.urlencode(sentQueryParams) if method in ['GET']: #Options to add statements later on and for compatibility pass elif method in ['POST', 'PUT', 'DELETE']: if postData: headers['Content-type'] = 'application/json' data = self.sanitizeForSerialization(postData) data = json.dumps(data) else: raise Exception('Method ' + method + ' is not recognized.') if data: data = data.encode('utf-8') requestParams = MethodRequest(method=method, url=url, headers=headers, data=data) # Make the request request = urllib.request.urlopen(requestParams) encoding = request.headers.get_content_charset() if not encoding: encoding = 'iso-8859-1' response = request.read().decode(encoding) try: data = json.loads(response) except ValueError: # PUT requests don't return anything data = None return data
def __init__(self, color): Segment.__init__(self) self.set_icon('mail') self.build_module('N/A') unread = [] hl = False try: for account in open(os.environ['XDG_CONFIG_HOME'] + '/gmailaccounts', encoding='utf-8'): (url, user, passwd) = account.split('|') auth_handler = urllib.request.HTTPBasicAuthHandler() auth_handler.add_password(realm='New mail feed', uri='https://mail.google.com/', user=user, passwd=passwd) opener = urllib.request.build_opener(auth_handler) urllib.request.install_opener(opener) request = urllib.request.urlopen(url) dom = xml.dom.minidom.parseString(request.read()) count = dom.getElementsByTagName('fullcount')[0].childNodes[0].data if int(count) > 0: hl = True unread.append(count) except (IOError, ValueError, KeyError): return if hl: self.set_icon('mail') self.build_module(' / '.join(unread))
def get_data_source_one(self): """Retrieves Data from the first Yahoo Finance source""" data = 'http://finance.yahoo.com/webservice/v1/symbols/' + self.stock + '/quote?format=json&view=detail' request = urllib.request.urlopen(data) response = request.read() charset = request.info().get_content_charset('utf-8') self.data_s1 = json.loads(response.decode(charset))
def family_download_json(self, family): """ Download json information from internet. It does not save any data anywhere. """ request = urllib.request.urlopen(self.family_download_url(family)) return json.loads(request.read().decode('utf-8'))
def download(self, name, md5): url = url_prefix + name response = "" try: request = urlopen(url) txt = request.read() #.decode('utf-8') except urllib.error.HTTPError as e: print('Unable to get %s - HTTPError = %s' % (name, e.reason)) return False except urllib.error.URLError as e: print ('Unable to get %s - URLError = %s' % (name, e.reason)) return False except httplib.error.HTTPException as e: print ('Unable to get %s - HTTPException' % name) return False except Exception as e: import traceback print ('Unable to get %s - Exception = %s' % (name, traceback.format_exc())) return False web_md5 = self.get_md5(txt) if web_md5 != md5: print("Checksum error in %s. Download aborted" % name) return False new_name = os.path.join(base_dir, name + "." + self.web_version.replace(".", "_")) with open(new_name, "wb") as f: f.write(txt) return True
def get_more_links(more_parameters=()): parameters = {"format": "json", "action": "query", "prop": "links", "pllimit": 500, "plnamespace": 0, "continue" : "", "titles": urllib.parse.quote(start_page.encode("utf8"))} parameters.update(more_parameters) queryString = "&".join("%s=%s" % (k, v) for k, v in parameters.items()) # This ensures that redirects are followed automatically, documented here: # http://www.mediawiki.org/wiki/API:Query#Resolving_redirects queryString = queryString+"&redirects" url = "http://%s.wikipedia.org/w/api.php?%s" % (wikipedia_language, queryString) #get json data and make a dictionary out of it: request = urllib.request.urlopen(url) encoding = request.headers.get_content_charset() jsonData = request.read().decode(encoding) data = json.loads(jsonData) pageId = list(data['query']['pages'])[0] if int(pageId)<=0: sys.exit("Page doesn't exist.") link_list = data['query']['pages'][str(pageId)]['links'] return [entry["title"] for entry in link_list], data
def __init__(self, force_update): self.remote_version = "" self.md5s = {} # Get local info (local update.info) with open("/usr/share/cnchi/update.info", "r") as local_update_info: response = local_update_info.read() if len(response) > 0: updateInfo = json.loads(response) self.local_files = updateInfo['files'] # Download update.info (contains info of all Cnchi's files) request = download.url_open(_update_info_url) if request is not None: response = request.read().decode('utf-8') if len(response) > 0: updateInfo = json.loads(response) self.remote_version = updateInfo['version'] for remote_file in updateInfo['files']: self.md5s[remote_file['name']] = remote_file['md5'] logging.info(_("Cnchi Internet version: %s"), self.remote_version) self.force = force_update
def __init__(self, force_update=False): self.web_version = "" self.web_files = [] response = "" try: update_info_url = url_prefix + "update.info" request = urlopen(update_info_url) response = request.read().decode('utf-8') except urllib.HTTPError as e: print('Unable to get latest version info - HTTPError = %s' % e.reason) except urllib.URLError as e: print ('Unable to get latest version info - URLError = %s' % e.reason) except httplib.HTTPException as e: print ('Unable to get latest version info - HTTPException') except Exception as e: import traceback print ('Unable to get latest version info - Exception = %s' % traceback.format_exc()) if len(response) > 0: updateInfo = json.loads(response) self.web_version = updateInfo['version'] self.web_files = updateInfo['files'] print("web version: %s" % self.web_version) self.force = force_update
def getJSON(self, url): try: request = urllib.request.urlopen(url) data = json.loads(request.read().decode('UTF-8')) return data except urllib.error.URLError as e: logging.warning("Error: TWITCH API connection")
def dnsHistory(domain): rows = '' print("\n-- Checking dns history --") url = 'http://toolbar.netcraft.com/site_report?url=' + domain try: request = urllib.request.urlopen(url) html = request.read() except: html = '' soup = BeautifulSoup(''.join(html)) tables = soup.findAll(attrs={'class': 'TBtable'}) try: table = tables[1] except: table = '' # Prevents errors if no history returned rows = '' if table: rows = soup.table.findAll('tr') # Need to edit out again x = -1 try: for tr in rows: columns = tr.findAll('td') for td in columns: text = ''.join(td.find(text=True)) if x % 5 == 0: # Only ip addresses are checked if dns.query(text): # Finds last ip thats not CloudFlare print(output("The last known ip address is: %s" % text)) if text not in iplist: iplist.append(text) raise End # Breaks from multiple loops x += 1 except End: pass print("\n#" + "-" * 77 + "#")
def start_http_session( observatory ): today_utc = datetime.datetime.utcnow() deltas = runtimeConfigs["delays"] requestString = "{url}/{observatory}/{type}/{file}" url = requestString.format( url = runtimeConfigs["url"], observatory = observatory, type = "OneMinute", file = form_file_name(observatory.lower(), today_utc) ) url_sec = requestString.format( url = runtimeConfigs["url"], observatory = observatory, type = "OneSecond", file = form_file_name_sec(observatory.lower(), today_utc) ) try: request = urllib.request.urlopen(url) request_sec = urllib.request.urlopen(url_sec) regex_string = "{year}-{month:02d}-{day:02d} {hour:02d}:{minute:02d}:{second:02d}.*" data_regex_string = "(-?\\d{1,5}\\.\\d{2}\\s*){4}" geo_data = request.read().decode("utf-8") geo_data_s = request_sec.read().decode("utf-8") for dtime in deltas: today_date = today_utc - dtime search_regex = re.compile( regex_string.format(year = today_date.year, month = today_date.month, day = today_date.day, hour = today_date.hour, minute = today_date.minute, second =0) + data_regex_string) search_regex_s = re.compile( regex_string.format(year = today_date.year, month = today_date.month, day = today_date.day, hour = today_date.hour, minute = today_date.minute, second = today_date.second) + data_regex_string ) process_data(geo_data, search_regex, "min", dtime, observatory) process_data(geo_data_s, search_regex_s, "sec", dtime, observatory) #### On Error, insert missing data point to database #### except urllib.error.HTTPError: print("Error connecting to ", url) for dtime in deltas: data_map_m = {"h": 1, "d": 1, "z": 1, "f":1, "delay": dtime.seconds, "timestamp": datetime.date.today(), "res":"min", "obs": observatory } data_map_s = {"h": 1, "d": 1, "z": 1, "f":1, "delay": dtime.seconds, "timestamp": datetime.date.today(), "res":"sec", "obs": observatory } insert_record(data_map_m) insert_record(data_map_s) except http.client.IncompleteRead: print("Incomplete Read, Something went wrong network side")
def update_best_videos(recent_videos): global best_videos for best_video in best_videos: """minimal_date = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(minutes=BIG_DELAY) seconds_minimal = time.mktime(minimal_date.timetuple()) seconds_best = time.mktime(best_video.published_at.timetuple()) if seconds_minimal > seconds_best: continue""" recent_videos.append(best_video) best_videos = [] ids = ",".join(str(video.video_id) for video in recent_videos) GET_QUERY = "https://www.googleapis.com/youtube/v3/videos?part=statistics&id=" + ids + "&key=" + API_KEY request = urllib.request.urlopen(GET_QUERY) videos = json.loads(request.read().decode(request.info().get_param("charset") or "utf-8")) for video in videos["items"]: index = -1 for i in range(len(recent_videos)): if recent_videos[i].video_id == video["id"]: index = i break assert index != -1 rvideo = recent_videos[index] new_video = Video(rvideo.title, rvideo.published_at, rvideo.video_id, int(video["statistics"]["commentCount"])) best_videos.append(new_video) best_videos.sort() best_videos = best_videos[:MAX_BEST_VIDEOS_COUNT]
def dl_extra_infos(year, month): """ Download extra infos from CollecTor. """ url = "https://collector.torproject.org/archive/relay-descriptors/extra-infos" filename = "extra-infos-%s-%s.tar.xz" % (year, month) save_dir_path = "extra-infos" if not os.path.isdir(save_dir_path): os.mkdir(save_dir_path) save_path = "%s/%s" % (save_dir_path, filename) if os.path.isfile(save_path): print(" [+] Extra infos %s found" % (save_path)) return save_path # Check if the directory exists. if os.path.isdir("%s" % (save_path[:-7])): print(" [+] Extra infos %s found" % (save_path[:-7])) return save_path print(" [+] Downloading extra infos %s/%s" % (url, filename)) try: request = urllib.request.urlopen("%s/%s" % (url, filename)) if request.code != 200: print(" [-] Unable to fetch extra infos %s at %s" % (filename, url)) return None except Exception as e: print(" [-] Unable to fetch %s/%s" % (url, filename)) return None fp = open(save_path, "wb+") fp.write(request.read()) fp.close() return save_path
def getJSON_text(url): request = urllib.request.urlopen(url) data = request.read() data_string = data.decode('UTF-8') print(data_string) return data_string
def make_request(self, method, **kwargs): request = urlopen( self.api.url, data=json.dumps( dict( { "request": { "interface": self.name, "method": method, "parameters": kwargs, } }, **({ "token": self.api.token } if self.api.token else {}), )).encode("utf-8"), ) response = json.loads(request.read().decode("utf-8")) exception = response.get("response", {}).get("exception", None) if exception: raise Exception(exception["message"]) else: return response["response"]["result"]
def get_declarations(cls): ''' Lists snow emergency delcarations throughout the state. Returns a JSON or None if no data found. ''' # container to hold results declarations = {'declarations': []} # submit request for data request = urllib.request.urlopen(cls.service_url) response = request.read() # parse xml response events = ET.fromstring(response) for event in events: event = {} for declaration in event: event[declaration.tag] = declaration.text declarations['declarations'].append(event) if len(declarations['declarations']) > 0: return declarations return None
def log_in(request): if request.method == 'POST': data = json.loads(request.read().decode('utf8')) user = authenticate(username=data['uid'], password=settings.DEFAULT_PASSWORD) if user is not None: if user.is_active: login(request, user) return HttpResponse('LOGIN OK POST') else: return HttpResponse('USER NOT ACTIVE') else: result = sign_up(data) if result: return HttpResponse('NEW SIGN UP') else: return HttpResponse('SIGN UP FAIL') else: return HttpResponse('POST REQUESTED')
def login_info(request): if request.method == 'GET': login_info = AuthenticationForm(request.POST) return render(request, 'login.html', {'login': login_info}) if request.method == 'POST': response = request.read().decode().split('&') username = response[1][response[1].find('=')+1:] password = response[2][response[2].find('=')+1:] #request.session = get_user_model()._meta.pk.to_python(request.session[SESSION_KEY]) #username = request.POST['username'] #password = request.POST['password'] user = authenticate(username=username, password=password) if user: print("User: "******"Password: "******"Hello Word. You're " + username) else: try: user = User.objects.create_user(username, password=password) login(request, user) except: username = '******' return HttpResponse("Hello Word. You're " + username)
def get_html(url): req = urllib.request.Request(url, None, headers=headers) try: request = urllib.request.urlopen(req) except: print("urlopen error") get_html(url) html = request.read() # エンコーディング形式を取得 enc = chardet.detect(html) #print(enc) decoded = "" if enc["encoding"]: try: decoded = codecs.decode(html,encoding=enc["encoding"], errors='strict') # デコードできなかったらとりあえずutf-8でデコードする except UnicodeDecodeError as e: print("UnicodeDecodeError {0}".format(e.reason)) decoded = codecs.decode(html,encoding="utf-8", errors='strict') return decoded
def extract_well_type(lease_query_result): if 'detail_link_rgx' not in extract_well_type.__dict__: extract_well_type.detail_link_rgx = re.compile( r'href="(leaseDetailAction.do[^"]+)"', re.IGNORECASE) match = extract_well_type.detail_link_rgx.search(lease_query_result) if not match: raise RuntimeError('No detail link found!') detail_url = URL_BASE + match.group(1) request = urllib.request.urlopen(detail_url) if (request.status != 200): raise RuntimeError('HTTP request failed.') lease_detail = request.read().decode() if 'well_type_rgx' not in extract_well_type.__dict__: extract_well_type.well_type_rgx = re.compile( r'Well Type:\s+<[^>]+>\s+(\w+)', re.IGNORECASE) match = extract_well_type.well_type_rgx.search(lease_detail) if not match: raise RuntimeError('Unable to find well type!') return match.group(1)
def load_remote_manifest(url: str) -> Dict[str, Any]: """ Converts a remote yaml file into a Python dictionary """ tmp_dir, _ = get_tmp_dir() try: request = urllib.request.urlopen(url, timeout=30) except urllib.error.HTTPError as e: # type: ignore e.msg += " " + url raise manifest_path = os.path.join(tmp_dir, str(uuid.uuid4()) + ".yaml") with open(manifest_path, "wb") as manifest: while True: buffer = request.read(BLOCK_SIZE) if not buffer: # There is nothing more to read break manifest.write(buffer) try: result = load_local_manifest(manifest_path) finally: os.remove(manifest_path) return result
def get(): """ Returns properly formatted weather for Rochester, NY City can be changed by grabbing the proper openweathermap.org url. """ weather_string = "Weather Unavailable" weather_url = "http://api.openweathermap.org/data/2.1/weather/city/5134086" request = urllib.request.urlopen(weather_url) weather_info = json.loads(request.read().decode("utf-8")) if (request.getcode() not in range(200, 300)): request.close return weather_string request.close if weather_info is not None: temp = str(k_to_f(weather_info['main']['temp'])) #state = str(weather_info['weather'][0]['main']) desc = str(weather_info['weather'][0]['description']) #weather_string = temp + " degrees, " + desc weather_string = temp + "°F, " + desc return weather_string
def get_meals(self): menus = [] try: date = datetime.datetime.now().strftime("%Y-%m-%d") url = "https://www.webservices.ethz.ch/gastro/v1/RVRI/Q1E1/meals/de/{}/lunch".format( date) with urllib.request.urlopen(url) as request: mensas = json.loads(request.read().decode()) for mensa in mensas: if mensa["mensa"] == self.api_name: for meal in mensa["meals"]: menu = Meal() menu.label = meal['label'] menu.price_student = meal['prices']['student'] menu.price_staff = meal['prices']['staff'] menu.price_extern = meal['prices']['extern'] menu.description = meal['description'] menus.append(menu) return menus except Exception as e: print(e) return menus # we failed, but let's pretend nothing ever happened
def openweather(city=NAME, lang=LANG, unit=UNIT, api_key=OPENWEATHER_API_KEY): try: request = urllib.request.urlopen( f"{OPENWEATHER_URL}?q={city.replace(' ', '+')}" f"&lang={lang}&units={unit}&appid={api_key}") if request.getcode() == 200: data = json.loads(request.read()) temp = int(data["main"]["temp"]) return { "name": data["name"], "country": iso3().get(data["sys"]["country"]), "temp": temp, "unit": check_unit(unit), "description": data["weather"][0]["description"], } else: print(f"E: {request.getcode()}") except: pass
def download(self, name, md5): url = _url_prefix + name response = "" try: request = urlopen(url) txt = request.read() #.decode('utf-8') except urllib.error.HTTPError as e: logging.exception('Unable to get %s - HTTPError = %s' % (name, e.reason)) return False except urllib.error.URLError as e: logging.exception('Unable to get %s - URLError = %s' % (name, e.reason)) return False except httplib.error.HTTPException as e: logging.exception('Unable to get %s - HTTPException' % name) return False except Exception as e: import traceback logging.exception('Unable to get %s - Exception = %s' % (name, traceback.format_exc())) return False web_md5 = self.get_md5(txt) if web_md5 != md5: logging.error("Checksum error in %s. Download aborted" % name) return False new_name = os.path.join( _base_dir, name + "." + self.web_version.replace(".", "_")) with open(new_name, "wb") as f: f.write(txt) return True
def scrape(): """This function scrapes that quote of the day from WikiQuotes""" #Setting the URL my_url = 'https://en.wikiquote.org/wiki/Wikiquote:Quote_of_the_day' try: #Opening up connection with url, get info and close request = urllib.request.urlopen(my_url) page_html = request.read() request.close() #HTML parsing page_soup = soup(page_html,'html.parser') #Get date and qoute date = page_soup.find_all('center')[1] qoute = page_soup.find('i') author = page_soup.find('td', style='font-size:smaller;') #Get text from date date_txt = date.get_text() qoute_txt = qoute.get_text() author_txt = author.get_text() except Exception as e: print(e.message, e.args) return 1; #Remove '~\n' & ' ' from str for formatting author_txt = author_txt.replace('~\n', '') date_txt = date_txt.replace('~\n', '') qoute_txt = qoute_txt.replace('~\n', '').replace(' ', ' ') #Result message = date_txt + ' *** ' + qoute_txt + ' *** ' + author_txt return message
def storage(prefs): import sqlite3 result = research(addon)["results"][0] request = urllib.request.urlopen(result["url"]) contents = request.read().decode("utf-8") guid = find_guid(contents) home = os.environ["HOME"] profiles = glob.glob(f"{home}/.mozilla/firefox/**.default-release/") for profile in profiles: conn = sqlite3.connect(profile + "storage-sync-v2.sqlite") c = conn.cursor() """ CREATE TABLE storage_sync_data ( ext_id TEXT NOT NULL PRIMARY KEY, data TEXT, sync_change_counter INTEGER NOT NULL DEFAULT 1 ); """ c.execute( "INSERT OR REPLACE INTO storage_sync_data VALUES (?,?,?)", (guid, json.dumps(prefs, separators=(",", ":")), 1)) conn.commit()
def get_html(url): tries = 5 req = urllib.request.Request(url) req.add_header('User-agent', 'Mozilla/5.0 (Linux x86_64)') # Add DoNotTrack header, do the right thing even if nobody cares req.add_header('DNT', '1') while tries > 0: try: request = urllib.request.urlopen(req) tries = 0 except socket.timeout: if debug: raise tries -= 1 except urllib.error.HTTPError as e: if debug: raise print("HTTP Error " + str(e.code) + ": " + e.reason) print("Aborting...") exit() # html.parser generates problems, I could fix them, but switching to lxml # is easier and faster soup = BeautifulSoup(request.read(), "lxml") return soup
def send_covid_tweet(self, data, msg): # gets covid data with urllib.request.urlopen( 'https://api.covidtracking.com/v1/us/daily.json') as request: covid_data = json.loads(request.read().decode()) days_ago = 0 if len(msg) > 2: days_ago = int(msg[2]) stats = covid_data[days_ago] raw_date = str(stats['date']) date = raw_date[:4] + '-' + raw_date[4:6] + '-' + raw_date[6:] # sends the status api.update_status( 'COVID STATISTICS' + '\n--------------------' + '\nDate: ' + date + '\nConfirmed cases: ' + str(stats['positive']) + '\nNew cases: ' + str(stats['positiveIncrease']) + '\nDeaths: ' + str(stats['death']) + '\nNew deaths: ' + str(stats['deathIncrease']) + '\nCurrently hospitalized: ' + str(stats['hospitalizedCurrently']) + '\nTotal hospitalized: ' + str(stats['hospitalized']), in_reply_to_status_id=data['id'])
def get_weather_data(station_code='KNYC'): """Function to query the website for data based on the user-submitted station code, update the dictionary values and get the image url""" url_general = 'http://www.weather.gov/xml/current_obs/{}.xml' url = url_general.format(station_code) request = urllib.request.urlopen(url) content = request.read().decode() # Using ElementTree to retrieve specific tags from the xml import xml.etree.ElementTree as ET xml_root = ET.fromstring(content) # Update the dictionary values with data from the xml for data_point in weather_data_tags_dict.keys(): try: weather_data_tags_dict[data_point] = xml_root.find(data_point).text except: #handle the case where certain data points are not available for a station weather_data_tags_dict[data_point] = "-" # Get the url for the image representing the weather icon icon_url_base = xml_root.find('icon_url_base').text icon_url_name = xml_root.find('icon_url_name').text icon_url = icon_url_base + icon_url_name return weather_data_tags_dict, icon_url
def torrent_search(self, query): logging.info( 'Searching matching movie torrents for "{}"'.format(query)) request = urllib.request.urlopen( urllib.request.Request( 'https://api.apidomain.info/list?' + urllib.parse.urlencode({ 'sort': 'relevance', 'quality': '720p,1080p,3d', 'page': 1, 'keywords': query, }), headers={ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' + '(KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36' })) results = [{ 'url': _['items'][0]['torrent_magnet'], 'title': _['title'], } for _ in json.loads(request.read())['MovieList']] return Response(output=results)
def getOMDB(movie): try: movie_web = os.path.splitext(movie)[0] request_url = "https://www.omdbapi.com/" + \ "?s={}".format(urllib.parse.quote(movie_web)) + \ "&apikey=2df782b" request = urllib.request.urlopen(request_url) r = json.loads(request.read().decode("utf-8")) except urllib.error.HTTPError as error: print(error) return except urllib.error.URLError as error: print(error) return try: movies = [movie["Title"] + " (" + movie["Year"] + ")" for movie in r["Search"]] except KeyError: return if not movies: return movies.insert(0, "Skip") movies.insert(1, "Manual search") return movies
def saveMp3(items, path): for item in items: if 'lexicalEntries' in item: if 'pronunciations' in item['lexicalEntries'][0]: if 'audioFile' in item['lexicalEntries'][0]['pronunciations'][0]: result = {'id': item['id'], 'url': item['lexicalEntries'][0]['pronunciations'][0]['audioFile']} print(result['url']) request = urllib.request.urlopen(result['url'], timeout = 10) with open(path + result['id'] + '.mp3', 'wb') as f: try: f.write(request.read()) except: print("error")
def get_city_station_codes(state='ca'): """Function to obtain the list of cities and their station codes based on the selected state""" url_general = "http://w1.weather.gov/xml/current_obs/seek.php?state={}&Find=Find" #generic url missing state code state = state.lower() url = url_general.format( state) #format the url to include the selected state code request = urllib.request.urlopen(url) #open the url content = request.read().decode() #read, decode the HTML data and store it parser = WeatherHTMLParser() #create a parser object parser.feed(content) #feed the content from the webpage to the parser if len(parser.stations) != len( parser.cities): #check for data inconsistency print( "Error: discrepancy between expected number of stations and actual" ) exit() #exit the app scr.delete('1.0', tk.END) #clear scrolledText widget for next button click for i in range(len(parser.stations)): city_station = parser.cities[i] + ' (' + parser.stations[i] + ')' scr.insert(tk.INSERT, city_station + '\n')
def url_filter(url, lock, save_file_path): try: print(url) timeout = 50 socket.setdefaulttimeout(timeout) sleep_download_time = 10 time.sleep(sleep_download_time) # context = ssl._create_unverified_context() request = urllib.request.urlopen(url) sauce = request.read() request.close() except (urllib.error.URLError, urllib.error.HTTPError, socket.timeout) as e: print('URL Error!', url) print(e) return soup = bs.BeautifulSoup(sauce, 'lxml') # check highlight highlight = soup.select('.el__storyhighlights__item') if len(highlight) == 0: # there is no highlight in website return # check video video = soup.select('.el__video') if len(video) == 0: # there is no video in website return lock.acquire() save_file = codecs.open(save_file_path, 'a') save_file.write(str(url) + "\n") save_file.close() lock.release()
def _strip_playlist(self, url: str): self.text_output.insert(tk.END, "Playlist detected, Stripping links..\n") self.showEnd_output() # Make a list of all youtube links in the playlist final_urls = [] # results # Gather info about the search info = self.ytdl.extract_info(url, download=False, process=False) with urllib.request.urlopen(info["webpage_url"]) as request: webpage = request.read() soup = BeautifulSoup(webpage, 'html.parser') vid_url_pat = re.compile(r'watch\?v=\S+?list=') vid_url_matches = list(set(re.findall(vid_url_pat, str(soup)))) # If url is a video, append it to the results for vid_url in vid_url_matches: if '&' in vid_url: url_amp = vid_url.index('&') final_urls.append('http://www.youtube.com/' + vid_url[:url_amp]) for item in final_urls: self.songs.append(item) # Notfiy user self.text_output.insert(tk.END, "\tLinks found:\n") self.text_output.insert( tk.END, '\n'.join([f'\t\t- {x[:55]}' for x in final_urls])) self.text_output.insert( tk.END, "\n\nReady for download, or add another link..\n\n") self.showEnd_output() self.entitie_total_vids[ "text"] = f'Videos to download: {len(self.songs)}'
from urllib import request from bs4 import BeautifulSoup url = "https://www.baidu.com" headers = { 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6' } req = request.Request(url=url, headers=headers) request = request.urlopen(req) soup = BeautifulSoup(request.read(), 'lxml') def child(soup, level, order, l): level = level + 1 for node in soup.contents: if not node.name: continue l.append(soup.name + str(level - 1) + '_' + str(order) + '->' + node.name + str(level) + '_' + str(soup.contents.index(node))) child(node, level, soup.contents.index(node), l) sideList = [] child(soup, 0, 0, sideList) outputFile = open('test.dot', 'w') outputFile.write('digraph G{\n') outputFile.write('rankdir="LR"\n') outputFile.write( 'node[fontname = "Consolas Italic", fontcolor="red",color="cyan"]\n')
os.chdir("build") urls = { Path("cairo.txz"): "https://archive.org/download/archlinux_pkg_cairo/" "cairo-1.17.2%2B17%2Bg52a7c79fd-2-x86_64.pkg.tar.xz", Path("fontconfig.txz"): "https://archive.org/download/archlinux_pkg_fontconfig/" "fontconfig-2%3A2.13.91%2B24%2Bg75eadca-1-x86_64.pkg.tar.xz", Path("freetype.zip"): "https://github.com/ubawurinna/freetype-windows-binaries/" "releases/download/v2.9.1/freetype-2.9.1.zip", } for archive_path, url in urls.items(): if not archive_path.exists(): with urllib.request.urlopen(url) as request: archive_path.write_bytes(request.read()) dest = archive_path.stem shutil.rmtree(dest, ignore_errors=True) shutil.unpack_archive(archive_path, dest) # Get cairo.dll (normally loaded by pycairo), checking that it include # FreeType support. Path("cairo/win64").mkdir(parents=True) cairo_dll, = enum_process_modules(b"cairo_ft_font_face_create_for_ft_face") shutil.copyfile(cairo_dll, "cairo/win64/cairo.dll") # Get hold of a CCompiler object, by creating a dummy Distribution with a list # of extension modules that claims to be truthy (but is actually empty) and # running its build_ext command. Prior to the deprecation of distutils, this # was just ``cc = distutils.ccompiler.new_compiler(); cc.initialize()``. class L(list): __bool__ = lambda self: True be = setuptools.Distribution({"ext_modules": L()}).get_command_obj("build_ext")
#!/usr/bin/python3 """ Write a Python script that fetches https://intranet.hbtn.io/stat """ import urllib.request if __name__ == "__main__": with urllib.request.urlopen("https://intranet.hbtn.io/status") as request: html = request.read() print("Body response:") print("\t- type: {}".format(type(html))) print("\t- content: {}".format(html)) print("\t- utf8 content: {}".format(html.decode("utf-8")))
def build_extensions(self): try: import importlib.metadata as importlib_metadata except ImportError: import importlib_metadata ext, = self.distribution.ext_modules ext.depends += [ "setup.py", *map(str, Path("src").glob("*.h")), *map(str, Path("src").glob("*.cpp")), ] if UNITY_BUILD: ext.sources += ["src/_unity_build.cpp"] else: ext.sources += [*map(str, Path("src").glob("*.cpp"))] ext.sources.remove("src/_unity_build.cpp") ext.language = "c++" # pybind11.get_include() is brittle (pybind #1425). pybind11_include_path = next( path for path in importlib_metadata.files("pybind11") if path.name == "pybind11.h").locate().parents[1] if not (pybind11_include_path / "pybind11/pybind11.h").exists(): # egg-install from setup_requires: # importlib-metadata thinks the headers are at # .eggs/pybind11-VER-TAG.egg/pybind11-VER.data/headers/pybind11.h # but they're actually at # .eggs/pybind11-VER-TAG.egg/pybind11.h # pybind11_include_path is # /<...>/.eggs/pybind11-VER-TAG.egg/pybind11-VER.data # so just create the proper structure there. try: is_egg = (pybind11_include_path.relative_to( Path(__file__).resolve().parent).parts[0] == ".eggs") except ValueError: # Arch Linux ships completely wrong metadata, but the headers # are in the default include paths, so just leave things as is. is_egg = False if is_egg: shutil.rmtree(pybind11_include_path / "pybind11", ignore_errors=True) for file in [*pybind11_include_path.parent.glob("**/*")]: if file.is_dir(): continue dest = (pybind11_include_path / "pybind11" / file.relative_to(pybind11_include_path.parent)) dest.parent.mkdir(parents=True, exist_ok=True) shutil.copy2(file, dest) ext.include_dirs += [pybind11_include_path] tmp_include_dir = Path( self.get_finalized_command("build").build_base, "include") tmp_include_dir.mkdir(parents=True, exist_ok=True) ext.include_dirs += [tmp_include_dir] try: get_pkg_config(f"--atleast-version={MIN_RAQM_VERSION}", "raqm") except (FileNotFoundError, CalledProcessError): (tmp_include_dir / "raqm-version.h").write_text("") # Touch it. with urllib.request.urlopen( f"https://raw.githubusercontent.com/HOST-Oman/libraqm/" f"v{MIN_RAQM_VERSION}/src/raqm.h") as request, \ (tmp_include_dir / "raqm.h").open("wb") as file: file.write(request.read()) if sys.platform == "linux": import cairo get_pkg_config(f"--atleast-version={MIN_CAIRO_VERSION}", "cairo") ext.include_dirs += [cairo.get_include()] ext.extra_compile_args += [ "-std=c++1z", "-fvisibility=hidden", "-flto", "-Wall", "-Wextra", "-Wpedantic", *get_pkg_config("--cflags", "cairo"), ] ext.extra_link_args += ["-flto"] if MANYLINUX: ext.extra_link_args += ["-static-libgcc", "-static-libstdc++"] elif sys.platform == "darwin": import cairo get_pkg_config(f"--atleast-version={MIN_CAIRO_VERSION}", "cairo") ext.include_dirs += [cairo.get_include()] # On OSX<10.14, version-min=10.9 avoids deprecation warning wrt. # libstdc++, but assumes that the build uses non-Xcode-provided # LLVM. # On OSX>=10.14, assume that the build uses the normal toolchain. macosx_min_version = ("10.14" if LooseVersion( platform.mac_ver()[0]) >= "10.14" else "10.9") ext.extra_compile_args += [ "-std=c++1z", "-fvisibility=hidden", "-flto", f"-mmacosx-version-min={macosx_min_version}", *get_pkg_config("--cflags", "cairo"), ] ext.extra_link_args += [ # version-min needs to be repeated to avoid a warning. "-flto", f"-mmacosx-version-min={macosx_min_version}", ] elif sys.platform == "win32": # Windows conda path for FreeType. ext.include_dirs += [Path(sys.prefix, "Library/include")] ext.extra_compile_args += [ "/std:c++17", "/Zc:__cplusplus", "/experimental:preprocessor", "/EHsc", "/D_USE_MATH_DEFINES", "/wd4244", "/wd4267", ] # cf. gcc -Wconversion. ext.libraries += ["psapi", "cairo", "freetype"] # Windows conda path for FreeType -- needs to be str, not Path. ext.library_dirs += [str(Path(sys.prefix, "Library/lib"))] # Workaround https://bugs.llvm.org/show_bug.cgi?id=33222 (clang + # libstdc++ + std::variant = compilation error) and pybind11 #1604 # (-fsized-deallocation). Note that `.compiler.compiler` only exists # for UnixCCompiler. if os.name == "posix": compiler_macros = subprocess.check_output( [*self.compiler.compiler, "-dM", "-E", "-x", "c", "/dev/null"], universal_newlines=True) if "__clang__" in compiler_macros: ext.extra_compile_args += ([ "-stdlib=libc++", "-fsized-deallocation" ]) # Explicitly linking to libc++ is required to avoid picking up # the system C++ library (libstdc++ or an outdated libc++). ext.extra_link_args += ["-lc++"] super().build_extensions() if sys.platform == "win32": for dll in ["cairo.dll", "freetype.dll"]: for path in paths_from_link_libpaths(): if (path / dll).exists(): shutil.copy2(path / dll, Path(self.build_lib, "mplcairo")) break
#!/usr/bin/env python3 from html.parser import HTMLParser import urllib.request class myParser(HTMLParser): def handle_starttag(self, tag, attrs): if (tag == "a"): for a in attrs: if (a[0] == 'href'): link = a[1] if (link.find('http') >= 0): print(link) newParse = myParser() newParse.feed(link) url = "http://www.packtpub.com" request = urllib.request.urlopen(url) parser = myParser() parser.feed(request.read().decode('utf-8'))
def downloadTemplate(templateUrl): request = urllib.request.urlopen(templateUrl) response = request.read().decode('utf-8') return response
def main(args): try: destination_ipv4address = ipaddress.ip_address(args.address[0]) except ValueError: print('[error] Destination address is invalid ({})'.format( args.address[0]), file=sys.stderr) exit(1) request_params = { 'ip': str(destination_ipv4address), 'port': args.port, } opener = urllib.request.build_opener() # Proxy有り if args.http_proxy: proxy_handler = urllib.request.ProxyHandler({ "http": args.http_proxy, "https": args.http_proxy }) opener.add_handler(proxy_handler) request = urllib.request.Request('{}?{}'.format( args.torbulkexitlist, urllib.parse.urlencode(request_params))) try: with opener.open(request) as request: response = request.read().decode('utf-8') except urllib.error.HTTPError as ex: print('[error] HTTP access error code:{}'.format(ex.code), file=sys.stderr) exit(2) except urllib.error.URLError as ex: print('[error] HTTP access error', file=sys.stderr) print('[error] {}'.format(ex.reason), file=sys.stderr) exit(2) exit_list_strings = [] for line in response.split('\n'): if re.match(r'^#', line): # コメント行はskip continue elif re.match(r'^$', line): # データなしもスキップ continue # IPv4 address format確認 try: ipaddress.ip_address(line) except ValueError: print('[error] address is invalid({})'.format(line), file=sys.stderr) continue except Exception as ex: print('[error] ', end='', file=sys.stderr) print(ex, file=sys.stderr) continue # 拒否リスト生成 exit_list_strings.append(' Require not ip {}\n'.format(line)) print('[info] Tor exit list count : {:d}'.format(len(exit_list_strings))) try: # 指定ディレクトリ配下に.htaccess書き込み with open('{}/.htaccess'.format(args.export_dir), 'w') as file_htaccess: file_htaccess.write('<RequireAll>\n') file_htaccess.write(' Require all granted\n') file_htaccess.writelines(exit_list_strings) file_htaccess.write('</RequireAll>\n') file_htaccess.flush() except Exception as ex: print('[error] ', end='', file=sys.stderr) print(ex, file=sys.stderr) exit(3)
filetypes = {'pdf'} for homepage in homepages_list: url = homepage[0] target_folder = homepage[1] #"base url", i.e. url without trailing index.html or similar url_base = str.join('/', url.split('/')[:-1]) try: request = urllib.request.urlopen(url) except: e = sys.exc_info()[0] print('Exception: \n {} \ at url: {} '.format(e, url)) str_html = request.read().decode('UTF-8') if not os.path.exists(target_folder): os.mkdir(target_folder) matches = list() for filetype in filetypes: #regex matches '"[AnyNumerOfCharactersExceptWhitespace].filetype"' matches += re.findall(r'"[^\s]*\.{}"'.format(filetype), str_html) for match in matches: match = match.strip('"') storepath = os.path.join(target_folder, match.split('/')[-1]) if os.path.exists(storepath) and not force: print('skipping {}, as {} exists'.format(match, storepath))