Python read示例，urllib.request.read Python示例

示例#1

0

显示文件

文件： screenScrape.py 项目： JonathanNix/scripting

def createItemList():
	request = urllib.request.urlopen('http://api.walmartlabs.com/v1/taxonomy?format=json&apiKey=tkbnu8astb9xxtn2ux9vw73b')
	response = request.read()
	jdict = json.loads(response.decode())
	categories = []
	items = {}

	for i in jdict['categories']:
		categories.append(i['id'])

	nums = random.sample(range(0,len(categories)), 3)

	reqStr1 = 'http://api.walmartlabs.com/v1/paginated/items?format=json&&category='+categories[nums[0]]+'&apiKey=tkbnu8astb9xxtn2ux9vw73b'
	reqStr2 = 'http://api.walmartlabs.com/v1/paginated/items?format=json&&category='+categories[nums[1]]+'&apiKey=tkbnu8astb9xxtn2ux9vw73b'
	reqStr3 = 'http://api.walmartlabs.com/v1/paginated/items?format=json&&category='+categories[nums[2]]+'&apiKey=tkbnu8astb9xxtn2ux9vw73b'
	request = urllib.request.urlopen(reqStr1)
	response = request.read()
	jdict = json.loads(response.decode())
	addToItemList(jdict, items)

	request = urllib.request.urlopen(reqStr2)
	response = request.read()
	jdict = json.loads(response.decode())
	addToItemList(jdict, items)

	request = urllib.request.urlopen(reqStr3)
	response = request.read()
	jdict = json.loads(response.decode())
	addToItemList(jdict, items)

	return items

示例#2

0

显示文件

文件： __init__.py 项目： CDE-UNIBE/qcat

def _access_project():
    """
    Call the homepage of the project for given branch if an url is set. This is a cheap way to fill the lru cache.
    """
    if hasattr(env, 'url'):
        # wait for uwsgi-restart after touch.
        time.sleep(10)
        for lang in settings.LANGUAGES:
            url = urllib.request.urlopen(env.url.format(lang[0]))
            with contextlib.closing(url) as request:
                request.read()
                print('Read response from: {}'.format(request.url))

示例#3

0

显示文件

文件： nhl.py 项目： KpaBap/genmaybot

def get_nhl_live_games(self, e, webCall=False):
    if e.input:
        today = e.input
    else:
        today = datetime.date.today().strftime("%Y-%m-%d")
    url = "http://live.nhle.com/GameData/GCScoreboard/{}.jsonp".format(today)
    request = urllib.request.urlopen(url)
    data = request.read().decode()[15:-2]
    data = json.loads(data)

    games = []
    for game in data['games']:
        if not game['bsc']:
            start = game['bs'].replace(':00 ', ' ')
            gametxt = "{} - {} ({} ET)".format(game['atcommon'].title(),
                                               game['htcommon'].title(),
                                               start)
        else:
            gametxt = "{} {} - {} {} ({})".format(game['atcommon'].title(),
                                                  game['ats'],
                                                  game['hts'],
                                                  game['htcommon'].title(),
                                                  game['bs'])
        games.append(gametxt)

    if webCall:
        return " | ".join(games)

    e.output = " | ".join(games)
    return e

示例#4

0

显示文件

文件： homevalue.py 项目： twopercent/homevalue

def get_zillow_estimate_iter_test():
    key = config.get('zillow', 'api_key')
    houseCode = config.get('zillow', 'property_id')
    
    request = urllib.request.urlopen(zillow_service + "?zws-id=" + key + "&zpid=" + houseCode)
    data = request.read()
    
    f = open('housedata.xml', 'wb')
    f.write(data)
    f.close()

    #ET.parse() creates an ElementTree
    tree1 = ET.parse('housedata.xml')
    root = tree1.getroot()

    #ET.fromstring() creats an Element
    #this element is equivalent to the root of the above ElementTree
    tree2 = ET.fromstring(data)

    print("Tree1: " + str(type(tree1)))
    print("Tree2: " + str(type(tree2)))
    
    print()
    print(ET.tostring(root))
    

    for element in root:
        print(element.tag)
        for child in element:
            print(child.tag, child.attrib, child.text)
    
    print()
    print("now the iter")
    for value in root.iter('amount'):
        print(value.text)

示例#5

0

显示文件

文件： services.py 项目： dennissiemensma/dsmr-reader

def read_weather():
    """ Reads the current weather state, if enabled, and stores it. """
    # Only when explicitly enabled in settings.
    weather_settings = WeatherSettings.get_solo()

    if not weather_settings.track:
        return

    # Fetch XML from API.
    request = urllib.request.urlopen(BUIENRADAR_API_URL)
    response_bytes = request.read()
    request.close()
    response_string = response_bytes.decode("utf8")

    # Use simplified xPath engine to extract current temperature.
    root = ET.fromstring(response_string)
    xpath = BUIENRADAR_XPATH.format(
        weather_station_id=weather_settings.buienradar_station
    )
    temperature_element = root.find(xpath)
    temperature = temperature_element.text

    # Gas readings trigger these readings, so the 'read at' timestamp should be somewhat in sync.
    # Therefor we align temperature readings with them, having them grouped by hour that is..
    read_at = timezone.now().replace(minute=0, second=0, microsecond=0)

    TemperatureReading.objects.create(read_at=read_at, degrees_celcius=temperature)

示例#6

0

显示文件

文件： sabnzbd.py 项目： casanovainformationservices/LazyLibrarian

def SABnzbd(title=None, nzburl=None):
    # Changes https to http
    HOST = lazylibrarian.SAB_HOST + ":" + lazylibrarian.SAB_PORT
    if not str(HOST)[:4] == "http":
        HOST = "http://" + HOST

    params = {}
    # Login for user
    params["mode"] = "addurl"
    params["name"] = nzburl
    # Checks that all are defined and nothing is missing
    if lazylibrarian.SAB_USER:
        params["ma_username"] = lazylibrarian.SAB_USER
    if lazylibrarian.SAB_PASS:
        params["ma_password"] = lazylibrarian.SAB_PASS
    if lazylibrarian.SAB_API:
        params["apikey"] = lazylibrarian.SAB_API
    if lazylibrarian.SAB_CAT:
        params["cat"] = lazylibrarian.SAB_CAT

    if lazylibrarian.USENET_RETENTION:
        params["maxage"] = lazylibrarian.USENET_RETENTION

    ## FUTURE-CODE
    #    if lazylibrarian.SAB_PRIO:
    #        params["priority"] = lazylibrarian.SAB_PRIO
    #    if lazylibrarian.SAB_PP:
    #        params["script"] = lazylibrarian.SAB_SCRIPT
    #    Encodes parameters
    URL = HOST + "/api?" + urllib.parse.urlencode(params)

    # to debug because of api
    logger.debug('Request url for <a href="%s">SABnzbd</a>' % URL)

    try:
        request = urllib.request.urlopen(URL)

    except (EOFError, IOError) as e:
        logger.error("Unable to connect to SAB with URL: %s" % url)
        return False

    except httplib.InvalidURL as e:
        logger.error("Invalid SAB host, check your config. Current host: %s" % HOST)
        return False

    result = request.read().strip()
    if not result:
        log.error("SABnzbd didn't return anything.")
        return False

    logger.debug("Result text from SAB: " + result)
    if result == "ok":
        logger.info("NZB sent to SAB successfully.")
        return True
    elif result == "Missing authentication":
        logger.error("Incorrect username/password.")
        return False
    else:
        logger.error("Unknown error: " + result)
        return False

示例#7

0

显示文件

文件： solver.py 项目： jakamitsu/pythonchallenge.com-solutions

def Challenge13():
  import xmlrpc.client
  import urllib.request
  
  startAddr = 'http://www.pythonchallenge.com/pc/return/evil4.jpg'
  resultAddr = 'http://www.pythonchallenge.com/pc/return/' 

  XMLRPCserver = xmlrpc.client.Server(
    'http://www.pythonchallenge.com/pc/phonebook.php'
  )

  auth_handler = urllib.request.HTTPBasicAuthHandler()
  auth_handler.add_password(realm='inflate',
                            uri=startAddr,
                            user='******',
                            passwd='file')
  opener = urllib.request.build_opener(auth_handler)
  urllib.request.install_opener(opener)
  request = urllib.request.urlopen(startAddr)

  rData = request.read().decode()
  
  evilName = rData.split()[0]
  
  resultAddr += XMLRPCserver.phone(evilName).split('-')[1].lower() + '.html'
  print(resultAddr)

示例#8

0

显示文件

文件： onigiri.py 项目： honishi/onigiri-alert-server

    def listen(self):
        logging.debug(u'OnigiriAlert.listen() started.')

        url = TWITCASTING_API_LIVE_STATUS + '?type=json&user='******'test exception')
            except Exception as error:
                logging.error("caught exception in polling loop, error: [{}]".format(error))
                # os.sys.exit()

            time.sleep(POLLING_INTERVAL)

        logging.debug(u'OnigiriAlert.listen() ended.')

示例#9

0

显示文件

文件： views.py 项目： jin8/AutoDo

def hook_callback(request, *args, **kwargs):
    import json
    import urllib.request
    print("hook here")
    data = request.read().decode('utf-8')
    res = json.loads(data)
    email = res['commits'][0]['author']['email']
    u = User.objects.filter(email__exact=email).first()
    p = Project.objects.filter(repository_url__exact=res['repository']['html_url']).first()
    from AutoDoApp.Manager import ManagerThread
    m = ManagerThread()
    m.put_request(req=res['repository']['html_url'], desc=p.description)

    token = u.access_token

    import time
    time.sleep(10)  # Temporal time sleep
    branch_id = p.branch_count
    autodo_prefix_branch_name = "AutoDo_" + str(branch_id)
    branch_name = "refs/heads/" + autodo_prefix_branch_name
    create_a_branch(access_token=token,
                    branch_name=branch_name,
                    request=request)
    create_file_commit(token, branch_name, request) # OAuth call back token
    create_pull_request(token, autodo_prefix_branch_name, request)
    p.update()
    return HttpResponse(res)

示例#10

0

显示文件

文件： challenge.py 项目： xuanshenbo/Web-crawler

def pywget_inside_crawler(url, depth, start_dir, start_file, root_dir_name):
    """
        Crawl the given url find all <a href> and <img src> tags
        Get the information inside the tags and apply pywget_recursive() function on each of them

        Arguments:
        url                -- the url that is to be crawler
        depth              -- total number of recursions
        start_dir          -- the directory of the this py file
        start_file         -- the first file that was downloaded, store it to avoid cycles
        root_dir_name      -- the root derectory to for downloading files
    """
    depth -= 1

    content = ''
    try:
        request = urllib.request.urlopen(url)
        content = request.read().decode("utf-8")
    except:
        pass

    # all the information that's inside <a href> and <img src> tags
    match = re.findall(r'<a href="(.*?)"', content) + \
            re.findall(r'<a href = "(.*?)"', content) + \
            re.findall(r'<img src="(.*?)"', content) + \
            re.findall(r'<img src = "(.*?)"', content)

    prefix = url[0 : url.rfind('/')]                                           # a prefix of the link. useful to check if a link is under the same domain

    all_item_list = add_item_to_list(match, prefix)                            # add information to a list

    for item in all_item_list:
        pywget_recursive(item, depth, start_dir, start_file, root_dir_name)    # recursively download the information

示例#11

0

显示文件

文件： get_images.py 项目： jeremiedecock/snippets

def main():
    """Main function"""

    # PARSE OPTIONS ###########################################################

    parser = argparse.ArgumentParser(description='A BeautifulSoup snippet.')
    parser.add_argument("url", nargs=1, metavar="URL",
                        help="The URL of the webpage to parse.")
    args = parser.parse_args()

    url = args.url[0]
    #print("url:", url)

    # GET HTML ################################################################

    request = urllib.request.urlopen(url)
    #print("STATUS:", request.status)

    html = request.read()
    #print(html)

    # PARSE HTML ##############################################################

    soup = BeautifulSoup(html)

    #print(soup.prettify())

    for img in soup.find_all('img'):
        print(img.get('src'))

示例#12

0

显示文件

文件： updater.py 项目： pombredanne/lution

    def __init__(self, force_update):
        self.web_version = ""
        self.web_files = []

        response = ""
        try:
            update_info_url = _url_prefix + "update.info"
            request = urlopen(update_info_url)
            response = request.read().decode("utf-8")

        except urllib.HTTPError as e:
            logging.exception("Unable to get latest version info - HTTPError = %s" % e.reason)
        except urllib.URLError as e:
            logging.exception("Unable to get latest version info - URLError = %s" % e.reason)
        except httplib.HTTPException as e:
            logging.exception("Unable to get latest version info - HTTPException")
        except Exception as e:
            import traceback

            logging.exception("Unable to get latest version info - Exception = %s" % traceback.format_exc())

        if len(response) > 0:
            updateInfo = json.loads(response)

            self.web_version = updateInfo["version"]
            self.web_files = updateInfo["files"]

            logging.info("Cnchi Internet version: %s" % self.web_version)

            self.force = force_update

示例#13

0

显示文件

文件： dataset_downloader.py 项目： ilario-pierbattista/hasp-tracker

 def download(self, path):
     """Scarica o mantieni il file"""
     target_path = self._generate_path(path)
     target_file = os.path.join(target_path, self.name)
     downf = not os.path.exists(target_file)
     if not downf: 
         """ A questo livello, il file esiste"""
         self.path = target_file
         self.directory = target_path
     downf = downf or (self.size != os.path.getsize(target_file))
     if downf:
         try:
             request = urllib.request.urlopen(self.url)
             f = open(target_file, 'wb')
             while True:
                 data = request.read(100*1024)
                 if data:
                     print("""downloading %s (%d/%d)\r""" % 
                             (self.name, os.path.getsize(target_file), self.size))
                     f.write(data)
                 else:
                     break
             print("""%s completed""" % (self.name))
             f.close()
             self.path = target_file
             self.directory = target_path
         except urllib.error.HTTPError:
             path = None

示例#14

0

显示文件

文件： functools_lru_cache.py 项目： bmwasaru/random_learnings

def get_webpage(section):
    web_page = "http://redesign.swahilipothub.co.ke/{}".format(section)
    try:
        with urllib.request.urlopen(web_page) as request:
            return request.read()
    except urllib.error.HTTPError:
        return None

示例#15

0

显示文件

文件： test.py 项目： jeremiedecock/snippets

def main():
    """Main function"""

    # PARSE OPTIONS ###########################################################

    parser = argparse.ArgumentParser(description='A BeautifulSoup snippet.')
    parser.add_argument("url", nargs=1, metavar="URL",
                        help="The URL of the webpage to parse.")
    args = parser.parse_args()

    url = args.url[0]
    print("url:", url)

    # GET HTML ################################################################

    request = urllib.request.urlopen(url)
    print("STATUS:", request.status)

    html = request.read()
    #print(html)

    # PARSE HTML ##############################################################

    soup = BeautifulSoup(html)

    print(soup.prettify())

    print("Element name:", soup.title.name)
    print("Element value:", soup.title.string)

    print()

    for anchor in soup.find_all('a'):
        print(anchor.get('href'))

示例#16

0

显示文件

文件： views.py 项目： AutoDo/AutoDo

def hook_callback(request, *args, **kwargs):
    import json
    # print("hook here")
    data = request.read().decode('utf-8')
    res = json.loads(data)
    print(res)
    name = res['repository']['owner']['login']
    u = User.objects.filter(account_ID__exact=name).first()
    repository_url = "https://github.com/" + res['repository']['full_name']
    print(repository_url)
    p = Project.objects.filter(repository_url__exact=repository_url).first()
    from AutoDoApp.Manager import ManagerThread
    m = ManagerThread()
    m.put_request(req=repository_url, desc=p.description)

    token = u.access_token

    import time
    time.sleep(10)  # Temporal time sleep
    branch_id = p.branch_count
    autodo_prefix_branch_name = "AutoDo_" + str(branch_id)
    branch_name = "refs/heads/" + autodo_prefix_branch_name

    project_name = res['repository']['full_name'].split('/')[1]

    create_a_branch(access_token=token,
                    branch_name=branch_name,
                    user_name=name,
                    project_name=project_name)
    create_file_commit(token, branch_name, name, project_name)  # OAuth call back token
    create_pull_request(token, autodo_prefix_branch_name, name, project_name)
    p.update()
    return HttpResponse(res)

示例#17

0

显示文件

文件： swagger.py 项目： Iterable/swagger-codegen

    def callAPI(self, resourcePath, method, queryParams, postData,
                headerParams=None):

        url = self.apiServer + resourcePath
        headers = {}
        if headerParams:
            for param, value in headerParams.items():
                headers[param] = value

        #headers['Content-type'] = 'application/json'
        headers['api_key'] = self.apiKey

        if self.cookie:
            headers['Cookie'] = self.cookie

        data = None

        
        if queryParams:
            # Need to remove None values, these should not be sent
            sentQueryParams = {}
            for param, value in queryParams.items():
                if value != None:
                    sentQueryParams[param] = value
            url = url + '?' + urllib.parse.urlencode(sentQueryParams)

        if method in ['GET']:

            #Options to add statements later on and for compatibility
            pass

        elif method in ['POST', 'PUT', 'DELETE']:

            if postData:
                headers['Content-type'] = 'application/json'
                data = self.sanitizeForSerialization(postData)
                data = json.dumps(data)

        else:
            raise Exception('Method ' + method + ' is not recognized.')

        if data:
            data = data.encode('utf-8')

        requestParams = MethodRequest(method=method, url=url,
                                       headers=headers, data=data)

        # Make the request
        request = urllib.request.urlopen(requestParams)
        encoding = request.headers.get_content_charset()
        if not encoding:
            encoding = 'iso-8859-1'
        response = request.read().decode(encoding)

        try:
            data = json.loads(response)
        except ValueError:  # PUT requests don't return anything
            data = None

        return data

示例#18

0

显示文件

文件： pybar3_beta.py 项目： laur89/dwm-setup

    def __init__(self, color):
        Segment.__init__(self)

        self.set_icon('mail')
        self.build_module('N/A')

        unread = []
        hl = False

        try:
            for account in open(os.environ['XDG_CONFIG_HOME'] + '/gmailaccounts', encoding='utf-8'):
                (url, user, passwd) = account.split('|')

                auth_handler = urllib.request.HTTPBasicAuthHandler()
                auth_handler.add_password(realm='New mail feed', uri='https://mail.google.com/', user=user, passwd=passwd)
                opener = urllib.request.build_opener(auth_handler)
                urllib.request.install_opener(opener)

                request = urllib.request.urlopen(url)
                dom = xml.dom.minidom.parseString(request.read())
                count = dom.getElementsByTagName('fullcount')[0].childNodes[0].data

                if int(count) > 0:
                    hl = True

                unread.append(count)
        except (IOError, ValueError, KeyError):
            return

        if hl:
            self.set_icon('mail')

        self.build_module(' / '.join(unread))

示例#19

0

显示文件

文件： yahoo_finance.py 项目： shanedonovan/yahoo-finance

 def get_data_source_one(self):
     """Retrieves Data from the first Yahoo Finance source"""
     data = 'http://finance.yahoo.com/webservice/v1/symbols/' + self.stock + '/quote?format=json&view=detail'
     request = urllib.request.urlopen(data)
     response = request.read()
     charset = request.info().get_content_charset('utf-8')
     self.data_s1 = json.loads(response.decode(charset))

示例#20

0

显示文件

文件： fontsquirrel.py 项目： vfrico/fontsquirrel_dl

 def family_download_json(self, family):
     """
     Download json information from internet. It does not
     save any data anywhere.
     """
     request = urllib.request.urlopen(self.family_download_url(family))
     return json.loads(request.read().decode('utf-8'))

示例#21

0

显示文件

文件： updater.py 项目： Astalaseven/Cnchi

    def download(self, name, md5):
        url = url_prefix + name
        response = ""
        try: 
            request = urlopen(url)
            txt = request.read()
            #.decode('utf-8')
        except urllib.error.HTTPError as e:
            print('Unable to get %s - HTTPError = %s' % (name, e.reason))
            return False
        except urllib.error.URLError as e:
            print ('Unable to get %s - URLError = %s' % (name, e.reason))
            return False
        except httplib.error.HTTPException as e:
            print ('Unable to get %s - HTTPException' % name)
            return False
        except Exception as e:
            import traceback
            print ('Unable to get %s - Exception = %s' % (name, traceback.format_exc()))
            return False

        web_md5 = self.get_md5(txt)
        
        if web_md5 != md5:
            print("Checksum error in %s. Download aborted" % name)
            return False
        
        new_name = os.path.join(base_dir, name + "." + self.web_version.replace(".", "_"))
        
        with open(new_name, "wb") as f:
            f.write(txt)

        return True

示例#22

0

显示文件

文件： get_given_names.py 项目： edugalt/wikipediaGame

    def get_more_links(more_parameters=()):
        parameters = {"format": "json",
                      "action": "query",
                      "prop": "links",
                      "pllimit": 500,
                      "plnamespace": 0,
                      "continue" : "",
                      "titles": urllib.parse.quote(start_page.encode("utf8"))}
        parameters.update(more_parameters)

        queryString = "&".join("%s=%s" % (k, v) for k, v in parameters.items())

        # This ensures that redirects are followed automatically, documented here:
        # http://www.mediawiki.org/wiki/API:Query#Resolving_redirects
        queryString = queryString+"&redirects"

        url = "http://%s.wikipedia.org/w/api.php?%s" % (wikipedia_language, queryString)

        #get json data and make a dictionary out of it:
        request = urllib.request.urlopen(url)
        encoding = request.headers.get_content_charset()
        jsonData = request.read().decode(encoding)
        data = json.loads(jsonData)

        pageId = list(data['query']['pages'])[0]
        if int(pageId)<=0:
            sys.exit("Page doesn't exist.")

        link_list = data['query']['pages'][str(pageId)]['links']

        return [entry["title"] for entry in link_list], data

示例#23

0

显示文件

文件： updater.py 项目： Llumex03/Cnchi

    def __init__(self, force_update):
        self.remote_version = ""

        self.md5s = {}

        # Get local info (local update.info)
        with open("/usr/share/cnchi/update.info", "r") as local_update_info:
            response = local_update_info.read()
            if len(response) > 0:
                updateInfo = json.loads(response)
                self.local_files = updateInfo['files']

        # Download update.info (contains info of all Cnchi's files)
        request = download.url_open(_update_info_url)

        if request is not None:
            response = request.read().decode('utf-8')
            if len(response) > 0:
                updateInfo = json.loads(response)
                self.remote_version = updateInfo['version']
                for remote_file in updateInfo['files']:
                    self.md5s[remote_file['name']] = remote_file['md5']

                logging.info(_("Cnchi Internet version: %s"), self.remote_version)
                self.force = force_update

示例#24

0

显示文件

文件： updater.py 项目： Astalaseven/Cnchi

    def __init__(self, force_update=False):
        self.web_version = ""
        self.web_files = []
        
        response = ""
        try: 
            update_info_url = url_prefix + "update.info"
            request = urlopen(update_info_url)
            response = request.read().decode('utf-8')
                    
        except urllib.HTTPError as e:
            print('Unable to get latest version info - HTTPError = %s' % e.reason)
        except urllib.URLError as e:
            print ('Unable to get latest version info - URLError = %s' % e.reason)
        except httplib.HTTPException as e:
            print ('Unable to get latest version info - HTTPException')
        except Exception as e:
            import traceback
            print ('Unable to get latest version info - Exception = %s' % traceback.format_exc())

        if len(response) > 0:
            updateInfo = json.loads(response)

            self.web_version = updateInfo['version']
            self.web_files = updateInfo['files']
        
            print("web version: %s" % self.web_version)
            
            self.force = force_update

示例#25

0

显示文件

文件： api.py 项目： choigit/kritzbot

	def getJSON(self, url):
		try:
			request = urllib.request.urlopen(url)
			data = json.loads(request.read().decode('UTF-8'))
			return data
		except urllib.error.URLError as e:
			logging.warning("Error: TWITCH API connection")

示例#26

0

显示文件

文件： cloud.py 项目： alma4rebi/V3n0M-Scanner

def dnsHistory(domain):
    rows = ''
    print("\n-- Checking dns history --")
    url = 'http://toolbar.netcraft.com/site_report?url=' + domain
    try:
        request = urllib.request.urlopen(url)
        html = request.read()
    except:
        html = ''
    soup = BeautifulSoup(''.join(html))
    tables = soup.findAll(attrs={'class': 'TBtable'})
    try:
        table = tables[1]
    except:
        table = ''  # Prevents errors if no history returned
        rows = ''
    if table:
        rows = soup.table.findAll('tr')  # Need to edit out again
    x = -1
    try:
        for tr in rows:
            columns = tr.findAll('td')
            for td in columns:
                text = ''.join(td.find(text=True))
                if x % 5 == 0:  # Only ip addresses are checked
                    if dns.query(text):  # Finds last ip thats not CloudFlare
                        print(output("The last known ip address is: %s" % text))
                        if text not in iplist:
                            iplist.append(text)
                        raise End  # Breaks from multiple loops
                x += 1
    except End:
        pass
    print("\n#" + "-" * 77 + "#")

示例#27

0

显示文件

文件： geostat.py 项目： ejyoung-usgs/geomag-data-statistics

def start_http_session( observatory ):

    today_utc = datetime.datetime.utcnow()
    deltas = runtimeConfigs["delays"]

    requestString = "{url}/{observatory}/{type}/{file}"
    url = requestString.format( url = runtimeConfigs["url"], observatory = observatory, type = "OneMinute", file = form_file_name(observatory.lower(), today_utc) )
    url_sec = requestString.format( url = runtimeConfigs["url"], observatory = observatory, type = "OneSecond", file = form_file_name_sec(observatory.lower(), today_utc) )
    try:
        request = urllib.request.urlopen(url)
        request_sec = urllib.request.urlopen(url_sec)
        regex_string = "{year}-{month:02d}-{day:02d} {hour:02d}:{minute:02d}:{second:02d}.*"
        data_regex_string = "(-?\\d{1,5}\\.\\d{2}\\s*){4}"
        geo_data = request.read().decode("utf-8")
        geo_data_s = request_sec.read().decode("utf-8")

        for dtime in deltas:
            today_date = today_utc - dtime
            search_regex = re.compile( regex_string.format(year = today_date.year, month = today_date.month, day = today_date.day, hour = today_date.hour, minute = today_date.minute, second =0) + data_regex_string)
            search_regex_s = re.compile( regex_string.format(year = today_date.year, month = today_date.month, day = today_date.day, hour = today_date.hour, minute = today_date.minute, second = today_date.second) + data_regex_string )
            process_data(geo_data, search_regex, "min", dtime, observatory)
            process_data(geo_data_s, search_regex_s, "sec", dtime, observatory)

#### On Error, insert missing data point to database ####

    except urllib.error.HTTPError:
        print("Error connecting to ", url)
        for dtime in deltas:
            data_map_m = {"h": 1, "d": 1, "z": 1, "f":1, "delay": dtime.seconds, "timestamp": datetime.date.today(), "res":"min", "obs": observatory }
            data_map_s = {"h": 1, "d": 1, "z": 1, "f":1, "delay": dtime.seconds, "timestamp": datetime.date.today(), "res":"sec", "obs": observatory }
            insert_record(data_map_m)
            insert_record(data_map_s)
    except http.client.IncompleteRead:
        print("Incomplete Read, Something went wrong network side")

示例#28

0

显示文件

文件： main.py 项目： DimaPhil/youtube-comments

def update_best_videos(recent_videos):
    global best_videos
    for best_video in best_videos:
        """minimal_date = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(minutes=BIG_DELAY)
        seconds_minimal = time.mktime(minimal_date.timetuple())
        seconds_best = time.mktime(best_video.published_at.timetuple())
        if seconds_minimal > seconds_best:
            continue"""
        recent_videos.append(best_video)
    best_videos = []

    ids = ",".join(str(video.video_id) for video in recent_videos)
    GET_QUERY = "https://www.googleapis.com/youtube/v3/videos?part=statistics&id=" + ids + "&key=" + API_KEY
    request = urllib.request.urlopen(GET_QUERY)
    videos = json.loads(request.read().decode(request.info().get_param("charset") or "utf-8"))
    for video in videos["items"]:
        index = -1
        for i in range(len(recent_videos)):
            if recent_videos[i].video_id == video["id"]:
                index = i
                break
        assert index != -1
        rvideo = recent_videos[index]
        new_video = Video(rvideo.title, rvideo.published_at, rvideo.video_id, int(video["statistics"]["commentCount"]))
        best_videos.append(new_video)

    best_videos.sort()
    best_videos = best_videos[:MAX_BEST_VIDEOS_COUNT]

示例#29

0

显示文件

文件： parse3.py 项目： dgoulet/tor-parser

def dl_extra_infos(year, month):
    """ Download extra infos from CollecTor. """
    url = "https://collector.torproject.org/archive/relay-descriptors/extra-infos"
    filename = "extra-infos-%s-%s.tar.xz" % (year, month)

    save_dir_path = "extra-infos"
    if not os.path.isdir(save_dir_path):
        os.mkdir(save_dir_path)
    save_path = "%s/%s" % (save_dir_path, filename)
    if os.path.isfile(save_path):
        print("  [+] Extra infos %s found" % (save_path))
        return save_path
    # Check if the directory exists.
    if os.path.isdir("%s" % (save_path[:-7])):
        print("  [+] Extra infos %s found" % (save_path[:-7]))
        return save_path

    print("  [+] Downloading extra infos %s/%s" % (url, filename))
    try:
        request = urllib.request.urlopen("%s/%s" % (url, filename))
        if request.code != 200:
            print("  [-] Unable to fetch extra infos %s at %s" % (filename, url))
            return None
    except Exception as e:
        print("  [-] Unable to fetch %s/%s" % (url, filename))
        return None
    fp = open(save_path, "wb+")
    fp.write(request.read())
    fp.close()
    return save_path

示例#30

0

显示文件

文件： api.py 项目： choigit/kritzbot

def getJSON_text(url):

	request = urllib.request.urlopen(url)
	data = request.read()
	data_string = data.decode('UTF-8')
	print(data_string)
	return data_string

示例#31

0

显示文件

 def make_request(self, method, **kwargs):
     request = urlopen(
         self.api.url,
         data=json.dumps(
             dict(
                 {
                     "request": {
                         "interface": self.name,
                         "method": method,
                         "parameters": kwargs,
                     }
                 },
                 **({
                     "token": self.api.token
                 } if self.api.token else {}),
             )).encode("utf-8"),
     )
     response = json.loads(request.read().decode("utf-8"))
     exception = response.get("response", {}).get("exception", None)
     if exception:
         raise Exception(exception["message"])
     else:
         return response["response"]["result"]

示例#32

0

显示文件

    def get_declarations(cls):
        '''
        Lists snow emergency delcarations throughout the state.
        Returns a JSON or None if no data found.
        '''
        # container to hold results
        declarations = {'declarations': []}

        # submit request for data
        request = urllib.request.urlopen(cls.service_url)
        response = request.read()

        # parse xml response
        events = ET.fromstring(response)
        for event in events:
            event = {}
            for declaration in event:
                event[declaration.tag] = declaration.text
            declarations['declarations'].append(event)

        if len(declarations['declarations']) > 0:
            return declarations
        return None

示例#33

0

显示文件

def log_in(request):
    if request.method == 'POST':
        data = json.loads(request.read().decode('utf8'))

        user = authenticate(username=data['uid'], password=settings.DEFAULT_PASSWORD)

        if user is not None:
            if user.is_active:
                login(request, user)

                return HttpResponse('LOGIN OK POST')
            else:
                return HttpResponse('USER NOT ACTIVE')

        else:
            result = sign_up(data)
            if result:
                return HttpResponse('NEW SIGN UP')
            else:
                return HttpResponse('SIGN UP FAIL')

    else:
        return HttpResponse('POST REQUESTED')

示例#34

0

显示文件

def login_info(request):
    if request.method == 'GET':
        login_info = AuthenticationForm(request.POST) 
        return render(request, 'login.html', {'login': login_info})
    if request.method == 'POST':
        response = request.read().decode().split('&')
        username = response[1][response[1].find('=')+1:]
        password = response[2][response[2].find('=')+1:]
        #request.session = get_user_model()._meta.pk.to_python(request.session[SESSION_KEY])
        #username = request.POST['username']
        #password = request.POST['password']
        user = authenticate(username=username, password=password)
        if user:
            print("User: "******"Password: "******"Hello Word. You're " + username)
        else:
            try:              
                 user = User.objects.create_user(username, password=password)
                 login(request, user)
            except:
                 username = '******'
        return HttpResponse("Hello Word. You're " + username)

示例#35

0

显示文件

文件： htmlGetter.py 项目： kz-rv04/YoutubeLiveCommentSaver

def get_html(url):
    req = urllib.request.Request(url, None, headers=headers)

    try:
        request = urllib.request.urlopen(req)
    except:
        print("urlopen error")
        get_html(url)

    html = request.read()
    # エンコーディング形式を取得
    enc = chardet.detect(html)
    #print(enc)
    decoded = ""
    if enc["encoding"]:
        try:
            decoded = codecs.decode(html,encoding=enc["encoding"], errors='strict')
        # デコードできなかったらとりあえずutf-8でデコードする
        except UnicodeDecodeError as e:
            print("UnicodeDecodeError {0}".format(e.reason))
            decoded = codecs.decode(html,encoding="utf-8", errors='strict')

    return decoded

示例#36

0

显示文件

def extract_well_type(lease_query_result):
    if 'detail_link_rgx' not in extract_well_type.__dict__:
        extract_well_type.detail_link_rgx = re.compile(
                r'href="(leaseDetailAction.do[^"]+)"', re.IGNORECASE)

    match = extract_well_type.detail_link_rgx.search(lease_query_result)
    if not match:
        raise RuntimeError('No detail link found!')
    detail_url = URL_BASE + match.group(1)

    request = urllib.request.urlopen(detail_url)
    if (request.status != 200):
        raise RuntimeError('HTTP request failed.')

    lease_detail = request.read().decode()
    if 'well_type_rgx' not in extract_well_type.__dict__:
        extract_well_type.well_type_rgx = re.compile(
                r'Well Type:\s+<[^>]+>\s+(\w+)', re.IGNORECASE)

    match = extract_well_type.well_type_rgx.search(lease_detail)
    if not match:
        raise RuntimeError('Unable to find well type!')
    return match.group(1)

示例#37

0

显示文件

def load_remote_manifest(url: str) -> Dict[str, Any]:
    """
    Converts a remote yaml file into a Python dictionary
    """
    tmp_dir, _ = get_tmp_dir()
    try:
        request = urllib.request.urlopen(url, timeout=30)
    except urllib.error.HTTPError as e:  # type: ignore
        e.msg += " " + url
        raise
    manifest_path = os.path.join(tmp_dir, str(uuid.uuid4()) + ".yaml")
    with open(manifest_path, "wb") as manifest:
        while True:
            buffer = request.read(BLOCK_SIZE)
            if not buffer:
                # There is nothing more to read
                break
            manifest.write(buffer)
    try:
        result = load_local_manifest(manifest_path)
    finally:
        os.remove(manifest_path)
    return result

示例#38

0

显示文件

def get():
    """
	Returns properly formatted weather for Rochester, NY
	City can be changed by grabbing the proper openweathermap.org url.
	"""
    weather_string = "Weather Unavailable"

    weather_url = "http://api.openweathermap.org/data/2.1/weather/city/5134086"
    request = urllib.request.urlopen(weather_url)
    weather_info = json.loads(request.read().decode("utf-8"))
    if (request.getcode() not in range(200, 300)):
        request.close
        return weather_string
    request.close

    if weather_info is not None:
        temp = str(k_to_f(weather_info['main']['temp']))
        #state = str(weather_info['weather'][0]['main'])
        desc = str(weather_info['weather'][0]['description'])
        #weather_string = temp + " degrees, " + desc
        weather_string = temp + "°F, " + desc

    return weather_string

示例#39

0

显示文件

    def get_meals(self):
        menus = []
        try:
            date = datetime.datetime.now().strftime("%Y-%m-%d")
            url = "https://www.webservices.ethz.ch/gastro/v1/RVRI/Q1E1/meals/de/{}/lunch".format(
                date)
            with urllib.request.urlopen(url) as request:
                mensas = json.loads(request.read().decode())

            for mensa in mensas:
                if mensa["mensa"] == self.api_name:
                    for meal in mensa["meals"]:
                        menu = Meal()
                        menu.label = meal['label']
                        menu.price_student = meal['prices']['student']
                        menu.price_staff = meal['prices']['staff']
                        menu.price_extern = meal['prices']['extern']
                        menu.description = meal['description']
                        menus.append(menu)
            return menus
        except Exception as e:
            print(e)
            return menus  # we failed, but let's pretend nothing ever happened

示例#40

0

显示文件

文件： weather.py 项目： SupremeCommandaPanda/Settings

def openweather(city=NAME, lang=LANG, unit=UNIT, api_key=OPENWEATHER_API_KEY):
    try:
        request = urllib.request.urlopen(
            f"{OPENWEATHER_URL}?q={city.replace(' ', '+')}"
            f"&lang={lang}&units={unit}&appid={api_key}")
        if request.getcode() == 200:
            data = json.loads(request.read())

            temp = int(data["main"]["temp"])

            return {
                "name": data["name"],
                "country": iso3().get(data["sys"]["country"]),
                "temp": temp,
                "unit": check_unit(unit),
                "description": data["weather"][0]["description"],
            }

        else:
            print(f"E: {request.getcode()}")

    except:
        pass

示例#41

0

显示文件

    def download(self, name, md5):
        url = _url_prefix + name
        response = ""
        try:
            request = urlopen(url)
            txt = request.read()
            #.decode('utf-8')
        except urllib.error.HTTPError as e:
            logging.exception('Unable to get %s - HTTPError = %s' %
                              (name, e.reason))
            return False
        except urllib.error.URLError as e:
            logging.exception('Unable to get %s - URLError = %s' %
                              (name, e.reason))
            return False
        except httplib.error.HTTPException as e:
            logging.exception('Unable to get %s - HTTPException' % name)
            return False
        except Exception as e:
            import traceback
            logging.exception('Unable to get %s - Exception = %s' %
                              (name, traceback.format_exc()))
            return False

        web_md5 = self.get_md5(txt)

        if web_md5 != md5:
            logging.error("Checksum error in %s. Download aborted" % name)
            return False

        new_name = os.path.join(
            _base_dir, name + "." + self.web_version.replace(".", "_"))

        with open(new_name, "wb") as f:
            f.write(txt)

        return True

示例#42

0

显示文件

def scrape():
    """This function scrapes that quote of the day from WikiQuotes"""

    #Setting the URL
    my_url = 'https://en.wikiquote.org/wiki/Wikiquote:Quote_of_the_day'
    
    try:
        #Opening up connection with url, get info and close
        request = urllib.request.urlopen(my_url)
        page_html = request.read()
        request.close()
        
        #HTML parsing
        page_soup = soup(page_html,'html.parser')
        
        #Get date and qoute
        date = page_soup.find_all('center')[1]
        qoute = page_soup.find('i')
        author = page_soup.find('td', style='font-size:smaller;')
        
        #Get text from date
        date_txt = date.get_text()
        qoute_txt = qoute.get_text()
        author_txt = author.get_text()
    except Exception as e:
        print(e.message, e.args)
        return 1; 

    #Remove '~\n' & '  ' from str for formatting      
    author_txt = author_txt.replace('~\n', '')
    date_txt = date_txt.replace('~\n', '') 
    qoute_txt = qoute_txt.replace('~\n', '').replace('  ', ' ') 
    
    #Result
    message = date_txt + ' *** ' + qoute_txt + ' *** ' + author_txt
    
    return message

示例#43

0

显示文件

文件： InstallFirefoxAddon.py 项目： demostanis/InstallFirefoxAddon

    def storage(prefs):
        import sqlite3

        result = research(addon)["results"][0]
        request = urllib.request.urlopen(result["url"])
        contents = request.read().decode("utf-8")
        guid = find_guid(contents)

        home = os.environ["HOME"]
        profiles = glob.glob(f"{home}/.mozilla/firefox/**.default-release/")
        for profile in profiles:
            conn = sqlite3.connect(profile + "storage-sync-v2.sqlite")
            c = conn.cursor()
            """
             CREATE TABLE storage_sync_data (
               ext_id TEXT NOT NULL PRIMARY KEY,
               data TEXT,
               sync_change_counter INTEGER NOT NULL DEFAULT 1
             );
            """
            c.execute(
                "INSERT OR REPLACE INTO storage_sync_data VALUES (?,?,?)",
                (guid, json.dumps(prefs, separators=(",", ":")), 1))
            conn.commit()

示例#44

0

显示文件

文件： wattpad2epub.py 项目： borfirbora/Wattpad2Epub

def get_html(url):
    tries = 5
    req = urllib.request.Request(url)
    req.add_header('User-agent', 'Mozilla/5.0 (Linux x86_64)')
    # Add DoNotTrack header, do the right thing even if nobody cares
    req.add_header('DNT', '1')
    while tries > 0:
        try:
            request = urllib.request.urlopen(req)
            tries = 0
        except socket.timeout:
            if debug:
                raise
            tries -= 1
        except urllib.error.HTTPError as e:
            if debug:
                raise
            print("HTTP Error " + str(e.code) + ": " + e.reason)
            print("Aborting...")
            exit()
    # html.parser generates problems, I could fix them, but switching to lxml
    # is easier and faster
    soup = BeautifulSoup(request.read(), "lxml")
    return soup

示例#45

0

显示文件

文件： twitter.py 项目： richard-phan/TwitterBot

    def send_covid_tweet(self, data, msg):
        # gets covid data
        with urllib.request.urlopen(
                'https://api.covidtracking.com/v1/us/daily.json') as request:
            covid_data = json.loads(request.read().decode())

            days_ago = 0
            if len(msg) > 2:
                days_ago = int(msg[2])

            stats = covid_data[days_ago]
            raw_date = str(stats['date'])
            date = raw_date[:4] + '-' + raw_date[4:6] + '-' + raw_date[6:]

            # sends the status
            api.update_status(
                'COVID STATISTICS' + '\n--------------------' + '\nDate: ' +
                date + '\nConfirmed cases: ' + str(stats['positive']) +
                '\nNew cases: ' + str(stats['positiveIncrease']) +
                '\nDeaths: ' + str(stats['death']) + '\nNew deaths: ' +
                str(stats['deathIncrease']) + '\nCurrently hospitalized: ' +
                str(stats['hospitalizedCurrently']) +
                '\nTotal hospitalized: ' + str(stats['hospitalized']),
                in_reply_to_status_id=data['id'])

示例#46

0

显示文件

文件： Get_Weather_Data.py 项目： sameertulshyan/python-projects

def get_weather_data(station_code='KNYC'):
    """Function to query the website for data based on the user-submitted station code, update the dictionary values and get the image url"""
    url_general = 'http://www.weather.gov/xml/current_obs/{}.xml'
    url = url_general.format(station_code)
    request = urllib.request.urlopen(url)
    content = request.read().decode()

    # Using ElementTree to retrieve specific tags from the xml
    import xml.etree.ElementTree as ET
    xml_root = ET.fromstring(content)

    # Update the dictionary values with data from the xml
    for data_point in weather_data_tags_dict.keys():
        try:
            weather_data_tags_dict[data_point] = xml_root.find(data_point).text
        except:  #handle the case where certain data points are not available for a station
            weather_data_tags_dict[data_point] = "-"

    # Get the url for the image representing the weather icon
    icon_url_base = xml_root.find('icon_url_base').text
    icon_url_name = xml_root.find('icon_url_name').text
    icon_url = icon_url_base + icon_url_name

    return weather_data_tags_dict, icon_url

示例#47

0

显示文件

    def torrent_search(self, query):
        logging.info(
            'Searching matching movie torrents for "{}"'.format(query))
        request = urllib.request.urlopen(
            urllib.request.Request(
                'https://api.apidomain.info/list?' +
                urllib.parse.urlencode({
                    'sort': 'relevance',
                    'quality': '720p,1080p,3d',
                    'page': 1,
                    'keywords': query,
                }),
                headers={
                    'User-Agent':
                    'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' +
                    '(KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'
                }))

        results = [{
            'url': _['items'][0]['torrent_magnet'],
            'title': _['title'],
        } for _ in json.loads(request.read())['MovieList']]

        return Response(output=results)

示例#48

0

显示文件

文件： database.py 项目： seriousm4x/movie-manager

def getOMDB(movie):
    try:
        movie_web = os.path.splitext(movie)[0]
        request_url = "https://www.omdbapi.com/" + \
            "?s={}".format(urllib.parse.quote(movie_web)) + \
            "&apikey=2df782b"
        request = urllib.request.urlopen(request_url)
        r = json.loads(request.read().decode("utf-8"))
    except urllib.error.HTTPError as error:
        print(error)
        return
    except urllib.error.URLError as error:
        print(error)
        return
    try:
        movies = [movie["Title"] +
                  " (" + movie["Year"] + ")" for movie in r["Search"]]
    except KeyError:
        return
    if not movies:
        return
    movies.insert(0, "Skip")
    movies.insert(1, "Manual search")
    return movies

示例#49

0

显示文件

def saveMp3(items, path):
    
    for item in items:
    
        if 'lexicalEntries' in item:
        
            if 'pronunciations' in item['lexicalEntries'][0]:
            
                if 'audioFile' in item['lexicalEntries'][0]['pronunciations'][0]:
                    
                    result = {'id': item['id'], 'url': item['lexicalEntries'][0]['pronunciations'][0]['audioFile']}

                    print(result['url'])
                    
                    request = urllib.request.urlopen(result['url'], timeout = 10)
                
                    with open(path + result['id'] + '.mp3', 'wb') as f:
                        
                        try:
                            f.write(request.read())
                        
                        except:
                            
                            print("error")

示例#50

0

显示文件

def get_city_station_codes(state='ca'):
    """Function to obtain the list of cities and their station codes based on the selected state"""
    url_general = "http://w1.weather.gov/xml/current_obs/seek.php?state={}&Find=Find"  #generic url missing state code
    state = state.lower()
    url = url_general.format(
        state)  #format the url to include the selected state code
    request = urllib.request.urlopen(url)  #open the url
    content = request.read().decode()  #read, decode the HTML data and store it

    parser = WeatherHTMLParser()  #create a parser object
    parser.feed(content)  #feed the content from the webpage to the parser

    if len(parser.stations) != len(
            parser.cities):  #check for data inconsistency
        print(
            "Error: discrepancy between expected number of stations and actual"
        )
        exit()  #exit the app

    scr.delete('1.0', tk.END)  #clear scrolledText widget for next button click

    for i in range(len(parser.stations)):
        city_station = parser.cities[i] + ' (' + parser.stations[i] + ')'
        scr.insert(tk.INSERT, city_station + '\n')

示例#51

0

显示文件

文件： webCrawler.py 项目： anonymity-code/anonymity

def url_filter(url, lock, save_file_path):

    try:
        print(url)
        timeout = 50
        socket.setdefaulttimeout(timeout)
        sleep_download_time = 10
        time.sleep(sleep_download_time)

        # context = ssl._create_unverified_context()
        request = urllib.request.urlopen(url)
        sauce = request.read()
        request.close()
    except (urllib.error.URLError, urllib.error.HTTPError,
            socket.timeout) as e:
        print('URL Error!', url)
        print(e)
        return

    soup = bs.BeautifulSoup(sauce, 'lxml')

    # check highlight
    highlight = soup.select('.el__storyhighlights__item')
    if len(highlight) == 0:  # there is no highlight in website
        return

    # check video
    video = soup.select('.el__video')
    if len(video) == 0:  # there is no video in website
        return

    lock.acquire()
    save_file = codecs.open(save_file_path, 'a')
    save_file.write(str(url) + "\n")
    save_file.close()
    lock.release()

示例#52

0

显示文件

    def _strip_playlist(self, url: str):
        self.text_output.insert(tk.END,
                                "Playlist detected, Stripping links..\n")
        self.showEnd_output()
        # Make a list of all youtube links in the playlist
        final_urls = []  # results
        # Gather info about the search
        info = self.ytdl.extract_info(url, download=False, process=False)

        with urllib.request.urlopen(info["webpage_url"]) as request:
            webpage = request.read()

        soup = BeautifulSoup(webpage, 'html.parser')
        vid_url_pat = re.compile(r'watch\?v=\S+?list=')
        vid_url_matches = list(set(re.findall(vid_url_pat, str(soup))))

        # If url is a video, append it to the results
        for vid_url in vid_url_matches:
            if '&' in vid_url:
                url_amp = vid_url.index('&')
                final_urls.append('http://www.youtube.com/' +
                                  vid_url[:url_amp])

        for item in final_urls:
            self.songs.append(item)

        # Notfiy user
        self.text_output.insert(tk.END, "\tLinks found:\n")
        self.text_output.insert(
            tk.END, '\n'.join([f'\t\t- {x[:55]}' for x in final_urls]))
        self.text_output.insert(
            tk.END, "\n\nReady for download, or add another link..\n\n")
        self.showEnd_output()

        self.entitie_total_vids[
            "text"] = f'Videos to download: {len(self.songs)}'

示例#53

0

显示文件

from urllib import request

from bs4 import BeautifulSoup

url = "https://www.baidu.com"
headers = {
    'User-Agent':
    'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'
}
req = request.Request(url=url, headers=headers)
request = request.urlopen(req)
soup = BeautifulSoup(request.read(), 'lxml')


def child(soup, level, order, l):
    level = level + 1
    for node in soup.contents:
        if not node.name:
            continue
        l.append(soup.name + str(level - 1) + '_' + str(order) + '->' +
                 node.name + str(level) + '_' + str(soup.contents.index(node)))
        child(node, level, soup.contents.index(node), l)


sideList = []
child(soup, 0, 0, sideList)
outputFile = open('test.dot', 'w')
outputFile.write('digraph G{\n')
outputFile.write('rankdir="LR"\n')
outputFile.write(
    'node[fontname = "Consolas Italic", fontcolor="red",color="cyan"]\n')

示例#54

0

显示文件

文件： build-windows-wheel.py 项目： matplotlib/mplcairo

os.chdir("build")
urls = {
    Path("cairo.txz"):
        "https://archive.org/download/archlinux_pkg_cairo/"
        "cairo-1.17.2%2B17%2Bg52a7c79fd-2-x86_64.pkg.tar.xz",
    Path("fontconfig.txz"):
        "https://archive.org/download/archlinux_pkg_fontconfig/"
        "fontconfig-2%3A2.13.91%2B24%2Bg75eadca-1-x86_64.pkg.tar.xz",
    Path("freetype.zip"):
        "https://github.com/ubawurinna/freetype-windows-binaries/"
        "releases/download/v2.9.1/freetype-2.9.1.zip",
}
for archive_path, url in urls.items():
    if not archive_path.exists():
        with urllib.request.urlopen(url) as request:
            archive_path.write_bytes(request.read())
    dest = archive_path.stem
    shutil.rmtree(dest, ignore_errors=True)
    shutil.unpack_archive(archive_path, dest)

# Get cairo.dll (normally loaded by pycairo), checking that it include
# FreeType support.
Path("cairo/win64").mkdir(parents=True)
cairo_dll, = enum_process_modules(b"cairo_ft_font_face_create_for_ft_face")
shutil.copyfile(cairo_dll, "cairo/win64/cairo.dll")
# Get hold of a CCompiler object, by creating a dummy Distribution with a list
# of extension modules that claims to be truthy (but is actually empty) and
# running its build_ext command.  Prior to the deprecation of distutils, this
# was just ``cc = distutils.ccompiler.new_compiler(); cc.initialize()``.
class L(list): __bool__ = lambda self: True
be = setuptools.Distribution({"ext_modules": L()}).get_command_obj("build_ext")

示例#55

0

显示文件

文件： 0-hbtn_status.py 项目： cmdelcarmen/holbertonschool-higher_level_programming

#!/usr/bin/python3
""" Write a Python script that fetches https://intranet.hbtn.io/stat """

import urllib.request

if __name__ == "__main__":

    with urllib.request.urlopen("https://intranet.hbtn.io/status") as request:
        html = request.read()
        print("Body response:")
        print("\t- type: {}".format(type(html)))
        print("\t- content: {}".format(html))
        print("\t- utf8 content: {}".format(html.decode("utf-8")))

示例#56

0

显示文件

    def build_extensions(self):
        try:
            import importlib.metadata as importlib_metadata
        except ImportError:
            import importlib_metadata

        ext, = self.distribution.ext_modules

        ext.depends += [
            "setup.py",
            *map(str,
                 Path("src").glob("*.h")),
            *map(str,
                 Path("src").glob("*.cpp")),
        ]
        if UNITY_BUILD:
            ext.sources += ["src/_unity_build.cpp"]
        else:
            ext.sources += [*map(str, Path("src").glob("*.cpp"))]
            ext.sources.remove("src/_unity_build.cpp")
        ext.language = "c++"

        # pybind11.get_include() is brittle (pybind #1425).
        pybind11_include_path = next(
            path for path in importlib_metadata.files("pybind11")
            if path.name == "pybind11.h").locate().parents[1]
        if not (pybind11_include_path / "pybind11/pybind11.h").exists():
            # egg-install from setup_requires:
            # importlib-metadata thinks the headers are at
            #   .eggs/pybind11-VER-TAG.egg/pybind11-VER.data/headers/pybind11.h
            # but they're actually at
            #   .eggs/pybind11-VER-TAG.egg/pybind11.h
            # pybind11_include_path is
            #   /<...>/.eggs/pybind11-VER-TAG.egg/pybind11-VER.data
            # so just create the proper structure there.
            try:
                is_egg = (pybind11_include_path.relative_to(
                    Path(__file__).resolve().parent).parts[0] == ".eggs")
            except ValueError:
                # Arch Linux ships completely wrong metadata, but the headers
                # are in the default include paths, so just leave things as is.
                is_egg = False
            if is_egg:
                shutil.rmtree(pybind11_include_path / "pybind11",
                              ignore_errors=True)
                for file in [*pybind11_include_path.parent.glob("**/*")]:
                    if file.is_dir():
                        continue
                    dest = (pybind11_include_path / "pybind11" /
                            file.relative_to(pybind11_include_path.parent))
                    dest.parent.mkdir(parents=True, exist_ok=True)
                    shutil.copy2(file, dest)

        ext.include_dirs += [pybind11_include_path]

        tmp_include_dir = Path(
            self.get_finalized_command("build").build_base, "include")
        tmp_include_dir.mkdir(parents=True, exist_ok=True)
        ext.include_dirs += [tmp_include_dir]
        try:
            get_pkg_config(f"--atleast-version={MIN_RAQM_VERSION}", "raqm")
        except (FileNotFoundError, CalledProcessError):
            (tmp_include_dir / "raqm-version.h").write_text("")  # Touch it.
            with urllib.request.urlopen(
                    f"https://raw.githubusercontent.com/HOST-Oman/libraqm/"
                    f"v{MIN_RAQM_VERSION}/src/raqm.h") as request, \
                 (tmp_include_dir / "raqm.h").open("wb") as file:
                file.write(request.read())

        if sys.platform == "linux":
            import cairo
            get_pkg_config(f"--atleast-version={MIN_CAIRO_VERSION}", "cairo")
            ext.include_dirs += [cairo.get_include()]
            ext.extra_compile_args += [
                "-std=c++1z",
                "-fvisibility=hidden",
                "-flto",
                "-Wall",
                "-Wextra",
                "-Wpedantic",
                *get_pkg_config("--cflags", "cairo"),
            ]
            ext.extra_link_args += ["-flto"]
            if MANYLINUX:
                ext.extra_link_args += ["-static-libgcc", "-static-libstdc++"]

        elif sys.platform == "darwin":
            import cairo
            get_pkg_config(f"--atleast-version={MIN_CAIRO_VERSION}", "cairo")
            ext.include_dirs += [cairo.get_include()]
            # On OSX<10.14, version-min=10.9 avoids deprecation warning wrt.
            # libstdc++, but assumes that the build uses non-Xcode-provided
            # LLVM.
            # On OSX>=10.14, assume that the build uses the normal toolchain.
            macosx_min_version = ("10.14" if LooseVersion(
                platform.mac_ver()[0]) >= "10.14" else "10.9")
            ext.extra_compile_args += [
                "-std=c++1z",
                "-fvisibility=hidden",
                "-flto",
                f"-mmacosx-version-min={macosx_min_version}",
                *get_pkg_config("--cflags", "cairo"),
            ]
            ext.extra_link_args += [
                # version-min needs to be repeated to avoid a warning.
                "-flto",
                f"-mmacosx-version-min={macosx_min_version}",
            ]

        elif sys.platform == "win32":
            # Windows conda path for FreeType.
            ext.include_dirs += [Path(sys.prefix, "Library/include")]
            ext.extra_compile_args += [
                "/std:c++17",
                "/Zc:__cplusplus",
                "/experimental:preprocessor",
                "/EHsc",
                "/D_USE_MATH_DEFINES",
                "/wd4244",
                "/wd4267",
            ]  # cf. gcc -Wconversion.
            ext.libraries += ["psapi", "cairo", "freetype"]
            # Windows conda path for FreeType -- needs to be str, not Path.
            ext.library_dirs += [str(Path(sys.prefix, "Library/lib"))]

        # Workaround https://bugs.llvm.org/show_bug.cgi?id=33222 (clang +
        # libstdc++ + std::variant = compilation error) and pybind11 #1604
        # (-fsized-deallocation).  Note that `.compiler.compiler` only exists
        # for UnixCCompiler.
        if os.name == "posix":
            compiler_macros = subprocess.check_output(
                [*self.compiler.compiler, "-dM", "-E", "-x", "c", "/dev/null"],
                universal_newlines=True)
            if "__clang__" in compiler_macros:
                ext.extra_compile_args += ([
                    "-stdlib=libc++", "-fsized-deallocation"
                ])
                # Explicitly linking to libc++ is required to avoid picking up
                # the system C++ library (libstdc++ or an outdated libc++).
                ext.extra_link_args += ["-lc++"]

        super().build_extensions()

        if sys.platform == "win32":
            for dll in ["cairo.dll", "freetype.dll"]:
                for path in paths_from_link_libpaths():
                    if (path / dll).exists():
                        shutil.copy2(path / dll,
                                     Path(self.build_lib, "mplcairo"))
                        break

示例#57

0

显示文件

文件： extract_links_parser.py 项目： yangwawa0323/Learning-Python-Networking-Second-Edition

#!/usr/bin/env python3

from html.parser import HTMLParser
import urllib.request


class myParser(HTMLParser):
    def handle_starttag(self, tag, attrs):
        if (tag == "a"):
            for a in attrs:
                if (a[0] == 'href'):
                    link = a[1]
                    if (link.find('http') >= 0):
                        print(link)
                        newParse = myParser()
                        newParse.feed(link)


url = "http://www.packtpub.com"
request = urllib.request.urlopen(url)
parser = myParser()
parser.feed(request.read().decode('utf-8'))

示例#58

0

显示文件

文件： permit-data-dallas-weekly-index.py 项目： datagap/flow-dags-public

def downloadTemplate(templateUrl):
    request = urllib.request.urlopen(templateUrl)
    response = request.read().decode('utf-8')

    return response

示例#59

0

显示文件

文件： generate_htaccess_block4tor_py3.py 项目： h-mineta/generate-htaccess-block4tor

def main(args):
    try:
        destination_ipv4address = ipaddress.ip_address(args.address[0])
    except ValueError:
        print('[error] Destination address is invalid ({})'.format(
            args.address[0]),
              file=sys.stderr)
        exit(1)

    request_params = {
        'ip': str(destination_ipv4address),
        'port': args.port,
    }

    opener = urllib.request.build_opener()

    # Proxy有り
    if args.http_proxy:
        proxy_handler = urllib.request.ProxyHandler({
            "http": args.http_proxy,
            "https": args.http_proxy
        })
        opener.add_handler(proxy_handler)

    request = urllib.request.Request('{}?{}'.format(
        args.torbulkexitlist, urllib.parse.urlencode(request_params)))
    try:
        with opener.open(request) as request:
            response = request.read().decode('utf-8')
    except urllib.error.HTTPError as ex:
        print('[error] HTTP access error code:{}'.format(ex.code),
              file=sys.stderr)
        exit(2)
    except urllib.error.URLError as ex:
        print('[error] HTTP access error', file=sys.stderr)
        print('[error] {}'.format(ex.reason), file=sys.stderr)
        exit(2)

    exit_list_strings = []

    for line in response.split('\n'):
        if re.match(r'^#', line):
            # コメント行はskip
            continue
        elif re.match(r'^$', line):
            # データなしもスキップ
            continue

        # IPv4 address format確認
        try:
            ipaddress.ip_address(line)
        except ValueError:
            print('[error] address is invalid({})'.format(line),
                  file=sys.stderr)
            continue
        except Exception as ex:
            print('[error] ', end='', file=sys.stderr)
            print(ex, file=sys.stderr)
            continue

        # 拒否リスト生成
        exit_list_strings.append('    Require not ip {}\n'.format(line))

    print('[info] Tor exit list count : {:d}'.format(len(exit_list_strings)))

    try:
        # 指定ディレクトリ配下に.htaccess書き込み
        with open('{}/.htaccess'.format(args.export_dir),
                  'w') as file_htaccess:
            file_htaccess.write('<RequireAll>\n')
            file_htaccess.write('    Require all granted\n')
            file_htaccess.writelines(exit_list_strings)
            file_htaccess.write('</RequireAll>\n')
            file_htaccess.flush()

    except Exception as ex:
        print('[error] ', end='', file=sys.stderr)
        print(ex, file=sys.stderr)
        exit(3)

示例#60

0

显示文件

filetypes = {'pdf'}

for homepage in homepages_list:

    url = homepage[0]
    target_folder = homepage[1]
    #"base url", i.e. url without trailing index.html or similar
    url_base = str.join('/', url.split('/')[:-1])

    try:
        request = urllib.request.urlopen(url)
    except:
        e = sys.exc_info()[0]
        print('Exception: \n {} \ at url: {} '.format(e, url))

    str_html = request.read().decode('UTF-8')

    if not os.path.exists(target_folder):
        os.mkdir(target_folder)

    matches = list()
    for filetype in filetypes:
        #regex matches '"[AnyNumerOfCharactersExceptWhitespace].filetype"'
        matches += re.findall(r'"[^\s]*\.{}"'.format(filetype), str_html)

    for match in matches:

        match = match.strip('"')
        storepath = os.path.join(target_folder, match.split('/')[-1])
        if os.path.exists(storepath) and not force:
            print('skipping {}, as {} exists'.format(match, storepath))