Python urljoin 예제들, urllib2.urlparse.urljoin Python 예제들

예제 #1

0

파일 보기

파일: shoppingdotcomspider.py 프로젝트: Scorpio1987/shoppingdotcom

    def parse_products(self, response):
        print "parse_products", response.url
        sel = Selector(response)
        breadcrumb = sel.xpath('//div[contains(@class,"breadCrumb")]')
        categories = [span for span in breadcrumb.xpath(".//span[@itemprop='title']/text()").extract()[1:]]
        categories.append(breadcrumb.xpath(".//span/text()").extract()[-1])
        print categories
        
        for product in sel.xpath('//div[contains(@id,"quickLookItem")]'):
            # check if it is a multistore product
            if product.xpath('.//span[contains(@id, "numStoresQA")]'):
                print product.xpath(".//a/@href").extract()[0]
                url = product.xpath(".//a/@href").extract()[0]
                url = "/".join(url.split("/")[:-1])+"/prices"
                yield Request(urlparse.urljoin(response.url, url), callback=self.parse_multiple_store_product)
            else:
                # It is not a multistore product. Parse it.
                item = ShoppingdotcomItem()
                item["categories"] = categories
                item["product_name"] = product.xpath(".//span[contains(@id, 'nameQA')]/@title").extract()[0]
                if product.xpath(".//span[@class='placeholderImg']").extract():
                    item["image_urls"] = product.xpath(".//span[@class='placeholderImg']/text()").extract()
                else:
                    item["image_urls"] = product.xpath(".//div[@class='gridItemTop']//img/@src").extract()
                item["product_urls"] = [urlparse.urljoin(response.url, product.xpath(".//a/@href").extract()[0])]
                item["stores"] = product.xpath(".//a[@class='newMerchantName']/text()").extract()
                item["prices"] = [price.replace("\n","") for price in product.xpath(".//span[@class='productPrice']/a/text()").extract()]
                yield item

        # Check if Next page link is there then yeild request with next URL
        if sel.xpath("//a[@name='PLN']").extract():
            yield Request(urlparse.urljoin(response.url, sel.xpath("//a[@name='PLN']/@href").extract()[0]), self.parse_products)
            pass

예제 #2

0

파일 보기

파일: shoppingdotcomspider.py 프로젝트: Scorpio1987/shoppingdotcom

 def parse_start_url(self, response):
     print response.url
     sel = Selector(response)
     
     for url in sel.xpath("//a"):
         #print url.xpath("@href").extract()
         href = url.xpath("@href").extract()[0] if url.xpath("@href").extract() else None
         if href and href.split("/")[-1] == "products":
             yield Request(urlparse.urljoin(response.url, href), callback=self.parse_products)
         if href and href.find("xFA-") >= 0:
             href = href.replace("xFA-", "").split("~")[0]+"/products"
             yield Request(urlparse.urljoin(response.url, href), callback=self.parse_products)
         pass

예제 #3

0

파일 보기

파일: shoppingdotcomspider.py 프로젝트: Scorpio1987/shoppingdotcom

    def parse_products(self, response):
        print "parse_products", response.url
        sel = Selector(response)
        breadcrumb = sel.xpath('//div[contains(@class,"breadCrumb")]')
        categories = [
            span for span in breadcrumb.xpath(
                ".//span[@itemprop='title']/text()").extract()[1:]
        ]
        categories.append(breadcrumb.xpath(".//span/text()").extract()[-1])
        print categories

        for product in sel.xpath('//div[contains(@id,"quickLookItem")]'):
            # check if it is a multistore product
            if product.xpath('.//span[contains(@id, "numStoresQA")]'):
                print product.xpath(".//a/@href").extract()[0]
                url = product.xpath(".//a/@href").extract()[0]
                url = "/".join(url.split("/")[:-1]) + "/prices"
                yield Request(urlparse.urljoin(response.url, url),
                              callback=self.parse_multiple_store_product)
            else:
                # It is not a multistore product. Parse it.
                item = ShoppingdotcomItem()
                item["categories"] = categories
                item["product_name"] = product.xpath(
                    ".//span[contains(@id, 'nameQA')]/@title").extract()[0]
                if product.xpath(".//span[@class='placeholderImg']").extract():
                    item["image_urls"] = product.xpath(
                        ".//span[@class='placeholderImg']/text()").extract()
                else:
                    item["image_urls"] = product.xpath(
                        ".//div[@class='gridItemTop']//img/@src").extract()
                item["product_urls"] = [
                    urlparse.urljoin(response.url,
                                     product.xpath(".//a/@href").extract()[0])
                ]
                item["stores"] = product.xpath(
                    ".//a[@class='newMerchantName']/text()").extract()
                item["prices"] = [
                    price.replace("\n", "") for price in product.xpath(
                        ".//span[@class='productPrice']/a/text()").extract()
                ]
                yield item

        # Check if Next page link is there then yeild request with next URL
        if sel.xpath("//a[@name='PLN']").extract():
            yield Request(
                urlparse.urljoin(
                    response.url,
                    sel.xpath("//a[@name='PLN']/@href").extract()[0]),
                self.parse_products)
            pass

예제 #4

0

파일 보기

파일: scrap.py 프로젝트: D3f0/vnomade

def main(argv=sys.argv):
    """ Punto de entrada al programa """
    url = "http://www.vientonomade.com.ar/index.php?option=com_content&view=category&" "layout=blog&id=8&Itemid=10"
    fetcher = httplib2.Http()
    get = partial(obtener_pagina, fetcher)

    while url:
        html = get(url)
        uri, links = buscar_links(html)
        for link in links:
            try:
                print urlparse.urljoin(url, link)
            except UnicodeEncodeError:
                pass
        url = uri and urlparse.urljoin(url, uri) or None

예제 #5

0

파일 보기

파일: shoppingdotcomspider.py 프로젝트: Scorpio1987/shoppingdotcom

    def parse_start_url(self, response):
        print response.url
        sel = Selector(response)

        for url in sel.xpath("//a"):
            #print url.xpath("@href").extract()
            href = url.xpath("@href").extract()[0] if url.xpath(
                "@href").extract() else None
            if href and href.split("/")[-1] == "products":
                yield Request(urlparse.urljoin(response.url, href),
                              callback=self.parse_products)
            if href and href.find("xFA-") >= 0:
                href = href.replace("xFA-", "").split("~")[0] + "/products"
                yield Request(urlparse.urljoin(response.url, href),
                              callback=self.parse_products)
            pass

예제 #6

0

파일 보기

def parse(url, body, **kwargs):
    for line in body.decode('gbk', errors='ignore').splitlines():
        if line.lstrip().startswith('var docData'):
            l, r = line.find('{'), line.rfind('}')
            obj = json.loads(line[l:r + 1])
            doc = obj['result']['docinfo'][0]['foolrinfo']
            doc['title'] = obj['result']['sDocTitle']
            doc['url'] = urlparse.urljoin('http://www.xici.net',
                                          obj['result']['strPageUrl'])
            doc['date'] = '20' + doc['LongDate']
            doc['content'] = html.fromstring(
                doc['floorcontent']).text_content()

            tpl = Template('''
                <html>
                <head>
                    <meta content="text/html; charset=utf-8" http-equiv="content-type">
                    <title>{{doc['title']}}</title>
                </head>
                <body>
                    <a id="title" href="{{doc['url']}}">{{doc['title']}}</a>
                    <p id="date">{{doc['date']}}</p>
                    <div id="content">{{doc['content']}}</div>
                </body>
                </html>''')

            return tpl.render(doc=doc).encode('gbk', errors='ignore')
    else:
        return '<html/>'

예제 #7

0

파일 보기

파일: forms.py 프로젝트: grith/sibboleth

    def submit(self, opener, res):
        """submit WAYF form with IDP

        :param opener: the urllib2 opener
        :param data: the form data as a dictionary
        :param res: the response object

        """
        log.info('Submitting form to wayf')
        #Set IDP to correct IDP
        wayf_data = {}
        idp = self.idp
        data = self.data
        if not idp.get_idp() in data['origin']:
            raise WAYFException(
                "Can't find IdP '{0}' in WAYF's IdP list".format(
                    idp.get_idp()))
        wayf_data['origin'] = data['origin'][idp.get_idp()]
        wayf_data['shire'] = data['shire']['value']
        wayf_data['providerId'] = data['providerId']['value']
        wayf_data['target'] = data['target']['value']
        wayf_data['time'] = data['time']['value']
        wayf_data['cache'] = 'false'
        wayf_data['action'] = 'selection'
        url = urlparse.urljoin(res.url, data['form']['action'])
        data = urllib.urlencode(wayf_data)
        request = Request(url + '?' + data)
        log.debug("POST: %s" % request.get_full_url())
        response = opener.open(request)
        return request, response

예제 #8

0

파일 보기

파일: xici_plugin.py 프로젝트: UncleJim/project

def parse(url, body, **kwargs):
    for line in body.decode('gbk', errors='ignore').splitlines():
        if line.lstrip().startswith('var docData'):
            l, r = line.find('{'), line.rfind('}')
            obj = json.loads(line[l:r+1])
            doc = obj['result']['docinfo'][0]['foolrinfo']
            doc['title'] = obj['result']['sDocTitle']
            doc['url'] = urlparse.urljoin('http://www.xici.net', obj['result']['strPageUrl'])
            doc['date'] = '20'+doc['LongDate']
            doc['content'] = html.fromstring(doc['floorcontent']).text_content()

            tpl = Template('''
                <html>
                <head>
                    <meta content="text/html; charset=utf-8" http-equiv="content-type">
                    <title>{{doc['title']}}</title>
                </head>
                <body>
                    <a id="title" href="{{doc['url']}}">{{doc['title']}}</a>
                    <p id="date">{{doc['date']}}</p>
                    <div id="content">{{doc['content']}}</div>
                </body>
                </html>''')

            return tpl.render(doc=doc).encode('gbk', errors='ignore')
    else:
        return '<html/>'

예제 #9

0

파일 보기

 def handle_starttag(self, tag, attrs):
     if tag == 'a':
         for (attribute, value) in attrs:
             if (attribute == 'href'):
                 # if not full url, convert relative url to full url
                 url = urlparse.urljoin(self.base_url, value)
                 self.links.add(url)

예제 #10

0

파일 보기

파일: reap_spider.py 프로젝트: AndrewADev/scrapers

    def parse(self, response):
        delinquent_link = Selector(response).xpath(
            '//*[@id="box1"]/td[1]/li/font/i/a/@href').extract()
        urllib.urlretrieve(urlparse.urljoin(response.url, delinquent_link[0]), 'delinquent.zip')
        unzip('delinquent.zip', 'delinquent')

        with open(glob.glob('delinquent/*.csv')[0], 'rb') as csvfile:
            csvreader = csv.reader(csvfile, delimiter=',')
            for idx, column in enumerate(csvreader.next()):
                column = re.sub('["]', "", column).strip()
                if column.startswith("PARCELID"):
                    parcelidcol = idx
                if column.startswith("OWNERNAME1"):
                    ownernamecol = idx
                if column.startswith("PARCELLOCATION"):
                    parcellocationcol = idx
                if column.startswith("CLS"):
                    parcelclass = idx
                if column.startswith("ASMTBLDG"):
                    buildingvalue = idx
            for row in csvreader:
                item = ReapItem()
                item['parcel_id'] = re.sub('["]', "", row[parcelidcol]).strip()
                item['parcel_location'] = row[parcellocationcol].strip()
                item['parcel_class'] = row[parcelclass].strip()
                item['building_value'] = row[buildingvalue].strip()
                request = scrapy.Request(
                    "http://mctreas.org/master.cfm?parid={0}&taxyr={1}&own1={2}".format(
                        item['parcel_id'], str(YEAR), row[ownernamecol]),
                    callback=self.get_tax_eligibility)
                request.meta['item'] = item
                yield request

예제 #11

0

파일 보기

파일: api.py 프로젝트: skennedy/jenkinsapi

def install_artifacts(artifacts, dirstruct, installdir, basestaticurl):
    """
    Install the artifacts.
    """
    assert basestaticurl.endswith("/"), "Basestaticurl should end with /"
    installed = []
    for reldir, artifactnames in dirstruct.items():
        destdir = os.path.join(installdir, reldir)
        if not os.path.exists(destdir):
            log.warn(msg="Making install directory %s" % destdir)
            os.makedirs(destdir)
        else:
            assert os.path.isdir(destdir)
        for artifactname in artifactnames:
            destpath = os.path.abspath(os.path.join(destdir, artifactname))
            if artifactname in artifacts.keys():
                # The artifact must be loaded from jenkins
                theartifact = artifacts[artifactname]
            else:
                # It's probably a static file, we can get it from the static collection
                staticurl = urlparse.urljoin(basestaticurl, artifactname)
                theartifact = Artifact(artifactname, staticurl)
            theartifact.save(destpath)
            installed.append(destpath)
    return installed

예제 #12

0

파일 보기

파일: brazilian_cities_wikipedia.py 프로젝트: abelthf/rows

def transform(row, table):
    'Transform row "link" into full URL and add "state" based on "name"'

    data = row._asdict()
    data['link'] = urlparse.urljoin('https://pt.wikipedia.org', data['link'])
    data['name'], data['state'] = regexp_city_state.findall(data['name'])[0]
    return data

예제 #13

0

파일 보기

파일: forms.py 프로젝트: russell/sibboleth

    def submit(self, opener, res):
        """submit login form to COSign IdP

        :param opener: the urllib2 opener
        :param data: the form data
           as a dictionary :param res: the response object :param cm: a
           :class:`~slick.passmgr.CredentialManager` containing the URL
           to the service provider you want to connect to

        """
        idp_data = {}
        cm = self.cm
        data = self.data
        url = urlparse.urljoin(res.url, data["form"]["action"])
        log.info("Form Authentication from: %s" % url)
        idp_data[self.username_field] = cm.get_username()
        idp_data[self.password_field] = cm.get_password()
        idp_data["service"] = data["service"]["value"]
        idp_data["ref"] = data["ref"]["value"]
        data = urllib.urlencode(idp_data)
        request = Request(url, data=data)
        log.info("Submitting login form")
        log.debug("POST: %s" % request.get_full_url())
        response = opener.open(request)
        return request, response

예제 #14

0

파일 보기

파일: forms.py 프로젝트: russell/sibboleth

    def submit(self, opener, res):
        """submit WAYF form with IDP

        :param opener: the urllib2 opener
        :param data: the form data as a dictionary
        :param res: the response object

        """
        log.info("Submitting form to wayf")
        # Set IDP to correct IDP
        wayf_data = {}
        idp = self.idp
        data = self.data
        if not idp.get_idp() in data["origin"]:
            raise WAYFException("Can't find IdP '{0}' in WAYF's IdP list".format(idp.get_idp()))
        wayf_data["origin"] = data["origin"][idp.get_idp()]
        wayf_data["shire"] = data["shire"]["value"]
        wayf_data["providerId"] = data["providerId"]["value"]
        wayf_data["target"] = data["target"]["value"]
        wayf_data["time"] = data["time"]["value"]
        wayf_data["cache"] = "false"
        wayf_data["action"] = "selection"
        url = urlparse.urljoin(res.url, data["form"]["action"])
        data = urllib.urlencode(wayf_data)
        request = Request(url + "?" + data)
        log.debug("POST: %s" % request.get_full_url())
        response = opener.open(request)
        return request, response

예제 #15

0

파일 보기

파일: forms.py 프로젝트: russell/sibboleth

    def submit(self, opener, res):
        """submit login form to IdP

        :param opener: the urllib2 opener
        :param data: the form data
           as a dictionary :param res: the response object :param cm: a
           :class:`~slick.passmgr.CredentialManager` containing the URL
           to the service provider you want to connect to

        """
        idp_data = {}
        cm = self.cm
        data = self.data

        # insert the hidden fields into the post data
        for k, v in data.items():
            if "type" in v and "value" in v:
                if v.get("type") == "hidden":
                    idp_data[k] = v.get("value")

        url = urlparse.urljoin(res.url, data["form"]["action"])
        log.info("Form Authentication from: %s" % url)
        idp_data[self.username_field] = cm.get_username()
        idp_data[self.password_field] = cm.get_password()
        data = urllib.urlencode(idp_data)
        request = Request(url, data=data)
        log.info("Submitting login form")
        log.debug("POST: %s" % request.get_full_url())
        response = opener.open(request)
        return request, response

예제 #16

0

파일 보기

    def parse(self, response):
        delinquent_link = Selector(response).xpath(
            '//*[@id="box1"]/td[1]/li/font/i/a/@href').extract()
        urllib.urlretrieve(urlparse.urljoin(response.url, delinquent_link[0]),
                           'delinquent.zip')
        unzip('delinquent.zip', 'delinquent')

        with open(glob.glob('delinquent/*.csv')[0], 'rb') as csvfile:
            csvreader = csv.reader(csvfile, delimiter=',')
            for idx, column in enumerate(csvreader.next()):
                column = re.sub('["]', "", column).strip()
                if column.startswith("PARCELID"):
                    parcelidcol = idx
                if column.startswith("OWNERNAME1"):
                    ownernamecol = idx
                if column.startswith("PARCELLOCATION"):
                    parcellocationcol = idx
                if column.startswith("CLS"):
                    parcelclass = idx
                if column.startswith("ASMTBLDG"):
                    buildingvalue = idx
            for row in csvreader:
                item = ReapItem()
                item['parcel_id'] = re.sub('["]', "", row[parcelidcol]).strip()
                item['parcel_location'] = row[parcellocationcol].strip()
                item['parcel_class'] = row[parcelclass].strip()
                item['building_value'] = row[buildingvalue].strip()
                request = scrapy.Request(
                    "http://mctreas.org/master.cfm?parid={0}&taxyr={1}&own1={2}"
                    .format(item['parcel_id'], str(YEAR), row[ownernamecol]),
                    callback=self.get_tax_eligibility)
                request.meta['item'] = item
                yield request

예제 #17

0

파일 보기

파일: brazilian_cities_wikipedia.py 프로젝트: abelthf/rows

def transform(row, table):
    'Transform row "link" into full URL and add "state" based on "name"'

    data = row._asdict()
    data['link'] = urlparse.urljoin('https://pt.wikipedia.org', data['link'])
    data['name'], data['state'] = regexp_city_state.findall(data['name'])[0]
    return data

예제 #18

0

파일 보기

    def parse(self, response):

        delinquentLink = Selector(response).xpath(
            '//*[@id="box1"]/td[1]/li/font/i/a/@href').extract()
        urllib.urlretrieve(urlparse.urljoin(response.url, delinquentLink[0]),
                           'delinquent.zip')
        unzip('delinquent.zip', 'delinquent')

        with open(glob.glob('delinquent/*.csv')[0], 'rb') as csvfile:
            csvreader = csv.reader(csvfile, delimiter=',')
            for idx, column in enumerate(csvreader.next()):
                column = re.sub('["]', "", column).strip()
                if column.startswith("PARCELID"):
                    parcelidcol = idx
                if column.startswith("OWNERNAME1"):
                    ownernamecol = idx
                if column.startswith("PARCELLOCATION"):
                    parcellocationcol = idx
            for row in csvreader:
                item = ReapItem()
                item['parcelid'] = re.sub('["]', "", row[parcelidcol]).strip()
                item['parcellocation'] = row[parcellocationcol].strip()
                if item['parcelid'].startswith('R72'):
                    request = scrapy.Request(
                        "http://mctreas.org/master.cfm?parid=" +
                        item['parcelid'] + "&taxyr=2014" + "&own1=" +
                        row[ownernamecol] + '\n',
                        callback=self.getTaxEligibility)
                    request.meta['item'] = item
                    yield request

예제 #19

0

파일 보기

파일: api.py 프로젝트: wgaggioli/jenkinsapi

def install_artifacts(artifacts, dirstruct, installdir, basestaticurl):
    """
    Install the artifacts.
    """
    assert basestaticurl.endswith("/"), "Basestaticurl should end with /"
    installed = []
    for reldir, artifactnames in dirstruct.items():
        destdir = os.path.join(installdir, reldir)
        if not os.path.exists(destdir):
            log.warn(msg="Making install directory %s" % destdir)
            os.makedirs(destdir)
        else:
            assert os.path.isdir(destdir)
        for artifactname in artifactnames:
            destpath = os.path.abspath(os.path.join(destdir, artifactname))
            if artifactname in artifacts.keys():
                # The artifact must be loaded from jenkins
                theartifact = artifacts[artifactname]
            else:
                # It's probably a static file, we can get it from the static collection
                staticurl = urlparse.urljoin(basestaticurl, artifactname)
                theartifact = Artifact(artifactname, staticurl)
            theartifact.save(destpath)
            installed.append(destpath)
    return installed

예제 #20

0

파일 보기

    def check_config():
        """
        Check crucial configuration details for existence and workability.

        Runs checks to see whether bugtracker's URL is reachable, whether
        backend is available at the right filename, and whether the script has
        the key arguments it needs to run: URL, backend, and database details.

        The filename for the backend in the backends/ directory needs to be the
        same as the configuration argument specifying that backend. For
        instance, invoking the Launchpad backend uses 'lp', and so the filename
        is 'lp.py'.
        """
        Config.check_params(['url', 'backend'])

        if Config.backend + ".py" not in Backend.get_all_backends():
            raise InvalidConfig('Backend "' + Config.backend +
                                '" does not exist')

        url = urlparse.urlparse(Config.url)
        check_url = urlparse.urljoin(url.scheme + '://' + url.netloc, '')
        print("Checking URL: " + check_url)
        req = Request(check_url)
        try:
            response = urlopen(req)
        except HTTPError, e:
            raise InvalidConfig('The server could not fulfill the request ' +
                                str(e.msg) + '(' + str(e.code) + ')')

예제 #21

0

파일 보기

파일: config.py 프로젝트: davidziman/Bicho

    def check_config():
        """
        Check crucial configuration details for existence and workability.

        Runs checks to see whether bugtracker's URL is reachable, whether
        backend is available at the right filename, and whether the script has
        the key arguments it needs to run: URL, backend, and database details.

        The filename for the backend in the backends/ directory needs to be the
        same as the configuration argument specifying that backend. For
        instance, invoking the Launchpad backend uses 'lp', and so the filename
        is 'lp.py'.
        """
        Config.check_params(['url', 'backend'])

        if Config.backend + ".py" not in Backend.get_all_backends():
            raise InvalidConfig('Backend "' + Config.backend + '" does not exist')

        url = urlparse.urlparse(Config.url)
        check_url = urlparse.urljoin(url.scheme + '://' + url.netloc, '')
        print("Checking URL: " + check_url)
        req = Request(check_url)

        if Config.backend != 'github':
            try:
                response = urlopen(req)
            except HTTPError, e:
                raise InvalidConfig('The server could not fulfill the request '
                                    + str(e.msg) + '(' + str(e.code) + ')')
            except URLError, e:
                raise InvalidConfig('We failed to reach a server. ' + str(e.reason))

예제 #22

0

파일 보기

파일: line.py 프로젝트: rasca0027/ElderTranslator

def bot_send_video(gesture, video_url, video_preview_img, to_mid="u2ef38a8c1f3f1c2c63bdf9c0a629023c"):
    
    headers = {}
    headers['Content-type'] = 'application/json; charset=UTF-8'
    headers['X-Line-ChannelID'] = settings.CHANNEL_ID
    headers['X-Line-ChannelSecret'] = settings.CHANNEL_SECRET
    headers['X-Line-Trusted-User-With-ACL'] = settings.CHANNEL_MID

    api = 'https://trialbot-api.line.me/v1/events'

    body = {}
    body['to'] = [to_mid]
    body['toChannel'] = 1383378250
    body['eventType'] = "138311608800106203"

    #gesture = Gesture.objects.all()[0]
    myurl = 'https://eldertranslator.herokuapp.com/'
    video_url = urlparse.urljoin(myurl, gesture.video.url)

    content = {
        "contentType": 3,
        "toType": 1,
        "originalContentUrl": video_url,
        "previewImageUrl": video_preview_img 
    }

    body['content'] = content
    req = requests.post(api, data=json.dumps(body), headers=headers, verify=False)
    
    return req

예제 #23

0

파일 보기

파일: forms.py 프로젝트: russell/sibboleth

    def submit(self, opener, res):
        """submit WAYF form with IDP

        :param opener: the urllib2 opener
        :param data: the form data as a dictionary
        :param res: the response object

        """
        log.info("Submitting form to wayf")
        # Set IDP to correct IDP
        wayf_data = {}
        idp = self.idp
        data = self.data
        idps = {}
        for d in data["user_idp"]:
            if isinstance(data["user_idp"][d], dict):
                idps.update(data["user_idp"][d])
        if not idp.get_idp() in idps:
            raise WAYFException("Can't find IdP '%s' in WAYF's IdP list" % idp)
        wayf_data["user_idp"] = idps[idp.get_idp()]
        wayf_data["Select"] = "Select"
        if data["form"]["action"].startswith("?"):
            urlsp = urlparse.urlsplit(res.url)
            urlsp = urlparse.urlunsplit((urlsp[0], urlsp[1], urlsp[2], "", ""))
            url = res.url + data["form"]["action"]
        else:
            url = urlparse.urljoin(res.url, data["form"]["action"])
        data = urllib.urlencode(wayf_data)
        request = Request(url, data)
        log.debug("POST: %s" % request.get_full_url())
        response = opener.open(request)
        return request, response

예제 #24

0

파일 보기

파일: hipchat.py 프로젝트: danmanirl/zmon-worker

    def notify(cls, alert, *args, **kwargs):
        url = cls._config.get('notifications.hipchat.url')
        token = kwargs.get('token', cls._config.get('notifications.hipchat.token'))
        repeat = kwargs.get('repeat', 0)
        notify = kwargs.get('notify', False)

        color = 'green' if alert and not alert.get('is_alert') else kwargs.get('color', 'red')

        message_text = cls._get_subject(alert, custom_message=kwargs.get('message'))

        if kwargs.get('link', False):
            zmon_host = kwargs.get('zmon_host', cls._config.get('zmon.host'))
            alert_id = alert['alert_def']['id']
            alert_url = urlparse.urljoin(zmon_host, '/#/alert-details/{}'.format(alert_id)) if zmon_host else ''
            link_text = kwargs.get('link_text', 'go to alert')
            message_text += ' -- <a href="{}" target="_blank">{}</a>'.format(alert_url, link_text)

        message = {
            'message': message_text,
            'color': color,
            'notify': notify
        }

        try:
            logger.info(
                'Sending to: ' + '{}/v2/room/{}/notification?auth_token={}'.format(url, urllib.quote(kwargs['room']),
                                                                                   token) + ' ' + json.dumps(message))
            r = requests.post(
                '{}/v2/room/{}/notification'.format(url, urllib.quote(kwargs['room'])),
                json=message, params={'auth_token': token}, headers={'Content-type': 'application/json'})
            r.raise_for_status()
        except:
            logger.exception('Hipchat write failed!')

        return repeat

예제 #25

0

파일 보기

파일: orc.py 프로젝트: thomas-young/layup-list

def _get_department_urls_from_url(url):
    soup = retrieve_soup(url)
    linked_urls = [
        urlparse.urljoin(BASE_URL, a["href"])
        for a in soup.find_all("a", href=True)
    ]
    return set(linked_url for linked_url in linked_urls
               if _is_department_url(linked_url, url))

예제 #26

0

파일 보기

파일: hipchat.py 프로젝트: drummerwolli/zmon-worker

    def notify(cls, alert, *args, **kwargs):

        current_span = extract_span_from_kwargs(**kwargs)

        url = cls._config.get('notifications.hipchat.url')
        token = kwargs.get('token', cls._config.get('notifications.hipchat.token'))
        repeat = kwargs.get('repeat', 0)
        notify = kwargs.get('notify', False)
        alert_def = alert['alert_def']
        message_format = kwargs.get('message_format', 'html')

        current_span.set_tag('alert_id', alert_def['id'])

        entity = alert.get('entity')
        is_changed = alert.get('alert_changed', False)
        is_alert = alert.get('is_alert', False)

        current_span.set_tag('entity', entity['id'])
        current_span.set_tag('alert_changed', bool(is_changed))
        current_span.set_tag('is_alert', is_alert)

        current_span.log_kv({'room': kwargs.get('room')})

        color = 'green' if alert and not alert.get('is_alert') else kwargs.get('color', 'red')

        message_text = cls._get_subject(alert, custom_message=kwargs.get('message'))

        if kwargs.get('link', False):
            zmon_host = kwargs.get('zmon_host', cls._config.get('zmon.host'))
            alert_id = alert['alert_def']['id']
            alert_url = urlparse.urljoin(zmon_host, '/#/alert-details/{}'.format(alert_id)) if zmon_host else ''
            link_text = kwargs.get('link_text', 'go to alert')
            if message_format == 'html':
                message_text += ' -- <a href="{}" target="_blank">{}</a>'.format(alert_url, link_text)
            else:
                message_text += ' -- {} - {}'.format(link_text, alert_url)

        message = {
            'message': message_text,
            'color': color,
            'notify': notify,
            'message_format': message_format
        }

        try:
            logger.info(
                'Sending to: ' + '{}/v2/room/{}/notification?auth_token={}'.format(url, urllib.quote(kwargs['room']),
                                                                                   token) + ' ' + json.dumps(message))
            r = requests.post(
                '{}/v2/room/{}/notification'.format(url, urllib.quote(kwargs['room'])),
                json=message, params={'auth_token': token}, headers={'Content-type': 'application/json'})
            r.raise_for_status()
        except Exception:
            current_span.set_tag('error', True)
            current_span.log_kv({'exception': traceback.format_exc()})
            logger.exception('Hipchat write failed!')

        return repeat

예제 #27

0

파일 보기

 def get_new_urls(self, new_url, soup): #获取解析后的新url
     #<a target="_blank" href="/item/%E8%A7%A3%E9%87%8A%E5%99%A8">解释器</a>
     new_urls = set()
     links = soup.find_all('a', href=re.compile(r"/item/"))
     for link in links:
         new_parse_url = link['href']
         new_full_url = urljoin(new_url, new_parse_url)
         new_urls.add(new_full_url)
     return new_urls

예제 #28

0

파일 보기

파일: models.py 프로젝트: pombredanne/django-vintage

 def relative_to_full_url(self, url):
     """
     Resolve the URL based on the object's original_url
     """
     from urllib2 import urlparse
     parsed = urlparse.urlparse(url)
     if not parsed.netloc:
         url = urlparse.urljoin(self.original_url, parsed.path)
     return url

예제 #29

0

파일 보기

파일: job51login.py 프로젝트: wolfwhoami/xxxxx

 def _kick_offline(self, con, condom):
     form = condom.xpath("//form[@id='form1']")[0]
     kvs,_ = self.process_form(form)
     # javascript:__doPostBack('gvOnLineUser','KickOut$0')
     kvs['__EVENTTARGET'] = 'gvOnLineUser'
     kvs['__EVENTARGUMENT'] = 'KickOut$0'
     action = urlparse.urljoin(con.request.url, form.attrib.get('action'))
     con = self.request_url(action, data=kvs, headers={'Referer': con.request.url})
     return 'Navigate.aspx' in con.request.url

예제 #30

0

파일 보기

파일: archiveurl.py 프로젝트: callowayproject/django-vintage

def relative_to_full_url(original_url, url):
    """
    Resolve the URL based on the original_url
    """
    from urllib2 import urlparse
    parsed = urlparse.urlparse(url)
    if not parsed.netloc:
        url = urlparse.urljoin(original_url, parsed.path)
    return url

예제 #31

0

파일 보기

파일: main.py 프로젝트: grith/slick.gui

 def show(self):
     slcs_login_url = urlparse.urljoin(self.settings.slcs, 'login')
     idp_keys = list_idps(slcs_login_url).keys()
     idp_keys.sort()
     for i in idp_keys:
         self.idps.append_text(i)
         if i == self.settings.idp:
             self.idps.set_active(len(self.idps.get_model())-1)
     self.window.show_all()

예제 #32

0

파일 보기

파일: worker.py 프로젝트: hayderimran7/url-image-crawler-flask-celery

def crawls(url):
    urlStream = urlopen(url)
    htmldoc = urlStream.read()
    soup = BeautifulSoup(htmldoc)
    links = []
    images = soup.findAll("img",
                          {"src": re.compile(r'\.(jpe?g)|(png)|(gif)$')})
    for img in images:
        links.append(urlparse.urljoin(url, img["src"]))
    return links

예제 #33

0

파일 보기

def urlIterator(startUrl, nextCssSelector):
    '''Yields the url of a page while there is a next one found by the cssSelector'''
    #This function takes time because it has to parse the dom to get the next url
    url = startUrl
    while url:
        yield url
        nextTags = getElementsFromUrl(url, nextCssSelector)
        url = None

        for possibleNext in nextTags:
            if possibleNext.tag == 'a':
                href = possibleNext.get('href')
                # Absolute href
                url = urlparse.urljoin(startUrl, href)
                break
            else:
                newTag = possibleNext.find('a')
                if newTag != None:
                    href = newTag.get('href')
                    url = urlparse.urljoin(startUrl, href)
                    break

예제 #34

0

파일 보기

파일: orc.py 프로젝트: thomas-young/layup-list

def crawl_courses_from_program_page_url(url, program_code):
    soup = retrieve_soup(url)
    linked_urls = [
        urlparse.urljoin(BASE_URL, a["href"])
        for a in soup.find_all("a", href=True)
    ]
    course_urls = sorted(set(url for url in linked_urls
                             if _is_course_url(url)))
    return filter(None, [
        _crawl_course_data(course_url, program_code)
        for course_url in course_urls
    ])

예제 #35

0

파일 보기

파일: scraptools.py 프로젝트: niroyb/ScrapMoodle

def urlIterator(startUrl, nextCssSelector):
    '''Yields the url of a page while there is a next one found by the cssSelector'''
    #This function takes time because it has to parse the dom to get the next url
    url = startUrl
    while url:
        yield url
        nextTags = getElementsFromUrl(url, nextCssSelector)
        url = None

        for possibleNext in nextTags:
            if possibleNext.tag == 'a':
                href = possibleNext.get('href')
                # Absolute href
                url = urlparse.urljoin(startUrl, href)
                break
            else:
                newTag = possibleNext.find('a')
                if newTag != None:
                    href = newTag.get('href')
                    url = urlparse.urljoin(startUrl, href)
                    break

예제 #36

0

파일 보기

파일: scraptools.py 프로젝트: niroyb/ScrapMoodle

def domIterator(startUrl, nextCssSelector):
    dom = getDOM(startUrl)
    nextSelector = CSSSelector(nextCssSelector)
    while dom is not None:
        yield dom
        nextTags = nextSelector(dom)
        dom = None
        for possibleNext in nextTags:
            if possibleNext.tag == 'a':
                url = possibleNext.get('href')
                url = urlparse.urljoin(startUrl, url)
                dom = getDOM(url)
                break

예제 #37

0

파일 보기

def domIterator(startUrl, nextCssSelector):
    dom = getDOM(startUrl)
    nextSelector = CSSSelector(nextCssSelector)
    while dom is not None:
        yield dom
        nextTags = nextSelector(dom)
        dom = None
        for possibleNext in nextTags:
            if possibleNext.tag == 'a':
                url = possibleNext.get('href')
                url = urlparse.urljoin(startUrl, url)
                dom = getDOM(url)
                break

예제 #38

0

파일 보기

파일: forms.py 프로젝트: russell/sibboleth

    def submit(self, opener, res):
        """follow login link on ESOE Chooser page

        :param opener: the urllib2 opener
        :param data: the form data as a dictionary
        :param res: the response object

        """
        url = urlparse.urljoin(res.url, self.url)
        request = Request(url)
        log.debug("GET: %s" % request.get_full_url())
        response = opener.open(request)
        return request, response

예제 #39

0

파일 보기

파일: bak_crawler.py 프로젝트: seraphln/wheel

def find_correct_element_url(params, el):
    els = el.xpath('//div[@class="wx-rb bg-blue wx-rb_v1 _item"]')
    print '---------------------------'
    print len(els)
    print '---------------------------'
    for cur_el in els:
        nick_name = cur_el.xpath('//div[@class="txt-box"]/h3/em/text()')[0]
        print nick_name
        if params.name == nick_name.encode('utf8'):
            url = cur_el.xpath('@href')[0]
            url = urlparse.urljoin(base_url, url)
            return url

    return ""

예제 #40

0

파일 보기

파일: orc.py 프로젝트: thomas-young/layup-list

def _get_program_urls_from_department_url(url):
    soup = retrieve_soup(url)
    linked_urls = [
        urlparse.urljoin(BASE_URL, a["href"])
        for a in soup.find_all("a", href=True)
    ]
    program_urls = set()
    for potential_program_url in linked_urls:
        if _is_course_url(potential_program_url):
            potential_program_url = ("/".join(
                potential_program_url.split("/")[:-1]))
        if _is_program_url(potential_program_url, url):
            program_urls.add(potential_program_url)
    return program_urls

예제 #41

0

파일 보기

파일: entities_wrapper.py 프로젝트: drummerwolli/zmon-worker

    def __init__(self, service_url, infrastructure_account, verify=True, oauth2=False):

        if not service_url:
            raise ConfigurationError('EntitiesWrapper improperly configured. URL is missing!')

        self.infrastructure_account = infrastructure_account
        self.__service_url = urlparse.urljoin(service_url, 'api/v1/')
        self.__session = requests.Session()

        self.__session.headers.update({'User-Agent': get_user_agent()})
        self.__session.verify = verify

        if oauth2:
            self.__session.headers.update({'Authorization': 'Bearer {}'.format(tokens.get('uid'))})

예제 #42

0

파일 보기

파일: pos_scrapper.py 프로젝트: aroscoe/scripts

def main(url):
    # Example URL: http://picturesofshit.com/v/2009/10-15_-_Dudescademy/
    img_size_qry_string = '?g2_imageViewsIndex=1'

    # Go to gallery and grab links to high resolution photos
    gallery = urlopen(url)
    soup = BeautifulSoup(gallery.read())
    links = [tag.attrMap['href'] + img_size_qry_string for tag in soup.findAll(href=re.compile('JPG.html'))]

    # Create download directory based on url
    dir = re.search('[_+]([a-zA-Z0-9]+)/$', url).groups()[0]
    if not os.path.exists(dir): os.makedirs(dir)

    # Go to each link, grab the image source, and download
    links = [urlparse.urljoin(url, link) for link in links]
    for link in links:
        gallery_image = urlopen(link)
        soup = BeautifulSoup(gallery_image.read())
        image_url = urlparse.urljoin(url, soup.find('img', 'ImageFrame_none').attrMap['src'])
        file_name = re.search('([^/]+)$', image_url).groups()[0]
        file = os.path.join(dir, file_name)
        print 'Downloading %s' % file_name
        urlretrieve(image_url, file)
    print '--- Downloads Complete ---'

예제 #43

0

파일 보기

파일: forms.py 프로젝트: russell/sibboleth

    def submit(self, opener, res):
        """submit IdP form to SP

        :param opener: the urllib2 opener
        :param data: the form data as a dictionary
        :param res: the response object

        """
        log.info("Submitting SAML Verification form")
        data = self.data
        url = urlparse.urljoin(res.url, data["form"]["action"])
        data = urllib.urlencode({"SAMLRequest": data["SAMLRequest"]["value"]})
        request = Request(url, data=data)
        log.debug("POST: %s" % request.get_full_url())
        response = opener.open(request)
        return request, response

예제 #44

0

파일 보기

파일: forms.py 프로젝트: grith/sibboleth

    def submit(self, opener, res):
        """submit IdP form to SP

        :param opener: the urllib2 opener
        :param data: the form data as a dictionary
        :param res: the response object

        """
        log.info('Submitting IdP SAML form')
        data = self.data
        url = urlparse.urljoin(res.url, data['form']['action'])
        data = urllib.urlencode({'SAMLResponse': data['SAMLResponse']['value'],
                                    'RelayState': data['RelayState']['value']})
        request = Request(url, data=data)
        log.debug("POST: %s" % request.get_full_url())
        response = opener.open(request)
        return request, response

예제 #45

0

파일 보기

파일: nasdaq_stock.py 프로젝트: sterling312/nasdaq

 def request(self):
     for ticker, i in self.tickers.items():
         if i:
             path = self.real_time_path.format(ticker.lower())
             req = self.sess.get(urlparse.urljoin(self.base_url, path))
             if req.ok:
                 try:
                     price = self.parse(req.text)
                     self.callback(json.dumps({ticker.upper(): price}))
                     yield {ticker: price}
                 except Exception as e:
                     logging.error(e)
                     del self.tickers[ticker]
             else:
                 logging.error(req.reason)
         else:
             del self.tickers[ticker]

예제 #46

0

파일 보기

파일: Config.py 프로젝트: f-breidenstein/Bicho

    def check_config():
        """
        """
        Config.check_params(['url','backend'])
        
        if Config.backend+".py" not in Backend.get_all_backends():
            raise InvalidConfig('Backend "'+ Config.backend + '" does not exist')


        url = urlparse.urlparse(Config.url)
        check_url = urlparse.urljoin(url.scheme + '://' + url.netloc,'')
        print("Checking URL: " + check_url)
        req = Request(check_url)
        try:
            response = urlopen(req)
        except HTTPError, e:
            raise InvalidConfig('The server could not fulfill the request '
                               + str(e.msg) + '('+ str(e.code)+')')

예제 #47

0

파일 보기

파일: Config.py 프로젝트: iKuba/Bicho

    def check_config():
        """
        """
        Config.check_params(['url', 'backend'])

        if Config.backend + ".py" not in Backend.get_all_backends():
            raise InvalidConfig('Backend "' + Config.backend +
                                '" does not exist')

        url = urlparse.urlparse(Config.url)
        check_url = urlparse.urljoin(url.scheme + '://' + url.netloc, '')
        print("Checking URL: " + check_url)
        req = Request(check_url)
        try:
            response = urlopen(req)
        except HTTPError, e:
            raise InvalidConfig('The server could not fulfill the request ' +
                                str(e.msg) + '(' + str(e.code) + ')')

예제 #48

0

파일 보기

파일: dump_metadata.py 프로젝트: Shamanou/FairDataPoint

def dump():
   for fmt,fxt in MIME_TYPES.iteritems():
      dump_path = path.join(DUMP_DIR, path.basename(fmt))
      makedirs(dump_path)

      for url in [ urlparse.urljoin(BASE_URL, p) for p in URL_PATHS ]:
         logger.info("Request metadata in '%s' from\n  %s\n" % (fmt, url))

         req = Request(url)
         req.add_header('Accept', fmt)
         res = urlopen(req)
         fname = '%s.%s' % (path.basename(urlparse.urlparse(url).path), fxt)
         fname = path.join(dump_path, fname)

         logger.info("Write metadata into file './%s'\n" % fname)

         with open(fname, 'w') as fout:
            fout.write(res.read())

예제 #49

0

파일 보기

파일: Scrap_97TingsProgrammer.py 프로젝트: bunstable/WebScraping

def scrapOreily(indexUrl, outName):
    '''Generates an html page from the index located at indexUrl'''
    links = scraptools.getElementsFromUrl(url, '#bodyContent ol a:nth-child(1)')
    
    f = open(outName, 'w')
    
    f.write(head)
    
    f.write(getHTMLContent(indexUrl))
    
    for link in links:
        relativeLink = link.get('href')
        print relativeLink
        absoluteLink = urlparse.urljoin(url, relativeLink)
        
        f.write(getHTMLContent(absoluteLink))
        
    f.write('</body></html>')
    f.close()

예제 #50

0

파일 보기

def get_user_json(user, profile):

    # try:
    #    user_package = User_packages.objects.get(user=user, status="A")
    #    package = user_package.package.price
    # except User_packages.DoesNotExist:
    #    package = 0

    user_investment = User_packages.objects.filter(user=user).annotate(
        investment=Sum('package__price')).values()
    investment = user_investment[0]['investment'] if user_investment else 0

    today = UTC.normalize(UTC.localize(datetime.datetime.utcnow()))
    pkg = get_package(user)
    pkg_dt = pkg.created_at.strftime("%D") if pkg else None
    return dict(
        id=user.id,
        avi_id=profile.user_auto_id,
        relationship=get_relationship(user),
        name="%s %s" % (user.first_name, user.last_name),
        # content="Total Transactional Volume: %s" % (tot_txn_vol(user)),
        sponsor_id=None
        if profile.sponser_id is None else profile.sponser_id.id,
        placement_id=None
        if profile.placement_id is None else profile.placement_id.id,
        placement_position=profile.placement_position,
        image=ICON,
        link=dict(
            href=urlparse.urljoin("https://www.avicrypto.us", "/network") +
            "#"),
        image_Name="inactive" if investment is 0 else "active",
        investment=investment,
        transaction=tot_txn_vol(user),
        binary=binary_txns(user, EPOCH_BEGIN, today),
        direct=direct_txns(user, EPOCH_BEGIN, today),
        roi=roi_txns(user, EPOCH_BEGIN, today),
        direct_left=direct_child(user, EPOCH_BEGIN, today, leg='l'),
        direct_right=direct_child(user, EPOCH_BEGIN, today, leg='r'),
        binary_left=binary_child(user, EPOCH_BEGIN, today, leg='l'),
        binary_right=binary_child(user, EPOCH_BEGIN, today, leg='r'),
        left_members_count=get_user_count(user, 'l'),
        right_members_count=get_user_count(user, 'r'),
        package_active_date=pkg_dt)

예제 #51

0

파일 보기

파일: pagerduty.py 프로젝트: mtesseract/zmon-worker

    def notify(cls, alert, per_entity=False, include_alert=True, message='', repeat=0, **kwargs):
        url = 'https://events.pagerduty.com/generic/2010-04-15/create_event.json'

        repeat = kwargs.get('repeat', 0)

        # Auth key!
        service_key = kwargs.get('service_key', cls._config.get('notifications.pagerduty.servicekey'))
        zmon_host = kwargs.get('zmon_host', cls._config.get('zmon.host'))

        if not service_key:
            raise NotificationError('Service key is required!')

        entity = alert.get('entity')
        is_alert = alert.get('is_alert')
        event_type = 'trigger' if is_alert else 'resolve'

        alert_id = alert['alert_def']['id']
        key = 'ZMON-{}'.format(alert_id) if not per_entity else 'ZMON-{}-{}'.format(alert_id, entity['id'])

        description = message if message else cls._get_subject(alert)

        message = {
            'service_key': service_key,
            'event_type': event_type,
            'incident_key': key,
            'description': description,
            'client': 'ZMON',
            'client_url': urlparse.urljoin(zmon_host, '/#/alert-details/{}'.format(alert_id)) if zmon_host else '',
            'details': json.dumps(alert, cls=JsonDataEncoder) if include_alert else '',
        }

        try:
            logger.info('Sending to %s %s', url, message)
            headers = {'User-Agent': get_user_agent(), 'Content-type': 'application/json'}

            r = requests.post(url, json=message, headers=headers, timeout=5)

            r.raise_for_status()
        except Exception as ex:
            logger.exception('Notifying Pagerduty failed %s', ex)

        return repeat

예제 #52

0

파일 보기

    def __init__(self,
                 service_url,
                 infrastructure_account,
                 verify=True,
                 oauth2=False):

        if not service_url:
            raise ConfigurationError(
                'EntitiesWrapper improperly configured. URL is missing!')

        self.infrastructure_account = infrastructure_account
        self.__service_url = urlparse.urljoin(service_url, 'api/v1/')
        self.__session = requests.Session()

        self.__session.headers.update({'User-Agent': get_user_agent()})
        self.__session.verify = verify

        if oauth2:
            self.__session.headers.update(
                {'Authorization': 'Bearer {}'.format(tokens.get('uid'))})

예제 #53

0

파일 보기

파일: entities_wrapper.py 프로젝트: drummerwolli/zmon-worker

    def _request(self, endpoint, q, method='get'):
        try:
            url = urlparse.urljoin(self.__service_url, endpoint)

            request = getattr(self.__session, method.lower())

            if method.lower() == 'post':
                response = request(url, json=q)
            else:
                response = request(url, params={'query': json.dumps(q)})

            if response.ok:
                return response.json()
            else:
                raise CheckError(
                    'EntitiesWrapper query failed: {} with status {}:{}'.format(q, response.status_code, response.text))
        except requests.Timeout:
            raise HttpError('timeout', self.__service_url), None, sys.exc_info()[2]
        except requests.ConnectionError:
            raise HttpError('connection failed', self.__service_url), None, sys.exc_info()[2]

예제 #54

0

파일 보기

파일: shoppingdotcomspider.py 프로젝트: Scorpio1987/shoppingdotcom

 def parse_multiple_store_product(self, response):
     print "parse_multiple_store_product", response.url
     sel = Selector(response)
     breadcrumb = sel.xpath('//div[contains(@class,"breadCrumb")]')
     categories = [
         span for span in breadcrumb.xpath(
             ".//span[@itemprop='title']/text()").extract()[1:]
     ]
     print categories
     item = ShoppingdotcomItem()
     item["categories"] = categories
     item["product_name"] = sel.xpath(
         "//h1[@class='productTitle']/text()").extract()[0]
     item["image_urls"] = list(
         set(sel.xpath("//div[@class='imgBorder']//img/@src").extract()))
     item["product_urls"] = []
     item["stores"] = []
     item["prices"] = []
     for div in sel.xpath("//div[contains(@id,'offerItem-')]"):
         item["product_urls"].append(
             urlparse.urljoin(
                 response.url,
                 div.xpath(".//a[@class='visitBtn']/@href").extract()[0]))
         item["stores"].append(
             div.xpath(".//img[contains(@id,'DCTmerchLogo')]/@title").
             extract()[0])
         if div.xpath(".//span[contains(@class,'toSalePrice')]"):
             item["prices"].append(
                 re.findall(
                     "\S+\d+\.\d+",
                     div.xpath(
                         ".//span[contains(@class,'toSalePrice')]/text()").
                     extract()[0])[0])
         else:
             item["prices"].append(
                 re.findall(
                     "\S+\d+\.\d+",
                     div.xpath(".//span[contains(@id,'priceQA')]/text()").
                     extract()[0])[0])
     yield item
     pass

예제 #55

0

파일 보기

def validate(amt, src_addr, addr, txn_id, coin="btc"):
    """Validates the given address and transaction of the given crypto payment type which can by anyone of BTC, ETH, XRP"""
    txn_res, addr_res = COIN[coin](src_addr, txn_id)
    assert coin in ("btc", "eth", "xrp")
    if coin == "btc":
        return is_valid_btc_paid(amt, addr, src_addr, txn_id, addr_res,
                                 txn_res)
    elif coin == "eth":
        txn_res = txn_res.json()
        addr_res = addr_res.json()
        if (txn_res.get("status", None) == "1"
                and addr_res.get("status", None) == "1"):
            uri = "api?module=account&action=txlist&address=%s&startblock=0&endblock=99999999&sort=asc&apikey=%s" % (
                src_addr, ETHER_KEY)
            res = requests.get(urlparse.urljoin(ETH_HOST, uri)).json()
            return is_valid_eth_paid(amt, src_addr, addr, txn_id, res)
    elif coin == "xrp":
        j_txn = txn_res.json()
        j_addr = addr_res.json()
        if j_addr['result'] == "success" and j_txn['result'] == "success":
            return is_valid_xrp_paid(amt, txn_id, src_addr, addr, j_txn)
    raise Exception("you should not be here")

예제 #56

0

파일 보기

    def _request(self, endpoint, q, method='get'):
        try:
            url = urlparse.urljoin(self.__service_url, endpoint)

            request = getattr(self.__session, method.lower())

            if method.lower() == 'post':
                response = request(url, json=q)
            else:
                response = request(url, params={'query': json.dumps(q)})

            if response.ok:
                return response.json()
            else:
                raise CheckError(
                    'EntitiesWrapper query failed: {} with status {}:{}'.
                    format(q, response.status_code, response.text))
        except requests.Timeout:
            raise HttpError('timeout',
                            self.__service_url), None, sys.exc_info()[2]
        except requests.ConnectionError:
            raise HttpError('connection failed',
                            self.__service_url), None, sys.exc_info()[2]

예제 #57

0

파일 보기

    def parse_book_0(self, response):
        sel = Selector(response)
        item = MetaItem()
        item['title'] = sel.xpath('//h1/text()').extract_first()
        item['category'] = sel.xpath('//span[contains(@itemprop,"category")]/text()').extract_first()
        item['author'] = sel.xpath('//span[contains(@itemprop,"author")]/text()').extract_first()
        item['desc'] = sel.xpath('//div[contains(@itemprop, "description")]/node()').extract()
        # find chapter
        el_chapter = sel.xpath('//li[contains(@itemprop, "itemListElement")]/node()')
        el_chapter.extract()
        array = []

        for index, s in enumerate(el_chapter):
            ch = dict()
            ch['num'] = index + 1
            content_url = urlparse.urljoin(self.base_domain, s.xpath('@href').extract()[0])
            ch['url'] = content_url
            ch['name'] = s.xpath('span/text()').extract_first()
            array.append(ch)
            yield Request(content_url, meta={'chapter': ch}, callback=self.parse_content_0, priority=PRIORITY_MID)

        item['chapter'] = array
        yield item

예제 #58

0

파일 보기

파일: app.py 프로젝트: manisoni28/BlueOptima-Assignment

class Crawl:
    f = open('employee_detail.html', 'w')
    f.truncate()
    f.close()
    seed = 'http://www.reuters.com/finance/markets/indices'
    all_links = set()
    links = list()

    try:

        r = requests.get(seed)
        if r.status_code == 200:
            print('Fetching in page links...')
            # print r.status_code
            content = r.content
            soup = BeautifulSoup(content, "lxml")
            tags = soup('a')
            flg = 0
            for a in tags:
                href = a.get("href")
                if href is not None:
                    new_url = urlparse.urljoin(seed, href)
                    if new_url.find("sector") != -1:
                        print new_url
                        links.append(
                            new_url)  # 'links' contains URLs of all 10 sectors

        elif r.status_code == 403:
            print "Error: 403 Forbidden url"
        elif r.status_code == 404:
            print "Error: 404 URL not found"
        else:
            print "Make sure you have everything correct."

    except requests.exceptions.ConnectionError, e:
        print "Oops! Connection Error. Try again"

예제 #59

0

파일 보기

파일: line.py 프로젝트: rasca0027/ElderTranslator

def bot_send_video(gesture,
                   video_url,
                   video_preview_img,
                   to_mid="u2ef38a8c1f3f1c2c63bdf9c0a629023c"):

    headers = {}
    headers['Content-type'] = 'application/json; charset=UTF-8'
    headers['X-Line-ChannelID'] = settings.CHANNEL_ID
    headers['X-Line-ChannelSecret'] = settings.CHANNEL_SECRET
    headers['X-Line-Trusted-User-With-ACL'] = settings.CHANNEL_MID

    api = 'https://trialbot-api.line.me/v1/events'

    body = {}
    body['to'] = [to_mid]
    body['toChannel'] = 1383378250
    body['eventType'] = "138311608800106203"

    #gesture = Gesture.objects.all()[0]
    myurl = 'https://eldertranslator.herokuapp.com/'
    video_url = urlparse.urljoin(myurl, gesture.video.url)

    content = {
        "contentType": 3,
        "toType": 1,
        "originalContentUrl": video_url,
        "previewImageUrl": video_preview_img
    }

    body['content'] = content
    req = requests.post(api,
                        data=json.dumps(body),
                        headers=headers,
                        verify=False)

    return req