Python fix_url示例，ahttp.fix_url Python示例

示例#1

0

显示文件

文件： favicon.py 项目： leigh123linux/streamtuner2

def google_find_homepage(row):
    """ Searches for missing homepage URL via Google. """
    if row.get("url") not in tried_urls:
        tried_urls.append(row.get("url"))

    if row.get("title"):
        rx_t = re.compile('^(([^-:]+.?){1,2})')
        rx_u = re.compile(
            r'''
            (?:  <h3\s+class="r"><a\s+href="  |  /url\?q=  )
            (https?://
            (?!www\.google|webcache|google|tunein|streema)
            [^"&]+)''', re.X)

        # Use literal station title now
        title = row["title"]
        #title = title.group(0).replace(" ", "%20")

        # Do 'le google search
        html = ahttp.get("http://www.google.com/search",
                         params=dict(hl="en", q=title, client="streamtuner2"),
                         ajax=1,
                         timeout=3.5)
        #log.DATA(re.sub("<(script|style)[^>]*>.*?</(script|style)>", "", html, 100, re.S))

        # Find first URL hit
        url = rx_u.findall(html)
        if url:
            #log.DATA(url)
            row["homepage"] = ahttp.fix_url(url[0])
            return True
    pass

示例#2

0

显示文件

文件： dirble.py 项目： leigh123linux/streamtuner2

    def unpack(self, r):
        listeners = 0

        # find stream
        if len(r.get("streams", [])):

            # compare against first entry
            s = r["streams"][0]

            # select "best" stream if there are alternatives
            if len(r["streams"]) > 0:
                for alt in r["streams"]:
                    listeners += alt.get("listeners", 0)

                    # set defaults
                    if not alt.get("content_type"):
                        alt["content_type"] = "?"
                    if not alt.get("bitrate"):
                        alt["bitrate"] = 16
                    alt["content_type"] = alt["content_type"].strip(
                    )  # There's a "\r\n" in nearly every entry :?

                    # weight format with bitrate
                    cur_q = self.format_q.get(  s["content_type"], "0.9") \
                            * s.get("bitrate", 32)
                    alt_q = self.format_q.get(alt["content_type"], "0.9") \
                            * alt.get("bitrate", 32)

                    # swap out for overall better score
                    if alt_q > cur_q:
                        s = alt
                        #log.DATA_BETTER_STREAM(s, "←FROM←", r)

            # fix absent audio type
            if not s.get("content_type") or len(s["content_type"]) < 7:
                s["content_type"] = "audio/mpeg"
            #log.DATA(s)

        else:
            return {}

        # rename fields
        return dict(
            genre=" ".join(c["slug"] for c in r["categories"]),
            title=r["name"],
            playing="{} {}".format(r.get("country"), r.get("description", "")),
            homepage=ahttp.fix_url(r["website"]),
            url=s["stream"],
            format=s["content_type"],
            bitrate=s["bitrate"],
            listeners=listeners,
            img=r.get("image",
                      {}).get("thumb",
                              {}).get("url",
                                      ""),  # CDN HTTPS trip up requests.get
            img_resize=32,
            state=self.state_map.get(int(s["status"]), ""),
            deleted=s.get("timedout", False),
        )

示例#3

0

显示文件

 def postprocess_filter_homepage(self, row, channel):
     if not row.get("homepage"):
         url = self.rx_www_url.search(row.get("title", ""))
         if url:
             url = url.group(0).lower().replace(" ", "")
             url = (url if url.find("www.") == 0 else "www." + url)
             row["homepage"] = ahttp.fix_url(url)
     return True

示例#4

0

显示文件

文件： internet_radio.py 项目： leigh123linux/streamtuner2

    def with_dom(self, html_list):
        log.PROC("internet-radio, dom")
        rx_numbers = re.compile("(\d+)")
        r = []
        for html in html_list:
            # the streams are arranged in table rows
            doc = pq(html)
            for dir in (pq(e) for e in doc("tr")):
                #log.HTML(dir)

                # bitrate/listeners
                bl = dir.find("p")
                if bl:
                    bl = rx_numbers.findall(str(bl.text()) + " 0 0")
                else:
                    bl = [0, 0]

                # stream url
                url = dir.find("i").eq(0).parent().attr("onclick")
                if url:
                    url = re.search("(http://[^\'\"\>]+)", url)
                    if url:
                        url = url.group(0)
                    else:
                        url = ""
                else:
                    url = ""

                row = {
                    "title":
                    dir.find("h4").text(),
                    "homepage":
                    ahttp.fix_url(dir.find("a.small").attr("href") or ""),
                    "url":
                    url,
                    "genre":
                    dir.find("a[href^='/stations/']").text() or "",
                    "listeners":
                    int(bl[0]),
                    "bitrate":
                    int(bl[1]),
                    "format":
                    "audio/mpeg",
                    "playing":
                    dir.find("b").text(),
                }
                #log.DATA(row)
                r.append(row)
        return r

示例#5

0

显示文件

    def share(self, *w):

        # get data
        row = self.parent.row()
        if row:
            row = copy.copy(row)

            # convert PLS/M3U link to direct ICY stream url
            if conf.myoggradio_morph and self.parent.channel(
            ).listformat != "url/direct":

                urls = action.convert_playlist(row["url"],
                                               row.get("listformat", "any"),
                                               "srv",
                                               local_file=False,
                                               row=row)
                if not urls:
                    urls = [row["url"]]
                row["url"] = ahttp.fix_url(urls[0])

            # prevent double check-ins
            if not self.streams.get("common"):
                log.WARN(
                    "Cache empty. Cannot compare stream info for newness. Please reload MyOggRadio channel first."
                )
                return
            if row["title"] in (r.get("title")
                                for r in self.streams["common"]):
                pass
            elif row["url"] in (r.get("url") for r in self.streams["common"]):
                pass

            # send
            else:
                self.status("Sharing station URL...")
                if (self.upload(row)):
                    # artificial slowdown, else user will assume it didn't work
                    self.status(0.5)
                    time.sleep(0.1)

                    # tell Gtk we've handled the situation
                    self.status("Shared '" + row["title"][:30] +
                                "' on MyOggRadio.org",
                                icon="gtk-save")
                else:
                    self.status()
            return True

示例#6

0

显示文件

文件： windowsmedia.py 项目： leigh123linux/streamtuner2

    def update_streams(self, cat, search=None):

        ucat = re.sub("\W+", "", cat.lower())
        html = ahttp.get(self.base.format(ucat, conf.windowsmedia_culture))
        # onclick="Listen('31e11281-cf43-4d39-9164-77721604380b', 'DJ Perry Radio', 'http://www.djperryradio.com/', 'More Stations', '20', true);">

        r = []
        ls = re.findall(r"""
            onclick="Listen\('([\w\-]+)',\s*'(.+?)',\s*'(.+?)',
        """, html, re.X|re.S)
        for id, title, homepage in ls:
            r.append(dict(
                id = id,
                title = unhtml(title),
                homepage = ahttp.fix_url(homepage),
                url = self._url.format(id, conf.windowsmedia_culture),
                bitrate = 0,
            ))
            print r
        return r

示例#7

0

显示文件

文件： internet_radio.py 项目： leigh123linux/streamtuner2

    def with_regex(self, html):
        log.PROC("internet-radio, regex")
        r = []
        html = "\n".join(html)

        # Break up into <tr> blocks before extracting bits
        rx_tr = re.compile("""<tr[^>]*>(.+?)</tr>""", re.S)
        rx_data = re.compile(
            r"""
               playjp',\s*'(https?://[^'">]+)
               .*?   <h4.*?>([^<>]+)</
               .*?   <b>([^<>]*)</b>
         (?:   .*?   href="(.*?)"        )?
         (?:   .*?   Genres:((?:</?a[^>]+>|\w+|\s+)+)    )?
               .*?   (\d+)\s*Listeners
               .*?   (\d+)\s*Kbps
        """, re.S | re.X)

        for div in rx_tr.findall(html):
            if div.find('id="pagination"') < 0:
                #log.DATA(len(div))
                uu = rx_data.search(div)
                if uu:
                    (url, title, playing, homepage, genres, listeners,
                     bitrate) = uu.groups()

                    # transform data
                    r.append({
                        "url": url,
                        "genre": strip_tags(genres or ""),
                        "homepage": ahttp.fix_url(homepage or ""),
                        "title": nl(title or ""),
                        "playing": nl(playing or ""),
                        "bitrate": int(bitrate or 0),
                        "listeners": int(listeners or 0),
                        "format":
                        "audio/mpeg",  # there is no stream info on that, but internet-radio.org.uk doesn't seem very ogg-friendly anyway, so we assume the default here
                    })
                else:
                    log.DATA("Regex couldn't decipher entry:", div)
        return r

示例#8

0

显示文件

文件： action.py 项目： leigh123linux/streamtuner2

 def m3u(self, rows):
     txt = "#EXTM3U\n"
     for r in rows:
         txt += "#EXTINF:-1,%s\n" % r["title"]
         txt += "%s\n" % ahttp.fix_url(r["url"])
     return txt