Exemplo n.º 1
0
class NPR:
    URL_PATT = re.compile('npr\.org')
    T_PAT = re.compile('<title>(.*)</title>')
    M_PAT = re.compile('<li class="audio-tool audio-tool-download">\s*<a href="([^"]*)"')

class WBUR:
    URL_PATT = re.compile('wbur\.org')
    T_PAT = re.compile('<title>(.*)</title>')
    M_PAT = re.compile('<a href="([^"]*)" class="article-audio-dl" title="Download the audio"')

PATTERNS = [NPR, WBUR]

yaml = '/home/dlu/public_html/podcast/david_misc.yaml'

podcast = YamlPodcast(yaml)

config_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '.creds')
config = json.load(open(config_path))

for key, entry in retrieve(config, verbose=True).iteritems():
    if 'podcast' in entry.get('tags', {}):
        continue
    try:
        url = entry.get('resolved_url', entry.get('given_url', None))
        if url is None:
            continue
        for pattern in PATTERNS:
            if not pattern.URL_PATT.search(url):
                continue
            page = urllib2.urlopen(url).read()