class NPR: URL_PATT = re.compile('npr\.org') T_PAT = re.compile('<title>(.*)</title>') M_PAT = re.compile('<li class="audio-tool audio-tool-download">\s*<a href="([^"]*)"') class WBUR: URL_PATT = re.compile('wbur\.org') T_PAT = re.compile('<title>(.*)</title>') M_PAT = re.compile('<a href="([^"]*)" class="article-audio-dl" title="Download the audio"') PATTERNS = [NPR, WBUR] yaml = '/home/dlu/public_html/podcast/david_misc.yaml' podcast = YamlPodcast(yaml) config_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '.creds') config = json.load(open(config_path)) for key, entry in retrieve(config, verbose=True).iteritems(): if 'podcast' in entry.get('tags', {}): continue try: url = entry.get('resolved_url', entry.get('given_url', None)) if url is None: continue for pattern in PATTERNS: if not pattern.URL_PATT.search(url): continue page = urllib2.urlopen(url).read()