def junk_urls_filter(matches): """ Given an iterable of URL matches, return an iterable where URLs with common uninteresting URLs, or uninteresting URL hosts or domains have been removed, such as local, non public or example.com URLs. """ for key, match, line, lineno in matches: good_url = finder_data.classify_url(match) if not good_url: if DEBUG: print('junk_url_filter: %(match)r' % locals()) continue yield key, match, line, lineno
def junk_urls_filter(matches): """ Given an iterable of URL matches, return an iterable where URLs with common uninteresting URLs, or uninteresting URL hosts or domains have been removed, such as local, non public or example.com URLs. """ for key, match, line, lineno in matches: good_url = finder_data.classify_url(match) if not good_url: if TRACE: logger_debug('junk_url_filter: %(match)r' % locals()) continue yield key, match, line, lineno