def main(): ''' Takes in a list of URL's in a file and outputs the validity of the URL, the canonicalized URL, the uniqueness of the URL and the canonicalized URL ''' (parser, opts, args) = controller() if not opts.filename: parser.print_help() sys.exit(1) filename = opts.filename try: f = open(filename, 'r') raw_url_list = reader.read_file(f) except IOError as e: handle_io_exception(filename, e) unique_raw_urls = set() unique_canonicalized_urls = set() is_raw_valid = False is_raw_unique = False is_canonical_unique = False canonicalized_url = "" for raw_url in raw_url_list: print("Source: " + raw_url) is_raw_valid = url_validator.is_valid(raw_url) print("Valid: " + str(is_raw_valid)) canonicalized_url = url_normalize.url_normalize(raw_url) print("Canonical: " + canonicalized_url) is_raw_unique = raw_url not in unique_raw_urls if is_raw_unique: unique_raw_urls.add(raw_url) print("Source unique: " + str(is_raw_unique)) is_canonical_unique = canonicalized_url not in unique_canonicalized_urls if is_canonical_unique: unique_canonicalized_urls.add(canonicalized_url) print("Canonicalized URL unique: " + str(is_canonical_unique))
def runTest(self): assert url_validator.is_valid(value) == expected