def main(): total = 0 time = 0 time_file_uri_to_path = 0 time_safe_url_string = 0 time_canonicalize_url = 0 tar = tarfile.open("sites.tar.gz") urls = [] for member in tar.getmembers(): f = tar.extractfile(member) html = f.read() response = HtmlResponse(url="local", body=html, encoding='utf8') links = response.css('a::attr(href)').extract() urls.extend(links) for url in urls: start_file_uri_to_path = timer() file_uri_to_path(url) end_file_uri_to_path = timer() time_file_uri_to_path += (end_file_uri_to_path - start_file_uri_to_path) time += (end_file_uri_to_path - start_file_uri_to_path) start_safe_url_string = timer() safe_url_string(url) end_safe_url_string = timer() time_safe_url_string += (end_safe_url_string - start_safe_url_string) time += (end_safe_url_string - start_safe_url_string) start_canonicalize_url = timer() canonicalize_url(url) end_canonicalize_url = timer() time_canonicalize_url += (end_canonicalize_url - start_canonicalize_url) time += (end_canonicalize_url - start_canonicalize_url) # any_to_uri(url) # Error on Python 2: KeyError: u'\u9996' total += 1 print("\nTotal number of items extracted = {0}".format(total)) print("Time spent on file_uri_to_path = {0}".format(time_file_uri_to_path)) print("Time spent on safe_url_string = {0}".format(time_safe_url_string)) print("Time spent on canonicalize_url = {0}".format(time_canonicalize_url)) print("Total time taken = {0}".format(time)) click.secho("Rate of link extraction : {0} items/second\n".format( float(total / time)), bold=True) with open("Benchmark.txt", 'w') as g: g.write(" {0}".format((float(total / time))))
def read(file_location): """ Read audio file from the local file system or download it from URL. Arguments --------- file_location: str Path or URL to the file that will be loaded. Return ------ data: numpy array Audio data. fs: int Sampling frequency. file_name: str Base name of the loaded file. """ intmaxabs = 32768. # maximum value for 16-bit signed integers fs = None temp_file = None if re.match('https://|http://', file_location): url = canonicalize_url(file_location) response = urlopen(url) temp_file = NamedTemporaryFile() temp_file.write(response.read()) file_path = temp_file.name file_name = basename(file_uri_to_path(file_location)) else: file_path = expanduser(file_location) file_name = basename(file_path) with audioread.audio_open(file_path) as input_file: fs = input_file.samplerate channels = input_file.channels # audioread returns buffers containing 16-bit signed integers data_int = np.array([], dtype=np.dtype('int16')) for frame in input_file: frame_int = np.frombuffer(frame, np.dtype('int16')) data_int = np.concatenate((data_int, frame_int), axis=0) # convert data to float data = data_int.astype(np.float_, casting='safe') # pylint: disable=maybe-no-member # Conversion to mono (mix both channels) if channels > 1: data = data.reshape((-1, channels)).T data = np.mean(data, axis=0) data = data / intmaxabs if temp_file: temp_file.close() return (data, fs, file_name)
def test_path_to_file_uri(self): if os.name == 'nt': self.assertEqual(path_to_file_uri("C:\\windows\clock.avi"), "file:///C:/windows/clock.avi") else: self.assertEqual(path_to_file_uri("/some/path.txt"), "file:///some/path.txt") fn = "test.txt" x = path_to_file_uri(fn) self.assert_(x.startswith('file:///')) self.assertEqual(file_uri_to_path(x).lower(), os.path.abspath(fn).lower())
def test_file_uri_to_path(self): if os.name == 'nt': self.assertEqual(file_uri_to_path("file:///C:/windows/clock.avi"), "C:\\windows\clock.avi") uri = "file:///C:/windows/clock.avi" uri2 = path_to_file_uri(file_uri_to_path(uri)) self.assertEqual(uri, uri2) else: self.assertEqual(file_uri_to_path("file:///path/to/test.txt"), "/path/to/test.txt") self.assertEqual(file_uri_to_path("/path/to/test.txt"), "/path/to/test.txt") uri = "file:///path/to/test.txt" uri2 = path_to_file_uri(file_uri_to_path(uri)) self.assertEqual(uri, uri2) self.assertEqual(file_uri_to_path("test.txt"), "test.txt")
def __init__(self, uri): self.path = file_uri_to_path(uri)
def download_request(self, request, spider): filepath = file_uri_to_path(request.url) with open(filepath, 'rb') as fo: body = fo.read() respcls = responsetypes.from_args(filename=filepath, body=body) return respcls(url=request.url, body=body)
def __init__(self, uri, *, feed_options=None): self.path = file_uri_to_path(uri) feed_options = feed_options or {} self.write_mode = 'wb' if feed_options.get('overwrite', False) else 'ab'
def __init__(self, uri): self.path = file_uri_to_path(uri) self.logger = getLogger()
def download_request(self, request, spider): filepath = file_uri_to_path(request.url) body = open(filepath, 'rb').read() respcls = responsetypes.from_args(filename=filepath, body=body) return respcls(url=request.url, body=body)
def __init__(self, uri, *, feed_options=None): self.path = file_uri_to_path(uri) feed_options = feed_options or {} self.write_mode = "wb" if feed_options.get("overwrite", False) else "ab"
def __init__(self, uri, settings): self.path = file_uri_to_path(uri) self.overwrite = settings['FEED_OVERWRITE']