Exemplo n.º 1
0
def main():
    total = 0
    time = 0
    time_file_uri_to_path = 0
    time_safe_url_string = 0
    time_canonicalize_url = 0

    tar = tarfile.open("sites.tar.gz")
    urls = []

    for member in tar.getmembers():
        f = tar.extractfile(member)
        html = f.read()
        response = HtmlResponse(url="local", body=html, encoding='utf8')

        links = response.css('a::attr(href)').extract()
        urls.extend(links)

    for url in urls:
        start_file_uri_to_path = timer()
        file_uri_to_path(url)
        end_file_uri_to_path = timer()
        time_file_uri_to_path += (end_file_uri_to_path -
                                  start_file_uri_to_path)
        time += (end_file_uri_to_path - start_file_uri_to_path)

        start_safe_url_string = timer()
        safe_url_string(url)
        end_safe_url_string = timer()
        time_safe_url_string += (end_safe_url_string - start_safe_url_string)
        time += (end_safe_url_string - start_safe_url_string)

        start_canonicalize_url = timer()
        canonicalize_url(url)
        end_canonicalize_url = timer()
        time_canonicalize_url += (end_canonicalize_url -
                                  start_canonicalize_url)
        time += (end_canonicalize_url - start_canonicalize_url)

        # any_to_uri(url) # Error on Python 2: KeyError: u'\u9996'

        total += 1

    print("\nTotal number of items extracted = {0}".format(total))
    print("Time spent on file_uri_to_path = {0}".format(time_file_uri_to_path))
    print("Time spent on safe_url_string = {0}".format(time_safe_url_string))
    print("Time spent on canonicalize_url = {0}".format(time_canonicalize_url))
    print("Total time taken = {0}".format(time))
    click.secho("Rate of link extraction : {0} items/second\n".format(
        float(total / time)),
                bold=True)

    with open("Benchmark.txt", 'w') as g:
        g.write(" {0}".format((float(total / time))))
Exemplo n.º 2
0
def read(file_location):
    """
    Read audio file from the local file system or download it from URL.

    Arguments
    ---------
    file_location: str
        Path or URL to the file that will be loaded.

    Return
    ------
    data: numpy array
        Audio data.
    fs: int
        Sampling frequency.
    file_name: str
        Base name of the loaded file.
    """
    intmaxabs = 32768.  # maximum value for 16-bit signed integers
    fs = None
    temp_file = None

    if re.match('https://|http://', file_location):
        url = canonicalize_url(file_location)
        response = urlopen(url)
        temp_file = NamedTemporaryFile()
        temp_file.write(response.read())
        file_path = temp_file.name
        file_name = basename(file_uri_to_path(file_location))
    else:
        file_path = expanduser(file_location)
        file_name = basename(file_path)

    with audioread.audio_open(file_path) as input_file:
        fs = input_file.samplerate
        channels = input_file.channels

        # audioread returns buffers containing 16-bit signed integers
        data_int = np.array([], dtype=np.dtype('int16'))
        for frame in input_file:
            frame_int = np.frombuffer(frame, np.dtype('int16'))
            data_int = np.concatenate((data_int, frame_int), axis=0)
        # convert data to float
        data = data_int.astype(np.float_, casting='safe')  # pylint: disable=maybe-no-member

        # Conversion to mono (mix both channels)
        if channels > 1:
            data = data.reshape((-1, channels)).T
            data = np.mean(data, axis=0)

        data = data / intmaxabs

    if temp_file:
        temp_file.close()

    return (data, fs, file_name)
Exemplo n.º 3
0
    def test_path_to_file_uri(self):
        if os.name == 'nt':
            self.assertEqual(path_to_file_uri("C:\\windows\clock.avi"),
                             "file:///C:/windows/clock.avi")
        else:
            self.assertEqual(path_to_file_uri("/some/path.txt"),
                             "file:///some/path.txt")

        fn = "test.txt"
        x = path_to_file_uri(fn)
        self.assert_(x.startswith('file:///'))
        self.assertEqual(file_uri_to_path(x).lower(), os.path.abspath(fn).lower())
Exemplo n.º 4
0
    def test_path_to_file_uri(self):
        if os.name == 'nt':
            self.assertEqual(path_to_file_uri("C:\\windows\clock.avi"),
                             "file:///C:/windows/clock.avi")
        else:
            self.assertEqual(path_to_file_uri("/some/path.txt"),
                             "file:///some/path.txt")

        fn = "test.txt"
        x = path_to_file_uri(fn)
        self.assert_(x.startswith('file:///'))
        self.assertEqual(file_uri_to_path(x).lower(), os.path.abspath(fn).lower())
Exemplo n.º 5
0
    def test_file_uri_to_path(self):
        if os.name == 'nt':
            self.assertEqual(file_uri_to_path("file:///C:/windows/clock.avi"),
                             "C:\\windows\clock.avi")
            uri = "file:///C:/windows/clock.avi"
            uri2 = path_to_file_uri(file_uri_to_path(uri))
            self.assertEqual(uri, uri2)
        else:
            self.assertEqual(file_uri_to_path("file:///path/to/test.txt"),
                             "/path/to/test.txt")
            self.assertEqual(file_uri_to_path("/path/to/test.txt"),
                             "/path/to/test.txt")
            uri = "file:///path/to/test.txt"
            uri2 = path_to_file_uri(file_uri_to_path(uri))
            self.assertEqual(uri, uri2)

        self.assertEqual(file_uri_to_path("test.txt"), "test.txt")
Exemplo n.º 6
0
    def test_file_uri_to_path(self):
        if os.name == 'nt':
            self.assertEqual(file_uri_to_path("file:///C:/windows/clock.avi"),
                             "C:\\windows\clock.avi")
            uri = "file:///C:/windows/clock.avi"
            uri2 = path_to_file_uri(file_uri_to_path(uri))
            self.assertEqual(uri, uri2)
        else:
            self.assertEqual(file_uri_to_path("file:///path/to/test.txt"),
                             "/path/to/test.txt")
            self.assertEqual(file_uri_to_path("/path/to/test.txt"),
                             "/path/to/test.txt")
            uri = "file:///path/to/test.txt"
            uri2 = path_to_file_uri(file_uri_to_path(uri))
            self.assertEqual(uri, uri2)

        self.assertEqual(file_uri_to_path("test.txt"),
                         "test.txt")
Exemplo n.º 7
0
 def __init__(self, uri):
     self.path = file_uri_to_path(uri)
Exemplo n.º 8
0
 def __init__(self, uri):
     self.path = file_uri_to_path(uri)
Exemplo n.º 9
0
 def download_request(self, request, spider):
     filepath = file_uri_to_path(request.url)
     with open(filepath, 'rb') as fo:
         body = fo.read()
     respcls = responsetypes.from_args(filename=filepath, body=body)
     return respcls(url=request.url, body=body)
Exemplo n.º 10
0
 def __init__(self, uri, *, feed_options=None):
     self.path = file_uri_to_path(uri)
     feed_options = feed_options or {}
     self.write_mode = 'wb' if feed_options.get('overwrite',
                                                False) else 'ab'
Exemplo n.º 11
0
 def __init__(self, uri):
     self.path = file_uri_to_path(uri)
     self.logger = getLogger()
Exemplo n.º 12
0
 def download_request(self, request, spider):
     filepath = file_uri_to_path(request.url)
     body = open(filepath, 'rb').read()
     respcls = responsetypes.from_args(filename=filepath, body=body)
     return respcls(url=request.url, body=body)
Exemplo n.º 13
0
 def __init__(self, uri, *, feed_options=None):
     self.path = file_uri_to_path(uri)
     feed_options = feed_options or {}
     self.write_mode = "wb" if feed_options.get("overwrite", False) else "ab"
Exemplo n.º 14
0
 def __init__(self, uri, settings):
     self.path = file_uri_to_path(uri)
     self.overwrite = settings['FEED_OVERWRITE']