示例#1
0
def test_pair_directories():
    directory1 = SAMPLES_DIR.joinpath("sample_1")
    directory2 = SAMPLES_DIR.joinpath("sample_2")
    pairs = files.pair(directory1, directory2, extensions=[".log"])
    assert sort_sublist(pairs) == [
        (sample_1, sample_2) for sample_1 in SAMPLES_1 for sample_2 in SAMPLES_2
    ]
示例#2
0
def match(
    *paths: Path,
    length: int = DEFAULT_LENGTH,
    extensions: Optional[Iterable[str]] = None,
) -> Dict[Tuple[Path, Path], float]:
    """
    Finds similar audio files in paths.

    Args:
        length: specifies how many seconds of the input audio to take for analysis.
            Defaults to 120.
        extensions: Take only files with given extensions. It has no effect on paths
            that already have extension.

    Returns:
        A dictionary where key is a pair of filepaths and value is a score between them.
    """
    pairs = list(files.pair(*paths, extensions=extensions))
    filepaths = list(set(itertools.chain.from_iterable(pairs)))
    func = functools.partial(fingerprints.calc, length=length)
    with concurrent.futures.ThreadPoolExecutor() as executor:
        fps = {
            filepaths[i]: fp
            for i, fp in enumerate(executor.map(func, filepaths))
        }

    # Using multiprocessing.Pool.starmap method we can avoid writing wrapper to unpack
    # arguments. However, multiprocessing.Pool doesn't play nicely with coverage, and
    # require to explicitly call 'pool.join'
    with concurrent.futures.ProcessPoolExecutor() as pool:
        scores = pool.map(_compare, ((fps[a], fps[b]) for a, b in pairs))

    return dict(zip(pairs, scores))
示例#3
0
def test_pair_glob():
    wildcard = SAMPLES_DIR.joinpath("sample_1/*.log")
    pairs = files.pair(wildcard)
    assert sort_sublist(pairs) == [
        (SAMPLES_1[0], SAMPLES_1[1]),
        (SAMPLES_1[0], SAMPLES_1[2]),
        (SAMPLES_1[1], SAMPLES_1[2]),
    ]
示例#4
0
def test_pair_files_in_a_directory():
    directory = SAMPLES_DIR.joinpath("sample_1")
    pairs = files.pair(directory, extensions=[".log"])
    assert sort_sublist(pairs) == [
        (SAMPLES_1[0], SAMPLES_1[1]),
        (SAMPLES_1[0], SAMPLES_1[2]),
        (SAMPLES_1[1], SAMPLES_1[2]),
    ]
示例#5
0
def test_pair_a_file_and_all_files_in_a_directory():
    file = SAMPLES_DIR.joinpath("sample_1/take-1.log")
    directory = SAMPLES_DIR.joinpath("sample_1")
    pairs = files.pair(file, directory, extensions=[".log"])
    assert sort_sublist(pairs) == [
        (SAMPLES_1[0], SAMPLES_1[0]),
        (SAMPLES_1[0], SAMPLES_1[1]),
        (SAMPLES_1[0], SAMPLES_1[2]),
    ]
示例#6
0
def test_pair_one_file():
    file = SAMPLES_DIR.joinpath("sample_1/take-1.log")
    with pytest.raises(NotEnoughFiles) as excinfo:
        files.pair(file)
    assert str(excinfo.value) == "Not enough input files."
示例#7
0
def test_pair_two_files():
    file1 = SAMPLES_DIR.joinpath("sample_1/take-1.log")
    file2 = SAMPLES_DIR.joinpath("sample_1/take-2.log")
    pairs = files.pair(file1, file2)
    assert sort_sublist(pairs) == [(SAMPLES_1[0], SAMPLES_1[1])]