def fit_model(io: IO, model: Sequential, preprocessed: List[Preprocessed]):
    epochs = io.get("epochs")
    model.reset_states()

    logline("splitting into training set and testing set ({}%)".format(
        io.get("split")))
    split = gen_split(preprocessed, io)

    log_dir = "logs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    for i in range(epochs):
        logline("generating input and expected data for epoch {}/{}".format(
            i + 1, epochs))
        train_x, train_y = trim_params(gen_fit_params(split), io)

        logline("training epoch {}/{}".format(i + 1, epochs))
        callbacks = []
        if io.get("profile"):
            debug("profiling")
            callbacks.append(
                tf.keras.callbacks.TensorBoard(log_dir=log_dir,
                                               histogram_freq=1))

        model.fit(train_x,
                  train_y,
                  batch_size=io.get("batch_size"),
                  epochs=1,
                  shuffle=False,
                  callbacks=callbacks)
        model.reset_states()
示例#2
0
def output_split(all: List[Preprocessed], train: List[Preprocessed], io: IO):
    obj = {
        "training_set": list(map(lambda x: x.file_name, train)),
        "test_set": list(map(lambda x: x.file_name, filter(lambda x: x not in train, all))),
    }
    with open(io.get("output_train"), "w+") as out_file:
        json.dump(obj, out_file)
        logline("wrote training/testing config to {}".format(io.get("output_train")))
示例#3
0
def read_test_files(io: IO) -> List[Preprocessed]:
    with open(io.get("input_preprocessed"), "rb") as preprocessed_file:
        file_configs = pickle.load(preprocessed_file)
        with open(io.get("input_train"), "rb") as train_config_file:
            train_config = json.load(train_config_file)
            test_files_names = train_config["test_set"]

            preprocessed = map(lambda x: Preprocessed(x), file_configs)
            test_files = list(
                filter(lambda x: x.file_name in test_files_names,
                       preprocessed))
            return test_files
示例#4
0
def fit_model(io: IO, model: Sequential, preprocessed: List[Preprocessed]):
    epochs = io.get("epochs")
    model.reset_states()

    logline("splitting into training set and testing set ({}%)".format(io.get("split")))
    split = gen_split(preprocessed, io)
    for i in range(epochs):
        logline("generating input and expected data for epoch {}/{}".format(i + 1, epochs))
        train_x, train_y = trim_params(gen_fit_params(split), io)

        logline("training epoch {}/{}".format(i + 1, epochs))
        model.fit(train_x, train_y, batch_size=io.get("batch_size"), epochs=1, shuffle=False)
        model.reset_states()
def output_split(all: List[Preprocessed], train: List[Preprocessed], io: IO):
    obj = {
        "training_set":
        list(map(lambda x: x.file_name, train)),
        "test_set":
        list(map(lambda x: x.file_name, filter(lambda x: x not in train,
                                               all))),
    }
    pathlib.Path(os.path.dirname(io.get("output_train"))).mkdir(parents=True,
                                                                exist_ok=True)
    with open(io.get("output_train"), "w+") as out_file:
        json.dump(obj, out_file)
        logline("wrote training/testing config to {}".format(
            io.get("output_train")))
def gen_split(preprocessed: List[Preprocessed], io: IO) -> List[Preprocessed]:
    split = io.get("split")
    if split == 100:
        output_split(preprocessed, preprocessed, io)
        return preprocessed

    shuffled = random.sample(preprocessed, len(preprocessed))

    total_len = sum(map(lambda x: len(x.features), preprocessed))
    train_len = (total_len / 100.0) * split

    train_items = list()
    current_len = 0
    for i in range(len(preprocessed) - 1):
        new_len = current_len + len(shuffled[i].features)

        if new_len >= train_len:
            output_split(preprocessed, train_items, io)
            return train_items

        current_len = new_len
        train_items.append(shuffled[i])

    output_split(preprocessed, train_items, io)
    return train_items
示例#7
0
def collect_input_paths(io: IO) -> List[str]:
    """Turn the input glob into file paths"""
    all_files = list(set(io.get("input_files")))
    wav_files = list(
        filter(lambda in_file: in_file.split(".")[-1] == "wav", all_files))

    return wav_files
示例#8
0
def predictions_to_out_file(predictions: np.array, io: IO):
    obj = {"items": [], "genre": {"hard": 0.5, "uptempo": 0.5}}
    interval = io.get("interval")

    melodies = list()

    cur_time = 0
    for i in range(len(predictions)):
        prediction = predictions[i]
        beat, melody = prediction

        if is_positive_beat(beat):
            cur_obj = {}
            cur_obj["type"] = "beat"
            cur_obj["time"] = cur_time
            obj["items"].append(cur_obj)
        if is_positive_melody(melody):
            cur_obj = {}
            cur_obj["type"] = "melody"
            cur_obj["time"] = cur_time
            cur_obj["duration"] = interval
            melodies.append(cur_obj)

        cur_time += interval

    obj["items"] = obj["items"] + stitch_melodies(melodies, io)
    return obj
示例#9
0
def gen_outputs(file: MarkedAudioFile, io: IO) -> List[ExpectedOutput]:
    """Gen a list of marked outputs for given file"""
    out_len = len(file.bins_file.bins)
    outputs = [ExpectedOutput(False, False) for x in range(out_len)]

    interval = io.get("interval")
    for timestamp in file.json_file.timestamps:
        # Round it to the range
        timestamp_time = timestamp.timestamp * 1000
        closest = get_closest(timestamp_time, io)

        timestamp_index = int(closest / interval)

        if timestamp_index >= out_len:
            continue

        if timestamp.beat_type == "beat":
            output_mark = outputs[timestamp_index]
            output_mark.is_beat = True
        elif timestamp.beat_type == "melody":
            closest_end = get_closest(timestamp_time +
                                      (timestamp.length * 1000))
            for i in range(int((closest_end - closest) / interval)):
                outputs[timestamp_index + i].is_melody = True

    return outputs
示例#10
0
def run_tests(io: IO, model: Sequential, test_files: List[Preprocessed]):
    model.reset_states()

    for file in test_files:
        logline("creating test params for {}".format(file.file_name))
        test_x, test_y = get_test_params(file)

        logline("making predictions")
        predictions: List[List[float]] = model.predict(test_x,
                                                       batch_size=1,
                                                       verbose=1)
        model.reset_states()

        mse_total: List[float] = list()
        correct = 0
        diff_score = 0
        for i in range(len(predictions)):
            prediction = predictions[i]
            actual: List[float] = test_y[i]

            diff = abs(actual[0] - prediction[0])
            diff_score += diff
            if is_in_range(diff):
                correct += 1

            mse_total.append(mean_squared_error(actual, prediction))

        logline(
            "predicted {}/{} within range ({}%) correct, score was {}/{}, mse was {}"
            .format(
                correct,
                len(predictions),
                round(correct / len(predictions) * 100, 2),
                diff_score,
                len(predictions),
                round(sum(mse_total) / len(predictions), 4),
            ))

        out_obj = predictions_to_out_file(predictions, io)

        pathlib.Path(io.get("output_annotated")).mkdir(parents=True,
                                                       exist_ok=True)
        out_path = os.path.join(io.get("output_annotated"),
                                "{}.json".format(file.file_name))
        with open(out_path, "w+") as out_file:
            json.dump(out_obj, out_file)
            logline("wrote object to {}".format(out_path))
def start_server(io: IO):
    global interval
    interval = io.get("interval")

    port = io.get("port")
    httpd = HTTPServer(("", port),
                       partial(WebServer,
                               directory=os.path.join(CUR_DIR, "public")))
    logline("listening at port", port)
    enter_group()
    try:
        httpd.serve_forever()
    except KeyboardInterrupt:
        pass
    httpd.server_close()
    exit_group()
    logline("stopped listening")
示例#12
0
def get_closest(timestamp_time: float, io: IO) -> int:
    """Get the closest multiple of INTERVAL to the timestamp"""
    interval = io.get("interval")
    lowerbound = (timestamp_time // interval) * interval
    upperbound = lowerbound + interval

    lowerbound_diff = timestamp_time - lowerbound
    upperbound_diff = upperbound - timestamp_time

    return lowerbound if lowerbound_diff <= upperbound_diff else upperbound
示例#13
0
def trim_params(params: Tuple[np.ndarray, np.ndarray], io: IO) -> Tuple[np.ndarray, np.ndarray]:
    batch_size = io.get("batch_size")

    x_param, y_param = params

    length = x_param.shape[0]

    remainder = length % batch_size
    if remainder == 0:
        return params
    return x_param[:-remainder], y_param[:-remainder]
示例#14
0
def collect_input_paths(io: IO) -> Union[None, List[str]]:
    """Turn the input glob into file paths"""
    all_files = io.get("input_files")
    wav_files = list(
        filter(lambda in_file: in_file.split(".")[-1] == "wav", all_files))

    if len(wav_files) == 0:
        return None

    annotated_files = list(filter(has_json_file, wav_files))

    return annotated_files
示例#15
0
def stitch_melodies(obj: List[Dict[str, Union[str, float]]],
                    io: IO) -> List[Dict[str, Union[str, float]]]:
    new_melodies = list()
    interval = io.get("interval")

    i = 0
    while i < len(obj):
        if len(new_melodies) > 0:
            if new_melodies[-1]["time"] == obj[i]["time"] - interval:
                new_melodies[-1]["time"] += interval
                i += 1
                continue
        new_melodies.append(obj[i])
        i += 1

    return new_melodies
示例#16
0
def predictions_to_out_file(predictions: np.array, io: IO):
    obj: Dict[str, Any] = {"items": [], "genre": {"hard": 0.5, "uptempo": 0.5}}
    interval = io.get("interval")

    cur_time = 0
    for i in range(len(predictions)):
        prediction: Tuple[float] = predictions[i]
        confidence: float = prediction[0]

        if is_in_range(confidence):
            cur_obj = {}
            cur_obj["type"] = "beat"
            cur_obj["time"] = cur_time
            obj["items"].append(cur_obj)

        cur_time += interval

    return obj
示例#17
0
def match_files(io: IO, input_paths: List[str]):
    """Match found files to analysis file contents"""
    analysis_file = io.get("analysis")
    logline(analysis_file)

    analysis = AnalysisFile(analysis_file)

    mapped: Dict[str, str] = {}
    reverse_map: Dict[str, str] = {}
    for in_path in input_paths:
        file_name = in_path.split("/")[-1].split(".")[0]
        for track_analysis in analysis.tracks:
            if track_analysis.name.lower() in file_name.lower():
                mapped[in_path] = track_analysis.name
                reverse_map[track_analysis.name] = file_name
                break

    logline("came up with the following mapping:")
    logline("")
    for file_name in mapped:
        logline('"{}" -> "{}"'.format(file_name, mapped[file_name]))

    unmapped_amount: int = 0
    for in_path in input_paths:
        if in_path not in mapped:
            warn('input file "{}" not mapped'.format(in_path))
            unmapped_amount += 1
    for track_analysis in analysis.tracks:
        if track_analysis.name not in reverse_map:
            warn('analysed file "{}" not mapped'.format(track_analysis.name))
            unmapped_amount += 1
    logline("")
    if unmapped_amount > 0:
        try:
            correct = input("is this correct? Y/n")
            if correct.lower() == "n":
                return None
        except KeyboardInterrupt:
            return None

    return analysis, mapped
示例#18
0
def gen_outputs(file: MarkedAudioFile, io: IO) -> List[ExpectedOutput]:
    """Gen a list of marked outputs for given file"""
    out_len = len(file.bins_file.bins)
    # TODO: change
    outputs = [ExpectedOutput(0) for _ in range(out_len)]

    interval = io.get("interval")
    for timestamp in file.timestamps:
        # Round it to the range
        timestamp_time = timestamp.timestamp * 1000
        closest = get_closest(timestamp_time, io)

        timestamp_index = int(closest / interval)

        if timestamp_index >= out_len:
            continue

        output_mark = outputs[timestamp_index]
        output_mark.beat_confidence = timestamp.confidence

    return outputs
示例#19
0
def run_tests(io: IO, model: Sequential, test_files: List[Preprocessed]):
    model.reset_states()

    for file in test_files:
        logline("creating test params for {}".format(file.file_name))
        test_x, test_y = get_test_params(file)

        logline("making predictions")
        predictions = model.predict(test_x, batch_size=1, verbose=1)
        model.reset_states()

        mse_total = list()
        correct = 0
        for i in range(len(predictions)):
            prediction = predictions[i]
            actual = test_y[i]
            if actual[0] == is_positive_beat(
                    prediction[0]) and actual[1] == is_positive_melody(
                        prediction[1]):
                correct += 1

            mse_total.append(mean_squared_error(actual, prediction))

        logline("predicted {}/{} ({}%) correct, mse was {}".format(
            correct,
            len(predictions),
            round(correct / len(predictions) * 100, 2),
            round(sum(mse_total) / len(predictions), 4),
        ))

        out_obj = predictions_to_out_file(predictions, io)

        out_path = os.path.join(io.get("output_annotated"),
                                "{}.json".format(file.file_name))
        with open(out_path, "w+") as out_file:
            json.dump(out_obj, out_file)
            logline("wrote object to {}".format(out_path))
def export_model(model: Sequential, io: IO):
    logline('wrote weights to file "{}"'.format(io.get("output_weights")))
    model.save_weights(io.get("output_weights"))
示例#21
0
def apply_weights(model: Sequential, io: IO) -> Sequential:
    model.load_weights(io.get("input_weights"))
    return model
def load_preprocessed(io: IO) -> List[Preprocessed]:
    with open(io.get("input_file"), "rb") as in_file:
        return list(map(lambda x: Preprocessed(x), pickle.load(in_file)))