def updateViews(evalAlgos, dName):
    train, val = DATASET_DIC[dName].randomSplit(CLF_SPLIT_RATIO,
                                                seed=RANDOM_SEED)
    loader_views = {
        "_VAL": val,
        "_TRAIN": train,
    }
    for vName, vLoader in loader_views.items():
        for dName, dLoader in DATASET_DIC.items():
            if vLoader.__class__ is dLoader.__class__:
                name = dName + vName
                logger.info(
                    "----------------------------------------------------------"
                )
                logger.info(f"loader view, name={name}")
                viewSaver = Metrics_Saver(name)
                dSaver = Metrics_Saver(dName)
                dSaver.load(EVAL_RESULT_DIR)
                for aName in dSaver.algoNames:
                    titles = [pp.title for pp in vLoader.pathPairs]
                    metrics, _ = dSaver.getResult(aName, titles)
                    viewSaver.addResult(aName, metrics, titles)
                    if aName in evalAlgos:
                        printResult(aName, metrics)
                dumpResult(viewSaver)
Exemplo n.º 2
0
def writeMirexOutput(mirexFmt, output):
    intervals, labels = mirexFmt
    mirexOutput = np.array([(x[0], x[1], y)
                            for x, y in zip(intervals, labels)],
                           np.dtype("f, f, U16"))
    np.savetxt(output, mirexOutput, fmt=["%.2f", "%.2f", "%s"], delimiter="\t")
    logger.info(f"mirex format music structure written to {output}")
def convertFileName(basedir, norm="NFD"):
    files = os.listdir(basedir)
    for fileName in files:
        normalName = unicodedata.normalize(norm, fileName)
        src = os.path.join(basedir, fileName)
        dst = os.path.join(basedir, normalName)
        if src != dst:
            os.rename(src, dst)
            logger.info(f"rename, src='{src}' dst='{dst}'")
 def build(self, preprocessor, force=False, num_workers=NUM_WORKERS):
     logger.info(
         f"building <{self.__class__.__name__}> from <{self.dataset.__class__.__name__}> with transform identifier=<{self.tid}>"
     )
     self.preprocessor = preprocessor
     self.force_build = force
     with Pool(num_workers) as p:
         N = len(self.dataset)
         _ = list(tqdm(p.imap(self.storeFeature, range(N)), total=N))
 def writeAveResults(self, dirname):
     aveOutputFile = os.path.join(dirname, f"{self.datasetName}.csv")
     columns = ["algo"] + METRIC_NAMES
     df = pd.DataFrame(columns=columns)
     for algoName, metrics in zip(self.algoNames, self.metricsList):
         data = np.hstack([algoName, np.mean(metrics,
                                             axis=0)]).reshape(1, -1)
         df = pd.concat([df, pd.DataFrame(data=data, columns=columns)])
     df.to_csv(aveOutputFile)
     logger.info(f"results written to '{aveOutputFile}'")
 def removeResult(self, algoName):
     try:
         # remove all match algoName result
         while True:
             idx = self.algoNames.index(algoName)
             self.algoNames.pop(idx)
             self.metricsList.pop(idx)
             self.titlesList.pop(idx)
     except ValueError:
         logger.info(f"all {algoName} result removed")
 def dump(self, dirname):
     dumpFile = os.path.join(dirname, f"{self.datasetName}.pkl")
     with open(dumpFile, "wb") as f:
         pickle.dump(
             (self.datasetName, self.algoNames, self.metricsList,
              self.titlesList),
             f,
             pickle.HIGHEST_PROTOCOL,
         )
     logger.info(f"saver object written to '{dumpFile}'")
     return self
 def loadData(self, dataFile):
     if os.path.exists(dataFile):
         with open(dataFile, "rb") as f:
             X, y = pickle.load(f)
             logger.info(
                 f"<{self.__class__.__name__}> load data from '{dataFile}'")
             logger.info(
                 f"target(chorus)/total={sum(np.array(y)==CLF_TARGET_LABEL)}/{len(y)}"
             )
     else:
         logger.error(f"build dataset for classifier first")
         raise FileNotFoundError(dataFile)
     return X, y
 def load(self, dirname):
     dumpFile = os.path.join(dirname, f"{self.datasetName}.pkl")
     try:
         with open(dumpFile, "rb") as f:
             dname, self.algoNames, self.metricsList, self.titlesList = pickle.load(
                 f)
             if dname != self.datasetName:
                 logger.warn(
                     f"old name:<{dname}> != new name:<{self.datasetName}>")
         logger.info(f"saver object loaded from '{dumpFile}'")
     except FileNotFoundError:
         logger.warn(
             f"saver object file '{dumpFile}' not found, set to empty")
     return self
 def writeFullResults(self, dirname):
     fullOutputFile = os.path.join(dirname, f"{self.datasetName}_full.csv")
     cols = ["title", "algo"] + METRIC_NAMES
     df = pd.DataFrame(columns=cols)
     for algoName, metrics, titles in zip(self.algoNames, self.metricsList,
                                          self.titlesList):
         n = len(titles)
         head = np.array([titles, [algoName] * n]).T
         headDf = pd.DataFrame(data=head, columns=cols[:2])
         metricDf = pd.DataFrame(data=metrics, columns=cols[2:])
         algoDf = pd.concat([headDf, metricDf], axis=1)
         df = pd.concat([df, algoDf], ignore_index=True)
     df.to_csv(fullOutputFile)
     logger.info(f"results written to '{fullOutputFile}'")
def main(force, dataset, algorithm):
    if dataset is None:
        evalLoader = DATASET_DIC
    elif dataset == "auto":
        evalLoader = findLoader(USING_DATASET.__class__)
    else:
        evalLoader = {dataset: DATASET_DIC[dataset]}
    if algorithm is None:
        evalAlgos = algos
    else:
        evalAlgos = {algorithm: algos[algorithm]}

    for dName, loader in evalLoader.items():
        logger.info(
            "-----------------------eval_algos---------------------------")
        logger.info(f"processing datasetloader, name={dName}")
        saver = Metrics_Saver(dName)
        # run incremental evaluation by default
        saver.load(EVAL_RESULT_DIR)
        for aName, algo in evalAlgos.items():
            # avoid duplicate evaluation
            if (aName not in saver.algoNames) or force:
                if force and (aName in saver.algoNames):
                    logger.info(f"re-eval algo, name={aName}")
                else:
                    logger.info(f"algo, name={aName}")

                if hasattr(algo, "clf"):
                    algo.clf.train()
                ae = AlgoEvaluator(loader, algo)
                metrics, titles = ae()
                printResult(aName, metrics)

                if force and (aName in saver.algoNames):
                    saver.reWriteResult(aName, metrics, titles)
                else:
                    saver.addResult(aName, metrics, titles)
                # save result every iter
                saver.dump(EVAL_RESULT_DIR)
            else:
                logger.info(f"!! skipping algo, name={aName}")
        dumpResult(saver)
        updateViews(evalAlgos, dName)
Exemplo n.º 12
0
def writeJsonMetadata(audiofile, predicted, figure, output, gt=None):
    def annotation(mirexFmt):
        annotation = []
        for intv, label in zip(*mirexFmt):
            annotation.append({
                "begin": float("%.2f" % intv[0]),
                "end": float("%.2f" % intv[1]),
                "label": label,
            })
        return annotation

    meta = {
        "audio": audiofile,
        "annotation": annotation(predicted),
        "gt_annotation": annotation(gt) if gt is not None else None,
        "figure": figure,
    }
    with open(output, "w") as f:
        json.dump(meta, f)
    logger.info(f"metadata written to {output}")
def buildCCDataset(cpath, baseset, getData, force=True):
    if not os.path.exists(cpath) or force:
        X = []
        y = []
        logger.info(
            f"building clique class Data for <{baseset.__class__.__name__}> @ {cpath}"
        )
        with Pool(NUM_WORKERS) as p:
            N = len(baseset)
            results = list(
                tqdm(
                    p.imap(
                        starGetCliqueClassData,
                        zip([getData] * N, [baseset] * N, range(N)),
                    ),
                    total=N,
                )
            )
        for features, clabels in results:
            X.extend([feature for feature in features])
            y.extend([clabel for clabel in clabels])
        with open(cpath, "wb") as f:
            pickle.dump((X, y), f)
    def saveViolinPlot(self,
                       dirname,
                       plotMetric=PLOT_METRIC_FIELDS,
                       order=None):
        matplotlib.use("Agg")
        pltOutputFile = os.path.join(dirname, f"{self.datasetName}.svg")
        rows, cols = len(plotMetric), len(plotMetric[0])
        axisNames = np.array(plotMetric).flatten()
        metricsList = np.array(self.metricsList)
        if order is not None:
            algoNames = list(filter(lambda x: x in self.algoNames, order))
            metricsList = metricsList[
                [self.algoNames.index(aName) for aName in algoNames], :, :]
        else:
            algoNames = self.algoNames
        metricsFieldSelector = np.array(
            [METRIC_NAMES.index(name) for name in axisNames])
        metricsList = metricsList[:, :, metricsFieldSelector]

        pos = np.arange(len(algoNames), dtype=int) + 1
        _, axes = plt.subplots(nrows=rows,
                               ncols=cols,
                               figsize=(cols * 4 * len(algoNames) / 10,
                                        rows * 4))
        for i, axis in enumerate(axes.flatten()):
            data = [metrics[:, i] for metrics in metricsList]
            axis.violinplot(data, pos, showmeans=True, showextrema=True)
            axis.set_title(axisNames[i])
            plt.setp(axis.get_xticklabels(), rotation=45)

        # plt.suptitle(self.datasetName, fontsize=20)
        plt.setp(axes, xticks=pos, xticklabels=algoNames)
        plt.tight_layout()
        plt.subplots_adjust(top=0.9)
        plt.savefig(pltOutputFile, quality=100)
        logger.info(f"violin plot written to '{pltOutputFile}'")
def testCCDataset(method):
    logger.info(f"testCC method:{method}")
    cpath_train = CHORUS_CLASSIFIER_TRAIN_DATA_FILE[method]
    cpath_val = CHORUS_CLASSIFIER_VAL_DATA_FILE[method]
    _clf = ChorusClassifier(cpath_train)
    _clf.train()
    clf = _clf.clf
    Xt, yt = _clf.loadData(cpath_val)
    with np.printoptions(precision=3, suppress=True):
        if hasattr(clf, "feature_importances_"):
            logger.info(
                f'feature importance, {[f"{s}={x*len(_clf.feature_names):.3f}" for x, s in sorted(zip(clf.feature_importances_, _clf.feature_names))]}'
            )
        logger.info(f"test classifier on valid data, score={clf.score(Xt, yt):.3f}")
def printResult(aName, metrics):
    logger.info(f"average result, algoName={aName}:")
    logger.info(f"metricNames={METRIC_NAMES}")
    logger.info(f"metric={np.mean(metrics, axis=0)}")
Exemplo n.º 17
0
def main(audiofiles, outputdir, metaoutputdir, algo, force, workers):
    logger.debug(f"algo={algo}")
    logger.info(f"preprocess to generate features")
    ddataset = DummyDataset(audiofiles)
    transforms = [
        ExtractMel(),
        GenerateSSM(dataset=ddataset),
        ExtractCliques(dataset=ddataset),
    ]
    for tf in transforms:
        preDataset = Preprocess_Dataset(tf.identifier, ddataset)
        preDataset.build(tf.preprocessor, force=force, num_workers=workers)

    predictor = switchPred(algo)
    predictorStruct = (predictor if algo not in ["mixed", "highlighter"] else
                       AlgoSeqRecur(trainFile=USE_MODEL_DIC["seqRecur"]))
    for i, pair in enumerate(ddataset.pathPairs):
        audioFileName, audiofile, _ = pair
        audiofile = os.path.abspath(audiofile)
        output = os.path.join(outputdir, audioFileName + ".txt")
        metaOutput = os.path.join(metaoutputdir, audioFileName + "_meta.json")

        ssm_f, mels_f = getFeatures(ddataset, i)
        cliques = predictorStruct._process(ddataset, i, ssm_f)
        mirexFmt = chorusDetection(cliques, ssm_f[0], mels_f,
                                   predictorStruct.clf)
        if algo == "multi":
            mirexFmt = tuneIntervals(mirexFmt,
                                     mels_f,
                                     chorusDur=CHORUS_DURATION,
                                     window=TUNE_WINDOW)
        elif algo == "single":
            mirexFmt = maxOverlap(mirexFmt,
                                  chorusDur=CHORUS_DURATION_SINGLE,
                                  centering=False)
            mirexFmt = tuneIntervals(
                mirexFmt,
                mels_f,
                chorusDur=CHORUS_DURATION_SINGLE,
                window=TUNE_WINDOW,
            )

        # plot mats
        tf = ExtractCliques(dataset=ddataset)
        origCliques = Preprocess_Dataset(tf.identifier,
                                         ddataset,
                                         transform=tf.transform)[i]["cliques"]
        olssm = getLabeledSSM(origCliques, ssm_f[1].shape[-1])
        lssm = getLabeledSSM(cliques, ssm_f[1].shape[-1])
        olssm = drawSegments(mirexFmt, mirexFmt, olssm, ssm_f[0])
        mats = np.array([ssm_f[1], lssm, olssm])
        titles = ["fused SSM", "result structure", "low level structure"]
        plotMats(mats, titles, show=False)

        # write output and viewer metadata
        if algo not in ["single", "multi"]:
            mirexFmt = predictor(ddataset, i)
            mirexFmt = removeNumber(mirexFmt)
            mirexFmt = mergeIntervals(mirexFmt)

        writeMirexOutput(mirexFmt, output)
        figurePath = os.path.join(os.getcwd(),
                                  f"data/test/predict_{audioFileName}.svg")
        plt.savefig(figurePath, bbox_inches="tight")
        writeJsonMetadata(audiofile, mergeIntervals(mirexFmt), figurePath,
                          metaOutput)
        if DEBUG:
            plt.show()