示例#1
0
def getSpecs(path):

    specs = []
    noise = []

    # Get mel-specs for file
    for spec in audio.specsFromFile(path,
                                    rate=cfg.SAMPLE_RATE,
                                    seconds=cfg.SPEC_LENGTH,
                                    overlap=cfg.SPEC_OVERLAP,
                                    minlen=cfg.SPEC_MINLEN,
                                    fmin=cfg.SPEC_FMIN,
                                    fmax=cfg.SPEC_FMAX,
                                    spec_type=cfg.SPEC_TYPE,
                                    shape=(cfg.IM_SIZE[1], cfg.IM_SIZE[0])):

        # Determine signal to noise ratio
        s2n = audio.signal2noise(spec)
        specs.append(spec)
        noise.append(s2n)

    # Shuffle arrays (we want to select randomly later)
    specs, noise = shuffle(specs, noise, random_state=RANDOM)

    return specs, noise
示例#2
0
def getSpecs(path):

    specs = []
    noise = []
    for spec in audio.specsFromFile(path,
                                    rate=cfg.SAMPLE_RATE,
                                    seconds=cfg.SPEC_LENGTH,
                                    overlap=cfg.SPEC_OVERLAP,
                                    minlen=cfg.SPEC_MINLEN,
                                    fmin=cfg.SPEC_FMIN,
                                    fmax=cfg.SPEC_FMAX,
                                    spec_type=cfg.SPEC_TYPE,
                                    shape=(cfg.IM_SIZE[1], cfg.IM_SIZE[0])):

        s2n = audio.signal2noise(spec)
        specs.append(spec)
        noise.append(s2n)
    specs, noise = shuffle(specs, noise, random_state=RANDOM)

    return specs, noise
示例#3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataroot', default="", help='path to dataset')
    #parser.add_argument('--model_path',default = "/media/labhdd/ASAS/V3/110modelv3.pt",help='path to model')
    parser.add_argument('--model_path',
                        default="/media/labhdd/ASAS/Resnet34_2/",
                        help='path to model')
    parser.add_argument('--csv_path',
                        default="./train_v3.csv",
                        help='path to label csv')
    parser.add_argument('--cuda', action='store_true', help='enables cuda')
    parser.add_argument('--ignore_prob', default=1e-4, type=float)
    opt = parser.parse_args()
    print(opt)
    # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
    # You should read file here and call power spectrum to  #
    # calculate the value and make decision                 #
    wav_files = [
        f for f in sorted(os.listdir(cfg.TESTSET_PATH)) if f[0] != '.'
    ]
    # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
    dirpath = 'resnet_34_2'
    if not os.path.exists(dirpath):
        os.makedirs(dirpath)
    for inx in range(5, 120, 5):  # using different epochs

        model = model_prediction(opt, inx)
        label = model.construct_label_dict()

        # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
        #
        filepath = os.path.join(dirpath,
                                'valset_s2n001_resnet34_%3d.csv' % (inx))
        f = open(filepath, 'w+')

        median_dict = load_median()

        #result = []
        #predict_class = []
        #predict_prob = []
        submission = []
        SPEC_SIGNAL_THRESHOLD = 0.001
        num = 0
        predict_prob_threshold = 0.0
        for file in wav_files:
            if num == 5:
                break

            #spec_flux = select_frame_criterion(file)
            #print(spec_flux,file = f)

            # Get specs for file
            cnt2 = 1
            predict_class = []
            predict_prob = []
            predict_s2n = []
            accept_spec_num = 0
            for spec in audio.specsFromFile(
                    os.path.join(cfg.TESTSET_PATH, file),
                    cfg.SAMPLE_RATE,
                    cfg.SPEC_LENGTH,
                    cfg.SPEC_OVERLAP,
                    cfg.SPEC_MINLEN,
                    shape=(cfg.IM_SIZE[1], cfg.IM_SIZE[0]),
                    fmin=cfg.SPEC_FMIN,
                    fmax=cfg.SPEC_FMAX,
                    spec_type=cfg.SPEC_TYPE):

                s2n = audio.signal2noise(spec)
                #print(s2n,file = f)
                # Above SIGNAL_THRESHOLD?
                if s2n >= SPEC_SIGNAL_THRESHOLD:

                    # Resize spec
                    #spec = image.resize(spec, cfg.IM_SIZE[0], cfg.IM_SIZE[1], mode=cfg.RESIZE_MODE)

                    # Normalize spec
                    #spec = image.normalize(spec, cfg.ZERO_CENTERED_NORMALIZATION)
                    # Prepare as input
                    spec = image.prepare(spec)

                    k = np.random.rand(1, 1, 256, 128)

                    confidence, prob = model.get_probability(spec, opt)

                    #            print(label)
                    probdict = {}
                    for i in range(659):
                        if prob[i] < opt.ignore_prob:
                            pass
                        else:
                            probdict[label[i]] = prob[i]
                    order = sorted(probdict.items(),
                                   key=lambda x: x[1],
                                   reverse=True)
                    #print(file + '_' + str(c) + ':' + 'confidence = ' + str(confidence)+'\n',file = f1)
                    #print(file + '_' + str(cnt2) + ':' + 'probablity = ' + str(order[0])+'\n',file = f2)
                    #print(order[0][1])

                    if (order[0][1] > predict_prob_threshold):  # select all
                        start = cnt2 - cnt2 % 5
                        end = start + 5
                        timestamp = getTimestamp(start, end)  #type:string
                        print(median_dict[file] + ';' + timestamp + ';' +
                              order[0][0] + ';' + str(1),
                              file=f)
                        submission.append([
                            median_dict[file] + ';' + timestamp + ';' +
                            order[0][0] + ';' + str(1)
                        ])
                        accept_spec_num += 1

                cnt2 = cnt2 + 1
            print('number of audio file : ' + str(num + 1) + ', containing ' +
                  str(accept_spec_num) + ' specs')
            num += 1
        del model