def deeplab( num_classes, crop_size=[513, 513], atrous_rates=[6, 12, 18], output_stride=16, fine_tune_batch_norm=False, pretrained=True, pretained_num_classes=21, checkpoint_path='./pretrained_models/deeplabv3_pascal_trainval.pth'): """DeepLab v3+ for semantic segmentation.""" outputs_to_num_classes = {'semantic': num_classes} model_options = common.ModelOptions(outputs_to_num_classes, crop_size=crop_size, atrous_rates=atrous_rates, output_stride=output_stride) feature_extractor = extractor.feature_extractor( model_options.model_variant, pretrained=False, output_stride=model_options.output_stride) model = DeepLab(feature_extractor, model_options, fine_tune_batch_norm) if pretrained: _load_state_dict(model, num_classes, pretained_num_classes, checkpoint_path) return model
def generate_data(sub_names, mov_names): speech = [] noise = [] for sub_name, mov_name in zip(sub_names, mov_names): fsub = open(sub_name) intervels = [] while True: if fsub.readline().strip() == '[Events]': break for line in fsub: if line.startswith('Dialogue:'): line = line.strip().split(',') if line[3] != 'Default' or line[9].startswith('{'): intervels.append( (parse_time(line[1]), parse_time(line[2]), False)) else: intervels.append( (parse_time(line[1]), parse_time(line[2]), True)) intervels.sort(cmp=lambda x, y: cmp(x[0], y[0])) i = 0 while i < len(intervels) - 1: if intervels[i][1] > intervels[i + 1][0]: intervels[i] = (intervels[i][0], intervels[i + 1][1], intervels[i][2] and intervels[i + 1][2]) del intervels[i + 1] else: i = i + 1 dec = ffmpeg_decoder(mov_name, SAMPLE_RATE) spec = spectrum(dec.ostream.get_handle(), squared=False) feat = feature_extractor(spec.ostream.get_handle(), intervels, speech, noise) dec.start() spec.start() feat.start() feat.join() return speech, noise
def generate_data(sub_names, mov_names): speech = [] noise = [] for sub_name, mov_name in zip(sub_names, mov_names): fsub = open(sub_name) intervels = [] while True: if fsub.readline().strip() == '[Events]': break for line in fsub: if line.startswith('Dialogue:'): line = line.strip().split(',') if line[3] != 'Default' or line[9].startswith('{'): intervels.append( (parse_time(line[1]), parse_time(line[2]), False) ) else: intervels.append( (parse_time(line[1]), parse_time(line[2]), True) ) intervels.sort(cmp=lambda x,y: cmp(x[0], y[0])) i = 0 while i < len(intervels)-1: if intervels[i][1] > intervels[i+1][0]: intervels[i] = (intervels[i][0], intervels[i+1][1], intervels[i][2] and intervels[i+1][2]) del intervels[i+1] else: i = i + 1 dec = ffmpeg_decoder(mov_name, SAMPLE_RATE) spec = spectrum(dec.ostream.get_handle(), squared = False) feat = feature_extractor(spec.ostream.get_handle(), intervels, speech, noise) dec.start() spec.start() feat.start() feat.join() return speech, noise
if line[3] != 'Default' or line[9].startswith('{\\an8'): intervels.append( (parse_time(line[1]), parse_time(line[2]), False) ) else: intervels.append( (parse_time(line[1]), parse_time(line[2]), True) ) intervels.sort(cmp=lambda x,y: cmp(x[0], y[0])) i = 0 while i < len(intervels)-1: if intervels[i][1] > intervels[i+1][0]: intervels[i] = (intervels[i][0], intervels[i+1][1], intervels[i][2] and intervels[i+1][2]) del intervels[i+1] else: i = i + 1 dec = ffmpeg_decoder(mov_name, SAMPLE_RATE) spec = spectrum(dec.ostream.get_handle(), squared = False) feat = feature_extractor(spec.ostream.get_handle(), intervels, speech, noise) dec.start() spec.start() feat.start() feat.join() random.shuffle(speech) random.shuffle(noise) for f in speech: plt.imshow(np.log(abs(f)**2).reshape((5, 128))) plt.show()