def __init__(self, feature_hdf5_path, train_csv, validate_csv, holdout_fold, batch_size, seed=1234): '''Data generator for training and validation. Args: feature_hdf5_path: string, path of hdf5 feature file train_csv: string, path of train csv file validate_csv: string, path of validate csv file holdout_fold: set 1 for development and none for training on all data without validation scalar: object, containing mean and std value batch_size: int seed: int, random seed ''' self.batch_size = batch_size self.random_state = np.random.RandomState(seed) # self.classes_num = classes_num self.in_domain_classes_num = len(config.labels) self.all_classes_num = len(config.labels) self.lb_to_idx = config.lb_to_idx self.idx_to_lb = config.idx_to_lb # Load training data load_time = time.time() self.data_dict = self.load_hdf5(feature_hdf5_path) train_meta = read_metadata(train_csv) validate_meta = read_metadata(validate_csv) self.train_audio_indexes = self.get_audio_indexes( train_meta, self.data_dict, holdout_fold, 'train') self.validate_audio_indexes = self.get_audio_indexes( validate_meta, self.data_dict, holdout_fold, 'validate') if holdout_fold == 'none': self.train_audio_indexes = np.concatenate( (self.train_audio_indexes, self.validate_audio_indexes), axis=0) self.validate_audio_indexes = np.array([]) logging.info('Load data time: {:.3f} s'.format(time.time() - load_time)) logging.info('Training audio num: {}'.format( len(self.train_audio_indexes))) logging.info('Validation audio num: {}'.format( len(self.validate_audio_indexes))) self.random_state.shuffle(self.train_audio_indexes) self.pointer = 0
def evaluate_sed(dataset): """Evaluate the sound event detection predictions and print results. Args: dataset: Dataset for retrieving ground truth. """ import evaluation import inference names, ground_truth = utils.read_metadata(dataset.metadata_path, weakly_labeled=False) # Load and binarize predictions path = cfg.predictions_path.format('sed', dataset.name) _, y_pred = utils.read_predictions(path) threshold = _determine_threshold(cfg.sed_threshold) y_pred_b = inference.binarize_predictions_3d(y_pred, threshold=threshold, n_dilation=cfg.sed_dilation, n_erosion=cfg.sed_erosion) # Convert to event list format and evaluate SED performance resolution = cfg.clip_duration / y_pred.shape[2] predictions = inference.generate_event_lists(y_pred_b, resolution) metrics = evaluation.evaluate_sed(ground_truth, predictions, names) # Ensure output directory exist and write results os.makedirs(os.path.dirname(cfg.results_path), exist_ok=True) output_path = cfg.results_path.format('sed', dataset.name) with open(output_path, 'w') as f: f.write(metrics.result_report_overall()) f.write(metrics.result_report_class_wise())
def evaluate_audio_tagging(dataset, compute_thresholds=False): """Evaluate the audio tagging predictions and write results. Args: dataset: Dataset for retrieving ground truth. compute_thresholds (bool): Whether to compute and record per-class optimal thresholds. See Also: :func:`evaluation.compute_thresholds` """ import evaluation _, y_true = utils.read_metadata(dataset.metadata_path) path = cfg.predictions_path.format('at', dataset.name) _, y_pred = utils.read_predictions(path) # Compute thresholds if flag is set if compute_thresholds: thresholds = evaluation.compute_thresholds(y_true, y_pred) output_path = os.path.join(os.path.dirname(cfg.predictions_path), 'thresholds.p') with open(output_path, 'wb') as f: pickle.dump(thresholds, f) # Evaluate audio tagging performance threshold = _determine_threshold(cfg.at_threshold) scores = evaluation.evaluate_audio_tagging( y_true, y_pred, threshold=threshold) # Ensure output directory exist and write results os.makedirs(os.path.dirname(cfg.results_path), exist_ok=True) output_path = cfg.results_path.format('at', dataset.name) evaluation.write_audio_tagging_results(scores, output_path)
def _load_data(dataset, is_training=False): """Load input data, target values and file names for a dataset. The input data is assumed to be a dataset of feature vectors. These feature vectors are standardized using a scaler that is either loaded from disk (if it exists) or computed on-the-fly. The latter is only possible if the input data is training data, which is indicated by the `is_training` parameter. Target values and file names are read from the metadata file. Args: dataset: Structure encapsulating dataset information. training (bool): Whether the input data is training data. Returns: x (np.ndarray): The input data. y (np.ndarray): The target values. names (list): The associated file names. """ import data_augmentation as aug import features features_path = os.path.join(cfg.extraction_path, dataset.name + '.h5') x = utils.timeit(lambda: features.load_features(features_path), 'Loaded features of %s dataset' % dataset.name) # Clip dynamic range to 90 dB x = np.maximum(x, x.max() - 90.0) # Load scaler from file if cached, or else compute it. scaler_path = cfg.scaler_path if os.path.exists(scaler_path) or not is_training: with open(scaler_path, 'rb') as f: scaler = pickle.load(f) else: scaler = utils.timeit(lambda: utils.compute_scaler(x), 'Computed standard scaler') with open(scaler_path, 'wb') as f: pickle.dump(scaler, f) x = utils.timeit(lambda: utils.standardize(x, scaler), 'Standardized %s features' % dataset.name) names, y = utils.timeit(lambda: utils.read_metadata(dataset.metadata_path), 'Loaded %s metadata' % dataset.name) if dataset == cfg.training_set and cfg.enable_augmentation: names, y = aug.expand_metadata((names, y)) return x, y, names
def extract(dataset): """Extract feature vectors from the given dataset. Args: dataset: Dataset to extract features from. """ import data_augmentation as aug import features # Use a logmel representation for feature extraction extractor = features.LogmelExtractor(sample_rate=cfg.sample_rate, n_window=cfg.n_window, hop_length=cfg.hop_length, n_mels=cfg.n_mels, ) # Prepare for data augmentation if enabled file_names, target_values = utils.read_metadata(dataset.metadata_path) if dataset == cfg.training_set and cfg.enable_augmentation: n_transforms_iter = aug.transform_counts(target_values) file_names = aug.expand_metadata((file_names, target_values))[0] else: n_transforms_iter = None # Ensure output directory exists and set file path os.makedirs(cfg.extraction_path, exist_ok=True) output_path = os.path.join(cfg.extraction_path, dataset.name + '.h5') # Save free parameters to disk utils.log_parameters(cfg.logmel, os.path.join(cfg.extraction_path, 'parameters.json')) # Generate features for each audio clip in the dataset features.extract_dataset(dataset.path, file_names, extractor, cfg.clip_duration, output_path, n_transforms_iter=n_transforms_iter, )
def calculate_feature_for_all_audio_files(args): '''Calculate feature of audio files and write out features to a hdf5 file. Args: dataset_dir: string workspace: string mini_data: bool, set True for debugging on a small part of data ''' # Arguments & parameters dataset_dir = args.dataset_dir workspace = args.workspace mini_data = args.mini_data sample_rate = config.sample_rate window_size = config.window_size hop_size = config.hop_size mel_bins = config.mel_bins fmin = config.fmin fmax = config.fmax frames_per_second = config.frames_per_second frames_num = config.frames_num total_samples = config.total_samples lb_to_idx = config.lb_to_idx audio_duration_clip = config.audio_duration_clip audio_stride_clip = config.audio_stride_clip audio_duration = config.audio_duration audio_num = config.audio_num total_frames = config.total_frames # Paths if mini_data: prefix = 'minidata_' else: prefix = '' audios_dir = os.path.join(dataset_dir, 'audio') metadata_path = os.path.join(dataset_dir, 'meta', 'esc50.csv') feature_path = os.path.join( workspace, 'features', '{}logmel_{}frames_{}melbins.h5'.format(prefix, frames_per_second, mel_bins)) create_folder(os.path.dirname(feature_path)) # Feature extractor feature_extractor = LogMelExtractor(sample_rate=sample_rate, window_size=window_size, hop_size=hop_size, mel_bins=mel_bins, fmin=fmin, fmax=fmax) # Read metadata meta_dict = read_metadata(metadata_path) # Extract features and targets if mini_data: mini_num = 10 total_num = len(meta_dict['filename']) random_state = np.random.RandomState(1234) indexes = random_state.choice(total_num, size=mini_num, replace=False) for key in meta_dict.keys(): meta_dict[key] = meta_dict[key][indexes] print('Extracting features of all audio files ...') extract_time = time.time() # Hdf5 file for storing features and targets hf = h5py.File(feature_path, 'w') hf.create_dataset( name='filename', data=[filename.encode() for filename in meta_dict['filename']], dtype='S80') if 'fold' in meta_dict.keys(): hf.create_dataset(name='fold', data=[fold for fold in meta_dict['fold']], dtype=np.int64) if 'target' in meta_dict.keys(): hf.create_dataset(name='target', data=[target for target in meta_dict['target']], dtype=np.int64) if 'category' in meta_dict.keys(): hf.create_dataset( name='category', data=[category.encode() for category in meta_dict['category']], dtype='S80') if 'esc10' in meta_dict.keys(): hf.create_dataset(name='esc10', data=[esc10 for esc10 in meta_dict['esc10']], dtype=np.bool) if 'src_file' in meta_dict.keys(): hf.create_dataset( name='src_file', data=[src_file for src_file in meta_dict['src_file']], dtype=np.int64) if 'take' in meta_dict.keys(): hf.create_dataset(name='take', data=[take.encode() for take in meta_dict['take']], dtype='S24') hf.create_dataset(name='feature', shape=(0, audio_num, frames_num, mel_bins), maxshape=(None, audio_num, frames_num, mel_bins), dtype=np.float32) for (n, filename) in enumerate(meta_dict['filename']): audio_path = os.path.join(audios_dir, filename) print(n, audio_path) # Read audio (audio, _) = read_audio(audio_path=audio_path, target_fs=sample_rate) # Pad or truncate audio recording to the same length audio = pad_truncate_sequence(audio, total_samples) # Extract feature fea_list = [] # for i in range(audio_num): # audio_clip = audio[i*sample_rate*audio_stride_clip: (i+2)*sample_rate*audio_stride_clip] # feature = feature_extractor.transform(audio_clip) # feature = feature[0 : frames_per_second*audio_duration_clip] # fea_list.append(feature) feature = feature_extractor.transform(audio) # # Remove the extra log mel spectrogram frames caused by padding zero feature = feature[0:total_frames] for i in range(audio_num): feature_clip = feature[i * frames_per_second * audio_stride_clip:(i + audio_duration_clip) * frames_per_second * audio_stride_clip] fea_list.append(feature_clip) hf['feature'].resize((n + 1, audio_num, frames_num, mel_bins)) hf['feature'][n] = fea_list hf.close() print('Write hdf5 file to {} using {:.3f} s'.format( feature_path, time.time() - extract_time))
data = read_csv('https://ocgptweb.azurewebsites.net/CSVDownload') data = data.drop(columns=['CountryName', 'ConfirmedCases', 'ConfirmedDeaths']) data = data.drop( columns=[col for col in data.columns if col.endswith('_Notes')]) data = data.drop( columns=[col for col in data.columns if col.endswith('_IsGeneral')]) data['Date'] = data['Date'].apply(lambda x: datetime_isoformat(x, '%Y%m%d')) # Join with ISO data iso = read_csv(ROOT / 'input' / 'ISO-3166-2.csv')[['3166-2-Alpha-2', '3166-2-Alpha-3']] data = data.rename(columns={'CountryCode': '3166-2-Alpha-3'}).merge(iso) # Join with our metadata metadata = read_metadata()[['Key', 'CountryCode']] data = data.rename(columns={'3166-2-Alpha-2': 'CountryCode'}).merge(metadata) # Use consistent naming convention for columns data = data[[ col for col in data.columns if '_' in col or col in ('Date', 'Key', 'StringencyIndex') ]] data.columns = [col.split('_')[-1] for col in data.columns] data.columns = [ re.sub(r'\s(\w)', lambda m: m.group(1).upper(), col) for col in data.columns ] # Fix column typo data = data.rename(columns={'ClosePublicTransport': 'PublicTransportClosing'})
'MaximumTemperature', 'Rainfall', 'Snowfall' ] return data[[col for col in output_columns if col in data.columns]] # Get all the weather stations with data up until 2020 stations_url = 'https://www1.ncdc.noaa.gov/pub/data/ghcn/daily/ghcnd-inventory.txt' stations = read_csv(stations_url, sep=r'\s+', names=('id', 'lat', 'lon', 'measurement', 'year_start', 'year_end')) stations = stations[stations.year_end == 2020][[ 'id', 'lat', 'lon', 'measurement' ]] # Filter stations that at least provide max and min temps measurements = ['TMIN', 'TMAX'] stations = stations.groupby(['id', 'lat', 'lon']).sum() stations = stations[stations.measurement.apply( lambda x: all(m in x for m in measurements))] stations = stations.reset_index() # Get all the POI from metadata and go through each key metadata = read_metadata()[['Key', 'Latitude', 'Longitude']] # Bottleneck is network so we can use lots of threads in parallel records = list( tqdm(ThreadPool(8).imap_unordered(station_records, metadata.iterrows()), total=len(metadata))) concat(records).sort_values(['Key', 'Date']).to_csv(sys.stdout, index=False)
root / 'output' / ('%s_latest.json' % name), orient='records') # Root path of the project ROOT = Path(os.path.dirname(__file__)) / '..' # Read the minimal data file and write to JSON output minimal = read_csv(ROOT / 'output' / 'data_minimal.csv').sort_values( ['Date', 'Key']) dataframe_to_json(minimal, ROOT / 'output' / 'data_minimal.json', orient='records') # Read the metadata file and write to output metadata = read_metadata() metadata = metadata[[ col for col in metadata.columns if not col.startswith('_') ]] metadata.to_csv(ROOT / 'output' / 'metadata.csv', index=False) dataframe_to_json(metadata, ROOT / 'output' / 'metadata.json', orient='records') # Merge minimal with the metadata file to create full file and write to output full = minimal.merge(metadata).sort_values(['Date', 'Key']) full = full[[ 'Date', 'Key', 'CountryCode', 'CountryName',
parser = argparse.ArgumentParser() parser.add_argument('pred_path', help='path to predictions directory') parser.add_argument('output_path', help='output file path') args = parser.parse_args() # Trim the time length of the input files model_dirs = [] for model in MODELS: model_dirs.append(os.path.join(args.pred_path, model)) utils.trim_clips(model_dirs) # Collect predictions for each model preds = [] feats = [] for model in MODELS: df = utils.read_metadata(os.path.join(args.pred_path, model)) preds.append((df > 0.5).astype(int).unstack()) feats.append(df) # Print correlation matrix print(pd.concat(preds, axis=1).corr()) # Save meta-features to disk feats = np.stack(feats, axis=1) feats = np.reshape(feats, (feats.shape[0], -1)) with h5py.File(args.output_path, 'w') as f: f.create_dataset('F', data=feats) f.create_dataset('names', data=preds[0].index.levels[1], dtype=h5py.special_dtype(vlen=str))
dlib_detector, dlib_predictor = utils.`load_dlib_detector_and_predictor() elif config.USING_DLIB_OR_FACE_ALIGNMENT == 'face_alignment': face_alignment_3D_object = utils.load_face_alignment_object(d='3D', enable_cuda=config.ENABLE_CUDA) face_alignment_2D_object = utils.load_face_alignment_object(d='2D', enable_cuda=config.ENABLE_CUDA) # Clip videos by dialogue times from metadata, # extract faces and landmarks from video clips, # blacken mouth and draw mouth polygon # save combined frame + frame_with_blackened_mouth_and_polygon for language in tqdm.tqdm(sorted(os.listdir(os.path.join(config.MOVIE_TRANSLATION_DATASET_DIR, 'metadata')))): # Read all metadata files in the language, # containing the columns: | output_file_name.mp4 | youtubeID | start_time | duration | for metadata_txt_file in tqdm.tqdm(sorted(glob.glob(os.path.join(config.MOVIE_TRANSLATION_DATASET_DIR, 'metadata', language, "*")))): actor = os.path.splitext(os.path.basename(metadata_txt_file))[0] print("\n", actor, "\n") metadata = utils.read_metadata(metadata_txt_file) # Extract video clips print("Extracting video clips...") extract_video_clips(language, actor, metadata, verbose=False) video_clips_dir = os.path.join(config.MOVIE_TRANSLATION_DATASET_DIR, 'videos', language, actor) # Extract faces and landmarks from video clips print("Extracting faces and landmarks from video clips...") for v, video_file in enumerate(tqdm.tqdm(sorted(glob.glob(os.path.join(video_clips_dir, "*.mp4"))))): # if v < 15: # continue if config.USING_DLIB_OR_FACE_ALIGNMENT == 'dlib': extract_face_frames_and_landmarks_from_video(video_file, config.USING_DLIB_OR_FACE_ALIGNMENT, dlib_detector=dlib_detector, dlib_predictor=dlib_predictor, save_with_blackened_mouths_and_polygons=True, save_gif=False, save_landmarks_as_txt=True) elif config.USING_DLIB_OR_FACE_ALIGNMENT == 'face_alignment': extract_face_frames_and_landmarks_from_video(video_file, config.USING_DLIB_OR_FACE_ALIGNMENT, face_alignment_3D_object=face_alignment_3D_object, face_alignment_2D_object=face_alignment_2D_object, save_with_blackened_mouths_and_polygons=True,
dfs = [ DataFrame.from_dict(data_[name]).rename(columns={'value': name}) for name in data_.keys() ] # Merge all datasets together data = reduce(lambda df1, df2: df1.merge(df2, on='date'), dfs) # Use consistent naming convention for columns data.columns = map(lambda name: name[0].upper() + name[1:], data.columns) # Add key column and return data['Key'] = key first_columns = ['Date', 'Key'] return data[first_columns + list(set(data.columns) - set(first_columns))] def _get_mobility_report(key: str): try: return get_mobility_report(key) except: return DataFrame() # Load all available keys from metadata and output mobility report all keys keys = read_metadata().Key.unique() data = list( tqdm(ThreadPool(4).imap_unordered(_get_mobility_report, keys), total=len(keys))) concat(data).sort_values(['Date', 'Key']).to_csv(sys.stdout, index=False)