def segment():
    raw_data_dir = os.path.join(BASE_DIR, 'data', 'raw')
    segmented_data_dir = os.path.join(BASE_DIR, 'data', 'segmented')

    if exist(pathname=raw_data_dir):
        output_data = {}
        for filename_with_ext in fnmatch.filter(os.listdir(raw_data_dir),
                                                '*.txt'):
            filename, file_ext = os.path.splitext(filename_with_ext)
            participant, label = filename.split('-')
            if participant not in output_data:
                output_data[participant] = {}
            output_data[participant][label] = {
                'sample_rate':
                PPG_SAMPLE_RATE,
                'signal':
                map(
                    float,
                    load_text(pathname=os.path.join(raw_data_dir,
                                                    filename_with_ext))),
            }

        for participant in output_data:
            output_filename = '%s.json' % participant
            dump_json(data=output_data[participant],
                      pathname=os.path.join(segmented_data_dir,
                                            output_filename),
                      overwrite=True)
示例#2
0
def split():
    extracted_data_dir = os.path.join(BASE_DIR, 'data', 'extracted')
    splited_data_dir = os.path.join(BASE_DIR, 'data', 'splited')

    if exist(pathname=extracted_data_dir):
        for filename_with_ext in fnmatch.filter(os.listdir(extracted_data_dir), '*.json'):
            feature_data = {
                '0': [],
                '1': [],
                '2': [],
            }
            pathname = os.path.join(extracted_data_dir, filename_with_ext)
            json_data = load_json(pathname=pathname)
            if json_data is not None:
                for session_id in json_data:
                    for block in json_data[session_id]['blocks']:
                        feature_data[str(block['level'])].append({
                            'ppg45': block['ppg']['ppg45'],
                            'ppg45_cr': get_change_ratio(data=block['ppg']['ppg45'], baseline=json_data[session_id]['rest']['ppg']['ppg45']),
                            'svri': block['ppg']['svri'],
                            'svri_cr': get_change_ratio(data=block['ppg']['svri'], baseline=json_data[session_id]['rest']['ppg']['svri']),
                        })
                output_data = {
                    'train': {
                        '0': feature_data['0'][:int(len(feature_data['0']) * TRAINING_DATA_RATIO)],
                        '1': feature_data['1'][:int(len(feature_data['1']) * TRAINING_DATA_RATIO)],
                        '2': feature_data['2'][:int(len(feature_data['2']) * TRAINING_DATA_RATIO)],
                    },
                    'test': {
                        '0': feature_data['0'][int(len(feature_data['0']) * TRAINING_DATA_RATIO):],
                        '1': feature_data['1'][int(len(feature_data['1']) * TRAINING_DATA_RATIO):],
                        '2': feature_data['2'][int(len(feature_data['2']) * TRAINING_DATA_RATIO):],
                    },
                }
                dump_json(data=output_data, pathname=os.path.join(splited_data_dir, filename_with_ext), overwrite=True)
def extract():
    preprocessed_data_dir = os.path.join(BASE_DIR, 'data', 'preprocessed')
    extracted_data_dir = os.path.join(BASE_DIR, 'data', 'extracted')

    if exist(pathname=preprocessed_data_dir):
        for filename_with_ext in fnmatch.filter(
                os.listdir(preprocessed_data_dir), '*.json'):
            pathname = os.path.join(preprocessed_data_dir, filename_with_ext)
            json_data = load_json(pathname=pathname)
            if json_data is not None:
                for label in json_data:
                    json_data[label]['ppg45'] = [
                        extract_ppg45(
                            single_waveform=single_waveform,
                            sample_rate=json_data[label]['sample_rate']) for
                        single_waveform in json_data[label]['single_waveforms']
                    ]
                    json_data[label]['svri'] = [
                        extract_svri(single_waveform=single_waveform) for
                        single_waveform in json_data[label]['single_waveforms']
                    ]
                    del json_data[label]['single_waveforms']
                dump_json(data=json_data,
                          pathname=os.path.join(extracted_data_dir,
                                                filename_with_ext),
                          overwrite=True)
def classify():
    extracted_data_dir = os.path.join(BASE_DIR, 'data', 'splited')
    result_dir = os.path.join(BASE_DIR, 'results')

    train_ratio = 0.8
    classifiers = [
        ('logistic_regression', logistic_regression_classifier, ),
        # ('support_vector', support_vector_classifier, ),
        ('gaussian_naive_bayes', gaussian_naive_bayes_classifier, ),
        ('decision_tree', decision_tree_classifier, ),
        ('random_forest', random_forest_classifier, ),
        ('adaboost', adaboost_classifier, ),
        ('gradient_boosting', gradient_boosting_classifier, ),
        #('voting', voting_classifier, ), # voting classifier has to be the LAST item in the list
    ]

    if exist(pathname=extracted_data_dir):
        train_features = []
        train_labels = []
        test_features = []
        test_labels = []
        for filename_with_ext in fnmatch.filter(os.listdir(extracted_data_dir), '*.json'):
            pathname = os.path.join(extracted_data_dir, filename_with_ext)
            json_data = load_json(pathname=pathname)
            if json_data is not None:
                participant = filename_with_ext.split('.')[0]
                signal_list = []
                count = 0
                for label in json_data:
                    signal_list.append = json_data[label]['signal']
                    if len(signal) == 0:
                        continue
                    else:
                        for s in signal:
                            signal_list.append(s)
                            count = count + 1
                if len(signal_list) == 0:
                    continue
                if signal_list == 1:
                    train_features.append(signal_list[0])
                    train_features.append(participant)
                else:
                    for index,signal in enumerate(signal_list):
                        if index == len(signal_list) - 1:
                            test_features.append(signal)
                            test_labels.append(participant)
                        else:
                            train_features.append(signal)
                            train_labels.append(participant)

                        # train_features, train_labels, test_features, test_labels = get_feature_set(data=json_data, label_set=label_set, feature_type_set=feature_type_set)
    estimators = []
    for classifier_name, classifier_object in classifiers:
        import pdb;pdb.set_trace()
        classifier = classifier_object(features=train_features, labels=train_labels)
        score = classifier.score(test_features,test_labels)
示例#5
0
def extract():
    preprocessed_data_dir = os.path.join(BASE_DIR, 'data', 'preprocessed')
    extracted_data_dir = os.path.join(BASE_DIR, 'data', 'extracted')

    if exist(pathname=preprocessed_data_dir):
        for filename_with_ext in fnmatch.filter(
                os.listdir(preprocessed_data_dir), '*.json'):
            pathname = os.path.join(preprocessed_data_dir, filename_with_ext)
            json_data = load_json(pathname=pathname)
            if json_data is not None:
                for session_id in json_data:
                    if json_data[session_id]['rest']['ppg'][
                            'single_waveforms'] is not None:
                        json_data[session_id]['rest']['ppg']['ppg45'] = [
                            extract_ppg45(single_waveform=single_waveform,
                                          sample_rate=json_data[session_id]
                                          ['rest']['ppg']['sample_rate'])
                            for single_waveform in json_data[session_id]
                            ['rest']['ppg']['single_waveforms']
                        ]
                        json_data[session_id]['rest']['ppg']['svri'] = [
                            extract_svri(single_waveform=single_waveform)
                            for single_waveform in json_data[session_id]
                            ['rest']['ppg']['single_waveforms']
                        ]
                    else:
                        json_data[session_id]['rest']['ppg']['ppg45'] = None
                        json_data[session_id]['rest']['ppg']['svri'] = None
                    del json_data[session_id]['rest']['ppg'][
                        'single_waveforms']
                    for block in json_data[session_id]['blocks']:
                        if block['ppg']['single_waveforms'] is not None:
                            block['ppg']['ppg45'] = [
                                extract_ppg45(
                                    single_waveform=single_waveform,
                                    sample_rate=block['ppg']['sample_rate'])
                                for single_waveform in block['ppg']
                                ['single_waveforms']
                            ]
                            block['ppg']['svri'] = [
                                extract_svri(single_waveform=single_waveform)
                                for single_waveform in block['ppg']
                                ['single_waveforms']
                            ]
                        else:
                            block['ppg']['ppg45'] = None
                            block['ppg']['svri'] = None
                        del block['ppg']['single_waveforms']
                dump_json(data=json_data,
                          pathname=os.path.join(extracted_data_dir,
                                                filename_with_ext),
                          overwrite=True)
示例#6
0
def preprocess():
    segmented_data_dir = os.path.join(BASE_DIR, 'data', 'segmented')
    preprocessed_data_dir = os.path.join(BASE_DIR, 'data', 'preprocessed')

    if exist(pathname=segmented_data_dir):
        for filename_with_ext in fnmatch.filter(os.listdir(segmented_data_dir), '*.json'):
            pathname = os.path.join(segmented_data_dir, filename_with_ext)
            json_data = load_json(pathname=pathname)
            if json_data is not None:
                for label in json_data:
                    json_data[label]['single_waveforms'] = extract_ppg_single_waveform(signal=smooth_ppg_signal(signal=json_data[label]['signal'], sample_rate=json_data[label]['sample_rate']))
                    del json_data[label]['signal']
                dump_json(data=json_data, pathname=os.path.join(preprocessed_data_dir, filename_with_ext), overwrite=True)
def subject_independent():
    extracted_data_dir = os.path.join(BASE_DIR, 'data', 'extracted')
    subject_independent_data_dir = os.path.join(BASE_DIR, 'data', 'subject_independent')

    if exist(pathname=extracted_data_dir):
        all_subject_data = {}
        for filename_with_ext in fnmatch.filter(os.listdir(extracted_data_dir), '*.json'):
            subject = os.path.splitext(filename_with_ext)[0]
            feature_data = {
                '0': [],
                '1': [],
                '2': [],
            }
            pathname = os.path.join(extracted_data_dir, filename_with_ext)
            json_data = load_json(pathname=pathname)
            if json_data is not None:
                for session_id in json_data:
                    for block in json_data[session_id]['blocks']:
                        feature_data[str(block['level'])].append({
                            'ppg45': block['ppg']['ppg45'],
                            'ppg45_cr': get_change_ratio(data=block['ppg']['ppg45'], baseline=json_data[session_id]['rest']['ppg']['ppg45']),
                            'svri': block['ppg']['svri'],
                            'svri_cr': get_change_ratio(data=block['ppg']['svri'], baseline=json_data[session_id]['rest']['ppg']['svri']),
                            'average_skin_conductance_level': block['skin_conductance']['average_level'],
                            'average_skin_conductance_level_cr': get_change_ratio(data=block['skin_conductance']['average_level'], baseline=json_data[session_id]['rest']['skin_conductance']['average_level']),
                            'minimum_skin_conductance_level': block['skin_conductance']['minimum_level'],
                            'minimum_skin_conductance_level_cr': get_change_ratio(data=block['skin_conductance']['minimum_level'], baseline=json_data[session_id]['rest']['skin_conductance']['minimum_level']),
                            'average_rri': block['ecg']['average_rri'],
                            'average_rri_cr': get_change_ratio(data=block['ecg']['average_rri'], baseline=json_data[session_id]['rest']['ecg']['average_rri']),
                            'rmssd': block['ecg']['rmssd'],
                            'rmssd_cr': get_change_ratio(data=block['ecg']['rmssd'], baseline=json_data[session_id]['rest']['ecg']['rmssd']),
                            'lf_hrv_power': block['ecg']['lf_hrv_power'],
                            'lf_hrv_power_cr': get_change_ratio(data=block['ecg']['lf_hrv_power'], baseline=json_data[session_id]['rest']['ecg']['lf_hrv_power']),
                            'hf_hrv_power': block['ecg']['hf_hrv_power'],
                            'hf_hrv_power_cr': get_change_ratio(data=block['ecg']['hf_hrv_power'], baseline=json_data[session_id]['rest']['ecg']['hf_hrv_power']),
                        })
                all_subject_data[subject] = feature_data
        for subject in all_subject_data:
            output_data = {
                'train': reduce(lambda feature_data_1, feature_data_2: merge(feature_data_1, feature_data_2), [all_subject_data[participant] for participant in all_subject_data if participant != subject]),
                'test': {
                    '0': all_subject_data[subject]['0'][int(len(all_subject_data[subject]['0']) * TRAINING_DATA_RATIO):],
                    '1': all_subject_data[subject]['1'][int(len(all_subject_data[subject]['1']) * TRAINING_DATA_RATIO):],
                    '2': all_subject_data[subject]['2'][int(len(all_subject_data[subject]['2']) * TRAINING_DATA_RATIO):],
                },
            }
            dump_json(data=output_data, pathname=os.path.join(subject_independent_data_dir, '%s.json' % subject), overwrite=True)
示例#8
0
def segment():
    raw_ppg_data_dir = os.path.join(BASE_DIR, 'data', 'raw', 'ppg')
    segmented_data_dir = os.path.join(BASE_DIR, 'data', 'segmented')

    if exist(pathname=raw_ppg_data_dir):
        output_data = {}
        for filename_with_ext in fnmatch.filter(os.listdir(raw_ppg_data_dir), '*.txt'):
            filename, file_ext = os.path.splitext(filename_with_ext)
            if filename.endswith('-rest'):
                participant, session_id, block_id = filename.split('-')
                if participant not in output_data:
                    output_data[participant] = {}
                if session_id not in output_data[participant]:
                    output_data[participant][session_id] = {
                        'rest': {
                            'ppg': {},
                        },
                        'blocks': [],
                    }
                output_data[participant][session_id]['rest']['ppg']['sample_rate'] = PPG_SAMPLE_RATE
                output_data[participant][session_id]['rest']['ppg']['signal'] = map(float, load_text(pathname=os.path.join(raw_ppg_data_dir, filename_with_ext)))
            else:
                participant, session_id, block_id, task_level = filename.split('-')
                if participant not in output_data:
                    output_data[participant] = {}
                if session_id not in output_data[participant]:
                    output_data[participant][session_id] = {
                        'rest': {
                            'ppg': {},
                        },
                        'blocks': [],
                    }
                output_data[participant][session_id]['blocks'].append({
                    'level': task_level,
                    'ppg': {
                        'sample_rate': PPG_SAMPLE_RATE,
                        'signal': map(float, load_text(pathname=os.path.join(raw_ppg_data_dir, filename_with_ext))),
                    },
                })

        for participant in output_data:
            output_filename = '%s.json' % participant
            dump_json(data=output_data[participant], pathname=os.path.join(segmented_data_dir, output_filename), overwrite=True)
示例#9
0
def preprocess():
    segmented_data_dir = os.path.join(BASE_DIR, 'data', 'segmented')
    preprocessed_data_dir = os.path.join(BASE_DIR, 'data', 'preprocessed')

    if exist(pathname=segmented_data_dir):
        for filename_with_ext in fnmatch.filter(os.listdir(segmented_data_dir),
                                                '*.json'):
            pathname = os.path.join(segmented_data_dir, filename_with_ext)
            json_data = load_json(pathname=pathname)
            if json_data is not None:
                for session_id in json_data:
                    if json_data[session_id]['rest']['ppg'][
                            'signal'] is not None:
                        json_data[session_id]['rest']['ppg'][
                            'single_waveforms'] = extract_ppg_single_waveform(
                                signal=smooth_ppg_signal(
                                    signal=json_data[session_id]['rest']['ppg']
                                    ['signal'],
                                    sample_rate=json_data[session_id]['rest']
                                    ['ppg']['sample_rate']))
                    else:
                        json_data[session_id]['rest']['ppg'][
                            'single_waveforms'] = None
                    del json_data[session_id]['rest']['ppg']['signal']
                    for block in json_data[session_id]['blocks']:
                        if block['ppg']['signal'] is not None:
                            block['ppg'][
                                'single_waveforms'] = extract_ppg_single_waveform(
                                    signal=smooth_ppg_signal(
                                        signal=block['ppg']['signal'],
                                        sample_rate=block['ppg']
                                        ['sample_rate']))
                        else:
                            block['ppg']['single_waveforms'] = None
                        del block['ppg']['signal']
                dump_json(data=json_data,
                          pathname=os.path.join(preprocessed_data_dir,
                                                filename_with_ext),
                          overwrite=True)
def classify():
    splited_data_dir = os.path.join(BASE_DIR, 'data', 'splited')
    model_dir = os.path.join(BASE_DIR, 'models')
    result_dir = os.path.join(BASE_DIR, 'results')

    label_sets = [
        ['pre', 'in'],
    ]
    feature_type_sets = [
        ['ppg45', 'svri'],
        ['ppg45'],
        ['svri'],
    ]
    classifiers = [
        (
            'logistic_regression',
            logistic_regression_classifier,
        ),
        # ('support_vector', support_vector_classifier, ),
        (
            'gaussian_naive_bayes',
            gaussian_naive_bayes_classifier,
        ),
        (
            'decision_tree',
            decision_tree_classifier,
        ),
        (
            'random_forest',
            random_forest_classifier,
        ),
        (
            'adaboost',
            adaboost_classifier,
        ),
        (
            'gradient_boosting',
            gradient_boosting_classifier,
        ),
        (
            'voting',
            voting_classifier,
        ),  # voting classifier has to be the LAST item in the list
    ]

    if exist(pathname=splited_data_dir):
        result_data = {}
        for filename_with_ext in fnmatch.filter(os.listdir(splited_data_dir),
                                                '*.json'):
            participant = os.path.splitext(filename_with_ext)[0]
            pathname = os.path.join(splited_data_dir, filename_with_ext)
            json_data = load_json(pathname=pathname)
            if json_data is not None:
                for label_set in label_sets:
                    label_set_name = '-'.join(label_set)
                    if label_set_name not in result_data:
                        result_data[label_set_name] = {}
                    for feature_type_set in feature_type_sets:
                        feature_type_set_name = '-'.join(feature_type_set)
                        if feature_type_set_name not in result_data[
                                label_set_name]:
                            result_data[label_set_name][
                                feature_type_set_name] = {}
                        train_features, train_labels, test_features, test_labels = get_feature_set(
                            data=json_data,
                            label_set=label_set,
                            feature_type_set=feature_type_set)
                        estimators = []
                        for classifier_name, classifier_object in classifiers:
                            if classifier_name not in result_data[
                                    label_set_name][feature_type_set_name]:
                                result_data[label_set_name][
                                    feature_type_set_name][
                                        classifier_name] = {}
                            model_pathname = os.path.join(
                                model_dir, label_set_name,
                                feature_type_set_name, classifier_name,
                                '%s.model' % participant)
                            classifier = load_model(pathname=model_pathname)
                            if classifier is None:
                                if classifier_name == 'voting':
                                    classifier = classifier_object(
                                        estimators=estimators,
                                        features=train_features,
                                        labels=train_labels)
                                else:
                                    classifier = classifier_object(
                                        features=train_features,
                                        labels=train_labels)
                                dump_model(model=classifier,
                                           pathname=model_pathname)
                            score = classifier.score(test_features,
                                                     test_labels)
                            print participant, score, label_set_name, feature_type_set_name, classifier_name
                            result_data[label_set_name][feature_type_set_name][
                                classifier_name][participant] = score

                            # prepare estimators for the training of voting classifier
                            if classifier_name != 'voting':
                                if hasattr(classifier, 'best_estimator_'):
                                    estimators.append((
                                        classifier_name,
                                        classifier.best_estimator_,
                                    ))
                                else:
                                    estimators.append((
                                        classifier_name,
                                        classifier,
                                    ))

        for label_set_name in result_data:
            dump_json(data=result_data[label_set_name],
                      pathname=os.path.join(result_dir,
                                            '%s.json' % label_set_name),
                      overwrite=True)
            csv_data = []
            for feature_type_set in feature_type_sets:
                feature_type_set_name = '-'.join(feature_type_set)
                csv_row = {
                    'feature_set': feature_type_set_name,
                }
                for classifier_name in result_data[label_set_name][
                        feature_type_set_name]:
                    csv_row[classifier_name] = sum(
                        result_data[label_set_name][feature_type_set_name]
                        [classifier_name].values()) / len(
                            result_data[label_set_name][feature_type_set_name]
                            [classifier_name])
                csv_data.append(csv_row)
            fieldnames = ['feature_set'] + [val[0] for val in classifiers]
            export_csv(data=csv_data,
                       fieldnames=fieldnames,
                       pathname=os.path.join(result_dir,
                                             '%s.csv' % label_set_name),
                       overwrite=True)
示例#11
0
def merge():
    extracted_data_dir = os.path.join(BASE_DIR, 'data', 'extracted')
    merged_data_dir = os.path.join(BASE_DIR, 'data', 'merged')

    if exist(pathname=extracted_data_dir):
        for filename_with_ext in fnmatch.filter(os.listdir(extracted_data_dir),
                                                '*.json'):
            output_data = {
                '0': [],
                '1': [],
                '2': [],
            }
            pathname = os.path.join(extracted_data_dir, filename_with_ext)
            json_data = load_json(pathname=pathname)
            if json_data is not None:
                for session_id in json_data:
                    for block in json_data[session_id]['blocks']:
                        output_data[str(block['level'])].append({
                            'ppg45':
                            block['ppg']['ppg45'],
                            'ppg45_cr':
                            get_change_ratio(data=block['ppg']['ppg45'],
                                             baseline=json_data[session_id]
                                             ['rest']['ppg']['ppg45']),
                            'svri':
                            block['ppg']['svri'],
                            'svri_cr':
                            get_change_ratio(data=block['ppg']['svri'],
                                             baseline=json_data[session_id]
                                             ['rest']['ppg']['svri']),
                            'average_skin_conductance_level':
                            block['skin_conductance']['average_level'],
                            'average_skin_conductance_level_cr':
                            get_change_ratio(
                                data=block['skin_conductance']
                                ['average_level'],
                                baseline=json_data[session_id]['rest']
                                ['skin_conductance']['average_level']),
                            'minimum_skin_conductance_level':
                            block['skin_conductance']['minimum_level'],
                            'minimum_skin_conductance_level_cr':
                            get_change_ratio(
                                data=block['skin_conductance']
                                ['minimum_level'],
                                baseline=json_data[session_id]['rest']
                                ['skin_conductance']['minimum_level']),
                            'average_rri':
                            block['ecg']['average_rri'],
                            'average_rri_cr':
                            get_change_ratio(data=block['ecg']['average_rri'],
                                             baseline=json_data[session_id]
                                             ['rest']['ecg']['average_rri']),
                            'rmssd':
                            block['ecg']['rmssd'],
                            'rmssd_cr':
                            get_change_ratio(data=block['ecg']['rmssd'],
                                             baseline=json_data[session_id]
                                             ['rest']['ecg']['rmssd']),
                            'lf_hrv_power':
                            block['ecg']['lf_hrv_power'],
                            'lf_hrv_power_cr':
                            get_change_ratio(data=block['ecg']['lf_hrv_power'],
                                             baseline=json_data[session_id]
                                             ['rest']['ecg']['lf_hrv_power']),
                            'hf_hrv_power':
                            block['ecg']['hf_hrv_power'],
                            'hf_hrv_power_cr':
                            get_change_ratio(data=block['ecg']['hf_hrv_power'],
                                             baseline=json_data[session_id]
                                             ['rest']['ecg']['hf_hrv_power']),
                        })
                dump_json(data=output_data,
                          pathname=os.path.join(merged_data_dir,
                                                filename_with_ext),
                          overwrite=True)
示例#12
0
def segment():
    raw_meta_data_dir = os.path.join(BASE_DIR, 'data', 'raw', 'meta')
    raw_ppg_data_dir = os.path.join(BASE_DIR, 'data', 'raw', 'ppg')
    raw_biopac_data_dir = os.path.join(BASE_DIR, 'data', 'raw', 'biopac')
    segmented_data_dir = os.path.join(BASE_DIR, 'data', 'segmented')

    output_data = {}
    completeness = {}

    # Meta data
    if exist(pathname=raw_meta_data_dir):
        for filename_with_ext in fnmatch.filter(os.listdir(raw_meta_data_dir),
                                                '*.json'):
            filename, file_ext = os.path.splitext(filename_with_ext)
            participant, session_id = filename.split('-')
            if participant not in output_data:
                output_data[participant] = {}
            if participant not in completeness:
                completeness[participant] = True
            output_data[participant][session_id] = {'rest': {}, 'blocks': []}
            pathname = os.path.join(raw_meta_data_dir, filename_with_ext)
            raw_json_data = load_json(pathname=pathname)
            if raw_json_data is not None:
                output_data[participant][session_id]['rest'] = {
                    'start_time':
                    parse_iso_time_string(
                        raw_json_data['rest_start_timestamp']),
                    'ppg': {
                        'sample_rate': None,
                        'signal': None,
                    },
                    'ecg': {
                        'sample_rate': None,
                        'signal': None,
                    },
                    'skin_conductance': {
                        'sample_rate': None,
                        'signal': None,
                    },
                }
                for block in raw_json_data['blocks']:
                    if block['stimuli'][0]['timestamp']['load'] is None:
                        print 'Skip one invalid block in \'%s\'.' % filename_with_ext
                        completeness[participant] = False
                        continue
                    output_data[participant][session_id]['blocks'].append({
                        'level':
                        block['level'],
                        'rsme':
                        int(block['rsme']),
                        'start_time':
                        parse_iso_time_string(
                            block['stimuli'][0]['timestamp']['load']),
                        'stimuli': [{
                            'stimulus': item['stimulus'],
                            'is_target': item['is_target'],
                            'answer': item['answer'],
                            'correct': item['correct'],
                            'response_time': item['response_time'],
                        } for item in block['stimuli']],
                        'ppg': {
                            'sample_rate': None,
                            'signal': None,
                        },
                        'ecg': {
                            'sample_rate': None,
                            'signal': None,
                        },
                        'skin_conductance': {
                            'sample_rate': None,
                            'signal': None,
                        },
                    })

    # PPG data
    if exist(pathname=raw_ppg_data_dir):
        for filename_with_ext in fnmatch.filter(os.listdir(raw_ppg_data_dir),
                                                '*.txt'):
            filename, file_ext = os.path.splitext(filename_with_ext)
            participant, session_id, time_str = filename.split('-')
            raw_ppg_data_start_time = datetime(
                *[int(item) for item in time_str.split('_')])
            if participant not in output_data or session_id not in output_data[
                    participant]:
                completeness[participant] = False
                continue
            if raw_ppg_data_start_time > output_data[participant][session_id][
                    'rest']['start_time']:
                print 'Recoding data started too late in \'%s\': %s > %s' % (
                    filename_with_ext, raw_ppg_data_start_time,
                    output_data[participant][session_id]['rest']['start_time'])
            pathname = os.path.join(raw_ppg_data_dir, filename_with_ext)
            raw_ppg_data = load_text(pathname=pathname)
            if raw_ppg_data is not None:
                raw_ppg_data = map(float, raw_ppg_data)
                tdelta = output_data[participant][session_id]['rest'][
                    'start_time'] - raw_ppg_data_start_time
                if tdelta.total_seconds() < 0:
                    print 'Skip \'rest\' PPG data.'
                    completeness[participant] = False
                    continue
                start_index = int(tdelta.total_seconds() * PPG_SAMPLE_RATE)
                length = REST_DURATION * PPG_SAMPLE_RATE
                end_index = start_index + length
                ppg_data = raw_ppg_data[start_index:end_index]
                if len(ppg_data) < length:
                    print 'Not enough \'rest\' PPG data (%s < %s). Skip.' % (
                        len(ppg_data), length)
                    completeness[participant] = False
                    continue
                output_data[participant][session_id]['rest']['ppg'][
                    'sample_rate'] = PPG_SAMPLE_RATE
                output_data[participant][session_id]['rest']['ppg'][
                    'signal'] = ppg_data
                for block in output_data[participant][session_id]['blocks']:
                    tdelta = block['start_time'] - raw_ppg_data_start_time
                    if tdelta.total_seconds() < 0:
                        print 'Skip one block PPG data.'
                        completeness[participant] = False
                        continue
                    start_index = int(tdelta.total_seconds() * PPG_SAMPLE_RATE)
                    length = BLOCK_DURATION * PPG_SAMPLE_RATE
                    end_index = start_index + length
                    ppg_data = raw_ppg_data[start_index:end_index]
                    if len(ppg_data) < length:
                        print 'Not enough one block PPG data (%s < %s). Skip.' % (
                            len(ppg_data), length)
                        completeness[participant] = False
                        continue
                    block['ppg']['sample_rate'] = PPG_SAMPLE_RATE
                    block['ppg']['signal'] = ppg_data

    # BIOPAC data
    if exist(pathname=raw_biopac_data_dir):
        for filename_with_ext in fnmatch.filter(
                os.listdir(raw_biopac_data_dir), '*.txt'):
            filename, file_ext = os.path.splitext(filename_with_ext)
            participant, session_id, seconds_str = filename.split('-')
            pre_tdelta = timedelta(seconds=int(seconds_str))
            if participant not in output_data or session_id not in output_data[
                    participant]:
                completeness[participant] = False
                continue
            pathname = os.path.join(raw_biopac_data_dir, filename_with_ext)
            raw_biopac_data = load_text(pathname=pathname)
            if raw_biopac_data is not None:
                sample_rate = 1000 / int(
                    raw_biopac_data[BIOPAC_MSEC_PER_SAMPLE_LINE_NUM -
                                    1].split(' ')[0].strip())
                raw_ecg_data = [
                    float(line.split('\t')[BIOPAC_ECG_CHANNEL].strip())
                    for line in raw_biopac_data[BIOPAC_HEADER_LINES:]
                ]
                raw_skin_conductance_data = [
                    float(
                        line.split('\t')
                        [BIOPAC_SKIN_CONDUCTANCE_CHANNEL].strip())
                    for line in raw_biopac_data[BIOPAC_HEADER_LINES:]
                ]
                tdelta = pre_tdelta
                if tdelta.total_seconds() < 0:
                    print 'Skip \'rest\' ECG/skin conductance data.'
                    completeness[participant] = False
                    continue
                start_index = int(tdelta.total_seconds() * sample_rate)
                length = REST_DURATION * sample_rate
                end_index = start_index + length
                ecg_data = raw_ecg_data[start_index:end_index]
                skin_conductance_data = raw_skin_conductance_data[
                    start_index:end_index]
                if len(ecg_data) < length:
                    print 'Not enough \'rest\' ECG/skin conductance data (%s < %s). Skip.' % (
                        len(ecg_data), length)
                    completeness[participant] = False
                    continue
                output_data[participant][session_id]['rest']['ecg'][
                    'sample_rate'] = sample_rate
                output_data[participant][session_id]['rest']['ecg'][
                    'signal'] = ecg_data
                output_data[participant][session_id]['rest'][
                    'skin_conductance']['sample_rate'] = sample_rate
                output_data[participant][session_id]['rest'][
                    'skin_conductance']['signal'] = skin_conductance_data
                for block in output_data[participant][session_id]['blocks']:
                    tdelta = block['start_time'] - output_data[participant][
                        session_id]['rest']['start_time'] + pre_tdelta
                    if tdelta.total_seconds() < 0:
                        print 'Skip one block ECG/skin conductance data.'
                        completeness[participant] = False
                        continue
                    start_index = int(tdelta.total_seconds() * sample_rate)
                    length = BLOCK_DURATION * sample_rate
                    end_index = start_index + length
                    ecg_data = raw_ecg_data[start_index:end_index]
                    skin_conductance_data = raw_skin_conductance_data[
                        start_index:end_index]
                    if len(ecg_data) < length:
                        print 'Not enough one block ECG/skin conductance data (%s < %s). Skip.' % (
                            len(ecg_data), length)
                        completeness[participant] = False
                        continue
                    block['ecg']['sample_rate'] = sample_rate
                    block['ecg']['signal'] = ecg_data
                    block['skin_conductance']['sample_rate'] = sample_rate
                    block['skin_conductance']['signal'] = skin_conductance_data

    # Clean up time data
    for participant in output_data:
        for session_id in output_data[participant]:
            del output_data[participant][session_id]['rest']['start_time']
            for block in output_data[participant][session_id]['blocks']:
                del block['start_time']

    # Save segmented signal data
    for participant in output_data:
        output_filename = '%s.json' % participant
        if completeness[participant] and len(
                output_data[participant]) == TOTAL_SESSION_NUM:
            dump_json(data=output_data[participant],
                      pathname=os.path.join(segmented_data_dir,
                                            output_filename),
                      overwrite=True)
        else:
            dump_json(data=output_data[participant],
                      pathname=os.path.join(segmented_data_dir, 'incomplete',
                                            output_filename),
                      overwrite=True)
示例#13
0
def extract():
    preprocessed_data_dir = os.path.join(BASE_DIR, 'data', 'preprocessed')
    extracted_data_dir = os.path.join(BASE_DIR, 'data', 'extracted')

    if exist(pathname=preprocessed_data_dir):
        for filename_with_ext in fnmatch.filter(
                os.listdir(preprocessed_data_dir), '*.json'):
            pathname = os.path.join(preprocessed_data_dir, filename_with_ext)
            json_data = load_json(pathname=pathname)
            if json_data is not None:
                for session_id in json_data:
                    if json_data[session_id]['rest']['ppg'][
                            'single_waveforms'] is not None:
                        json_data[session_id]['rest']['ppg']['ppg45'] = [
                            extract_ppg45(single_waveform=single_waveform,
                                          sample_rate=json_data[session_id]
                                          ['rest']['ppg']['sample_rate'])
                            for single_waveform in json_data[session_id]
                            ['rest']['ppg']['single_waveforms']
                        ]
                        json_data[session_id]['rest']['ppg']['svri'] = [
                            extract_svri(single_waveform=single_waveform)
                            for single_waveform in json_data[session_id]
                            ['rest']['ppg']['single_waveforms']
                        ]
                    else:
                        json_data[session_id]['rest']['ppg']['ppg45'] = None
                        json_data[session_id]['rest']['ppg']['svri'] = None
                    del json_data[session_id]['rest']['ppg'][
                        'single_waveforms']
                    if json_data[session_id]['rest']['skin_conductance'][
                            'signal'] is not None:
                        json_data[session_id]['rest']['skin_conductance'][
                            'average_level'] = extract_average_skin_conductance_level(
                                signal=json_data[session_id]['rest']
                                ['skin_conductance']['signal'])
                        json_data[session_id]['rest']['skin_conductance'][
                            'minimum_level'] = extract_minimum_skin_conductance_level(
                                signal=json_data[session_id]['rest']
                                ['skin_conductance']['signal'])
                    else:
                        json_data[session_id]['rest']['skin_conductance'][
                            'average_level'] = None
                        json_data[session_id]['rest']['skin_conductance'][
                            'minimum_level'] = None
                    del json_data[session_id]['rest']['skin_conductance'][
                        'signal']
                    if json_data[session_id]['rest']['ecg']['rri'] is not None:
                        json_data[session_id]['rest']['ecg'][
                            'average_rri'] = extract_average_rri(
                                rri=json_data[session_id]['rest']['ecg']
                                ['rri'])
                        json_data[session_id]['rest']['ecg'][
                            'rmssd'] = extract_rmssd(rri=json_data[session_id]
                                                     ['rest']['ecg']['rri'])
                        mf_hrv_power, hf_hrv_power = extract_hrv_power(
                            rri=json_data[session_id]['rest']['ecg']
                            ['rri_interpolated'],
                            sample_rate=json_data[session_id]['rest']['ecg']
                            ['sample_rate'])
                        json_data[session_id]['rest']['ecg'][
                            'mf_hrv_power'] = mf_hrv_power
                        json_data[session_id]['rest']['ecg'][
                            'hf_hrv_power'] = hf_hrv_power
                    else:
                        json_data[session_id]['rest']['ecg'][
                            'average_rri'] = None
                        json_data[session_id]['rest']['ecg']['rmssd'] = None
                        json_data[session_id]['rest']['ecg'][
                            'mf_hrv_power'] = None
                        json_data[session_id]['rest']['ecg'][
                            'hf_hrv_power'] = None
                    del json_data[session_id]['rest']['ecg']['rri']
                    del json_data[session_id]['rest']['ecg'][
                        'rri_interpolated']
                    for block in json_data[session_id]['blocks']:
                        if block['ppg']['single_waveforms'] is not None:
                            block['ppg']['ppg45'] = [
                                extract_ppg45(
                                    single_waveform=single_waveform,
                                    sample_rate=block['ppg']['sample_rate'])
                                for single_waveform in block['ppg']
                                ['single_waveforms']
                            ]
                            block['ppg']['svri'] = [
                                extract_svri(single_waveform=single_waveform)
                                for single_waveform in block['ppg']
                                ['single_waveforms']
                            ]
                        else:
                            block['ppg']['ppg45'] = None
                            block['ppg']['svri'] = None
                        del block['ppg']['single_waveforms']
                        if block['skin_conductance']['signal'] is not None:
                            block['skin_conductance'][
                                'average_level'] = extract_average_skin_conductance_level(
                                    signal=block['skin_conductance']['signal'])
                            block['skin_conductance'][
                                'minimum_level'] = extract_minimum_skin_conductance_level(
                                    signal=block['skin_conductance']['signal'])
                        else:
                            block['skin_conductance']['average_level'] = None
                            block['skin_conductance']['minimum_level'] = None
                        del block['skin_conductance']['signal']
                        if block['ecg']['rri'] is not None:
                            block['ecg']['average_rri'] = extract_average_rri(
                                rri=block['ecg']['rri'])
                            block['ecg']['rmssd'] = extract_rmssd(
                                rri=block['ecg']['rri'])
                            mf_hrv_power, hf_hrv_power = extract_hrv_power(
                                rri=block['ecg']['rri_interpolated'],
                                sample_rate=block['ecg']['sample_rate'])
                            block['ecg']['mf_hrv_power'] = mf_hrv_power
                            block['ecg']['hf_hrv_power'] = hf_hrv_power
                        else:
                            block['ecg']['average_rri'] = None
                            block['ecg']['rmssd'] = None
                            block['ecg']['mf_hrv_power'] = None
                            block['ecg']['hf_hrv_power'] = None
                        del block['ecg']['rri']
                        del block['ecg']['rri_interpolated']
                dump_json(data=json_data,
                          pathname=os.path.join(extracted_data_dir,
                                                filename_with_ext),
                          overwrite=True)
示例#14
0
def select_feature():
    merged_data_dir = os.path.join(BASE_DIR, 'data', 'merged')
    model_dir = os.path.join(BASE_DIR, 'models', 'feature_selection')
    result_dir = os.path.join(BASE_DIR, 'results', 'feature_selection')

    level_sets = [
        ['0', '2'],
        ['0', '1'],
        ['1', '2'],
    ]
    feature_type_sets = [
        ['ppg45_cr'],
    ]

    if exist(pathname=merged_data_dir):
        result_data = {}
        for filename_with_ext in fnmatch.filter(os.listdir(merged_data_dir),
                                                '*.json'):
            participant = os.path.splitext(filename_with_ext)[0]
            pathname = os.path.join(merged_data_dir, filename_with_ext)
            json_data = load_json(pathname=pathname)
            if json_data is not None:
                for level_set in level_sets:
                    level_set_name = '-'.join(level_set)
                    if level_set_name not in result_data:
                        result_data[level_set_name] = {}
                    for feature_type_set in feature_type_sets:
                        feature_type_set_name = '-'.join(feature_type_set)
                        if feature_type_set_name not in result_data[
                                level_set_name]:
                            result_data[level_set_name][
                                feature_type_set_name] = {
                                    'grid_scores': {},
                                }
                        features, labels = get_merged_feature_set(
                            data=json_data,
                            level_set=level_set,
                            feature_type_set=feature_type_set)
                        model_pathname = os.path.join(model_dir,
                                                      level_set_name,
                                                      feature_type_set_name,
                                                      '%s.model' % participant)
                        classifier = load_model(pathname=model_pathname)
                        if classifier is None:
                            classifier = feature_selection_classifier(
                                features=features, labels=labels)
                            dump_model(model=classifier,
                                       pathname=model_pathname)
                        print(participant, level_set_name,
                              feature_type_set_name)
                        result_data[level_set_name][feature_type_set_name][
                            'grid_scores'][
                                participant] = classifier.grid_scores_.tolist(
                                )

        for level_set_name in result_data:
            dump_json(data=result_data[level_set_name],
                      pathname=os.path.join(result_dir,
                                            '%s.json' % level_set_name),
                      overwrite=True)
            for feature_type_set in feature_type_sets:
                feature_type_set_name = '-'.join(feature_type_set)
                csv_data = []
                all_grid_scores = []
                for participant in result_data[level_set_name][
                        feature_type_set_name]['grid_scores']:
                    csv_row = {
                        'participant': participant,
                    }
                    grid_scores = result_data[level_set_name][
                        feature_type_set_name]['grid_scores'][participant]
                    all_grid_scores.append(grid_scores)
                    for score_index, score in list(enumerate(grid_scores)):
                        csv_row[str(score_index + 1)] = score
                    csv_data.append(csv_row)
                csv_row = {
                    'participant': 'average',
                }
                for scores_index, scores in list(
                        enumerate([
                            list(x) for x in zip(*[
                                grid_scores for grid_scores in all_grid_scores
                            ])
                        ])):
                    csv_row[str(scores_index + 1)] = sum(scores) / len(scores)
                csv_data.append(csv_row)
                fieldnames = ['participant'] + [
                    str(x) for x in list(
                        range(
                            1,
                            len(result_data[level_set_name]
                                [feature_type_set_name]['grid_scores']
                                [participant]) + 1))
                ]
                export_csv(data=csv_data,
                           fieldnames=fieldnames,
                           pathname=os.path.join(
                               result_dir, feature_type_set_name,
                               '%s-grid_scores.csv' % level_set_name),
                           overwrite=True)
示例#15
0
def stats():
    extracted_data_dir = os.path.join(BASE_DIR, 'data', 'extracted')
    stats_data_dir = os.path.join(BASE_DIR, 'data', 'stats')
    result_dir = os.path.join(BASE_DIR, 'results')
    fieldnames = [
        'name',
        's1_l0_rsme',
        's1_l0_correct_rate',
        's1_l0_svri',
        's1_l0_minimum_skin_conductance_level',
        's1_l0_average_skin_conductance_level',
        's1_l0_average_rri',
        's1_l0_rmssd',
        's1_l0_lf_hrv_power',
        's1_l0_hf_hrv_power',
        's1_l1_rsme',
        's1_l1_correct_rate',
        's1_l1_svri',
        's1_l1_minimum_skin_conductance_level',
        's1_l1_average_skin_conductance_level',
        's1_l1_average_rri',
        's1_l1_rmssd',
        's1_l1_lf_hrv_power',
        's1_l1_hf_hrv_power',
        's1_l2_rsme',
        's1_l2_correct_rate',
        's1_l2_svri',
        's1_l2_minimum_skin_conductance_level',
        's1_l2_average_skin_conductance_level',
        's1_l2_average_rri',
        's1_l2_rmssd',
        's1_l2_lf_hrv_power',
        's1_l2_hf_hrv_power',
        's2_l0_rsme',
        's2_l0_correct_rate',
        's2_l0_svri',
        's2_l0_minimum_skin_conductance_level',
        's2_l0_average_skin_conductance_level',
        's2_l0_average_rri',
        's2_l0_rmssd',
        's2_l0_lf_hrv_power',
        's2_l0_hf_hrv_power',
        's2_l1_rsme',
        's2_l1_correct_rate',
        's2_l1_svri',
        's2_l1_minimum_skin_conductance_level',
        's2_l1_average_skin_conductance_level',
        's2_l1_average_rri',
        's2_l1_rmssd',
        's2_l1_lf_hrv_power',
        's2_l1_hf_hrv_power',
        's2_l2_rsme',
        's2_l2_correct_rate',
        's2_l2_svri',
        's2_l2_minimum_skin_conductance_level',
        's2_l2_average_skin_conductance_level',
        's2_l2_average_rri',
        's2_l2_rmssd',
        's2_l2_lf_hrv_power',
        's2_l2_hf_hrv_power',
    ]

    if exist(pathname=extracted_data_dir):
        csv_data = []
        for filename_with_ext in fnmatch.filter(os.listdir(extracted_data_dir), '*.json'):
            participant = os.path.splitext(filename_with_ext)[0]
            output_data = {}
            csv_row = {
                'name': participant,
            }
            pathname = os.path.join(extracted_data_dir, filename_with_ext)
            json_data = load_json(pathname=pathname)
            if json_data is not None:
                for session_id in json_data:
                    if session_id not in output_data:
                        output_data[session_id] = {}
                    for block in json_data[session_id]['blocks']:
                        correct_count = sum([item['correct'] for item in block['stimuli'] if item['correct'] is not None])
                        stimuli_count = len(block['stimuli'])
                        correct_rate = correct_count / stimuli_count
                        svri = np.mean(block['ppg']['svri'])
                        output_data[session_id][block['level']] = {
                            'rsme': block['rsme'],
                            'correct_count': correct_count,
                            'stimuli_count': stimuli_count,
                            'correct_rate': correct_rate,
                            'svri': svri,
                            'minimum_skin_conductance_level': block['skin_conductance']['minimum_level'],
                            'average_skin_conductance_level': block['skin_conductance']['average_level'],
                            'average_rri': block['ecg']['average_rri'],
                            'rmssd': block['ecg']['rmssd'],
                            'lf_hrv_power': block['ecg']['lf_hrv_power'],
                            'hf_hrv_power': block['ecg']['hf_hrv_power'],
                        }
                        csv_row['s%s_l%s_rsme' % (session_id, block['level'])] = block['rsme']
                        csv_row['s%s_l%s_correct_rate' % (session_id, block['level'])] = correct_rate
                        csv_row['s%s_l%s_svri' % (session_id, block['level'])] = svri
                        csv_row['s%s_l%s_minimum_skin_conductance_level' % (session_id, block['level'])] = block['skin_conductance']['minimum_level']
                        csv_row['s%s_l%s_average_skin_conductance_level' % (session_id, block['level'])] = block['skin_conductance']['average_level']
                        csv_row['s%s_l%s_average_rri' % (session_id, block['level'])] = block['ecg']['average_rri']
                        csv_row['s%s_l%s_rmssd' % (session_id, block['level'])] = block['ecg']['rmssd']
                        csv_row['s%s_l%s_lf_hrv_power' % (session_id, block['level'])] = block['ecg']['lf_hrv_power']
                        csv_row['s%s_l%s_hf_hrv_power' % (session_id, block['level'])] = block['ecg']['hf_hrv_power']
                dump_json(data=output_data, pathname=os.path.join(stats_data_dir, filename_with_ext), overwrite=True)
                csv_data.append(csv_row)
        export_csv(data=csv_data, fieldnames=fieldnames, pathname=os.path.join(result_dir, 'stats.csv'), overwrite=True)