Пример #1
0
def get_data_by_id(feature_table, gender=False):
    sql_handler = SqlHandler()
    feature = sql_handler.get_df(feature_table)
    feature['ID'] = feature['ID'].apply(pd.to_numeric)  #转为数值
    train = sql_handler.get_df(config.tbl_train_ros_set)
    dev = sql_handler.get_df(config.tbl_develop_set)
    test = sql_handler.get_df(config.tbl_test_set)

    if not gender:
        train_set = merge_df_by_id(
            train, feature)  #合并特征csv和PHQcsv 调用了之前的函数 merge_df_by_id()
        dev_set = merge_df_by_id(dev, feature)  #合并特征csv和PHQcsv
        test_set = merge_df_by_id(test, feature)
        return train_set, dev_set, test_set
    else:  #考虑性别因素
        train_male = train[train['Gender'] == 1]
        train_female = train[train['Gender'] == 0]
        dev_male = dev[dev['Gender'] == 1]
        dev_female = dev[dev['Gender'] == 0]
        test_male = test[test['Gender'] == 1]
        test_female = test[test['Gender'] == 0]

        train_male = merge_df_by_id(train_male, feature)
        train_female = merge_df_by_id(train_female, feature)
        dev_male = merge_df_by_id(dev_male, feature)
        dev_female = merge_df_by_id(dev_female, feature)
        test_male = merge_df_by_id(test_male, feature)
        test_female = merge_df_by_id(test_female, feature)
        return train_male, dev_male, test_male, train_female, dev_female, test_female
Пример #2
0
def get_data_multi_modality(tables, gender=False):
    """gather data from different tables in every modality
        and generate train set and dev dev set of them.
    """
    sql_handler = SqlHandler()
    audio_df, video_df, text_df = [], [], []
    for tb in tables:
        if tb in AUDIO_TABLE:
            audio_df.append(sql_handler.get_df(tb))  #从数据库提取特征存入list
        elif tb in VIDEO_TABLE:
            video_df.append(sql_handler.get_df(tb))
        elif tb in TEXT_TABLE:
            text_df.append(sql_handler.get_df(tb))
        else:
            pass

    audio_merge_df = merge_dfs_by_id(audio_df)
    video_merge_df = merge_dfs_by_id(video_df)
    text_merge_df = merge_dfs_by_id(text_df)

    train = sql_handler.get_df(config.tbl_train_ros_set)
    dev = sql_handler.get_df(config.tbl_develop_set)

    if not gender:  #不考虑性别
        data_dct = {
            'audio_train': merge_df_by_id(train, audio_merge_df),
            'audio_dev': merge_df_by_id(dev, audio_merge_df),
            'vedio_train': merge_df_by_id(train, video_merge_df),
            'vedio_dev': merge_df_by_id(dev, video_merge_df),
            'text_train': merge_df_by_id(train, text_merge_df),
            'text_dev': merge_df_by_id(dev, text_merge_df)
        }
    else:  #考虑性别
        train_male = train[train['Gender'] == 1]
        train_female = train[train['Gender'] == 0]
        dev_male = dev[dev['Gender'] == 1]
        dev_female = dev[dev['Gender'] == 0]

        data_dct = {
            'male': {
                'audio_train': merge_df_by_id(train_male, audio_merge_df),
                'audio_dev': merge_df_by_id(dev_male, audio_merge_df),
                'vedio_train': merge_df_by_id(train_male, video_merge_df),
                'vedio_dev': merge_df_by_id(dev_male, video_merge_df),
                'text_train': merge_df_by_id(train_male, text_merge_df),
                'text_dev': merge_df_by_id(dev_male, text_merge_df)
            },
            'female': {
                'audio_train': merge_df_by_id(train_female, audio_merge_df),
                'audio_dev': merge_df_by_id(dev_female, audio_merge_df),
                'vedio_train': merge_df_by_id(train_female, video_merge_df),
                'vedio_dev': merge_df_by_id(dev_female, video_merge_df),
                'text_train': merge_df_by_id(train_female, text_merge_df),
                'text_dev': merge_df_by_id(dev_female, text_merge_df)
            }
        }

    return data_dct
Пример #3
0
def get_data_by_id(feature_table, gender=False):

    sql_handler = SqlHandler()
    feature = sql_handler.get_df(feature_table)
    feature['ID'] = feature['ID'].apply(pd.to_numeric)
    train = sql_handler.get_df(config.tbl_training_set)
    dev = sql_handler.get_df(config.tbl_develop_set)
    if not gender:
        train_set = merge_df_by_id(train, feature)
        dev_set = merge_df_by_id(dev, feature)
        return train_set, dev_set
    else:
        train_male = train[train['Gender'] == 1]
        train_female = train[train['Gender'] == 0]
        dev_male = train[train['Gender'] == 1]
        dev_female = train[train['Gender'] == 0]
        
        train_male = merge_df_by_id(train_male, feature)
        train_female = merge_df_by_id(train_female, feature)
        dev_male = merge_df_by_id(dev_male, feature)
        dev_female = merge_df_by_id(dev_female, feature)
        return train_male, dev_male, train_female, dev_female
Пример #4
0
def createCodebook(feature_name):
    """
    COVAREP, FORMANT, FAUs ,***p.CSV->stack dataframe -> create codebook ->extract all files' features
    java -jar openXBOW.jar -i examples/example2/llds.csPv  -o bow.csv -a 1 -log -c kmeans++ -size 100 -B codebook -writeName -writeTimeStamp    

    """
    sqlhandler = SqlHandler()
    train = sqlhandler.get_df(config.tbl_training_set)
    dev = sqlhandler.get_df(config.tbl_develop_set)
    trainID = train['Participant_ID'].values
    devID = dev['Participant_ID'].values
    trainDev = np.hstack([trainID, devID])
    folds = np.random.choice(trainDev, 20,
                             replace=False)  # for video 50, for audio 20

    window_size = 4
    hop_size = 1
    openxbow = 'java -jar E:/openXBOW/openXBOW.jar '
    openxbow_options = '-writeName -writeTimeStamp -t ' + str(
        window_size) + ' ' + str(hop_size)
    codebook_out = 'E:/openXBOW/codebooks/'
    openxbow_options_codebook = f'-size 100 -a 1 -log -B {codebook_out}{feature_name}_codebook '

    if feature_name == 'faus':
        for fold in folds:
            path = config.data_dir + str(fold) + '_P/' + str(
                fold) + '_' + SUFFIX['au']
            feature = np.loadtxt(path, delimiter=',', skiprows=1)
            success = feature[:, 3] == 1
            feature = feature[success, 1:18]
            feature = np.delete(feature, [1, 2], axis=1)
            save_features(codebook_out + 'fausTrainDevRandom.csv',
                          feature,
                          append=True,
                          instname=str(fold))
        os.system(openxbow+ f'-standardizeInput -i {codebook_out}fausTrainDevRandom.csv '+openxbow_options_codebook+\
            openxbow_options+ ' -c kmeans++'+ f' -o {codebook_out}temp.csv')
    elif feature_name == 'gaze_pose':
        for fold in folds:
            path1 = config.data_dir + str(fold) + '_P/' + str(
                fold) + '_' + SUFFIX['gaze']
            path2 = config.data_dir + str(fold) + '_P/' + str(
                fold) + '_' + SUFFIX['pose']
            gaze_data = pd.read_csv(path1)
            pose_data = pd.read_csv(path2)
            if fold in [367, 396, 432]:
                temp = np.all(pose_data.values != ' -1.#IND',
                              axis=1)  #['367_', '396_', '432_'] 缺失 存在异常值
                data = pd.merge(
                    gaze_data,
                    pose_data)  #key = frame timestamps confidence success
                data = data[temp]
                data.iloc[:, -6:] = data.iloc[:, -6:].applymap(
                    lambda x: float(x[1:]))
            else:
                data = pd.merge(gaze_data, pose_data)
            success = data[' success'] == 1
            data = data.values[:, 1:]
            data = np.delete(data, [1, 2], axis=1)
            data = data[success]
            save_features(codebook_out + 'gazePoseTrainDevRandom.csv',
                          data,
                          append=True,
                          instname=str(fold))
        os.system(openxbow+ f'-standardizeInput -i {codebook_out}gazeposeTrainDevRandom.csv '+openxbow_options_codebook+\
            openxbow_options+ ' -c kmeans++'+ f' -o {codebook_out}temp.csv')

    elif feature_name == 'covarep':
        for fold in folds:
            path = config.data_dir + str(fold) + '_P/' + str(
                fold) + '_' + SUFFIX['covarep']
            data = np.loadtxt(path, delimiter=',')
            timestamp = np.arange(0, data.shape[0]).reshape(data.shape[0], 1)
            timestamp = timestamp / 100
            data = np.hstack([timestamp, data])
            data = data[data[:, 2] == 1]
            data = np.delete(data, 2, axis=1)
            data[np.isnan(data)] = 0
            data[np.isinf(data)] = 0
            save_features(codebook_out + 'covarepTrainDevRandom.csv',
                          data,
                          append=True,
                          instname=str(fold))
        os.system(openxbow+ f'-standardizeInput -i {codebook_out}covarepTrainDevRandom.csv '+openxbow_options_codebook+\
            openxbow_options+ ' -c kmeans++'+ f' -o {codebook_out}temp.csv')
    else:
        for fold in folds:
            path = config.data_dir + str(fold) + '_P/' + str(
                fold) + '_' + SUFFIX['formant']
            data = np.loadtxt(path, delimiter=',')
            timestamp = np.arange(0, data.shape[0]).reshape(data.shape[0], 1)
            timestamp = timestamp / 100
            data = np.hstack([timestamp, data])
            data[np.isnan(data)] = 0
            data[np.isinf(data)] = 0
            save_features(codebook_out + 'formantTrainDevRandom.csv',
                          data,
                          append=True,
                          instname=str(fold))
        os.system(openxbow+ f'-standardizeInput -i {codebook_out}formantTrainDevRandom.csv '+openxbow_options_codebook+\
            openxbow_options+ ' -c kmeans++'+ f' -o {codebook_out}temp.csv')