Python loadW2vModel示例，help_prep_functions.loadW2vModel Python示例

示例#1

0

显示文件

def prep_pipeline(dataset='RumEv',
                  seq=True,
                  feature_set=['avgw2v'],
                  fs_name="text",
                  timeline=False):
    #%%

    path = 'saved_data_' + dataset + fs_name

    folds = {}

    folds = read_fullPHEME()

    help_prep_functions.loadW2vModel()
    for fold in folds.keys():
        print(fold)
        feature_fold = []
        labels = []
        ids = []
        for conversation in folds[fold]:
            thread_feature_dict = extract_thread_features_incl_response(
                conversation)
            #             Optionally here can collect feature dict and save it to read it instead of raw data?
            thread_features_array = transform_feature_dict(
                thread_feature_dict,
                conversation,
                sequential=seq,
                feature_set=feature_set,
                timeline=timeline)

            #            print thread_features_array.shape
            if seq:
                feature_fold.extend(thread_features_array)
                for i in range(len(thread_features_array)):
                    labels.append(convert_label(conversation['veracity']))
                    ids.append(conversation['id'])
            else:
                feature_fold.append(thread_features_array)
                labels.append(convert_label(conversation['veracity']))
                ids.append(conversation['id'])

        feature_fold = pad_sequences(feature_fold,
                                     maxlen=None,
                                     dtype='float32',
                                     padding='post',
                                     truncating='post',
                                     value=0.)
        labels = np.asarray(labels)

        #
        #        categorical_labels = to_categorical(labels, num_classes=None)

        path_fold = os.path.join(path, fold)
        if not os.path.exists(path_fold):
            os.makedirs(path_fold)
        np.save(os.path.join(path_fold, 'train_array'), feature_fold)
        np.save(os.path.join(path_fold, 'labels'), labels)
        np.save(os.path.join(path_fold, 'ids'), ids)

示例#2

0

显示文件

文件： prep_pipeline_with_partial_trees.py 项目： kochkinaelena/Uncertainty4VerificationModels

    else:
        print(label)

        
dataset='fullPHEME'
feature_set=['avgw2v']
path = 'saved_data'+dataset
folds = {}
if dataset == 'RumEv':
    folds = read_RumEv()
else:
    folds = read_fullPHEME()

newfolds = add_partial_folds(folds)    
    
help_prep_functions.loadW2vModel()


#%%    
for fold in list(newfolds.keys()):
    print(fold)

    preprocessed_expanded_folds = {}
    preprocessed_expanded_folds[fold] = {}
    for tree in list(newfolds[fold].keys()):
         preprocessed_expanded_folds[fold][tree] = {}
         subtree_dict = newfolds[fold][tree]
         for subtree_num in list(subtree_dict.keys()):
             preprocessed_expanded_folds[fold][tree][subtree_num] = {}
             
             subtree = subtree_dict[subtree_num]

示例#3

0

显示文件

文件： prep_pipeline.py 项目： webzerg/RumourEval2019

def prep_pipeline(dataset='RumEval2019', feature_set=['avgw2v']):

    path = 'saved_data' + dataset
    folds = {}
    folds = load_dataset()
    reddit = load_data()

    folds['train'].extend(reddit['train'])
    folds['dev'].extend(reddit['dev'])
    folds['test'].extend(reddit['test'])

    help_prep_functions.loadW2vModel()

    #%%
    for fold in folds.keys():

        print(fold)
        feature_fold = []
        tweet_ids = []
        fold_stance_labels = []
        labels = []
        ids = []
        for conversation in folds[fold]:

            thread_feature_dict = extract_thread_features_incl_response(
                conversation)

            thread_features_array, thread_stance_labels, branches = transform_feature_dict(
                thread_feature_dict, conversation, feature_set=feature_set)

            fold_stance_labels.extend(thread_stance_labels)
            tweet_ids.extend(branches)
            feature_fold.extend(thread_features_array)
            for i in range(len(thread_features_array)):
                labels.append(convert_label(conversation['veracity']))
                ids.append(conversation['id'])

#%
        if feature_fold != []:

            feature_fold = pad_sequences(feature_fold,
                                         maxlen=None,
                                         dtype='float32',
                                         padding='post',
                                         truncating='post',
                                         value=0.)

            fold_stance_labels = pad_sequences(fold_stance_labels,
                                               maxlen=None,
                                               dtype='float32',
                                               padding='post',
                                               truncating='post',
                                               value=0.)

            labels = np.asarray(labels)
            path_fold = os.path.join(path, fold)
            if not os.path.exists(path_fold):
                os.makedirs(path_fold)

            np.save(os.path.join(path_fold, 'train_array'), feature_fold)
            np.save(os.path.join(path_fold, 'labels'), labels)
            np.save(os.path.join(path_fold, 'fold_stance_labels'),
                    fold_stance_labels)
            np.save(os.path.join(path_fold, 'ids'), ids)
            np.save(os.path.join(path_fold, 'tweet_ids'), tweet_ids)

示例#4

0

显示文件

文件： prep_pipeline.py 项目： kochkinaelena/Uncertainty4VerificationModels

def prep_pipeline(dataset='15',
                  seq=True,
                  feature_set=['avgw2v'],
                  timeline=False):
    #%%

    path = 'saved_data_' + dataset

    #    if not os.path.exists(path):
    #        os.makedirs(path)
    #    folds = {}
    if dataset == '15':
        folds_test = read_Twitter15_16(dataset="15", set='test')
        folds_train = read_Twitter15_16(dataset="15", set='train')

    else:
        folds_test = read_Twitter15_16(dataset="16", set='test')
        folds_train = read_Twitter15_16(dataset="16", set='train')

    help_prep_functions.loadW2vModel()

    for fold in folds_test.keys():

        print(fold)

        feature_fold = []
        labels = []
        ids = []
        for conversation in folds_test[fold]:
            #print (conversation['id'])

            thread_feature_dict = extract_thread_features_incl_response(
                conversation)
            #             Optionally here can collect feature dict and save it to read it instead of raw data?
            thread_features_array = transform_feature_dict(
                thread_feature_dict,
                conversation,
                sequential=seq,
                feature_set=feature_set,
                timeline=timeline)
            #            print thread_features_array.shape
            if seq:
                feature_fold.extend(thread_features_array)
                for i in range(len(thread_features_array)):
                    labels.append(convert_label(conversation['label']))
                    ids.append(conversation['id'])
            else:
                feature_fold.append(thread_features_array)
                labels.append(convert_label(conversation['label']))
                ids.append(conversation['id'])

        feature_fold = pad_sequences(feature_fold,
                                     maxlen=None,
                                     dtype='float32',
                                     padding='post',
                                     truncating='post',
                                     value=0.)
        labels = np.asarray(labels)

        #
        #        categorical_labels = to_categorical(labels, num_classes=None)

        path_fold = os.path.join(path, str(fold), 'test')
        if not os.path.exists(path_fold):
            os.makedirs(path_fold)
#        print feature_fold.shape
        np.save(os.path.join(path_fold, 'train_array'), feature_fold)
        np.save(os.path.join(path_fold, 'labels'), labels)
        np.save(os.path.join(path_fold, 'ids'), ids)

    for fold in folds_train.keys():

        print(fold)

        feature_fold = []
        labels = []
        ids = []
        for conversation in folds_train[fold]:

            thread_feature_dict = extract_thread_features_incl_response(
                conversation)
            #             Optionally here can collect feature dict and save it to read it instead of raw data?
            thread_features_array = transform_feature_dict(
                thread_feature_dict,
                conversation,
                sequential=seq,
                feature_set=feature_set,
                timeline=timeline)
            #            print thread_features_array.shape
            if seq:
                feature_fold.extend(thread_features_array)
                for i in range(len(thread_features_array)):
                    labels.append(convert_label(conversation['label']))
                    ids.append(conversation['id'])
            else:
                feature_fold.append(thread_features_array)
                labels.append(convert_label(conversation['label']))
                ids.append(conversation['id'])

        feature_fold = pad_sequences(feature_fold,
                                     maxlen=None,
                                     dtype='float32',
                                     padding='post',
                                     truncating='post',
                                     value=0.)
        labels = np.asarray(labels)

        #
        #        categorical_labels = to_categorical(labels, num_classes=None)

        path_fold = os.path.join(path, str(fold), 'train')
        if not os.path.exists(path_fold):
            os.makedirs(path_fold)


#        print feature_fold.shape
        np.save(os.path.join(path_fold, 'train_array'), feature_fold)
        np.save(os.path.join(path_fold, 'labels'), labels)
        np.save(os.path.join(path_fold, 'ids'), ids)

示例#5

0

显示文件

文件： prep_pipeline.py 项目： W0lgast/RumourEval

def prep_pipeline(dataset="RumEval2019", feature_set=["avgw2v"]):
    use_reddit_data = True
    path = "saved_data" + dataset
    folds = {}
    folds = load_dataset()
    reddit = load_data()

    #folds["train"].extend(reddit["train"])
    #folds["dev"].extend(reddit["dev"])
    #folds["test"].extend(reddit["test"])

    folds["test"] = load_test_data_twitter()["test"]
    if use_reddit_data:
        reddit = load_data()
        folds['train'].extend(reddit['train'])
        folds['dev'].extend(reddit['dev'])
        reddit_test_data = load_test_data_reddit()['test']
        folds["test"].extend(reddit_test_data)

    help_prep_functions.loadW2vModel()

    ###

    #%%
    for fold in list(reversed(list(folds.keys()))):

        print(fold)
        feature_fold = []
        tweet_ids = []
        fold_stance_labels = []
        labels = []
        ids = []
        for conversation in folds[fold]:

            thread_feature_dict = extract_thread_features_incl_response(
                conversation)

            if fold == "test":
                # if it's in the test set it wont have veracity, assign it.
                conversation['veracity'] = TEST_DATA_LABELS["subtaskbenglish"][
                    conversation['id']]
                conversation['source']['label'] = TEST_DATA_LABELS[
                    "subtaskaenglish"][conversation['id']]
                for reply in conversation['replies']:
                    reply['label'] = TEST_DATA_LABELS["subtaskaenglish"][
                        reply['id_str']]

            (
                thread_features_array,
                thread_stance_labels,
                branches,
            ) = transform_feature_dict(thread_feature_dict,
                                       conversation,
                                       feature_set=feature_set)

            fold_stance_labels.extend(thread_stance_labels)
            tweet_ids.extend(branches)
            feature_fold.extend(thread_features_array)
            for i in range(len(thread_features_array)):
                labels.append(convert_label(conversation["veracity"]))
                ids.append(conversation["id"])

        #%
        if feature_fold != []:

            feature_fold = pad_sequences(
                feature_fold,
                maxlen=None,
                dtype="float32",
                padding="post",
                truncating="post",
                value=0.0,
            )

            fold_stance_labels = pad_sequences(
                fold_stance_labels,
                maxlen=None,
                dtype="float32",
                padding="post",
                truncating="post",
                value=0.0,
            )

            labels = np.asarray(labels)
            path_fold = os.path.join(path, fold)
            if not os.path.exists(path_fold):
                os.makedirs(path_fold)

            np.save(os.path.join(path_fold, "train_array"), feature_fold)
            np.save(os.path.join(path_fold, "labels"), labels)
            np.save(os.path.join(path_fold, "fold_stance_labels"),
                    fold_stance_labels)
            np.save(os.path.join(path_fold, "ids"), ids)
            np.save(os.path.join(path_fold, "tweet_ids"), tweet_ids)