def plot_dense_big(cfg,nrow,ncol, st_cycle=1,st_cross=1):
    '''将可分性画在一张大画布上,共 sub_num 个小图,共 nrow*ncol 个格子'''
    for cycle in range(st_cycle, cfg.cycles+1):
        # 建立一张大画布
        plt.figure()

        for cross in range(st_cross, cfg.cross_num+1):
            cfg.set_dir(cycle, cross)

            # 读取数据
            print('\n', '--'*20, 'Start load data from dataset {} '.format(cfg.dt), '--'*20, '\n')
            __, __, val_x, val_y = load_data(cross, cfg,get_tr=False) # 若cfg_2不为None,则返回两种数据的列表

            # load 模型, 并显示结构
            mdl = load_mdl_file(cfg)
            if cross==0 and cycle==0:
                mdl.summary()

            # 画出模型在 dense 层的可分性,再保存图片
            plot_dense_embed(mdl, cfg, nrow, ncol, val_x, val_y, cross)

        # 保存大画布到文件
        plt.suptitle(cfg.mdl_nm+'__'+'cycle'+str(cycle)+'__'+'dense_separability')
        plt.savefig(cfg.save_one_dense_file, bbox_inches='tight')
        plt.close()
def plot_dense_all_sigle(cfg, st_cycle=1,st_cross=1):
    '''直接画出所有的 dense 可分性图片,不在main.py中调用
    在没有这个图的时候,单独运行这个文件来补画
    '''
    for cycle in range(st_cycle,cfg.cycles+1):
        for cross in range(st_cross,cfg.cross_num+1):
            cfg.set_dir(cycle,cross)

            # 读取数据
            print('\n', '--'*20, 'Start load data from dataset {} '.format(cfg.dt), '--'*20, '\n')
            __, __, val_x, val_y = load_data(cross, cfg,get_tr=False) # 若cfg_2不为None,则返回两种数据的列表

            # load 模型, 并显示结构
            mdl = load_mdl_file(cfg)
            if cross==0 and cycle==0:
                mdl.summary()

            # 画出模型在 dense 层的可分性,再保存图片
            plot_dense_single(mdl,cfg,val_x,val_y,cross)
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from preprocess_data import load_data
from tools import load_config, save_params

ProcessedData = './processed_data/preprocess.p'
ProcessedParams = './processed_data/params.p'
ProcessedDataDir = './processed_data'

try:
    title_count, title_set, genres2int, features, targets_values, ratings, users, movies, data, movies_orig, users_orig = pickle.load(
        open(ProcessedDataDir + os.sep + 'preprocess.p', mode='rb'))
except OSError:
    title_count, title_set, genres2int, features, targets_values, ratings, users, movies, data, movies_orig, users_orig = load_data()


embed_dim, uid_max, gender_max, age_max, job_max, movie_id_max, movie_categories_max, \
movie_title_max, combiner, sentences_size, window_sizes, filter_num = load_config()

movieid2idx = {val[0]: i for i, val in enumerate(movies.values)}

'''
Hyper parameters

'''

# Number of Epochs
num_epochs = 1
# Batch Size
示例#4
0
    print(f"Num GPUs Available: {len(physical_devices)}")
    tf.config.experimental.set_memory_growth(physical_devices[0], True)

    # Load test data
    test_in = open("toktest.pkl", "rb")
    test_set = pickle.load(test_in)
    x_test = remove_chars(test_set[0])

    # Identify models to test, with their appropriate character conversion dictionaries and dictionary sizes

    text_name_1 = "Wb. Training Glosses"
    text_designation_1 = "Wb"
    one_text = [
        rem_dubspace(" ".join(pickle.load(open("toktrain.pkl", "rb"))))
    ]
    mapping_1 = map_chars(load_data(one_text, text_name_1))
    model_1 = "models\\models\\Wb-model, 2 layer(s) of 100 LSTM Nodes, 1 Dense, 250 Ep, No Bat, 10.0% Val"
    char_dict_1, rchardict_1, size_vocab_1 = mapping_1[0], mapping_1[
        1], mapping_1[2]

    text_name_2 = "Sg. Training Glosses"
    text_designation_2 = "Sg"
    two_text = [
        rem_dubspace(" ".join(
            load_conllu('sga_dipsgg-ud-test_combined_POS.conllu')))
    ]
    mapping_2 = map_chars(load_data(two_text, text_name_2))
    model_2 = "models\\models\\Sg-model, 2 layer(s) of 100 LSTM Nodes, 1 Dense, 250 Ep, No Bat, 10.0% Val"
    char_dict_2, rchardict_2, size_vocab_2 = mapping_2[0], mapping_2[
        1], mapping_2[2]
        # 若程序中断过,则存在这些结果文件,则将每一个都load进来继续训练,但其中的训练时间无法更新,所以,对于时间,看看就好
        # 但这段程序,也让除了第0次,之后的每次训练,都会从前面的文件中load变量
        print("\n\n\n",'#' * 70,"Start process of {} cross ".format(cross),'#' * 70)
        if os.path.exists(cfg.cnf_mtr_cycle_file):
            cnf_mtr_cycle = np.load(cfg.cnf_mtr_cycle_file)
            acc_list = np.load(cfg.acc_cycle_file)
            best_acc_list = np.load(cfg.best_acc_cycle_file)
            pre_list = np.load(cfg.pre_cycle_file)
            re_list = np.load(cfg.re_cycle_file)
            f1_list = np.load(cfg.f1_cycle_file)
            print("\nReloading results from file...... \n")


        # 读取数据
        print('\n', '--'*20, 'Start load data from dataset {} '.format(cfg.dt), '--'*20, '\n')
        tr_x, tr_y, val_x, val_y = load_data(cross, cfg)

        # 在样本维度上进行数据的归一化处理
        if cfg.is_Z_Norm:
            tr_x = Z_Norm(tr_x)
            val_x = Z_Norm(val_x)


        # 若需要调试,则减少数据
        if cfg.rdc_smp:
            tr_x = tr_x[:cfg.rdc_smp_num]
            tr_y = tr_y[:cfg.rdc_smp_num]
            val_x = val_x[:cfg.rdc_smp_num]
            val_y = val_y[:cfg.rdc_smp_num]

        # 建立模型
示例#6
0
def handler():
    print('Analyse latest data...')
    t = pd.Timestamp(datetime.now())
    latest_bucket = t.floor(
        '{}min'.format(Config.interval)
    )  # Assuming this will be very close to the (but still after) the wished timestamp

    convertJson.main(
    )  # transform json format of most recent tweets to csv format

    tweets = preprocess_data.load_data()

    filtered_tweets = preprocess_data.filter_spam(tweets)
    grid_tweets = preprocess_data.calc_grid(filtered_tweets)

    # create_time_series() can be used but here yields only one time slice
    # as the provided data is only the size of one interval
    timeslice, oldest = detect_crowded.create_time_series(grid_tweets)
    timeslice[0][1][1] = 200

    timeseries = get_file(Config.helper_files, 'timeseries.p')

    #uncomment if you want to preserve the timeseries from your static data. timeseries.p will be overwritten for real time mode
    #pickle.dump(timeseries, open(Config.helper_files + 'timeseries_from_static.p', 'wb'))

    sl_window = int(
        (24 / (Config.interval / 60)) * Config.sliding_window
    )  # how many times does interval fit into day * sliding window
    timeseries = np.append(timeseries, timeslice, axis=0)

    pickle.dump(timeseries, open(Config.helper_files + 'timeseries.p', 'wb'))

    if len(timeseries) < Config.sliding_window:
        print(
            'The timeseries is too small to detect crowded places. '
            'Please wait ', Config.sliding_window - len(timeseries),
            ' intervals more.')
        return

    crowded_places = detect_crowded.determine_crowded_per_cell_timeseries(
        timeseries, real_time_flag=True)

    if not crowded_places:
        print('No crowded places detected.')
    else:
        first_bucket = latest_bucket - pd.Timedelta(minutes=(len(timeseries) *
                                                             Config.interval))
        crowded_places = detect_crowded.check_amount_tweets(
            crowded_places, first_bucket)

        related_events_sample = analyse_crowded.get_details(
            grid_tweets, crowded_places)
        if not related_events_sample:
            print('No new events detected.')
        else:
            print('HURRAY! New events detected.')
            master_object = get_file(Config.interval, 'master_object.p')
            for key, value in related_events_sample.items():
                master_object[key] = value
            pickle.dump(master_object,
                        open(Config.results + 'master_object.p', 'wb'))

    timeseries = timeseries[
        -sl_window:]  # trim timeseries to sliding window size
    del_raw_data()  # some deletion logic for raw data and outdated prep data
    del_prep_data()

    print('Processing for {0} finished. Next analysis is taking place at {1}'.
          format(latest_bucket, latest_bucket + Config.interval))
示例#7
0
import progressbar
from tqdm import tqdm
import numpy as np
import tensorflow as tf
import keras as k

# config = tf.ConfigProto( device_count = {'GPU': 1 , 'CPU': 4} )
# sess = tf.Session(config=config)
# k.backend.set_session(sess)

# import sys
# np.set_printoptions(threshold=sys.maxsize)
import preprocess_data as pre

pre.clean_dataset()
train_data, test_data = pre.load_data()
word_to_index = pre.get_word_to_index(train_data)
n_vocab = len(word_to_index)
print(word_to_index)
dialogs = pre.get_user_utterances(train_data, word_to_index)
test_dialogs = pre.get_user_utterances(test_data, word_to_index)
print(dialogs[0])
for turn in dialogs[0]:
    print(pre.index_to_word(turn, word_to_index))

food_dict, area_dict, pricerange_dict = pre.get_ontology(train_data)
bs, bs_index, bs_onehot = pre.get_belief_states(train_data, food_dict,
                                                area_dict, pricerange_dict)
test_bs, test_bs_index, test_bs_onehot = pre.get_belief_states(
    test_data, food_dict, area_dict, pricerange_dict)
food_nb_classes = len(food_dict)
示例#8
0
文件: main.py 项目: TJWats0n/STACC
import preprocess_data, detect_crowded, convertJson, analyse_crowded
import pandas as pd
import ast, pickle, json
from config import Config

# ================================== Phase 1 - Load data ==========================================================

# #transform tweets from JSON to csv format
# #convertJson.main()

previous_map_size = pickle.load(open(Config.helper_files + 'previous_map_size.p', 'rb'))

if previous_map_size != Config.map_size:
    print('Mapsize was changed, therefore data must be preprocessed again. This can take a minute or two.')

    tweets = preprocess_data.load_data()

    tweets = preprocess_data.filter_spam(tweets)

    tweets = preprocess_data.calc_grid(tweets)

    tweets.to_csv(Config.helper_files+'tweets.csv', sep='\t', encoding='utf-8')

    pickle.dump(Config.map_size, open(Config.helper_files+'previous_map_size.p', 'wb'))

# ================================== Phase 2 - Detect Crowded =====================================================
print('Detecting crowd...')

tweets = pd.read_csv(Config.helper_files+'tweets.csv',
                         parse_dates={'datetime': ['date']},
                         converters={'grid': ast.literal_eval},  # without pandas would load tuple as type string