Пример #1
0
def bonus_cluserting():
    df1_clean = rd.read_data()
    features = list(df1_clean.columns.values)
    df1_clean = rd.clean_df(df1_clean,False)
    labels = df1_clean.loc[:,['target']].values
    features = features[2:-1]
    data = df1_clean.loc[:,features].astype(float).astype(int)
    #DO KMeans to obtain new labels
    points = data.values
    kmeans = KMeans(n_clusters=2)
    km =kmeans.fit(points)
    labels = pd.DataFrame(km.labels_)
    labels.columns = ['labels']
    #as we have 11/13 features, drawing a 11-D graph is impossible
    #so I used PCA to reduce dimentionality to 2 and draw the graph
    points_std = StandardScaler().fit_transform(points)
    pca = PCA(n_components=2)
    principalComponents = pca.fit_transform(points_std)
    principalDf = pd.DataFrame(data = principalComponents, columns = ['principal component 1', 'principal component 2'])
    finalDf = pd.concat([principalDf, labels], axis = 1)
    fig, ax = plt.subplots()
    colors = {1: 'red',0 : 'blue'}
    finalDf.plot.scatter(x = 'principal component 1',y = 'principal component 2',c = finalDf.labels.map(colors))
    red_patch = mpatches.Patch(color='red', label='Class One')
    blue_patch = mpatches.Patch(color='blue', label='Class Two')
    plt.legend(handles=[red_patch,blue_patch])


    return get_graph_url(plt)
Пример #2
0
def produce_chart(chart_no):
    df = rd.read_data()
    chart_no = int(chart_no)
    chart_produce = {
        1: ['rd.draw_chest_pain_type(df)', 'Chest Pain Type'],
        2: ['rd.draw_resting_blood_pressure(df)', 'Resting Blood Pressure'],
        3: ['rd.draw_serum_cholestoral(df)', 'Serum Cholestoral'],
        4: ['rd.draw_fasting_blood_sugar(df)', 'Fasting Blood Sugar'],
        5: ['rd.draw_RER(df)', 'Resting Electrocardiographic Results'],
        6: ['rd.draw_Mhra(df)', 'Maximum Heart Rate Achieved'],
        7: ['rd.draw_exercise_induced_angina(df)', 'Exercise Induced Angina'],
        8: ['rd.draw_ST_Depression(df)', 'ST Depression'],
        9: [
            'rd.draw_slope_exercise_ST_segment(df)',
            'Slope of the Peak Exercise ST Segment'
        ],
        10: ['rd.draw_major_vessels(df)', 'Number of Major Vessels'],
        11: ['rd.draw_thal(df)', 'Thal(Thalassemia)']
    }

    title = chart_produce[chart_no][1]
    graph = eval(chart_produce[chart_no][0])
    return render_template('specific_graph.html',
                           graph=graph,
                           graph_no=chart_no,
                           title=title)
Пример #3
0
def clean_data (usePCA = False) :
    """
    """
    logging.info ('begin to clean the data')
    if os.path.exists (ROOT + '/data/cleandata.csv') :
        # we need not to clean the data each time
        # if you want to reclean the data, please delete '../data/cleandata.csv' file
        logging.info ('the clean data is already exists')
        data = pd.read_csv (ROOT + '/data/cleandata.csv')
        train_number, val_number, test_number, unlabel_number, label, uid = io.grab (ROOT + '/data/datadescribe')
    else :
        data, train_number, val_number, test_number, unlabel_number, label, uid = read.read_data ()
        data = feature_handler (data)
        # store the result
        data.to_csv (ROOT + '/data/cleandata.csv')
        io.store ([train_number, val_number, test_number, unlabel_number, label, uid], ROOT + '/data/datadescribe')

    logging.info ('finished cleaning the data')

    if usePCA :
        # dimensionality reduction
        if not os.path.exists (ROOT + '/data/datapca') :
            # we need not to rerun this step
            # if you change the parameters and want to relearn it, please delete '../data/datapca' file
            data_values = decomposition.pca_solver (data)
            io.store (data_values, ROOT + '/data/datapca')

        data_values = io.grab (ROOT + '/data/datapca')
    else :
        data_values = data.values[:,1:]
    return data_values, train_number, val_number, test_number, unlabel_number, label, uid
Пример #4
0
def main():
    train = read_data("train")
    print(train.columns)
    count_missing(train)
    df_train_Y = train[["isFraud"]]
    df_train_X = train.drop(["isFraud"], axis=1)
    train_Y = df_train_Y.to_numpy()
    train_X = df_train_X.to_numpy()
    train_model_dnn(train_X, train_Y)
Пример #5
0
def load_data(subset):
    df1_clean = rd.read_data()
    df1_clean = rd.clean_df(df1_clean, False)
    labels = df1_clean.loc[:, ['target']].values
    df1_clean = df1_clean.loc[:, subset].astype(float).astype(int)
    data = df1_clean.loc[:, subset].astype(float).astype(int)
    query = ''
    for s in range(len(subset) - 1):
        query = query + "data['" + subset[s] + "'].values,"
    query = query + "data['" + subset[-1] + "'].values"
    data = np.stack((eval(query)), axis=-1)
    return data, labels, df1_clean
Пример #6
0
def viewall():
    df = rd.read_data()
    q3 = rd.draw_chest_pain_type(df)
    q4 = rd.draw_resting_blood_pressure(df)
    q5 = rd.draw_serum_cholestoral(df)
    q6 = rd.draw_fasting_blood_sugar(df)
    q7 = rd.draw_RER(df)
    q8 = rd.draw_Mhra(df)
    q9 = rd.draw_exercise_induced_angina(df)
    q10 = rd.draw_ST_Depression(df)
    q11 = rd.draw_slope_exercise_ST_segment(df)
    q12 = rd.draw_major_vessels(df)
    q13 = rd.draw_thal(df)
    return render_template('part1_viewall.html', q3 = q3, q4 = q4, q5 = q5, q6 = q6, q7 = q7, q8 = q8\
        ,q9 = q9,q10 =q10,q11 = q11, q12 = q12, q13 = q13)
Пример #7
0
def main():
    start = time.clock()
    docs, stopwords = read_data()
    docs = [doc for i, doc in list(docs.items())]
    indexer = kmeans(docs, stopwords)
    print("Time to index: ", round(time.clock() - start, 3), "Seconds")

    ### PART A
    print("PART A")

    print("\nEnter number of clusters: ")
    indexer.clustering(int(input()))
    print("\nTime to cluster: ", round(time.clock() - start, 3), "Seconds")

    ### PART B

    print("\n\nPART B")

    start = time.clock()
    for i in range(2, 31):
        print("\nNumber of clusters:", i)
        indexer.clustering(i)
    print("\nTime to cluster: ", round(time.clock() - start, 3), "Seconds")
Пример #8
0
def relevance_feedback(isPsuedoFeedback):
    queries, relevances, docs, stopwords = read_data()
    docs = [docs[i] for i in range(17, max(list(docs.keys())))]

    indexer = Index(docs, stopwords)

    rel = {
        1: {
            0: [374, 398, 304, 380],
            1: [374, 304, 380],
            2: [304, 268, 326],
            3: [304, 268, 326],
            4: [304, 268, 326]
        },
        6: {
            0: [304, 398, 402, 380, 374],
            1: [304, 398, 402],
            2: [304, 398, 402],
            3: [304, 398, 402],
            4: [304, 398, 402]
        },
        31: {
            0: [47, 72, 100],
            1: [47, 72, 100, 10],
            2: [47, 72, 100, 10],
            3: [47, 72, 100, 10],
            4: [47, 72, 100, 10]
        }
    }

    for key in [1, 6, 31]:
        query = queries[key]

        if len(relevances[key]) > 5:
            indexer.run_relevance_feedback(key, query, relevances[key],
                                           isPsuedoFeedback, rel[key])
Пример #9
0
#default
input_filename = 'real.csv'
output_filename = 'model.txt'
feature_list = []
if len(sys.argv) >= 2:
    if sys.argv[1][0] != '-':
        input_filename = sys.argv[1]
        n = -int(sys.argv[2])
        start = 3
    else:
        n = -int(sys.argv[1])
        start = 2
for i in range(start, len(sys.argv)):
    feature_list.append(int(sys.argv[i]))
output_filepath = '../data/' + output_filename
sample_num, feature_num, features, NULL = read_data(input_filename)
convs = analyze(sample_num, feature_num, features, n, feature_list)
# print out in a file
output = open(output_filepath, "w")
buf = ""
buf += str(sample_num) + ' ' + str(feature_num) + '\n'
buf += str(n) + '\n'
for xi in range(0, n):
    buf += str(feature_list[xi])
    if xi < n - 1:
        buf += ' '

buf += '\n'
output.write(buf)
for xi in range(0, n):
    buf = ""
Пример #10
0
def train(config):
    """Trains the neural network with provided configurations.

    Parameters
    ----------
    config : dictionary
        Configurations of training procedure.

    Returns
    -------
    None

    """

    # Latent Space Dimension
    k = config['k']

    # Read training data
    user_ids, movie_ids, ratings = read_data(training=True)

    user_ids = map_ids(user_ids, users=True)
    movie_ids = map_ids(movie_ids, users=False)

    # Input Data
    users = torch.Tensor(user_ids).int()
    movies = torch.Tensor(movie_ids).int()
    ratings = torch.Tensor(ratings)

    config['n_users'] = np.unique(user_ids).size
    config['n_items'] = np.unique(movie_ids).size

    # The input size of first fc layer is different if we do one hot encoding for the input
    if config['one_hot_encoding']:
        config['layers'][0] = (config['n_users'] +
                               config['n_items']) * config['k']
    else:  # 2 by k - because we conactenate the users and items, k is the output size of embedding layers
        config['layers'][0] = 2 * config['k']

    print("Configurations")
    print(config)

    # Save the configs as a dictionary
    with open(configs.CONFIGS_PATH, "wb") as f:
        pickle.dump(config, f, pickle.HIGHEST_PROTOCOL)

    # Try to use GPU
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Model
    model = NCA(config).to(device)

    print("-" * 50)

    print("Our Model")
    print(model)

    learning_rate = config['lr']
    critertion = config['critertion']
    batch_size = config['batch_size']
    epochs = range(config['epochs'])

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    # optimizer = torch.optim.Adam(model.parameters())

    # Create a data loader from training data
    data_loader = DataLoader(TensorDataset(users, movies, ratings),
                             batch_size=batch_size)

    # Accumulatas the loss across epochs
    losses = []

    print(sum(p.numel() for p in model.parameters() if p.requires_grad))

    print("-" * 50)

    # Iterate over epochs
    for epoch in epochs:
        epoch_loss = []

        # Iterate over batches
        for batch_users, batch_movies, batch_ratings in data_loader:

            # Do one-hot encoding
            if config['one_hot_encoding']:
                batch_users = torch.nn.functional.one_hot(
                    batch_users.long(), config['n_users'])
                batch_movies = torch.nn.functional.one_hot(
                    batch_movies.long(), config['n_items'])

            users = batch_users.int().to(device)
            movies = batch_movies.int().to(device)
            ratings = batch_ratings.to(device)

            optimizer.zero_grad()

            output = model(users, movies)[:, 0]

            loss = critertion(output, ratings)

            loss.backward()
            optimizer.step()

            epoch_loss.append(loss.item())

        avg_epoch_loss = np.mean(epoch_loss)
        losses.append(avg_epoch_loss)
        print(f"epoch {epoch}, loss = {avg_epoch_loss}")

    # Save the trained model
    # Save different models to different files which is based whether it includes one-hot encoding of features or not
    if config['one_hot_encoding']:
        torch.save(model.state_dict(), configs.NCF_MODEL_ONE_HOT_PATH)
    else:
        torch.save(model.state_dict(), configs.NCF_MODEL_PATH)
    ivar = str(args.var)

    main_path = '/srv/ccrc/data25/z5166746/CMIP5/'

    cmip1=main_path+str(ivar)+'/a10_cmip5_r01_'+str(ivar)+'_r.nc'
    cmip2=main_path+str(ivar)+'/a10_cmip5_r02_'+str(ivar)+'_r.nc'
    cmip3=main_path+str(ivar)+'/a10_cmip5_r03_'+str(ivar)+'_r.nc'

    cmip_all = [cmip1,cmip2,cmip3]

    #SST
    if ivar=='sst':
        ds_sst = []
        for i in range(len(cmip_all)):
            lon,lat,lev,sst,time,basin_mask = read_data(cmip_all[i],'thetao',imask=None)
            sst = sst - 273.15 #Convert SST to degrees celsius
            ds_sst.append(sst)

            klepto_atm_xr = klepto.archives.dir_archive('klepto_atm_xr', serialized=True, cached=False)
            klepto_atm_xr['cmip5_sst'] = ds_sst

    elif ivar=='pot_temp':
        ds_pot_temp = []
        for i in range(len(cmip_all)):
            lon,lat,lev,pot_temp,time,basin_mask = read_data(cmip_all[i],'thetao',imask=None)
            pot_temp = pot_temp.sel(lev=slice(0,600)) - 273.15 #Convert SST to degrees celsius
            lev = lev.sel(lev=slice(0,600)) #Select upper ocean
            ds_pot_temp.append(pot_temp)
        klepto_atm_xr = klepto.archives.dir_archive('klepto_atm_xr', serialized=True, cached=False)
        klepto_atm_xr['cmip5_pot_temp'] = ds_pot_temp
import PIL.Image
import numpy as np
import os
from read import read_data
import scipy.misc


def split_pic(data):
    print(data.shape)
    res = np.split(data, 2, axis=2)
    print(res[0].shape)
    return res[0], res[1]


if __name__ == "__main__":
    data = read_data('DATA/', 200)
    data = data * 255
    data = data.astype(dtype='uint8')

    svg, pxl = split_pic(data)
    out = tf.placeholder('float32', [256, 256, 3])
    out1 = tf.image.resize_images(out, [64, 64], 0)
    out2 = tf.image.resize_images(out1, [256, 256], 0)
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        for i in range(200):
            #out_pxl = sess.run(out2, {out : pxl[i]})
            #out = scipy.misc.imresize(pxl[i], (64, 64), 'bilinear')
            #out = scipy.misc.imresize(out, (256, 256), 'bilinear')
            #scipy.misc.imsave('test_input/%d.png' %i, out_pxl)
Пример #13
0
import os

import cv2

from read import read_data
from segment import segment_leukocytes
from visualize import compare_images, mark_cancerous_lymphocytes

if __name__ == "__main__":
    IMG_PATH = os.path.join("ALL_IDB1", "im")
    XYC_PATH = os.path.join("ALL_IDB1", "xyc")

    # This dataframe stores id, image path, bool for presence of blasts and
    # co-ordinates of blasts if any: id, img_path, has_blasts, blast_xy
    df = read_data(IMG_PATH, XYC_PATH)

    for i in range(108):
        img = cv2.imread(df.loc[i].img_path)

        segmented_image = segment_leukocytes(img)
        mark_cancerous_lymphocytes(img, df.loc[i].blast_xy)
        compare_images(img, segmented_image)
Пример #14
0
import matplotlib.pyplot as plt  # plotting routine
import sys
sys.path.append(
    '../.'
)  # allows to search for modules in parent directory, if not found in current directory
from sub import fcc_points, kline, sort4  # user-defined function
from plot import plot_bands  # user-defined function for plotting
from read import read_data  # user-defined function for reading data
"""Code pseudo.py, written by Lucio Andreani, [email protected]
   It calculates the energy bands of tetrahedral semiconductors by the empirical pseudopotential method
   Reference: Yu-Cardona, Fundamentals of Semiconductors, Springer
   Atomic units are used (Bohr radius for length, Hartree for energy)"""

# reads data from file pseudo.dat
f = open('pseudo.dat', 'r')
aret, v3s, v8s, v11s, v3a, v4a, v11a, nk, gmax, nmax, upper, ymin, ymax, ny, jplot = read_data(
    f)
print(aret, v3s, v8s, v11s, v3a, v4a, v11a, nk, gmax, nmax, upper, ymin, ymax,
      ny, jplot)
f.close

#aret=5.43     # lattice constant in Angstrom
#v3s,v8s,v11s= -0.211, 0.04, 0.08    #v3s,v8s,v11s=simmetric pseudopotential form factors in Rydberg
#v3a,v4a,v11a=0.   , 0.  , 0.  ,    #v4a,v4a,v11a=antisimmetric pseudopot. form factors in Rydberg
#nk=50        # number of k-points along each line in BZ
#gmax=5.      # maximum modulus of reciprocal lattice vector, in units of 2\pi/a
#nmax=5       # max index number for reciprocal lattice vectors, choose high enough to get all bands up to max energy of the plot
#upper=1e-12  # numerical tolerance parameter: upper limit for evaluating zero

# fundamental constants
abohr = 0.529177210903  # Bohr radius in Angstrom
hartree = 27.211386245988  # Hartree energy in eV
Пример #15
0
def evaluate(p):
    cmd = ["./cycle.sh"]
    cmd.append("-" + str(p.num))
    for k in range(0, p.num):
        cmd.append(str(p.feature_list[k]))
    output = subprocess.check_output(cmd).decode("utf-8")
    spos = output.find("Testing Accuracy")
    output = output[spos:len(output)]
    for c in output:
        if not c in '.0123456789':
            output = output.replace(c, '')
    return float(output)


input_filename = 'real.csv'
NULL, feature_num, NULL, NULL = read_data(input_filename)
init(feature_num)
for i in range(0, MAX):
    # cross over
    for j in range(0, cross_over_num // 2):
        random.seed(a=None, version=2)
        num1, crossed_list1, num2, crossed_list2 = crossover( generation[random.randint(0, population-1)], \
                                                              generation[random.randint(0, population-1)])
        generation[population + j * 2].modify(num1, crossed_list1)
        generation[population + j * 2 + 1].modify(num2, crossed_list2)
    # mutate
    for j in range(0, population):
        num, mutation_list = mutate(generation[j])
        generation[j].modify(num, mutation_list)
    # evaluate
    for j in range(0, population + cross_over_num):
Пример #16
0
    clmt1 = main_path + 'a10_' + iexp + 'T_r01/a10_' + iexp + 'T_r01.pa_1951-2016_var.nc'
    clmt2 = main_path + 'a10_' + iexp + 'T_r02/a10_' + iexp + 'T_r02.pa_1951-2016_var.nc'
    clmt3 = main_path + 'a10_' + iexp + 'T_r03/a10_' + iexp + 'T_r03.pa_1951-2016_var.nc'
    clmt4 = main_path + 'a10_' + iexp + 'T_r04/a10_' + iexp + 'T_r04.pa_1951-2016_var.nc'
    clmt5 = main_path + 'a10_' + iexp + 'T_r05/a10_' + iexp + 'T_r05.pa_1951-2016_var.nc'
    clmt6 = main_path + 'a10_' + iexp + 'T_r06/a10_' + iexp + 'T_r06.pa_1951-2016_var.nc'

    atm_all = [clm1, clm2, clm3, clm4, clm5, clm6, \
    clmt1, clmt2, clmt3, clmt4, clmt5, clmt6]

    #U1000
    if ivar == 'u1000':
        ds = []
        for i in range(len(atm_all)):
            lon, lat, lev, ua, time, basin_mask = read_data(atm_all[i],
                                                            'ua_plev',
                                                            imask=None)
            ua = ua.sel(lev=1000, method='nearest')
            ds.append(ua)

        klepto_atm_xr = klepto.archives.dir_archive('klepto_atm_xr',
                                                    serialized=True,
                                                    cached=False)
        klepto_atm_xr[str(iexp) + '_u1000'] = ds

    #V1000
    elif ivar == 'v1000':
        ds = []
        for i in range(len(atm_all)):
            lon, lat, lev, va, time, basin_mask = read_data(atm_all[i],
                                                            'va_plev',
Пример #17
0
def return_clean_df():
    df1_clean = rd.read_data()
    df1_clean = rd.clean_df(df1_clean,False)
    return df1_clean
Пример #18
0
        clmt1 = main_path + 'a10_' + iexp + 'T_r01/a10_' + iexp + 'T_r01.mocn_1951-2016_w_r.nc'
        clmt2 = main_path + 'a10_' + iexp + 'T_r02/a10_' + iexp + 'T_r02.mocn_1951-2016_w_r.nc'
        clmt3 = main_path + 'a10_' + iexp + 'T_r03/a10_' + iexp + 'T_r03.mocn_1951-2016_w_r.nc'
        clmt4 = main_path + 'a10_' + iexp + 'T_r04/a10_' + iexp + 'T_r04.mocn_1951-2016_w_r.nc'
        clmt5 = main_path + 'a10_' + iexp + 'T_r05/a10_' + iexp + 'T_r05.mocn_1951-2016_w_r.nc'
        clmt6 = main_path + 'a10_' + iexp + 'T_r06/a10_' + iexp + 'T_r06.mocn_1951-2016_w_r.nc'

        w_all = [clm1, clm2, clm3, clm4, clm5, clm6, \
        clmt1, clmt2, clmt3, clmt4, clmt5, clmt6]

        #Vertical current
        ds = []
        for i in range(len(w_all)):
            lon, lat, lev, w, time, basin_mask = read_data(w_all[i],
                                                           ivar,
                                                           imask=None)
            w = w.sel(lev=slice(0, 600))
            lev = lev.sel(lev=slice(0, 600))  #Select upper ocean
            ds.append(w)
        # #Assign correct lat/lon to MclmT r02 and r03
        # ds[7]['latitude'] = ds[0].latitude
        # ds[8]['latitude'] = ds[0].latitude

    elif ivar == 'ssh':
        clm1 = main_path + 'a10_' + iexp + '_r01/a10_' + iexp + '_r01.mocn_1951-2016_sshr.nc'
        clm2 = main_path + 'a10_' + iexp + '_r02/a10_' + iexp + '_r02.mocn_1951-2016_sshr.nc'
        clm3 = main_path + 'a10_' + iexp + '_r03/a10_' + iexp + '_r03.mocn_1951-2016_sshr.nc'
        clm4 = main_path + 'a10_' + iexp + '_r04/a10_' + iexp + '_r04.mocn_1951-2016_sshr.nc'
        clm5 = main_path + 'a10_' + iexp + '_r05/a10_' + iexp + '_r05.mocn_1951-2016_sshr.nc'
        clm6 = main_path + 'a10_' + iexp + '_r06/a10_' + iexp + '_r06.mocn_1951-2016_sshr.nc'
Пример #19
0

#===============================================================

### Execute script

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('ivar_obs')
    parser.add_argument('time_ifile')
    parser.add_argument('fname')
    args = parser.parse_args()

    #Extract lon,lat,sst,time from HadISST observation (1950-2017)
    ipath_had = '/srv/ccrc/data25/z5166746/Obs_data/sst/HadISST/HadISST_all_clean.nc'
    lon_had,lat_had,lev_had,sst_had,time_had,basin_mask = read_data(ipath_had,'sst',\
    imask=None)

    #Select time
    start_time = 1950
    end_time = 2017
    sst_t, time_t = seltime(sst_had, time_had, start_time, end_time)

    # #Calculate area weighed ssts
    # wgtfac = areawgtvar_3D(lon_had,lat_had)
    # sst_had_aw = np.multiply(sst_had,wgtfac[np.newaxis,...])
    #
    #Mask other oceans
    sst_io = mask_oceans('./../../grids/basinmask_01.msk', sst_had, lon_had,
                         lat_had)

    #Calculate trend in IO SST from 1950 to 2017
Пример #20
0
def test_model():
    """Runs the persisted neural network model on test data and returns the test loss.

    Returns
    -------
    None

    """

    user_ids, movie_ids, ratings = read_data(training=False)

    # Resetting ids of users and movies
    user_ids = map_ids(user_ids, users=True)
    movie_ids = map_ids(movie_ids, users=False)

    # Creating the tensors
    users = torch.Tensor(user_ids).int()
    movies = torch.Tensor(movie_ids).int()
    ratings = torch.Tensor(ratings)

    # Reading the training settings to be fed into the model
    config = {}
    with open(configs.CONFIGS_PATH, "rb") as f:
        config = pickle.load(f)

    model = NCA(config)

    # Different models for different Preprocessing steps
    if config['one_hot_encoding']:
        model.load_state_dict(torch.load(configs.NCF_MODEL_ONE_HOT_PATH,
                                         'cpu'))
    else:
        model.load_state_dict(torch.load(configs.NCF_MODEL_PATH, 'cpu'))

    model.eval()

    # Batch size for test data
    batch_size = 200

    # The same critertion used for training stage
    critertion = config['critertion']

    # Creating loader for test data
    data_loader = DataLoader(TensorDataset(users, movies, ratings),
                             batch_size=batch_size)

    losses = []

    # Iterate over batches
    # We calculate the test loss on batches due to the large dataset size
    for batch_users, batch_movies, batch_ratings in data_loader:

        # Whether we have to do one hot encoding or not
        if config['one_hot_encoding']:
            batch_users = torch.nn.functional.one_hot(batch_users.long(),
                                                      config['n_users'])
            batch_movies = torch.nn.functional.one_hot(batch_movies.long(),
                                                       config['n_items'])

        users = batch_users.int()
        movies = batch_movies.int()
        ratings = batch_ratings

        output = model(users, movies)[:, 0]

        loss = critertion(output, ratings)
        losses.append(loss.item())

    print(f"Loss for test data is {np.mean(losses)}")
from read import read_data
#=================================================================
### Execute script

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('var')  #thetao,
    args = parser.parse_args()

    ivar = str(args.var)

    main_path = '/srv/ccrc/data25/z5166746/Obs_data/obs_' + str(ivar) + '_r.nc'

    if ivar == 'sst':
        lon, lat, lev, sst, time, basin_mask = read_data(main_path,
                                                         'sst',
                                                         imask=None)

        klepto_atm_xr = klepto.archives.dir_archive('klepto_atm_xr',
                                                    serialized=True,
                                                    cached=False)
        klepto_atm_xr['obs_sst'] = sst

    elif ivar == 'pot_temp':
        lon, lat, lev, pot_temp, time, basin_mask = read_data(
            main_path, 'temp', imask=None, decode_times=False)
        klepto_atm_xr = klepto.archives.dir_archive('klepto_atm_xr',
                                                    serialized=True,
                                                    cached=False)
        klepto_atm_xr['obs_pot_temp'] = pot_temp
Пример #22
0
def get_df_val():
    df_new = read_data()
    df_val = df_new[20000:]
    df_val.reset_index(inplace=True)
    return df_val
Пример #23
0
def train_gan(activity):
    logdir = '1'
    data_path = 'C:/Users/STUDENT/Desktop/Ibrahim/GAN_tot/o/' + activity + '/train/'  # directly to plot training data for visualization
    outputdata = 'C:/Users/STUDENT/Desktop/Ibrahim/GAN_tot/o/'  # directly to visualization GAN output
    gan_input, _ = read_data(data_path, [[activity]])
    train(logdir, 64, gan_input, outputdata, activity)
Пример #24
0
#filename default
train_filename = "synthetic.csv"
test_filename = "real.csv"
if len(sys.argv) > 1:
    test_filename = sys.argv[1]
if len(sys.argv) > 2:
    train_filename = sys.argv[2]

# parameters
learning_rate = 0.001
training_iters = 100
batch_size = 10
display_step = 10

# Network Parameters
n_sample, n_input, features, labels = read_data(train_filename)
# features * 4 (img size: n_input * 4)
n_classes = 2  # 0 and 1
keep_rate = 0.75  # Dropout, probability to keep units
filter_width = 5 # filter size
p1_width = 10 # pooling rate for first pooling
p2_width = 10 # pooling rate for second pooling
remain = math.ceil(float(n_input) / float(p1_width)) # remaining items after pooling
remain = math.ceil(float(remain) / float(p2_width))
remain = remain * 4

# tf Graph input
x = tf.placeholder(tf.float32, [None, n_input, 4])
y = tf.placeholder(tf.float32, [None, n_classes])
keep_prob = tf.placeholder(tf.float32) # keep probability for dropout
Пример #25
0
        clmt5 = main_path + 'a10_' + iexp + 'T_r05/a10_' + iexp + 'T_r05.mocn_1951-2016_w_r_detrend_' + str(
            itype) + '.nc'
        clmt6 = main_path + 'a10_' + iexp + 'T_r06/a10_' + iexp + 'T_r06.mocn_1951-2016_w_r_detrend_' + str(
            itype) + '.nc'

        atm_all = [
            clm1, clm2, clm3, clm4, clm5, clm6, clmt1, clmt2, clmt3, clmt4,
            clmt5, clmt6
        ]

        #Vertical current
        iw = 'W'
        ds = []
        for i in range(len(atm_all)):
            lon, lat, lev, w, time, basin_mask = read_data(atm_all[i],
                                                           iw,
                                                           imask=None)
            ds.append(w)
        # #Assign correct lat/lon to MclmT r02 and r03
        # w_d[7]['latitude'] = w_d[0].latitude
        # w_d[8]['latitude'] = w_d[0].latitude

    else:
        #u
        clm1 = main_path + 'a10_' + iexp + '_r01/a10_' + iexp + '_r01.mocn_1951-2016_uvpot_temprho_detrend_' + str(
            itype) + '.nc'
        clm2 = main_path + 'a10_' + iexp + '_r02/a10_' + iexp + '_r02.mocn_1951-2016_uvpot_temprho_detrend_' + str(
            itype) + '.nc'
        clm3 = main_path + 'a10_' + iexp + '_r03/a10_' + iexp + '_r03.mocn_1951-2016_uvpot_temprho_detrend_' + str(
            itype) + '.nc'
        clm4 = main_path + 'a10_' + iexp + '_r04/a10_' + iexp + '_r04.mocn_1951-2016_uvpot_temprho_detrend_' + str(
Пример #26
0
# 添加层
def add_layer(inputs, in_size, out_size, activation_function=None):
    # add one more layer and return the output of this layer
    weights = tf.Variable(tf.truncated_normal([in_size, out_size], stddev=0.1))
    biases = tf.Variable(tf.zeros([1, out_size]) + 0.1)
    wx_plus_b = tf.matmul(inputs, weights) + biases
    if activation_function is None:
        outputs = wx_plus_b
    else:
        outputs = activation_function(wx_plus_b)
    return outputs


# 1.训练的数据
# Make up some real data
x_train, y_train, x_test, y_test = read_data("data_train_4.csv")
x_test_cnn = read_test("predict.csv")

# 2.定义节点准备接收数据
# define placeholder for inputs to network
xs = tf.placeholder(tf.float32, [None, 10])
ys = tf.placeholder(tf.float32, [None, 3])

# 3.定义神经层:隐藏层和预测层
# add hidden layer 输入值是 xs
l1 = add_layer(xs, 10, 90, activation_function=tf.nn.sigmoid)
l2 = add_layer(l1, 90, 90, activation_function=tf.nn.sigmoid)
# l3 = add_layer(l2, 20, 20, activation_function=tf.nn.tanh)
# l4 = add_layer(l3, 24, 22, activation_function=tf.nn.tanh)
# l5 = add_layer(l4, 22, 20, activation_function=tf.nn.tanh)
# l6 = add_layer(l5, 20, 18, activation_function=tf.nn.tanh)
Пример #27
0
    if (len(sys.argv) < 5):
        print("Go Away")
        exit(1)

    part = sys.argv[1]
    trainFile = sys.argv[2]
    testFile = sys.argv[3]
    valFile = sys.argv[4]

    # Preprocess
    preprocess_data(trainFile, trainFile + '.processed')
    preprocess_data(testFile, testFile + '.processed')
    preprocess_data(valFile, valFile + '.processed')

    if (part == '1' or part == '2'):
        data = read_data(trainFile + '.processed')
        valData = read_data(valFile + '.processed')
        testData = read_data(testFile + '.processed')

        features = set()
        for x in range(1, 24):
            features.add(x)

        trainAccuracies = []
        valAccuracies = []
        testAccuracies = []
        numNodes = []
        remainders = [r for r in range(23, -1, -1)]  # [23, 20, 15, 10, 5, 0]
        pruning = False if part == '1' else True
        fileName = "Q1/plots/accuracies.png" if part == '1' else "Q1/plots/pruning.png"
Пример #28
0
def hello_world():
    df = rd.read_data()
    return render_template('index.html')
Пример #29
0
    recents = []
    while True:
        results = get_group(data)
        if results.tolist() in recents:
            break
        new_centroids = get_new_centroids(data, results)
        centroids = new_centroids.copy()
        recents.append(results.tolist())
    return centroids


def get_class_from_centroids(x, centroids):
    return np.argmin(np.abs(centroids - x))


not_tech, tech = read_data()


def get_delta(hitobjects):
    current = -1
    delta = []
    for hitobject in hitobjects:
        if current == -1:
            delta.append(0)
            current = hitobject.offset
            continue
        delta.append(hitobject.offset - current)
        current = hitobject.offset
    return delta