Пример #1
0
        )
        print("%s model analysis completion time: %.3f s = %.3f min" %
              (dnn_model, (end - start), (end - start) / 60.0))
        print(
            "==================================================================\n"
        )


if __name__ == "__main__":
    path = os.getcwd()[:os.getcwd().rfind('/')]
    to_write_filename = path + '/stats/dnn_models_analysis.txt'
    utils.initialize_writer(to_write_filename)

    # Load the train and test sets for the selected dataset
    dataset = "ghosh"
    train_data, _, train_labels, test_data, _, test_labels = data_proc.get_dataset(
        dataset)

    # Alternatively, if other experiments with the data are to be made (on Ghosh's dataset)
    # load different tokens (grammatical, strict, filtered, etc) and train on those
    """
    train_filename = "train_sample.txt"
    test_filename = "test_sample.txt"
    
    train_data = utils.load_file(path + "/res/tokens/tokens_clean_original_" + train_filename)
    test_data = utils.load_file(path + "/res/tokens/tokens_clean_original_" + test_filename)

    train_labels = [int(l) for l in utils.load_file(path + "/res/datasets/ghosh/labels_" + train_filename)]
    test_labels = [int(l) for l in utils.load_file(path + "/res/datasets/ghosh/labels_" + test_filename)]
    """

    # Transform the output into categorical data
Пример #2
0
        loss=loss_op,
        train_op=train_op,
        training_hooks=train_hook_list,
        eval_metric_ops={'accuracy': accuracy})

    return estim_specs

# Visualize the process of training steps
tf.logging.set_verbosity(tf.logging.INFO)

# Try getting the dataset if there is no data set in local 
data_processing.try_download_and_extract()

# This is the case of MNIST
# In case of other data set (Cifar10, Cifar100) you have to check
mnist = data_processing.get_dataset()

config = None
# Logging Device placement 
#config=tf.estimator.RunConfig(session_config=tf.ConfigProto(log_device_placement=True))

# Create Estimator with model directory [XXX] Model Directory check
model = tf.estimator.Estimator(model_fn, model_dir="/Users/jwl1993/model_dir", config=config)

# Training input, if num_epochs == None, training will run forever until the max_step is set
input_fn = tf.estimator.inputs.numpy_input_fn(
    x=mnist.train.images, y=mnist.train.labels,
    batch_size=batch_size, num_epochs=None, shuffle=True)

_ = model.train(input_fn, steps=number_of_iteration)
Пример #3
0
import pandas as pd
import data_processing as data
import HBA_analysis as hba
import math
from pathlib import Path

adult_exp = data.get_dataset(dataset='adult', probes_strategy='reannotator')
fetal_exp = data.get_dataset(dataset='fetal', probes_strategy='reannotator')

negraes = data.get_genelist('negraes')
duncan = data.get_genelist('duncan')
lutterAN = data.get_genelist('lutterAN')
lutterBN = data.get_genelist('lutterBN')

results_dir = Path('./results')
results_dir.mkdir(exist_ok=True)


def add_sig_marks(df):
    """adds markers to brain names column: ** for pFDR < 0.05 and * p<0.05"""
    # add ** if FDR < 0.05 and AUROC > 0.5
    mask = (df['pFDR'] < 0.05) & (df['AUROC'] > 0.5)
    df.loc[mask,
           'brain area'] = df.loc[mask,
                                  'brain area'].apply(lambda x: str(x) + ' **')
    # add * if FDR>0.05 but p<0.05 and AUROC > 0.5
    mask = (df['pFDR'] > 0.05) & (df['p'] < 0.05) & (df['AUROC'] > 0.5)
    df.loc[mask,
           'brain area'] = df.loc[mask,
                                  'brain area'].apply(lambda x: str(x) + ' *')
Пример #4
0
decision_tree.
Compares sklearn scores and decision_tree on one of UCI datasets.
Shows accuracy measurements, number of rules used in each classificator before
and after post pruning (only decision_tree, sklearn does not provide this
functionality [December 2018])

"""

from sklearn.tree import DecisionTreeClassifier

from data_processing import convert_to_numerical, get_dataset, split_data
from decision_tree import DecisionTree, accuracy_score

# READ DATA AND RENAME COLUMNS
df = get_dataset(
    './car.data',
    'http://archive.ics.uci.edu/ml/machine-learning-databases/car/car.data')
df.columns = [
    'buying', 'maintenance', 'doors', 'people', 'lug_boot', 'safety', 'class'
]

# CONVERT STRING VALUES TO THEIR NUMERICAL COUNTERPARTS (FASTER CALCULATION)
convert_to_numerical(df,
                     columns=[
                         'buying', 'maintenance', 'doors', 'people',
                         'lug_boot', 'safety', 'class'
                     ],
                     inplace=True)

# SPLIT DATASET INTO TRAINING, VALIDATION, TESTING
training, validation, test = split_data(df, inplace=True)
Пример #5
0
# -*- coding: utf-8 -*-
"""example.

Example usage of functionalities provided in: data_processing
decision_tree

"""

from sklearn.metrics import accuracy_score

from binary_decision_tree import DecisionTree
from data_processing import convert_to_numerical, get_dataset, split_data
from random_forests import RandomForest

df = get_dataset(
    './kr-vs-kp.data',
    'https://archive.ics.uci.edu/ml/machine-learning-databases/chess/king-rook-vs-king-pawn/kr-vs-kp.data'
)

df.columns = list(range(37))
convert_to_numerical(df, columns=list(range(37)), inplace=True)

training, validation, test = split_data(df, inplace=True)
training_X = training.iloc[:, :-1]
training_y = training.iloc[:, -1]

clf = RandomForest()
clf.fit(training_X, training_y)

validation_X = validation.iloc[:, :-1]
validation_y = validation.iloc[:, -1]
Пример #6
0
import sqlalchemy as db
from sqlalchemy.orm import sessionmaker
from tables import Base, State, Tornado, Industry, Gdp
from data_processing import get_dataset

dataset = get_dataset()
metadata = db.MetaData()
engine = db.create_engine('sqlite:///test.db', echo=True)
connection = engine.connect()
Base.metadata.create_all(engine)

Session = sessionmaker(bind=engine)

session = Session()
sts = [State(**i) for i in dataset.states.to_dict(orient="records")]
session.add_all(sts)

tds = [Tornado(**i) for i in dataset.tornados.to_dict(orient="records")]
session.add_all(tds)

inds = [Industry(**i) for i in dataset.industries.to_dict(orient="records")]
session.add_all(inds)

gdps = [Gdp(**i) for i in dataset.gdp.to_dict(orient="records")]
session.add_all(gdps)

session.commit()
session.close()

connection.close()
engine.dispose()
Пример #7
0
        print("Shape of the x train set (%d, %d)" %
              (len(x_train_scaled), len(x_train_scaled[0])))
        print("Shape of the x test set (%d, %d)" %
              (len(x_test_scaled), len(x_test_scaled[0])))

        # Run the model on the selection of features made
        start = time.time()
        utils.run_supervised_learning_models(x_train_scaled, y_train,
                                             x_test_scaled, y_test)
        end = time.time()
        print("Completion time of the Linear SVM model: %.3f s = %.3f min" %
              ((end - start), (end - start) / 60.0))


if __name__ == "__main__":
    path = os.getcwd()[:os.getcwd().rfind('/')]
    to_write_filename = path + '/stats/ml_analysis.txt'
    utils.initialize_writer(to_write_filename)

    dataset = "ghosh"  # can be "ghosh", "riloff", "sarcasmdetection" and "ptacek"
    train_tokens, train_pos, train_labels, test_tokens, test_pos, test_labels = data_proc.get_dataset(
        dataset)

    run_baseline = False

    if run_baseline:
        baseline(train_tokens, train_labels, test_tokens, test_labels)
    else:
        ml_model(train_tokens, train_pos, train_labels, test_tokens, test_pos,
                 test_labels)
Пример #8
0
fetal_svgs = ['0893_101892619.svg', '1097_101892615.svg', '1352_101892610.svg']
fetal_brainstem_svgs = ['0391_102182817.svg', '0639_102182810.svg']
human_diagram = 'human_diagram.svg'

# input directories
svg_dir = Path('./data/svg')
adult_dir = svg_dir / 'slices' / 'adult'
fetal_dir = svg_dir / 'slices' / 'fetal21'
fetal_brainstem_dir = svg_dir / 'slices' / 'fetal21_brainstem'

# define output directory
figures_dir = Path('./figures')
figures_dir.mkdir(exist_ok=True)

# get data
adult_exp = data.get_dataset('adult', 'reannotator')
fetal_exp = data.get_dataset('fetal', 'reannotator')
negraes = data.get_genelist('negraes')

# create tables to match AUC values to structures
adult_lookup = svg_utils.create_auc_lookup(exp_df=adult_exp,
                                           gene_list=negraes,
                                           ontology='adult')
fetal_lookup = svg_utils.create_auc_lookup(exp_df=fetal_exp,
                                           gene_list=negraes,
                                           ontology='fetal')

adult_lookup = adult_lookup.rename(index=str, columns={"AUROC": "AUC"})
fetal_lookup = fetal_lookup.rename(index=str, columns={"AUROC": "AUC"})

svg_utils.modify_svg(svg_dir / human_diagram,