import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

from dimensional_structure.prediction_utils import run_prediction
from selfregulation.utils.result_utils import load_results
from selfregulation.utils.utils import get_recent_dataset, get_behav_data

results = load_results(get_recent_dataset())
data = get_behav_data(file='variables_exhaustive.csv')

# get demographics
full_demog = results['task'].DA.get_scores()
full_demog.columns = ['full_' + c for c in full_demog.columns]
demog = pd.read_csv('/home/ian/Downloads/demog_fa_scores_t1.csv', index_col=0)

# get predictors
ddm_factors = pd.read_csv('/home/ian/Downloads/ez_t1_fa_3_scores.csv', index_col=0)
ontology_factors = results['task'].EFA.get_scores()
ontology_ddm_factors = ontology_factors[['Speeded IP', 'Caution', 'Perc / Resp']]

#
# compare demographics
diff = pd.DataFrame(demog.values - full_demog.loc[demog.index].values,
                    index=demog.index, columns=demog.columns)
corr = demog.join(full_demog).corr().iloc[:len(demog.columns), 
                                         len(demog.columns):]

# EZ vars 
EZ_vars = data.filter(regex='EZ_(non_decision|drift|thresh)$')
hddm_vars = data.filter(regex='hddm_(non_decision|drift|thresh)$')
@author: ian
"""
import numpy as np
from os import makedirs, path
import pandas as pd
import pickle
from sklearn.covariance import GraphLassoCV
from sklearn.preprocessing import scale

from dimensional_structure.graph_utils import Graph_Analysis
from selfregulation.utils.utils import get_behav_data, get_recent_dataset
from selfregulation.utils.result_utils import load_results
from selfregulation.utils.r_to_py_utils import qgraph_cor
dataset = get_recent_dataset()
data = get_behav_data(dataset=dataset, file='meaningful_variables_imputed.csv')
all_results = load_results(dataset)


def get_EFA_HCA(results, EFA):
    if EFA == False:
        return results.HCA.results['data']
    else:
        c = results.EFA.results['num_factors']
        return results.HCA.results['EFA%s_oblimin' % c]


EFA = True
survey_HCA = get_EFA_HCA(all_results['survey'], EFA)
survey_order = survey_HCA['reorder_vec']
task_HCA = get_EFA_HCA(all_results['task'], EFA)
task_order = task_HCA['reorder_vec']
        results.DA.name_factors(demographic_factor_names)
        if verbose: print('Saving Subset: %s' % name)
        id_file = results.save_results()
        # ***************************** saving ****************************************
        # copy latest results and prediction to higher directory
        copyfile(
            id_file,
            path.join(path.dirname(results.get_output_dir()),
                      '%s_results.pkl' % name))

    if run_prediction == True:
        if verbose:
            print('*' * 79)
            print('Running prediction: %s' % name)
        if results is None or name not in results.ID:
            results = load_results(datafile, name=name)[name]
        # run behavioral prediction using the factor results determined by BIC
        for classifier in classifiers:
            for rotate in ['oblimin', 'varimax']:
                results.run_prediction(classifier=classifier,
                                       rotate=rotate,
                                       verbose=verbose)
                results.run_prediction(classifier=classifier,
                                       rotate=rotate,
                                       shuffle=shuffle_repeats,
                                       verbose=verbose)  # shuffled
                # predict demographic changes
                if run_change:
                    results.run_change_prediction(classifier=classifier,
                                                  rotate=rotate,
                                                  verbose=verbose)
                results.HCA.name_clusters(cluster_names, inp='EFA%s_%s' % (c, rotate))
        ID = results.ID.split('_')[1]
        results.DA.name_factors(demographic_factor_names)
        if verbose: print('Saving Subset: %s' % name)
        id_file = results.save_results()
        # ***************************** saving ****************************************
        # copy latest results and prediction to higher directory
        copyfile(id_file, path.join(path.dirname(results.get_output_dir()), 
                                    '%s_results.pkl' % name))

    if run_prediction == True:   
        if verbose:
            print('*'*79)
            print('Running prediction: %s' % name)
        if results is None or name not in results.ID:
            results = load_results(datafile, name=name)[name]
        # run behavioral prediction using the factor results determined by BIC
        for classifier in classifiers:
            for rotate in ['oblimin', 'varimax']:
                results.run_prediction(classifier=classifier, rotate=rotate, verbose=verbose)
                results.run_prediction(classifier=classifier, rotate=rotate, shuffle=shuffle_repeats, verbose=verbose) # shuffled
                # predict demographic changes
                if run_change:
                    results.run_change_prediction(classifier=classifier, rotate=rotate, verbose=verbose)
                    results.run_change_prediction(classifier=classifier, rotate=rotate, shuffle=shuffle_repeats, verbose=verbose) # shuffled
        # ***************************** saving ****************************************
        prediction_dir = path.join(results.get_output_dir(), 'prediction_outputs')
        new_dir = path.join(path.dirname(results.get_output_dir()), 'prediction_outputs')
        for classifier in classifiers:
            for change_flag in [False, True]:
                for subset in ['varimax', 'oblimin', 'raw']:
        clf: linear model that returns coefs
    """
    y=scale(data)
    clf.fit(scores, y)

    out = clf.coef_
    if len(out.shape)==1:
        out = out.reshape(1,-1)
    out = pd.DataFrame(out, columns=scores.columns)
    out.index = data.columns
    return out

# do mapping
dataset = get_recent_dataset()
# load ontology
results = load_results(datafile=dataset)
task_loadings = results['task'].EFA.get_loading()
task_scores = results['task'].EFA.get_scores()
# load all DVs
all_DVs = get_behav_data(file='variables_exhaustive.csv', dataset=dataset)



contrast_loadings = {}
for contrast, relation in fmri_ontology_mapping.items():
    # if relation is in the direct mapping
    if relation.lstrip('-') in task_loadings.index:
        task_loading = task_loadings.loc[relation.lstrip('-')]
        if relation[0] == '-':
            task_loading = task_loading*-1
    # otherwise, reconstruct!
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 29 13:28:14 2018

@author: ian
"""

from os import path
from dimensional_structure.utils import transfer_scores
from selfregulation.utils.utils import get_behav_data, get_info
from selfregulation.utils.result_utils import load_results

# get contextualizing results
results_dataset = 'Complete_03-29-2018'
results = load_results(datafile=results_dataset)

# get fmri data
fmri_dataset = 'Fmri_Followup_10-22-2018'
data = get_behav_data(dataset=fmri_dataset)
# remove data where participants are missing more than 20% of the tasks
tasks = data.copy()
tasks.columns = [i.split('.')[0] for i in tasks.columns]
successful_subjects = (tasks.isnull().reset_index().melt(id_vars=['index']) \
                       .groupby(['index', 'variable']).mean() \
                       .groupby('index').sum()<12)
successful_subjects = successful_subjects[successful_subjects['value']]
data = data.loc[successful_subjects.index]

task_scores = transfer_scores(data, results['task'])
survey_scores = transfer_scores(data, results['survey'])
Exemplo n.º 7
0
    result_subset = args.result_subset
    rerun = args.rerun
    append = not args.no_append
    knn_metric = args.knn_metric
    EFA_rotation = args.EFA_rotation
    verbose = args.verbose
    dataset = get_recent_dataset()
    save = not args.no_save

# ## Additional Setup

# In[ ]:

# Load dataset
np.random.seed(12412)
results = load_results(dataset)[result_subset]
c = results.EFA.get_c()

# Define classifiers
classifiers = {
    'Ridge': Ridge(fit_intercept=False),
    'LR': LinearRegression(fit_intercept=False)
}
# get output dir to store results
output_dir = path.join(get_info('results_directory'),
                       'ontology_reconstruction', dataset, results.ID,
                       EFA_rotation)
makedirs(output_dir, exist_ok=True)
# get plot dir to store plots
plot_dir = path.join(output_dir, 'Plots')
makedirs(plot_dir, exist_ok=True)
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.cluster.hierarchy import dendrogram
from sklearn.preprocessing import scale
from dimensional_structure.HCA_plots import get_dendrogram_color_fun
from dimensional_structure.utils import hierarchical_cluster
from selfregulation.utils.result_utils import load_results
from selfregulation.utils.utils import get_recent_dataset, get_demographics

dataset = get_recent_dataset()
results = load_results(dataset)

demographics = get_demographics()
demo_factors = results['survey'].DA.get_scores()


def plot_dendrogram(clustering, size=10):
    link = clustering['linkage']
    labels = clustering['labels']
    link_function, colors = get_dendrogram_color_fun(link, clustering['reorder_vec'],
                                                     labels)
    
    # set figure properties
    figsize = (size, size*.6)
    with sns.axes_style('white'):
        fig = plt.figure(figsize=figsize)
        # **********************************
        # plot dendrogram
        # **********************************
        with plt.rc_context({'lines.linewidth': size*.125}):
Exemplo n.º 9
0
import matplotlib.pyplot as plt
import seaborn as sns

# In[ ]:

dataset = get_recent_dataset()
results_dir = get_info('results_directory')
ontology_results_dir = path.join(results_dir, 'ontology_reconstruction',
                                 dataset, '*', 'oblimin')
retest_data = get_retest_data(dataset.replace('Complete', 'Retest'))
plot_dir = glob(path.join(ontology_results_dir, 'Plots'))[0]
save = True

# In[ ]:

results = load_results(dataset)['task']
c = results.EFA.get_c()

# # Load Reconstructions

# In[ ]:

KNNR_files = glob(path.join(ontology_results_dir, 'KNNR_*'))
KNNR_loaded = load_files(KNNR_files)
KNNR_var_summary, KNNR_best_params, KNNR_reconstructions = summarize_k(
    KNNR_loaded)

# In[ ]:

KNNRind_files = glob(path.join(ontology_results_dir, 'KNNRind_*'))
KNNRind_loaded = load_files(KNNRind_files)
    args, _ = parser.parse_known_args()
    pop_sizes = args.pop_sizes
    n_reps = args.n_reps
    n_vars = args.n_vars
    if args.dataset is not None:
        dataset = args.dataset
    else:
        dataset = get_recent_dataset()


# In[ ]:


# additional setup
np.random.seed(12412)
results = load_results(dataset)['task']
c = results.EFA.results['num_factors']

classifiers = {'Ridge': Ridge(fit_intercept=False),
               'LR': LinearRegression(fit_intercept=False)}
# get output dir to store results
output_dir = path.join(get_info('results_directory'),
                       'ontology_reconstruction', results.ID)
makedirs(output_dir, exist_ok=True)


# In[ ]:


# get a random subset of variables to perform the calculation on if n_vars is set
if n_vars is not None:
Exemplo n.º 11
0
import numpy as np
import pandas as pd
from selfregulation.utils.result_utils import load_results
from dimensional_structure.prediction_utils import run_prediction

results = load_results('Complete_02-03-2018')['task']
final_df = pd.DataFrame()
for classifier in ['ridge', 'lasso']:
    full = results.load_prediction_object(classifier=classifier, EFA=False)
    # get no discount data
    no_discount_predictors = results.data
    no_discount_predictors.drop(
        results.data.filter(regex='holt|kirby|discount').columns,
        axis=1,
        inplace=True)
    wD = full['data']
    nD = run_prediction(no_discount_predictors,
                        results.DA.get_scores(),
                        classifier=classifier,
                        save=False)

    # insample
    wD_insample = sorted([(k, i['scores_insample'][0] * 100)
                          for k, i in wD.items()],
                         key=lambda x: x[0])
    nD_insample = sorted([(k, i[0] * 100)
                          for k, i in nD.scores_insample.items()],
                         key=lambda x: x[0])
    insample_df = pd.DataFrame(wD_insample, columns=['target', 'wD'])
    insample_df.insert(2, 'nD', [i[1] for i in nD_insample])
    insample_df = insample_df.assign(type='insample')
Exemplo n.º 12
0
import pandas as pd
from scipy.spatial.distance import squareform
from sklearn.manifold import MDS
import seaborn as sns
from dimensional_structure.HCA_plots import abs_pdist
from selfregulation.utils.result_utils import load_results
from selfregulation.utils.utils import get_info, get_recent_dataset

# get dataset of interest
basedir = get_info('base_directory')
dataset = get_recent_dataset()
dataset = path.join(basedir, 'Data', dataset)
datafile = dataset.split(path.sep)[-1]

# load data
results = load_results(datafile)
data = results['task'].data
out = results['task'].EFA.get_loading()
nfactors = out.shape[1]
task_subset = pd.concat([
    out.filter(regex='choice_reaction_time', axis=0),
    out.filter(regex='^stop_signal\.(hddm|SSRT)', axis=0)[1:5]
])
task_subset_data = data.loc[:, task_subset.index]
task_variables = list(task_subset.index)
plot_dir = output_dir = path.join(get_info('results_directory'),
                                  'ontology_reconstruction',
                                  results['task'].ID, 'Plots')
makedirs(plot_dir, exist_ok=True)

# plot
Exemplo n.º 13
0
import fancyimpute
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LassoCV, RidgeCV, ElasticNetCV
from sklearn.metrics import r2_score, make_scorer

from selfregulation.utils.utils import get_behav_data, get_recent_dataset, get_demographics
from selfregulation.utils.result_utils import load_results
from selfregulation.utils.get_balanced_folds import BalancedKFold

# load data

dataset = get_recent_dataset()
items = get_behav_data(dataset=dataset, file='items.csv.gz')
subject_items = get_behav_data(dataset=dataset, file='subject_x_items.csv')
survey_ontology = load_results(dataset)['survey']
demographics = survey_ontology.DA.data
demo_factors = survey_ontology.DA.get_scores()

# set up prediction
imputer = fancyimpute.KNN()
predictors = imputer.fit_transform(subject_items)
targets = demo_factors.values

# set up cross-validation
for i, name in enumerate(demo_factors.columns):
    CV = BalancedKFold(nfolds=10)
    CV_iter = list(CV.split(predictors, targets[:, 0]))
    clf = RidgeCV(cv=5)
    score = cross_val_score(clf,
                            survey_ontology.EFA.get_scores(),
                            targets[:, i],
import pandas as pd
from scipy.spatial.distance import  squareform
from sklearn.manifold import MDS
import seaborn as sns
from dimensional_structure.HCA_plots import abs_pdist
from selfregulation.utils.result_utils import load_results
from selfregulation.utils.utils import get_info, get_recent_dataset

# get dataset of interest
basedir=get_info('base_directory')
dataset = get_recent_dataset()
dataset = path.join(basedir,'Data',dataset)
datafile = dataset.split(path.sep)[-1]

# load data
results = load_results(datafile)
data = results['task'].data
out = results['task'].EFA.get_loading()
nfactors = out.shape[1]
task_subset = pd.concat([
    out.filter(regex='choice_reaction_time', axis=0),
    out.filter(regex='^stop_signal\.(hddm|SSRT)', axis=0)[1:5]])
task_subset_data = data.loc[:, task_subset.index]
task_variables = list(task_subset.index)
plot_dir = output_dir = path.join(get_info('results_directory'),
                       'ontology_reconstruction', results['task'].ID, 'Plots')
makedirs(plot_dir, exist_ok=True)


# plot
size=8
Exemplo n.º 15
0
    """
    y = scale(data)
    clf.fit(scores, y)

    out = clf.coef_
    if len(out.shape) == 1:
        out = out.reshape(1, -1)
    out = pd.DataFrame(out, columns=scores.columns)
    out.index = data.columns
    return out


# do mapping
dataset = get_recent_dataset()
# load ontology
results = load_results(datafile=dataset)
task_loadings = results['task'].EFA.get_loading()
task_scores = results['task'].EFA.get_scores()
# load all DVs
all_DVs = get_behav_data(file='variables_exhaustive.csv', dataset=dataset)

contrast_loadings = {}
for contrast, relation in fmri_ontology_mapping.items():
    # if relation is in the direct mapping
    if relation.lstrip('-') in task_loadings.index:
        task_loading = task_loadings.loc[relation.lstrip('-')]
        if relation[0] == '-':
            task_loading = task_loading * -1
    # otherwise, reconstruct!
    else:
        unmapped_data = all_DVs.loc[:, relation.lstrip('-')]
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

from dimensional_structure.prediction_utils import run_prediction
from selfregulation.utils.result_utils import load_results
from selfregulation.utils.utils import get_recent_dataset, get_behav_data

results = load_results(get_recent_dataset())
data = get_behav_data(file='variables_exhaustive.csv')

# get demographics
full_demog = results['task'].DA.get_scores()
full_demog.columns = ['full_' + c for c in full_demog.columns]
demog = pd.read_csv('/home/ian/Downloads/demog_fa_scores_t1.csv', index_col=0)

# get predictors
ddm_factors = pd.read_csv('/home/ian/Downloads/ez_t1_fa_3_scores.csv',
                          index_col=0)
ontology_factors = results['task'].EFA.get_scores()
ontology_ddm_factors = ontology_factors[[
    'Speeded IP', 'Caution', 'Perc / Resp'
]]

#
# compare demographics
diff = pd.DataFrame(demog.values - full_demog.loc[demog.index].values,
                    index=demog.index,
                    columns=demog.columns)
corr = demog.join(full_demog).corr().iloc[:len(demog.columns),
                                          len(demog.columns):]
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 29 13:28:14 2018

@author: ian
"""

from os import path
from dimensional_structure.utils import transfer_scores
from selfregulation.utils.utils import get_behav_data, get_info
from selfregulation.utils.result_utils import load_results

# get contextualizing results
results_dataset = 'Complete_03-29-2018'
results = load_results(datafile=results_dataset)

# get fmri data
fmri_dataset= 'Fmri_Followup_10-22-2018'
data = get_behav_data(dataset=fmri_dataset)
# remove data where participants are missing more than 20% of the tasks
tasks = data.copy()
tasks.columns = [i.split('.')[0] for i in tasks.columns]
successful_subjects = (tasks.isnull().reset_index().melt(id_vars=['index']) \
                       .groupby(['index', 'variable']).mean() \
                       .groupby('index').sum()<12)
successful_subjects = successful_subjects[successful_subjects['value']]
data = data.loc[successful_subjects.index]

task_scores = transfer_scores(data, results['task'])
survey_scores = transfer_scores(data, results['survey'])