def repopulate_cognitive_atlas(CognitiveAtlasTask=None,CognitiveAtlasContrast=None): if CognitiveAtlasTask==None: from neurovault.apps.statmaps.models import CognitiveAtlasTask if CognitiveAtlasContrast==None: from neurovault.apps.statmaps.models import CognitiveAtlasContrast import json, os from cognitiveatlas.api import get_task tasks = get_task() # Update tasks for t in range(0,len(tasks.json)): task = tasks.json[t] print "%s of %s" %(t,len(tasks.json)) if tasks.json[t]["name"]: task, _ = CognitiveAtlasTask.objects.update_or_create(cog_atlas_id=task["id"],defaults={"name":task["name"]}) task.save() if tasks.json[t]["id"]: task_details = get_task(id=tasks.json[t]["id"]) if task_details.json[0]["contrasts"]: print "Found %s contrasts!" %(len(task_details.json[0]["contrasts"])) for contrast in task_details.json[0]["contrasts"]: contrast, _ = CognitiveAtlasContrast.objects.update_or_create(cog_atlas_id=contrast["id"], defaults={"name":contrast["contrast_text"], "task":task}) contrast.save() # Add an "Other" contrast task = CognitiveAtlasTask.objects.filter(name="None / Other")[0] contrast, _ = CognitiveAtlasContrast.objects.update_or_create(cog_atlas_id="Other", defaults={"name":"Other", "task":task})
def repopulate_cognitive_atlas(CognitiveAtlasTask=None,CognitiveAtlasContrast=None): if CognitiveAtlasTask==None: from neurovault.apps.statmaps.models import CognitiveAtlasTask if CognitiveAtlasContrast==None: from neurovault.apps.statmaps.models import CognitiveAtlasContrast from cognitiveatlas.api import get_task tasks = get_task() # Update tasks for t in range(0,len(tasks.json)): task = tasks.json[t] print "%s of %s" %(t,len(tasks.json)) if tasks.json[t]["name"]: task, _ = CognitiveAtlasTask.objects.update_or_create(cog_atlas_id=task["id"],defaults={"name":task["name"]}) task.save() if tasks.json[t]["id"]: task_details = get_task(id=tasks.json[t]["id"]) if task_details.json[0]["contrasts"]: print "Found %s contrasts!" %(len(task_details.json[0]["contrasts"])) for contrast in task_details.json[0]["contrasts"]: contrast, _ = CognitiveAtlasContrast.objects.update_or_create(cog_atlas_id=contrast["id"], defaults={"name":contrast["contrast_text"], "task":task}) contrast.save() # Add an "Other" contrast task = CognitiveAtlasTask.objects.filter(name="None / Other")[0] contrast, _ = CognitiveAtlasContrast.objects.update_or_create(cog_atlas_id="Other", defaults={"name":"Other", "task":task})
def task_json_dump(): ''' use cognitiveatlas library to dump tasks to a json file ''' all_tasks = [] tasks = get_task().json for task in tasks: all_tasks.append(get_task(id=task['id']).json) with open("all_tasks.json", 'w') as fp: json.dump(all_tasks, fp)
def get_cognitiveatlas_task(task_id): '''get_cognitiveatlas_task return the database entry for CognitiveAtlasTask if it exists, and update concepts for that task. If not, create it. :param task_id: the unique id for the cognitive atlas task ''' try: task = get_task(id=task_id).json[0] cogatlas_task, _ = CognitiveAtlasTask.objects.update_or_create( cog_atlas_id=task["id"], defaults={"name": task["name"]}) concept_list = [] if "concepts" in task.keys(): for concept in task["concepts"]: cogatlas_concept = get_concept( id=concept["concept_id"]).json[0] cogatlas_concept, _ = CognitiveAtlasConcept.objects.update_or_create( cog_atlas_id=cogatlas_concept["id"], defaults={"name": cogatlas_concept["name"]}, definition=cogatlas_concept["definition_text"]) cogatlas_concept.save() concept_list.append(cogatlas_concept) cogatlas_task.concepts = concept_list cogatlas_task.save() return cogatlas_task except BaseException: # Any error with API, etc, return None return None
def get_cognitiveatlas_task(task_id): """get_cognitiveatlas_task return the database entry for CognitiveAtlasTask if it exists, and update concepts for that task. If not, create it. :param task_id: the unique id for the cognitive atlas task """ try: task = get_task(id=task_id).json[0] cogatlas_task, _ = CognitiveAtlasTask.objects.update_or_create( cog_atlas_id=task["id"], defaults={"name": task["name"]} ) concept_list = [] if "concepts" in task.keys(): for concept in task["concepts"]: cogatlas_concept = get_concept(id=concept["concept_id"]).json[0] cogatlas_concept, _ = CognitiveAtlasConcept.objects.update_or_create( cog_atlas_id=cogatlas_concept["id"], defaults={"name": cogatlas_concept["name"]}, definition=cogatlas_concept["definition_text"], ) cogatlas_concept.save() concept_list.append(cogatlas_concept) cogatlas_task.concepts = concept_list cogatlas_task.save() return cogatlas_task except: # Any error with API, etc, return None return None
def main(): tasks = get_task() output_folder = os.path.abspath("data") print "Generating Cognitive Atlas Data..." # Cognitive Atlas tasks we are interested in from NeuroVault tags task_uids = ['trm_553e77e53497d', 'trm_553ebfc390256', 'trm_553e88a66b676', 'trm_553fd2fc7a648', 'trm_4ebd482eba5b1', 'trm_4ebc98cc77e7b', 'trm_4ebc728326a13', 'trm_4ebc6a6b75ebf', 'trm_4ebc9d2e397f2', 'trm_553fce5d21da7', 'trm_553fcbbe974ba', 'trm_4da890594742a', 'trm_4d559bcd67c18', 'trm_4cacee4a1d875', 'trm_4c898c0786246', 'trm_4ebd47b8bab6b', 'tsk_4a57abb949a4f', 'trm_4f2456027809f', 'trm_553e73e29cf7d', 'trm_4c8a834779883', 'trm_4cacf22a22d80', 'trm_4e8dd3831f0cc', 'trm_53c4465b0466f', 'trm_553fbbf79ebc5', 'trm_5542841f3dcd5', 'trm_5346938eed092', 'trm_534692ef3b5df', 'trm_534690b0e9dc5', 'trm_5346927710e88', 'trm_4f244ad7dcde7', 'trm_551b1460e89a3', 'trm_553e6b8e33da4', 'trm_553e85265f51e', 'tsk_4a57abb949bf6', 'trm_4f24179122380', 'tsk_4a57abb949e1a', 'trm_4cacf3fbc503b', 'trm_5181f83b77fa4', 'trm_5181f863d24f4', 'trm_553eb45e2b709', 'trm_550b5b066d37b', 'trm_550b50095d4a3', 'trm_550b53d7dd674', 'trm_550b5c1a7f4db', 'trm_550b54a8b30f4', 'trm_550b557e5f90e', 'trm_550b5a47aa23e', 'trm_553eb28436233', 'trm_50df0dd9d0b6f', 'trm_553fc858cacc5'] # Functions for making nodes def make_node(nid,name,color): return {"nid":nid,"name":name,"color":color} for task in tasks.json: if task["name"] != "": print "Parsing task %s..." %task["name"] task_name = task["name"].replace(" ","_").replace("/","_").lower() if task["id"] in task_uids: task_node = make_node(task["id"],task["name"],"#63506d") single_task = get_task(id=task["id"]).json[0] # We only want to see contrasts with associated concepts task_contrasts = single_task["contrasts"] task_concepts = [] for contrast in task_contrasts: try: contrast_node = make_node(contrast["id"],contrast["contrast_text"],"#d89013") contrast_concepts = get_concept(contrast_id=contrast["id"]) children = [] current_names = [] for concept in contrast_concepts.json: if concept["name"] not in current_names: children.append(make_node(concept["id"],concept["name"],"#3c7263")) current_names.append(concept["name"]) contrast_node["children"] = children # Only append contrast if it has children if len(children) > 0: task_concepts.append(contrast_node) except: pass task_node["children"] = task_concepts # Save to file if we have children if len(task_concepts) > 0: filey = open('%s/%s.json' %(output_folder,task_name),'w') filey.write(json.dumps(task_node, sort_keys=True,indent=4, separators=(',', ': '))) filey.close()
def test_task(): print "### TESTING TASK QUERIES:" task_id = "trm_4cacee4a1d875" task_name = "mixed gambles task" # task_id and task_name result = get_task(id=task_id,name=task_name) assert_equal(result.json[0]["type"],"task") assert_equal(result.json[0]["event_stamp"],"2010-10-06 21:46:50") # task_id result = get_task(id=task_id) assert_equal(result.json[0]["name"],task_name) # task_name result = get_task(name=task_name) assert_equal(result.json[0]["id"],task_id)
def test_task(): print("### TESTING TASK QUERIES:") task_id = "trm_4cacee4a1d875" task_name = "mixed gambles task" # task_id and task_name result = get_task(id=task_id, name=task_name) assert_equal(result.json[0]["type"], "task") assert_equal(result.json[0]["event_stamp"], "2010-10-06 21:46:50") # task_id result = get_task(id=task_id) assert_equal(result.json[0]["name"], task_name) # task_name result = get_task(name=task_name) assert_equal(result.json[0]["id"], task_id)
def download_cognitive_atlas(data_dir=None, overwrite=False, verbose=1): """Download Cognitive Atlas ontology and extract IDs and relationships. .. versionadded:: 0.0.2 Parameters ---------- data_dir : :obj:`str`, optional Location in which to place Cognitive Atlas files. Default is None, which uses the package's default path for downloaded data. overwrite : :obj:`bool`, optional Whether to overwrite existing files or not. Default is False. verbose : :obj:`int`, optional Default is 1. Returns ------- out_dict : :obj:`dict` Dictionary with two keys: 'ids' and 'relationships'. Each points to a csv file. The 'ids' file contains CogAt identifiers, canonical names, and aliases, sorted by alias length (number of characters). The 'relationships' file contains associations between CogAt items, with three columns: input, output, and rel_type (relationship type). """ from cognitiveatlas.api import get_concept, get_disorder, get_task dataset_name = "cognitive_atlas" data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) ids_file = op.join(data_dir, "cogat_aliases.csv") rels_file = op.join(data_dir, "cogat_relationships.csv") if overwrite or not all([op.isfile(f) for f in [ids_file, rels_file]]): concepts = get_concept(silent=True).pandas tasks = get_task(silent=True).pandas disorders = get_disorder(silent=True).pandas # Identifiers and aliases long_concepts = _longify(concepts) long_tasks = _longify(tasks) # Disorders currently lack aliases disorders["name"] = disorders["name"].str.lower() disorders = disorders.assign(alias=disorders["name"]) disorders = disorders[["id", "name", "alias"]] # Combine into aliases DataFrame aliases = pd.concat((long_concepts, long_tasks, disorders), axis=0) aliases = _expand_df(aliases) aliases = aliases.replace("", np.nan) aliases = aliases.dropna(axis=0) aliases = aliases.reset_index(drop=True) # Relationships relationship_list = [] for i, id_ in enumerate(concepts["id"].unique()): if i % 100 == 0: time.sleep(5) row = [id_, id_, "isSelf"] relationship_list.append(row) concept = get_concept(id=id_, silent=True).json for rel in concept["relationships"]: reltype = _get_concept_reltype(rel["relationship"], rel["direction"]) if reltype is not None: row = [id_, rel["id"], reltype] relationship_list.append(row) for i, id_ in enumerate(tasks["id"].unique()): if i % 100 == 0: time.sleep(5) row = [id_, id_, "isSelf"] relationship_list.append(row) task = get_task(id=id_, silent=True).json for rel in task["concepts"]: row = [id_, rel["concept_id"], "measures"] relationship_list.append(row) row = [rel["concept_id"], id_, "measuredBy"] relationship_list.append(row) for i, id_ in enumerate(disorders["id"].unique()): if i % 100 == 0: time.sleep(5) row = [id_, id_, "isSelf"] relationship_list.append(row) disorder = get_disorder(id=id_, silent=True).json for rel in disorder["disorders"]: if rel["relationship"] == "ISA": rel_type = "isA" else: rel_type = rel["relationship"] row = [id_, rel["id"], rel_type] relationship_list.append(row) relationships = pd.DataFrame(columns=["input", "output", "rel_type"], data=relationship_list) ctp_df = concepts[["id", "id_concept_class"]] ctp_df = ctp_df.assign(rel_type="inCategory") ctp_df.columns = ["input", "output", "rel_type"] ctp_df["output"].replace("", np.nan, inplace=True) ctp_df.dropna(axis=0, inplace=True) relationships = pd.concat((ctp_df, relationships)) relationships = relationships.reset_index(drop=True) aliases.to_csv(ids_file, index=False) relationships.to_csv(rels_file, index=False) out_dict = {"ids": ids_file, "relationships": rels_file} return out_dict
tx = graph.cypher.begin() #class Task(models.NodeModel): # name = models.StringProperty() # uid = models.StringProperty(indexed=True) # derived_from = models.Relationship('self',rel_type='DERIVEDFROM') # definition = models.StringProperty() # has_condition = models.Relationship(Condition,rel_type='HASCONDITION') # has_implementation = models.Relationship(Experiment,rel_type='HASIMPLEMENTATION') # ExperimentFactory? # mentioned_in = models.Relationship('PMID',rel_type='MENTIONEDIN') for row in tasks.iterrows(): name = row[1].term uid = row[1].url.split("/")[-1] try: task = get_task(id=uid,silent=True).json definition = task[0]["definition_text"] except: definition = "" if not str(name) =="nan": properties = {"definition":definition} node = make_node("task",uid,name,properties) #class Condition(models.NodeModel): # name = models.StringProperty() # uid = models.StringProperty(indexed=True) # has_contrast = models.Relationship(Contrast,rel_type='HASCONTRAST') for row in conditions.iterrows(): name = row[1].condition_text user = row[1].id_user
import os import json import pandas from cogpheno.apps.assessments.models import CognitiveAtlasTask, CognitiveAtlasConcept from cognitiveatlas.api import get_task, get_concept tasks = get_task() concepts = get_concept() for t in range(0,len(tasks.json)): task = tasks.json[t] print "%s of %s" %(t,len(tasks.json)) task, _ = CognitiveAtlasTask.objects.update_or_create(cog_atlas_id=task["id"], defaults={"name":task["name"]}) task.save() # Or just update those not in #termid_present = [ct.cog_atlas_id for ct in CognitiveAtlasTask.objects.all()] #termid = [tasks.json[x]["id"] for x in range(0,len(tasks.json))] #termid_missing = [x for x in range(0,len(termid)) if termid[x] not in termid_present] #for m in termid_missing: # task = tasks.json[m] # task, _ = CognitiveAtlasTask.objects.update_or_create(cog_atlas_id=task["id"], defaults={"name":task["name"]}) # task.save() for c in range(0,len(concepts.json)): concept = concepts.json[c] print "%s of %s" %(c,len(concepts.json)) concept, _ = CognitiveAtlasConcept.objects.update_or_create(cog_atlas_id=concept["id"], defaults={"name":concept["name"]},definition=concept["definition_text"]) concept.save()
def concept_node_triples(image_dict=None,output_file="concept_node_triples.tsv", delim="\t",save_to_file=True,lookup_key_type="contrast"): '''concept_node_triples Export a list of nodes, in triples :param delim: delimiter for output file :param save_to_file: boolean, False will return pandas data frame :param image_dict [OPTIONAL]: dict a dictionary of [term_id:image_file] pairs, eg ..note:: {"cnt_4decfedb91973":["image1.nii.gz","image2.nii.gz"]} This will mean that the images in the list will be assigned to all concept nodes associated with the term specified. This allows for inference over the tree (for example, some relationship with concept nodes that are parents of assigned nodes). Specifying an image dictionary will append the images as the base nodes of the tree. No image dictionary means that the base nodes will be the lowest level concepts. You must specify the term type as "contrast" or "task" (see lookup_key_type) :param delim: str delimeter for output file, default is tab. :param output_file: path :param lookup_key_type: the term type used as a key in the image_dict. Either "task" or "contrast" (default is contrast) ..note:: Output looks like id parent name 1 none BASE # there is always a base node trm_12345 1 MEMORY # high level concept groups trm_23456 1 PERCEPTION trm_34567 trm_12345 WORKING MEMORY # concepts trm_56789 trm_12345 LONG TERM MEMORY trm_67890 trm_34567 image1.nii.gz # associated images (discovered by way of contrasts) trm_78901 trm_34567 image2.nii.gz ''' concepts = filter_concepts() if save_to_file == True: filey = init_output_file(output_file,delim=delim) df = pandas.DataFrame(columns=["id","parent","name"]) df.loc[0] = ["1","None","BASE"] # Generate a unique id for each concept concept_lookup = dict() for c in range(0,len(concepts)): concept_lookup[concepts[c]["id"]] = c+2 count=1 # Generate tree for main concepts for concept in concepts: parents = [] if "relationships" in concept: for relation in concept["relationships"]: if relation["direction"] == "parent": # We can only use "kind of" otherwise we get circular reference if relation["relationship"] == "kind of": if relation["id"] in concept_lookup: parents.append(relation["id"]) if not parents: # make_node(node_id,name,parent,delim,file_obj): if save_to_file == True: make_node(concept["id"],concept["name"],"1",delim,filey) df.loc[count] = [concept["id"],"1",concept["name"]] count+=1 else: for parent in parents: # make_node(node_id,name,parent,delim,file_obj): if save_to_file == True: make_node(concept["id"],concept["name"],parent,delim,filey) df.loc[count] = [concept["id"],parent,concept["name"]] count+=1 # Now add an entry for each image / contrast, may be multiple for each image if image_dict: node_id = max(concept_lookup.values()) + 1 for conid, image_paths in image_dict.items(): if lookup_key_type == "contrast": concepts_single = get_concept(contrast_id=conid).json key_id = "id" else: concepts_single = get_task(id=conid).json[0] if "concepts" in list(concepts_single.keys()): concepts_single = concepts_single["concepts"] else: concepts_single = None key_id = "concept_id" if concepts_single != None: for con in concepts_single: # The concept is the parent of the image if con: for image_path in image_paths: # make_node(node_id,name,parent,delim,file_obj): if save_to_file == True: make_node("node_%s" %node_id,image_path,con[key_id],delim,filey) df.loc[count] = ["node_%s" %node_id,con[key_id],image_path] node_id +=1 count+=1 if save_to_file == True: filey.close() print("%s has been created." % output_file) return df
def main(): tasks = get_task() output_folder = os.path.abspath("data") print "Generating Cognitive Atlas Data..." # Cognitive Atlas tasks we are interested in from NeuroVault tags task_uids = [ 'trm_553e77e53497d', 'trm_553ebfc390256', 'trm_553e88a66b676', 'trm_553fd2fc7a648', 'trm_4ebd482eba5b1', 'trm_4ebc98cc77e7b', 'trm_4ebc728326a13', 'trm_4ebc6a6b75ebf', 'trm_4ebc9d2e397f2', 'trm_553fce5d21da7', 'trm_553fcbbe974ba', 'trm_4da890594742a', 'trm_4d559bcd67c18', 'trm_4cacee4a1d875', 'trm_4c898c0786246', 'trm_4ebd47b8bab6b', 'tsk_4a57abb949a4f', 'trm_4f2456027809f', 'trm_553e73e29cf7d', 'trm_4c8a834779883', 'trm_4cacf22a22d80', 'trm_4e8dd3831f0cc', 'trm_53c4465b0466f', 'trm_553fbbf79ebc5', 'trm_5542841f3dcd5', 'trm_5346938eed092', 'trm_534692ef3b5df', 'trm_534690b0e9dc5', 'trm_5346927710e88', 'trm_4f244ad7dcde7', 'trm_551b1460e89a3', 'trm_553e6b8e33da4', 'trm_553e85265f51e', 'tsk_4a57abb949bf6', 'trm_4f24179122380', 'tsk_4a57abb949e1a', 'trm_4cacf3fbc503b', 'trm_5181f83b77fa4', 'trm_5181f863d24f4', 'trm_553eb45e2b709', 'trm_550b5b066d37b', 'trm_550b50095d4a3', 'trm_550b53d7dd674', 'trm_550b5c1a7f4db', 'trm_550b54a8b30f4', 'trm_550b557e5f90e', 'trm_550b5a47aa23e', 'trm_553eb28436233', 'trm_50df0dd9d0b6f', 'trm_553fc858cacc5' ] # Functions for making nodes def make_node(nid, name, color): return {"nid": nid, "name": name, "color": color} for task in tasks.json: if task["name"] != "": print "Parsing task %s..." % task["name"] task_name = task["name"].replace(" ", "_").replace("/", "_").lower() if task["id"] in task_uids: task_node = make_node(task["id"], task["name"], "#63506d") single_task = get_task(id=task["id"]).json[0] # We only want to see contrasts with associated concepts task_contrasts = single_task["contrasts"] task_concepts = [] for contrast in task_contrasts: try: contrast_node = make_node(contrast["id"], contrast["contrast_text"], "#d89013") contrast_concepts = get_concept( contrast_id=contrast["id"]) children = [] current_names = [] for concept in contrast_concepts.json: if concept["name"] not in current_names: children.append( make_node(concept["id"], concept["name"], "#3c7263")) current_names.append(concept["name"]) contrast_node["children"] = children # Only append contrast if it has children if len(children) > 0: task_concepts.append(contrast_node) except: pass task_node["children"] = task_concepts # Save to file if we have children if len(task_concepts) > 0: filey = open('%s/%s.json' % (output_folder, task_name), 'w') filey.write( json.dumps(task_node, sort_keys=True, indent=4, separators=(',', ': '))) filey.close()
tx = graph.cypher.begin() #class Task(models.NodeModel): # name = models.StringProperty() # uid = models.StringProperty(indexed=True) # derived_from = models.Relationship('self',rel_type='DERIVEDFROM') # definition = models.StringProperty() # has_condition = models.Relationship(Condition,rel_type='HASCONDITION') # has_implementation = models.Relationship(Experiment,rel_type='HASIMPLEMENTATION') # ExperimentFactory? # mentioned_in = models.Relationship('PMID',rel_type='MENTIONEDIN') for row in tasks.iterrows(): name = row[1].term uid = row[1].url.split("/")[-1] try: task = get_task(id=uid, silent=True).json definition = task[0]["definition_text"] except: definition = "" if not str(name) == "nan": properties = {"definition": definition} node = make_node("task", uid, name, properties) #class Condition(models.NodeModel): # name = models.StringProperty() # uid = models.StringProperty(indexed=True) # has_contrast = models.Relationship(Contrast,rel_type='HASCONTRAST') for row in conditions.iterrows(): name = row[1].condition_text user = row[1].id_user
def concept_node_triples(image_dict=None, output_file="concept_node_triples.tsv", delim="\t", save_to_file=True, lookup_key_type="contrast"): '''concept_node_triples Export a list of nodes, in triples :param delim: delimiter for output file :param save_to_file: boolean, False will return pandas data frame :param image_dict [OPTIONAL]: dict a dictionary of [term_id:image_file] pairs, eg ..note:: {"cnt_4decfedb91973":["image1.nii.gz","image2.nii.gz"]} This will mean that the images in the list will be assigned to all concept nodes associated with the term specified. This allows for inference over the tree (for example, some relationship with concept nodes that are parents of assigned nodes). Specifying an image dictionary will append the images as the base nodes of the tree. No image dictionary means that the base nodes will be the lowest level concepts. You must specify the term type as "contrast" or "task" (see lookup_key_type) :param delim: str delimeter for output file, default is tab. :param output_file: path :param lookup_key_type: the term type used as a key in the image_dict. Either "task" or "contrast" (default is contrast) ..note:: Output looks like id parent name 1 none BASE # there is always a base node trm_12345 1 MEMORY # high level concept groups trm_23456 1 PERCEPTION trm_34567 trm_12345 WORKING MEMORY # concepts trm_56789 trm_12345 LONG TERM MEMORY trm_67890 trm_34567 image1.nii.gz # associated images (discovered by way of contrasts) trm_78901 trm_34567 image2.nii.gz ''' concepts = filter_concepts() if save_to_file == True: filey = init_output_file(output_file, delim=delim) df = pandas.DataFrame(columns=["id", "parent", "name"]) df.loc[0] = ["1", "None", "BASE"] # Generate a unique id for each concept concept_lookup = dict() for c in range(0, len(concepts)): concept_lookup[concepts[c]["id"]] = c + 2 count = 1 # Generate tree for main concepts for concept in concepts: parents = [] if "relationships" in concept: for relation in concept["relationships"]: if relation["direction"] == "parent": # We can only use "kind of" otherwise we get circular reference if relation["relationship"] == "kind of": if relation["id"] in concept_lookup: parents.append(relation["id"]) if not parents: # make_node(node_id,name,parent,delim,file_obj): if save_to_file == True: make_node(concept["id"], concept["name"], "1", delim, filey) df.loc[count] = [concept["id"], "1", concept["name"]] count += 1 else: for parent in parents: # make_node(node_id,name,parent,delim,file_obj): if save_to_file == True: make_node(concept["id"], concept["name"], parent, delim, filey) df.loc[count] = [concept["id"], parent, concept["name"]] count += 1 # Now add an entry for each image / contrast, may be multiple for each image if image_dict: node_id = max(concept_lookup.values()) + 1 for conid, image_paths in image_dict.items(): if lookup_key_type == "contrast": concepts_single = get_concept(contrast_id=conid).json key_id = "id" else: concepts_single = get_task(id=conid).json[0] if "concepts" in list(concepts_single.keys()): concepts_single = concepts_single["concepts"] else: concepts_single = None key_id = "concept_id" if concepts_single != None: for con in concepts_single: # The concept is the parent of the image if con: for image_path in image_paths: # make_node(node_id,name,parent,delim,file_obj): if save_to_file == True: make_node("node_%s" % node_id, image_path, con[key_id], delim, filey) df.loc[count] = [ "node_%s" % node_id, con[key_id], image_path ] node_id += 1 count += 1 if save_to_file == True: filey.close() print("%s has been created." % output_file) return df
from cognitiveatlas.api import get_concept, get_task from py2neo import Graph, Path, Node, Rel, authenticate import os # Get concepts, tasks concepts = get_concept() concept_ids = concepts.pandas.id.tolist() concept_names = concepts.pandas.name.tolist() tasks = get_task() task_ids = tasks.pandas.id.tolist() task_names = tasks.pandas.name.tolist() # get contrasts from tasks contrast_ids = [] contrast_names = [] contrast_tasks = [] for t in tasks.json: task = get_task(id=t["id"]) contrasts = task.json[0]["contrasts"] for contrast in contrasts: contrast_tasks.append(t["id"]) contrast_ids.append(contrast["id"]) contrast_names.append(contrast["contrast_text"]) # set up authentication parameters pw=open('neo4j_pw').readline().strip() authenticate("localhost:7474", "neo4j", pw) # connect to authenticated graph database graph = Graph()
def pull_ontology(out_dir='auto', overwrite=False): """ Download Cognitive Atlas ontology and combine Concepts, Tasks, and Disorders to create ID and relationship DataFrames. """ if out_dir == 'auto': out_dir = op.join(get_resource_path(), 'ontology') else: out_dir = op.abspath(out_dir) ids_file = op.join(out_dir, 'cogat_ids.csv') rels_file = op.join(out_dir, 'cogat_relationships.csv') if overwrite or not all([op.isfile(f) for f in [ids_file, rels_file]]): concepts = get_concept(silent=True).pandas tasks = get_task(silent=True).pandas disorders = get_disorder(silent=True).pandas # Identifiers and aliases long_concepts = _longify(concepts) long_tasks = _longify(tasks) # Disorders currently lack aliases disorders['name'] = disorders['name'].str.lower() disorders = disorders.assign(alias=disorders['name']) disorders = disorders[['id', 'name', 'alias']] # Combine into id_df id_df = pd.concat((long_concepts, long_tasks, disorders), axis=0) id_df = _expand_df(id_df) id_df = id_df.replace('', np.nan) id_df = id_df.dropna(axis=0) id_df = id_df.reset_index(drop=True) # Relationships relationships = [] for i, id_ in enumerate(concepts['id'].unique()): if i % 100 == 0: time.sleep(5) row = [id_, id_, 'isSelf'] relationships.append(row) concept = get_concept(id=id_, silent=True).json for rel in concept['relationships']: reltype = _get_concept_reltype(rel['relationship'], rel['direction']) if reltype is not None: row = [id_, rel['id'], reltype] relationships.append(row) for i, id_ in enumerate(tasks['id'].unique()): if i % 100 == 0: time.sleep(5) row = [id_, id_, 'isSelf'] relationships.append(row) task = get_task(id=id_, silent=True).json for rel in task['concepts']: row = [id_, rel['concept_id'], 'measures'] relationships.append(row) row = [rel['concept_id'], id_, 'measuredBy'] relationships.append(row) for i, id_ in enumerate(disorders['id'].unique()): if i % 100 == 0: time.sleep(5) row = [id_, id_, 'isSelf'] relationships.append(row) disorder = get_disorder(id=id_, silent=True).json for rel in disorder['disorders']: if rel['relationship'] == 'ISA': rel_type = 'isA' else: rel_type = rel['relationship'] row = [id_, rel['id'], rel_type] relationships.append(row) rel_df = pd.DataFrame(columns=['input', 'output', 'rel_type'], data=relationships) ctp_df = concepts[['id', 'id_concept_class']] ctp_df = ctp_df.assign(rel_type='inCategory') ctp_df.columns = ['input', 'output', 'rel_type'] ctp_df['output'].replace('', np.nan, inplace=True) ctp_df.dropna(axis=0, inplace=True) rel_df = pd.concat((ctp_df, rel_df)) rel_df = rel_df.reset_index(drop=True) id_df.to_csv(ids_file, index=False) rel_df.to_csv(rels_file, index=False) else: id_df = pd.read_csv(ids_file) rel_df = pd.read_csv(rels_file) return id_df, rel_df
#[u'conclass', # u'implementations', # u'disorders', # u'discussion', # u'indicators', # u'conditions', # u'contrasts', # u'external_datasets', # u'umarkdef', # u'umark', # u'history'] # Step 2: Find contrasts associated with each task # Note that this is an inefficient way to retrieve the full data, but it will work! task_uids = [task["id"] for task in get_task().json] contrasts = dict() # contrast lookup by task uid # Now we can retrieve the full data. We are interested in contrasts, so let's save those. for task in task_uids: if task not in contrasts: task_complete = get_task(task).json[0] # Only save if we have contrasts if len(task_complete["contrasts"]) > 0: contrasts[task] = task_complete["contrasts"] # How many tasks have contrasts? len(contrasts) # 437 # Step 3: Make a contrast --> concept lookup