示例#1
0
def submit(parent_name, parent_id, data_file,
        id_tracking_file=node_tracking_file):
    log.info('Starting submission of subjects.')
    nodes = []
    for record in load_data(data_file):
        try:
            log.debug('...trying next record...')
            # log.debug(record)
            n = load(record['rand_subject_id'])
            # n = Subject()
            if not n.rand_subject_id:
                saved = validate_record(parent_id, n, record)
                if saved:
                    header = settings.node_id_tracking.id_fields
                    vals = values_to_node_dict(
                            [[node_type,saved.rand_subject_id,saved.id,
                              parent_type,parent_name,parent_id,
                              get_cur_datetime()]]
                            )
                    write_out_csv(id_tracking_file,
                            values=vals)
                    nodes.append(vals)
        except Exception, e:
            log.error(e)
            raise e
示例#2
0
def submit(data_file, id_tracking_file=node_tracking_file):
	log.info('Starting submission of %ss.', node_type)
	nodes = []
	csv_fieldnames = get_field_header(data_file)
	for record in load_data(data_file):
		# check not 'unknown' jaxid, not missing visit info
		if len(record['rand_subject_id']) > 0:
			log.debug('\n...next record...')
			try:
				log.debug('data record: '+str(record))

				# Node-Specific Variables:
				load_search_field = 'rand_subject_id'
				internal_id = record['rand_subject_id']
				parent_internal_id = record['rand_subject_id']
				#grand_parent_internal_id = record['rand_patient_id']

				import pdb ; pdb.set_trace()
				#parent_id = get_parent_node_id(id_tracking_file, parent_type, parent_internal_id)
				parent_id = record['parent_osdf_id']
				log.debug('matched parent_id: %s', parent_id)

				if parent_id:
					node_is_new = False # set to True if newbie
					node = load(internal_id, load_search_field)
					if not getattr(node, load_search_field):
						log.debug('loaded node newbie...')
						node_is_new = True

					import pdb ; pdb.set_trace()
					saved = validate_record(parent_id, node, record,
											data_file_name=data_file)
					if saved:
						header = settings.node_id_tracking.id_fields
						saved_name = getattr(saved, load_search_field)
						vals = values_to_node_dict(
							[[node_type.lower(),saved_name,saved.id,
							  parent_type.lower(),parent_internal_id,parent_id,
							  get_cur_datetime()]],
							header
							)
						nodes.append(vals)
						if node_is_new:
							write_out_csv(id_tracking_file,
								  fieldnames=get_field_header(id_tracking_file),
								  values=vals)
				else:
					log.error('No parent_id found for %s', parent_internal_id)

			except Exception, e:
				log.exception(e)
				raise e
		else:
			write_out_csv(data_file+'_records_no_submit.csv',
						  fieldnames=record.keys(), values=[record,])
示例#3
0
def submit(data_file, id_tracking_file=node_tracking_file):
	log.info('Starting submission of %ss.', node_type)
	nodes = []
	csv_fieldnames = get_field_header(data_file)
	write_csv_headers(data_file,fieldnames=csv_fieldnames)
	for record in load_data(data_file):
		# if record['consented'] == 'YES' \
		# and record['visit_number'] != 'UNK':
		#if record['visit_number'] != 'UNK':
			# use of 'UNK' = hack workaround for unreconciled visit list
		log.info('\n...next record...')
		try:
			log.debug('data record: '+str(record))

			# node-specific variables:
			load_search_field = 'visit_id'
			internal_id = record['DCC_VISIT_IDS']
			parent_internal_id = record['rand_patient_id']  ##Text ID to find the parent and get back OSDF ID
			grand_parent_internal_id = 'prediabetes'

			parent_id = get_parent_node_id(
				id_tracking_file, parent_type, parent_internal_id)
			# grand_parent_id = get_parent_node_id(
				# id_tracking_file, grand_parent_type, grand_parent_internal_id)

			if parent_id:
				node_is_new = False # set to True if newbie
				node = load(internal_id, load_search_field)
				if not getattr(node, load_search_field):
					log.debug('loaded node newbie...')
					node_is_new = True

				saved = validate_record(parent_id, node, record,
										data_file_name=data_file)
				if saved:
					header = settings.node_id_tracking.id_fields
					saved_name = getattr(saved, load_search_field)
					vals = values_to_node_dict(
						[[node_type.lower(), saved_name, saved.id,
						  parent_type.lower(), parent_internal_id, parent_id,
						  get_cur_datetime()]],
						header
						)
					nodes.append(vals)
					if node_is_new:
						write_out_csv(id_tracking_file,
							  fieldnames=get_field_header(id_tracking_file),
							  values=vals)
			else:
				log.error('No parent_id found for %s', parent_internal_id)

		except Exception, e:
			log.exception(e)
			raise e
def submit(data_file, id_tracking_file=node_tracking_file):
    log.info('Starting submission of %ss.', node_type)
    nodes = []
    csv_fieldnames = get_field_header(data_file)
    write_csv_headers(data_file,fieldnames=csv_fieldnames)
    for record in load_data(data_file):
        log.info('...next record...')
        try:
            log.debug('data record: '+str(record))

            # node-specific variables:
            load_search_field = 'local_file'
            internal_id = os.path.basename(record[load_search_field])
            parent_internal_id = record['prep_id']
            grand_parent_internal_id = record['visit_id']

            parent_id = get_parent_node_id(
                id_tracking_file, parent_type, parent_internal_id)
            log.debug('matched parent_id: %s', parent_id)

            if parent_id:
                node_is_new = False # set to True if newbie
                node = load(internal_id, load_search_field)
                if not getattr(node, load_search_field):
                    log.debug('loaded node newbie...')
                    node_is_new = True

                saved = validate_record(parent_id, node, record,
                                        data_file_name=data_file)
                if saved:
                    # load_search_field = 'urls'
                    header = settings.node_id_tracking.id_fields
                    if record['consented'] == 'YES':
                        saved_name = os.path.basename(getattr(saved, load_search_field))
                    else:
                        saved_name = '-'.join([getattr(saved, 'comment'), 'private_file'])
                    vals = values_to_node_dict(
                        [[node_type.lower(), saved_name, saved.id,
                          parent_type.lower(), parent_internal_id, parent_id,
                          get_cur_datetime()]],
                        header
                        )
                    nodes.append(vals)
                    if node_is_new:
                        write_out_csv(id_tracking_file,
                              fieldnames=get_field_header(id_tracking_file),
                              values=vals)
            else:
                log.error('No parent_id found for %s', parent_internal_id)

        except Exception, e:
            log.exception(e)
            raise e
def submit(data_file, id_tracking_file=node_tracking_file):
    log.info('Starting submission of %ss.', node_type)
    nodes = []
    csv_fieldnames = get_field_header(data_file)
    write_csv_headers(data_file,fieldnames=csv_fieldnames)
    for record in load_data(data_file):
        log.info('...next record...')
        try:
            log.debug('data record: '+str(record))

            # node-specific variables:
            load_search_field = 'comment'
            internal_id = str(record['host_transcriptomics_id']) +'.host_transcriptomics'
            parent_internal_id = record['host_seq_prep_name_id'] ##Link to Host_seq_prep ID
            grand_parent_internal_id = record['sample_name_id']  ##Link to Sample ID

            parent_id = get_parent_node_id(
                id_tracking_file, parent_type, parent_internal_id)
            log.debug('matched parent_id: %s', parent_id)

            if parent_id:
                node_is_new = False # set to True if newbie
                node = load(internal_id, load_search_field)
                if not getattr(node, load_search_field):
                    log.debug('loaded node newbie...')
                    node_is_new = True

                import pdb ; pdb.set_trace()
                saved = validate_record(parent_id, node, record,
                                        data_file_name=data_file)
                if saved:
                    header = settings.node_id_tracking.id_fields
                    saved_name = getattr(saved, load_search_field)
                    vals = values_to_node_dict(
                        [[node_type.lower(),saved_name,saved.id,
                          parent_type.lower(),parent_internal_id,parent_id]],
                        header
                        )
                    nodes.append(vals)
                    if node_is_new:
                        write_out_csv(id_tracking_file,
                              fieldnames=get_field_header(id_tracking_file),
                              values=vals)
            else:
                log.error('No parent_id found for %s', parent_internal_id)

        except Exception, e:
            log.exception(e)
            raise e
示例#6
0
def submit(data_file, id_tracking_file=node_tracking_file):
    log.info('Starting submission of %ss.', node_type)
    nodes = []
    csv_fieldnames = get_field_header(data_file)
    write_csv_headers(data_file,fieldnames=csv_fieldnames)
    for record in load_data(data_file):
        log.info('\n...next record...')
        try:
            log.debug('data record: '+str(record))

            # node-specific variables:
            load_search_field = 'comment'
            internal_id = record['sample_name_id'] + '.proteome'
            parent_internal_id = record['sample_name_id'] + '.hostassayprep'
            grand_parent_internal_id = record['visit_id']

            parent_id = get_parent_node_id(
                id_tracking_file, parent_type, parent_internal_id)

            node_is_new = False # set to True if newbie
            node = load(internal_id, load_search_field)
            if not getattr(node, load_search_field):
                log.debug('loaded node newbie...')
                node_is_new = True

	    import pdb ; pdb.set_trace()
            saved = validate_record(parent_id, node, record,
                                    data_file_name=data_file)
	    if saved:
                header = settings.node_id_tracking.id_fields
                saved_name = getattr(saved, load_search_field)
                vals = values_to_node_dict(
                    [[node_type.lower(),saved_name,saved.id,
                      parent_type.lower(),parent_internal_id,parent_id]],
                    header
                    )
                nodes.append(vals)
                if node_is_new:
                    write_out_csv(id_tracking_file,
                          fieldnames=get_field_header(id_tracking_file),
                          values=vals)

        except Exception, e:
            log.exception(e)
            raise e
示例#7
0
def retrieve_query_all(session, query, data_file='node_retrievals_.csv'):
    """wrapper for 'query_all' to retrieve all matching node_types"""
    log.info('Starting retrieval of ""%s".', query)

    '''write headers if file ne or empty'''
    fields = ['node_type', 'id', 'internal_id', 'linkage', 'meta', 'ns', 'ver', 'acl', 'date_retrieved']
    log.warn('fields,%s', str(fields))
    try:
        write_out_csv(data_file, fieldnames=fields)
    except Exception as e:
        log.exception('Write headers, Except... %s', e)

    results = query_all(session, query)
    log.info("Number of Query Results: %s", len(results))
    for node_id, result in results.iteritems():
        try:
            node_type = result['node_type']
            if node_type in NodeDict:
                id_field = NodeDict[node_type]['id_field']
                internal_id = result['meta'][id_field]
            else:
                internal_id = node_type
            # id_field = getattr(NodeDict, "[node_type]['id_field']", "unk")
            # internal_id = getattr(result, "['meta'][id_field]", "unk")
            log.debug('Current data node: %s', str(internal_id))
            values = [node_type,
                      node_id,
                      internal_id,
                      json.dumps(result['linkage']),
                      json.dumps(result['meta']),
                      result['ns'],
                      result['ver'],
                      json.dumps(result['acl']),
                      get_cur_datetime(),
                     ]
            log.warn('vals,%s', str(values))
            vals = values_to_node_dict([values], fields)
            write_out_csv(data_file, fieldnames=fields, values=[vals])
            write_out_csv(data_file, fieldnames=fields, values=[values])

        except Exception, e:
            log.exception(e)
            raise e
def submit(data_file, id_tracking_file=node_tracking_file):
    log.info('Starting submission of %ss.', node_type)
    nodes = []
    csv_fieldnames = get_field_header(data_file)
    write_csv_headers(data_file,fieldnames=csv_fieldnames)
    for record in load_data(data_file):
        log.info('\n...next record...')
        try:
            log.debug('data record: '+str(record))

            if record['local_file'] != '':
                load_search_field = 'local_file'
                internal_id = os.path.basename(record['local_file'])
                parent_internal_id = record['raw_file_id']
                grand_parent_internal_id = record['prep_id']

                parent_id = get_parent_node_id(
                    id_tracking_file, parent_type, parent_internal_id)

                node_is_new = False # set to True if newbie
                node = load(internal_id, load_search_field)
                if not getattr(node, load_search_field):
                    log.debug('loaded node newbie...')
                    node_is_new = True

                saved = validate_record(parent_id, node, record,
                                        data_file_name=data_file)
                if saved:
                    header = settings.node_id_tracking.id_fields
                    vals = values_to_node_dict(
                            [[node_type.lower(),saved_name,saved.id,
                              parent_type.lower(),parent_internal_id,parent_id]],
                            header
                            )
                    nodes.append(vals)
                    if node_is_new:
                        write_out_csv(id_tracking_file,
                              fieldnames=get_field_header(id_tracking_file),
                              values=vals)

        except Exception, e:
            log.exception(e)
            raise e