示例#1
0
def check_if_node_exists(driver, node_property, value):
    """
    Queries the graph database and checks if a node with a specific property and property value already exists.

    :param driver: neo4j driver, which provides the connection to the neo4j graph database.
    :type driver: neo4j driver
    :param str node_property: property of the node.
    :param value: property value.
    :type value: str, int, float or bool
    :return: Pandas dataframe with user identifier if User with node_property and value already exists, \
            if User does not exist, returns and empty dataframe.
    """
    query_name = 'check_node'
    try:
        user_creation_cypher = get_user_creation_queries()
        query = user_creation_cypher[query_name]['query'].replace(
            'PROPERTY', node_property)
        for q in query.split(';')[0:-1]:
            if '$' in q:
                result = connector.getCursorData(driver,
                                                 q + ';',
                                                 parameters={'value': value})
            else:
                result = connector.getCursorData(driver, q + ';')
    except Exception as err:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error(
            "Reading query {}: {}, file: {},line: {}, error: {}".format(
                query_name, sys.exc_info(), fname, exc_tb.tb_lineno, err))
    return result
示例#2
0
def get_subjects_enrolled_in_project(driver, projectId):
    """
    Extracts the number of subjects included in a given project.

    :param driver: neo4j driver, which provides the connection to the neo4j graph database.
    :type driver: neo4j driver
    :param str projectId: external project identifier (from the graph database).
    :return: Number of subjects.
    :rtype: Numpy ndarray
    """
    query_name = 'extract_enrolled_subjects'
    query = ''
    try:
        data_upload_cypher = get_data_upload_queries()
        query = data_upload_cypher[query_name]['query']
        for q in query.split(';')[0:-1]:
            if '$' in q:
                result = connector.getCursorData(driver, q+';', parameters={'external_id': str(projectId)})
            else:
                result = connector.getCursorData(driver, q+';')
    except Exception as err:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error("Error: {}. Getting new subjects enrolled in project: Query name ({}) - Query ({}), error info: {}, file: {},line: {}".format(err, query_name, query, sys.exc_info(), fname, exc_tb.tb_lineno))
    return result.values
示例#3
0
def create_db_user(driver, data):
    """
    Creates and assigns role to new graph database user, if user not in list of local users.

    :param driver: neo4j driver, which provides the connection to the neo4j graph database.
    :type driver: neo4j driver
    :param Series data: pandas Series with required user information (see set_arguments()).
    """
    query_name_add = 'create_db_user'
    query_name_role = 'add_role_to_db_user'
    query_list_db_users = 'list_db_users'

    try:
        cypher = get_user_creation_queries()
        db_query = cypher[query_name_add]['query'] + cypher[query_name_role][
            'query']
        db_users = connector.getCursorData(
            driver, cypher[query_list_db_users]['query'], {})
        if data['username'] not in db_users['username'].to_list(
        ) or db_users.empty:
            for q in db_query.split(';')[0:-1]:
                result = connector.getCursorData(driver,
                                                 q + ';',
                                                 parameters=data.to_dict())
            logger.info("New user created: {}. Result: {}".format(
                data['username'], result))
    except Exception as err:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error(
            "Reading query {}: {}, file: {},line: {}, error: {}".format(
                query_name_add, sys.exc_info(), fname, exc_tb.tb_lineno, err))
示例#4
0
def get_db_stats_data():
    """
    Retrieves all the stats data from the graph database and returns them as a dictionary.

    :return: Dictionary of dataframes.
    """
    query_names = ['unique_projects', 'get_db_stats',
                   'get_db_store_size', 'get_db_transactions', 'get_db_kernel']
    df_names = ['projects', 'meta_stats',
                'store_size', 'transactions', 'kernel_monitor']

    dfs = {}
    cypher = get_query()
    driver = connector.getGraphDatabaseConnectionConfiguration()
    
    if driver is not None:
        for i, j in zip(df_names, query_names):
            query = cypher[j]['query']
            try:
                data = connector.getCursorData(driver, query)
                if i == 'store_size':
                    data = data.T
                    data['size'] = [size_converter(i) for i in data[0]]
                dfs[i] = data.to_json(orient='records')
            except Exception:
                pass
    return dfs
示例#5
0
def get_new_project_identifier(driver, projectId):
    """
    Queries the database for the last project external identifier and returns a new sequential identifier.

    :param driver: neo4j driver, which provides the connection to the neo4j graph database.
    :type driver: neo4j driver
    :param str projectId: internal project identifier (CPxxxxxxxxxxxx).
    :return: Project external identifier.
    :rtype: str
    """
    query_name = 'increment_project_id'
    try:
        project_creation_cypher = get_project_creation_queries()
        query = project_creation_cypher[query_name]['query']
        last_project, new_id = connector.getCursorData(driver, query).values[0]
        if last_project is None and new_id is None:
            external_identifier = 'P0000001'
        else:
            length = len(last_project.split('P')[-1])
            new_length = len(str(new_id))
            external_identifier = 'P' + '0' * (length -
                                               new_length) + str(new_id)
    except Exception as err:
        external_identifier = None
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error("Reading query {}: {}, file: {},line: {}, err: {}".format(
            query_name, sys.exc_info(), fname, exc_tb.tb_lineno, err))

    return external_identifier
示例#6
0
def get_subject_number_in_project(driver, projectId):
    """
    Extracts the number of subjects included in a given project.

    :param driver: neo4j driver, which provides the connection to the neo4j graph database.
    :type driver: neo4j driver
    :param str projectId: external project identifier (from the graph database).
    :return: Integer with the number of subjects.
    """
    query_name = 'subject_number'
    try:
        cypher = get_project_creation_queries()
        query = cypher[query_name]['query']
        result = connector.getCursorData(driver,
                                         query,
                                         parameters={
                                             'external_id': projectId
                                         }).values[0][0]
    except Exception as err:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error(
            "Error: {}. Reading query {}: {}, file: {},line: {}".format(
                err, query_name, sys.exc_info(), fname, exc_tb.tb_lineno))
    return result
示例#7
0
文件: project.py 项目: MannLabs/CKG
    def query_data(self):
        data = {}
        try:
            queries_path = os.path.join(cwd, self.queries_file)
            project_cypher = query_utils.read_queries(queries_path)

            driver = connector.getGraphDatabaseConnectionConfiguration()
            replace = [("PROJECTID", self.identifier)]
            for query_name in project_cypher:
                title = query_name.lower().replace('_', ' ')
                query = project_cypher[query_name]['query']
                query_type = project_cypher[query_name]['query_type']
                for r, by in replace:
                    query = query.replace(r, by)
                if query_type == "pre":
                    data[title] = connector.getCursorData(driver, query)
        except Exception as err:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
            logger.error(
                "Reading queries from file {}: {}, file: {},line: {}, error: {}"
                .format(queries_path, sys.exc_info(), fname, exc_tb.tb_lineno,
                        err))

        return data
示例#8
0
文件: mapping.py 项目: MannLabs/CKG
def get_mapping_analytical_samples(project_id):
    from ckg.graphdb_connector import connector
    driver = connector.getGraphDatabaseConnectionConfiguration()

    mapping = {}
    query = "MATCH (p:Project)-[:HAS_ENROLLED]-(:Subject)-[:BELONGS_TO_SUBJECT]-()-[:SPLITTED_INTO]-(a:Analytical_sample) WHERE p.id='{}' RETURN a.external_id, a.id".format(
        project_id)
    mapping = connector.getCursorData(driver, query)
    if not mapping.empty:
        mapping = mapping.set_index("a.external_id").to_dict(
            orient='dict')["a.id"]

    return mapping
示例#9
0
文件: mapping.py 项目: MannLabs/CKG
def getMappingFromDatabase(id_list,
                           node,
                           attribute_from='id',
                           attribute_to='name'):
    id_list = ["'{}'".format(i) for i in id_list]
    driver = connector.getGraphDatabaseConnectionConfiguration()
    mapping_query = "MATCH (n:{}) WHERE n.{} IN [{}] RETURN n.{} AS from, n.{} AS to"
    mapping = connector.getCursorData(
        driver,
        mapping_query.format(node, attribute_from, ','.join(id_list),
                             attribute_from, attribute_to))
    if not mapping.empty:
        mapping = dict(zip(mapping['from'], mapping['to']))

    return mapping
示例#10
0
def map_node_name_to_id(driver, node, value):
    identifier = None
    query_name = 'map_node_name'
    cwd = os.path.dirname(os.path.abspath(__file__))
    queries_path = "queries.yml"
    cypher = read_queries(os.path.join(cwd, queries_path))
    query = cypher[query_name]['query'].replace('NODE', node)
    result = connector.getCursorData(driver,
                                     query,
                                     parameters={'name': str(value).lower()})

    if result is not None and not result.empty:
        identifier = result.values[0][0]

    return identifier
示例#11
0
def check_external_ids_in_db(driver, projectId):
    """
    """
    query_name = 'check_external_ids'
    query = ''
    result = pd.DataFrame()
    try:
        data_upload_cypher = get_data_upload_queries()
        query = data_upload_cypher[query_name]['query']
        result = connector.getCursorData(driver, query, parameters={'external_id': str(projectId)})
    except Exception as err:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error("Error: {}. Checking if external identifiers exist in the database: Query name ({}) - Query ({}), error info: {}, file: {},line: {}".format(err, query_name, query, sys.exc_info(), fname, exc_tb.tb_lineno))

    return result
示例#12
0
文件: project.py 项目: MannLabs/CKG
    def get_sdrf(self):
        sdrf_df = pd.DataFrame()
        try:
            driver = connector.getGraphDatabaseConnectionConfiguration()
            query_path = os.path.join(cwd, self.queries_file)
            project_cypher = query_utils.read_queries(query_path)
            query = query_utils.get_query(project_cypher,
                                          query_id="project_sdrf")
            df = connector.getCursorData(
                driver, query.replace("PROJECTID", self.identifier))
            sdrf_df = builder_utils.convert_ckg_to_sdrf(df)
        except Exception as err:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
            logger.error(
                "Error: {}. Reading queries from file {}: {}, file: {},line: {}"
                .format(err, query_path, sys.exc_info(), fname,
                        exc_tb.tb_lineno))

        return sdrf_df
示例#13
0
def get_new_analytical_sample_identifier(driver):
    """
    Queries the database for the last analytical sample internal identifier and returns a new sequential identifier.
    :param driver: neo4j driver, which provides the connection to the neo4j graph database.

    :return: Analytical sample identifier.
    """
    query_name = 'increment_analytical_sample_id'
    query = ''
    try:
        cypher = get_data_upload_queries()
        query = cypher[query_name]['query']
        identifier = connector.getCursorData(driver, query).values[0][0]
    except Exception as err:
        identifier = None
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error("Error: {}. Getting new analytical sample identifiers: Query name ({}) - Query ({}), error info: {}, file: {},line: {}".format(err, query_name, query, sys.exc_info(), fname, exc_tb.tb_lineno))

    return identifier
示例#14
0
def get_new_user_identifier(driver):
    """
    Queries the database for the last user identifier and returns a new sequential identifier.

    :param driver: neo4j driver, which provides the connection to the neo4j graph database.
    :type driver: neo4j driver
    :return: User identifier.
    :rtype: str
    """
    query_name = 'increment_user_id'
    try:
        user_creation_cypher = get_user_creation_queries()
        query = user_creation_cypher[query_name]['query']
        user_identifier = connector.getCursorData(driver, query).values[0][0]
    except Exception as err:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error(
            "Reading query {}: {}, file: {},line: {}, error: {}".format(
                query, sys.exc_info(), fname, exc_tb.tb_lineno, err))
    return user_identifier
示例#15
0
def convert_ckg_clinical_to_sdrf(df):
    out_mapping = {'tissue':'characteristics[organism part]',
                   'disease': 'characteristics[disease]',
                   'grouping1': 'characteristics[phenotype]',
                   'analytical_sample': 'comment[data file]',
                   'subject': 'characteristics[individual]',
                   'biological_sample': 'source name'}
    cols = []
    for c in df.columns:
        matches = re.search(r'(\d+)', c)
        if matches:
            cols.append(c)
    
    driver = connector.getGraphDatabaseConnectionConfiguration()
    query = '''MATCH (ef:Experimental_factor)-[r:MAPS_TO]-(c:Clinical_variable)
                WHERE c.name+' ('+c.id+')' IN {} RETURN c.name+' ('+c.id+')' AS from, "characteristic["+ef.name+"]" AS to, LABELS(c)'''
    
    mapping = connector.getCursorData(driver, query.format(cols))
    mapping = dict(zip(mapping['from'], mapping['to']))
    mapping.update(out_mapping)
    df = df.rename(mapping, axis=1)
    
    return df
示例#16
0
def convert_sdrf_to_ckg(df):
    in_mapping = {'organism part': 'tissue',
                  'disease': 'disease',
                  'phenotype': 'grouping1',
                  'data file': 'analytical_sample external_id',
                  'individual':'subject external_id',
                  'source name':'biological_sample external_id'}
    cols = {}
    for c in df.columns:
        matches = re.search(r'\[(.+)\]', c)
        if matches:
            cols[c] = matches.group(1)
    
    driver = connector.getGraphDatabaseConnectionConfiguration()
    query = '''MATCH (ef:Experimental_factor)-[r:MAPS_TO]-(c:Clinical_variable)
                WHERE ef.name IN {} RETURN ef.name AS from, c.name+' ('+c.id+')' AS to, LABELS(c)'''
    
    mapping = connector.getCursorData(driver, query.format(list(cols.values())))
    mapping = dict(zip(mapping['from'], mapping['to']))
    mapping.update(in_mapping)
    df = df.rename(cols, axis=1).rename(mapping, axis=1)
    
    return df
示例#17
0
    def send_query(self, query):
        driver = connector.getGraphDatabaseConnectionConfiguration()
        data = connector.getCursorData(driver, query)

        return data