Exemplo n.º 1
0
def _check_housekeeping(id):
    '''
    For each study_id missing "Event" data, 
    check it's Housekeeping information. 
    '''
    try:
        ip, port, sid, username, password = gwas_data_sources.get_db_properties('DEV3')
        dsn_tns = cx_Oracle.makedsn(ip, port, sid)
        connection = cx_Oracle.connect(username, password, dsn_tns)

        cursor = connection.cursor()
        
        cursor.prepare("""
            SELECT P.ID, P.PUBMED_ID, S.ID, 
            TO_CHAR(H.LAST_UPDATE_DATE, 'yyyy-mm-dd hh24:mi:ss'),
            TO_CHAR(H.STUDY_ADDED_DATE, 'yyyy-mm-dd hh24:mi:ss'),
            TO_CHAR(H.CATALOG_PUBLISH_DATE, 'yyyy-mm-dd hh24:mi:ss'),
            P.PUBLICATION_DATE
            FROM PUBLICATION P, STUDY S, HOUSEKEEPING H
            WHERE S.PUBLICATION_ID=P.ID
            and S.HOUSEKEEPING_ID=H.ID
            and S.ID = :id
        """)
        
        r = cursor.execute(None, {'id': id})

        housekeeping_details = cursor.fetchall()
        
        cursor.close()
        connection.close()

    except cx_Oracle.DatabaseError, exception:
        print exception
Exemplo n.º 2
0
def _execute_statements(study_id, sql_statement):
    '''
    Insert a list of Insert statements into the EVENT and STUDY_EVENT tables.
    '''

    # Prepare output file to Append STUDY_EVENT Insert statements to existing file
    output_file = open('missing_event_data.txt', 'a')
    csvout = csv.writer(output_file)

    try:
        ip, port, sid, username, password = gwas_data_sources.get_db_properties('DEV3')
        dsn_tns = cx_Oracle.makedsn(ip, port, sid)
        connection = cx_Oracle.connect(username, password, dsn_tns)

        cursor = connection.cursor()
        
        # https://stackoverflow.com/questions/35327135/retrieving-identity-of-most-recent-insert-in-oracle-db-12c
        new_id = cursor.var(cx_Oracle.NUMBER)
        sql_event = sql_statement + " returning id into :new_id"

        # Write data to file to review 
        csvout.writerow(["StudyID: "+study_id])
        csvout.writerow([sql_event])

        # Get Event_ID of last row inserted
        cursor.execute(sql_event, {'new_id': new_id})
        event_id = int(new_id.getvalue())


        # Execute STUDY_EVENT Insert statement
        sql_study_event = "INSERT INTO STUDY_EVENT VALUES (" + str(study_id) + ", "+ str(event_id) +")"

        # Write data to file to review
        csvout.writerow([sql_study_event+"\n"])

        cursor.execute(sql_study_event)
        
        # commit or rollback changes
        if args.mode == 'production':
            cursor.execute('COMMIT')
        else:
            cursor.execute('ROLLBACK')

        cursor.close()
        connection.close()

    except cx_Oracle.DatabaseError, exception:
        print exception
Exemplo n.º 3
0
def get_all_studies_missing_event_data(event_type):
    '''
    Get all Study Ids that are Published (Housekeeping.Is_published=1)
    but do not have any data in the StudyEvent and Event tables.
    '''
    try:
        ip, port, sid, username, password = gwas_data_sources.get_db_properties('DEV3')
        dsn_tns = cx_Oracle.makedsn(ip, port, sid)
        connection = cx_Oracle.connect(username, password, dsn_tns)

        cursor = connection.cursor()
        
        cursor.prepare("""
            SELECT P.ID, P.PUBMED_ID, S.ID AS STUDY_ID 
            FROM PUBLICATION P, STUDY S, HOUSEKEEPING H 
            WHERE P.ID=S.PUBLICATION_ID 
                and S.HOUSEKEEPING_ID=H.ID 
                and H.IS_PUBLISHED=1
            MINUS
            SELECT P.ID, P.PUBMED_ID, S.ID AS STUDY_ID
            FROM PUBLICATION P, STUDY S, HOUSEKEEPING H, STUDY_EVENT SE, EVENT E
            WHERE S.PUBLICATION_ID=P.ID
                and S.HOUSEKEEPING_ID=H.ID
                and S.ID=SE.STUDY_ID and SE.EVENT_ID=E.ID 
                and E.EVENT_TYPE = :event_type
                and H.IS_PUBLISHED=1
        """)
        
        r = cursor.execute(None, {'event_type': event_type})

        studies = cursor.fetchall()
        
        cursor.close()
        connection.close()

    except cx_Oracle.DatabaseError, exception:
        print exception
Exemplo n.º 4
0
def get_studies_missing_first_publication_event():
    '''
    For studies that have both a 'STUDY_STATUS_CHANGE_PUBLISH_STUDY' Event 
    and a Housekeeping.Catalog_Publish_Date, check if the 
    Housekeeping.Catalog_Publish_Date is earlier than the 'STUDY_STATUS_CHANGE_PUBLISH_STUDY' Event
    and if so create a new Event for this "first" Publication event.
    '''
    try:
        ip, port, sid, username, password = gwas_data_sources.get_db_properties('DEV3')
        dsn_tns = cx_Oracle.makedsn(ip, port, sid)
        connection = cx_Oracle.connect(username, password, dsn_tns)

        cursor = connection.cursor()

        # Get all StudyIds from Housekeeping where Status is Published
        sql_housekeeping_published_status = """
            SELECT P.ID, P.PUBMED_ID, S.ID, H.CATALOG_PUBLISH_DATE 
            FROM PUBLICATION P, STUDY S, HOUSEKEEPING H, CURATION_STATUS CS 
            WHERE S.PUBLICATION_ID=P.ID and S.HOUSEKEEPING_ID=H.ID 
            and H.CURATION_STATUS_ID=CS.ID and H.CURATION_STATUS_ID=6
            and H.CATALOG_PUBLISH_DATE IS NOT NULL
        """

        cursor.execute(sql_housekeeping_published_status)

        housekeeping_published_studies = cursor.fetchall()

        # For Testing
        missing_initial_publication_event = []

        # Review Catalog_Publish_Date for each StudyId
        for hp_study in housekeeping_published_studies:
            hp_study_id = str(hp_study[2])
            hp_publish_date = hp_study[3]

            # Query Study-StudyEvent-Event with this study_id
            cursor.prepare("""
                SELECT P.ID, P.PUBMED_ID, S.ID, E.EVENT_DATE 
                FROM PUBLICATION P, STUDY S, STUDY_EVENT SE, EVENT E 
                WHERE S.PUBLICATION_ID=P.ID and S.ID=SE.STUDY_ID 
                and SE.EVENT_ID=E.ID 
                and E.EVENT_TYPE='STUDY_STATUS_CHANGE_PUBLISH_STUDY' 
                and S.ID = :id 
                and ROWNUM <= 1 
                ORDER BY E.EVENT_DATE DESC
                """)

            r = cursor.execute(None, {'id': hp_study_id})

            initial_publication_event = cursor.fetchone()


            event_type = 'STUDY_STATUS_CHANGE_PUBLISH_STUDY'

            if initial_publication_event is not None and hp_publish_date < initial_publication_event[3]:

                insert_initial_published = "INSERT INTO EVENT VALUES (NULL, TO_DATE("+ \
                        "'"+str(hp_publish_date)+"'" +", 'yyyy-mm-dd hh24:mi:ss'), "+ \
                        "'"+event_type+"'" + ", 14978333, null)"

                _execute_statements(hp_study_id, insert_initial_published)
            elif initial_publication_event is None:
                missing_initial_publication_event.append(hp_study_id)
            else:
                pass

        # print "Studies missing InitialPubEvent Date: ", len(missing_initial_publication_event)

        cursor.close()
        connection.close()

    except cx_Oracle.DatabaseError, exception:
        print exception
 def __init__(self, database):
     self.ip, self.port, self.sid, self.username, self.password = \
         gwas_data_sources.get_db_properties(database)
def get_curation_review_data(pmid, ancestry_mode, curator):
    '''
    Get data for Level 2 review.
    '''

    # List of queries
    curation_level2_data_sql = """
        SELECT S.ID, S.ACCESSION_ID, P.PUBMED_ID, A.FULLNAME,  
        TO_CHAR(P.PUBLICATION_DATE, 'dd-mm-yyyy'), P.PUBLICATION, P.TITLE, 
        TO_CHAR(H.STUDY_ADDED_DATE, 'dd-mm-yyyy'), CS.STATUS, TO_CHAR(H.CATALOG_PUBLISH_DATE, 'dd-mm-yyyy'), 
        S.INITIAL_SAMPLE_SIZE, S.REPLICATE_SAMPLE_SIZE
        FROM STUDY S, HOUSEKEEPING H, PUBLICATION P, AUTHOR A, CURATION_STATUS CS
        WHERE S.HOUSEKEEPING_ID=H.ID and S.PUBLICATION_ID=P.ID and P.FIRST_AUTHOR_ID=A.ID
            and H.CURATION_STATUS_ID=CS.ID
            and P.PUBMED_ID= :pmid
    """

    study_dup_tag_sql = """
        SELECT N.TEXT_NOTE
        FROM STUDY S, NOTE N
        WHERE S.ID=N.STUDY_ID
          and N.NOTE_SUBJECT_ID=9
          and S.ID= :study_id
    """

    study_platform_sql = """
        SELECT listagg(PLA.MANUFACTURER, ', ') WITHIN GROUP (ORDER BY PLA.MANUFACTURER), S.QUALIFIER, S.SNP_COUNT, S.IMPUTED 
        FROM PLATFORM PLA, STUDY_PLATFORM SP, STUDY S
        WHERE PLA.ID=SP.PLATFORM_ID and SP.STUDY_ID=S.ID
              and S.ID= :study_id
        GROUP BY S.QUALIFIER, S.SNP_COUNT, S.IMPUTED
    """

    study_genotyping_technology_sql = """
        SELECT listagg(GT.GENOTYPING_TECHNOLOGY) WITHIN GROUP (ORDER BY GT.GENOTYPING_TECHNOLOGY)
        FROM STUDY S, STUDY_GENOTYPING_TECHNOLOGY SGT, GENOTYPING_TECHNOLOGY GT
        WHERE S.ID=SGT.STUDY_ID and SGT.GENOTYPING_TECHNOLOGY_ID=GT.ID 
            and S.ID= :study_id
    """

    study_reported_trait_sql = """
        SELECT listagg(DT.TRAIT, ', ')  WITHIN GROUP (ORDER BY DT.TRAIT) 
        FROM STUDY S, STUDY_DISEASE_TRAIT SDT, DISEASE_TRAIT DT 
        WHERE S.ID=SDT.STUDY_ID and SDT.DISEASE_TRAIT_ID=DT.ID 
            and S.ID= :study_id
    """

    study_mapped_trait_sql = """
        SELECT listagg(ET.TRAIT, ', ')  WITHIN GROUP (ORDER BY ET.TRAIT), ET.URI
        FROM STUDY S, STUDY_EFO_TRAIT SETR, EFO_TRAIT ET
        WHERE S.ID=SETR.STUDY_ID and SETR.EFO_TRAIT_ID=ET.ID
          and S.ID= :study_id
        GROUP BY ET.URI
    """

    study_association_cnt_sql = """
        SELECT COUNT(A.ID)
        FROM STUDY S, ASSOCIATION A
        WHERE S.ID=A.STUDY_ID 
            and S.ID= :study_id
    """

    # Queries for Expanded Ancestry Option
    expanded_ancestry_sql = """
        SELECT A.ID, A.TYPE, A.NUMBER_OF_INDIVIDUALS, A.DESCRIPTION
        FROM STUDY S, ANCESTRY A
        WHERE A.STUDY_ID=S.ID
          and S.ID= :study_id 
    """

    ancestral_group_sql = """
        SELECT listagg(AG.ANCESTRAL_GROUP, ', ') WITHIN GROUP (ORDER BY AG.ANCESTRAL_GROUP) 
        FROM ANCESTRY A, ANCESTRY_ANCESTRAL_GROUP AAG, ANCESTRAL_GROUP AG 
        WHERE A.ID=AAG.ANCESTRY_ID and AAG.ANCESTRAL_GROUP_ID=AG.ID 
            and A.ID= :ancestry_id
    """

    ancestry_country_of_origin_sql = """
        SELECT listagg(C.COUNTRY_NAME, ', ') WITHIN GROUP (ORDER BY C.COUNTRY_NAME)
        FROM ANCESTRY A, ANCESTRY_COUNTRY_OF_ORIGIN ACOO, COUNTRY C 
        WHERE A.ID=ACOO.ANCESTRY_ID and ACOO.COUNTRY_ID=C.ID 
            and A.ID= :ancestry_id
    """

    ancestry_country_of_recruitment = """
        SELECT listagg(C.COUNTRY_NAME, ', ') WITHIN GROUP (ORDER BY C.COUNTRY_NAME) 
        FROM ANCESTRY A, ANCESTRY_COUNTRY_RECRUITMENT ACOR, COUNTRY C 
        WHERE A.ID=ACOR.ANCESTRY_ID and ACOR.COUNTRY_ID=C.ID 
            and A.ID= :ancestry_id
    """

    all_level2_data = []

    if ancestry_mode == 'collapsed':
        level2_attr_list = ['STUDY_ID', 'DUP_TAG', 'REPORTED_TRAIT', 'STUDY_CREATION_DATE', 'CURATION_STATUS', 'STUDY_ACCCESSION',\
        'CATALOG_PUBLISH_DATE', 'PUBMED_ID', 'FIRST_AUTHOR', 'PUBLICATION_DATE', 'JOURNAL', 'LINK', 'TITLE', 'PLATFORM [SNPS PASSING QC]',\
        'ASSOCIATION_COUNT', 'MAPPED_TRAIT', 'MAPPED_TRAIT_URI', 'GENOTYPING_TECHNOLOGY', 'INITIAL_SAMPLE_DESCRIPTION', \
        'REPLICATION_SAMPLE_DESCRIPTION']
    else:
        level2_attr_list = ['STUDY_ID', 'DUP_TAG', 'REPORTED_TRAIT', 'STUDY_CREATION_DATE', 'CURATION_STATUS', 'STUDY_ACCCESSION',\
        'CATALOG_PUBLISH_DATE', 'PUBMED_ID', 'FIRST_AUTHOR', 'PUBLICATION_DATE', 'JOURNAL', 'LINK', 'TITLE', 'PLATFORM [SNPS PASSING QC]',\
        'ASSOCIATION_COUNT', 'MAPPED_TRAIT', 'MAPPED_TRAIT_URI', 'GENOTYPING_TECHNOLOGY', 'INITIAL_SAMPLE_DESCRIPTION', \
        'REPLICATION_SAMPLE_DESCRIPTION', 'STAGE', 'NUMBER_OF_INDIVIDUALS', 'BROAD_ANCESTRAL_CATEGORY', 'COUNTRY_OF_ORIGIN',\
        'COUNTRY_OF_RECRUITMENT', 'ADDITONAL_ANCESTRY_DESCRIPTION']

    # Get First Author name to include in output filename
    first_author_sql = """
        SELECT REPLACE(A.FULLNAME_STANDARD, ' ', '')
        FROM PUBLICATION P, AUTHOR A 
        WHERE P.FIRST_AUTHOR_ID=A.ID 
            and P.PUBMED_ID= :pmid
    """

    first_author = ""
    try:
        ip, port, sid, username, password = gwas_data_sources.get_db_properties(
            DATABASE_NAME)
        dsn_tns = cx_Oracle.makedsn(ip, port, sid)
        connection = cx_Oracle.connect(username, password, dsn_tns)

        with contextlib.closing(connection.cursor()) as cursor:
            cursor.prepare(first_author_sql)
            cursor.execute(None, {'pmid': pmid})
            first_author_data = cursor.fetchone()
            first_author = first_author_data[0]

        connection.close()

    except cx_Oracle.DatabaseError, exception:
        print exception
        connection.close()

    except cx_Oracle.DatabaseError, exception:
        print exception

    TIMESTAMP = get_timestamp()

    outfile = open(
        first_author + "_" + pmid + "-" + ancestry_mode + "_" + curator + "_" +
        TIMESTAMP + ".csv", "w")
    csvout = csv.writer(outfile)
    csvout.writerow(level2_attr_list)

    # Get data for curation review file
    try:
        ip, port, sid, username, password = gwas_data_sources.get_db_properties(
            DATABASE_NAME)
        dsn_tns = cx_Oracle.makedsn(ip, port, sid)
        connection = cx_Oracle.connect(username, password, dsn_tns)

        with contextlib.closing(connection.cursor()) as cursor:
            cursor.prepare(curation_level2_data_sql)
            cursor.execute(None, {'pmid': pmid})
            curation_queue_data = cursor.fetchall()

            for data in curation_queue_data:

                data_results = {}

                data_results['STUDY_ID'] = data[0]

                # Account for studies that do not yet have an AccessionId