Python RefSNP примеры использования

Язык программирования: Python

Пространство имен/Пакет: common.snp

Класс/Тип: RefSNP

Примеров на hotexamples.com: 7

Python RefSNP - 7 примеров найдено. Это лучшие примеры Python кода для common.snp.RefSNP, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

delete_chromosomes(2)

from_row_proxy(2)

RefSNP(1)

from_nih_json(1)

put_allele(1)

Пример #1

Показать файл

Файл: db_test.py Проект: yagoubali/PopFactory

class DbTest(unittest.TestCase):
    def setUp(self):
        db.db_init('sqlite:///unit_test.db')
        db.metadata.drop_all(bind=db.connection)
        db.metadata.create_all(db.engine)
        self.r1 = RefSNP(1000, '1')
        self.r2 = RefSNP(1001, '2')
        a1 = Allele('A', 'A', 100190109)
        a1.allele_count = 5000
        a1.total_count = 11000
        self.r1.put_allele(a1)
        a2 = Allele('A', 'G', 100190109)
        a2.allele_count = 6000
        a2.total_count = 11000
        self.r1.put_allele(a2)

    def test_bulk_insert(self):
        result = db.bulk_insert([self.r1, self.r2], db.ref_snps)
        self.assertEqual(2, result.rowcount)
        result = db.bulk_insert(self.r1.alleles, db.alleles)
        self.assertEqual(2, result.rowcount)

    def test_delete_chromosome(self):
        result = db.bulk_insert([self.r1, self.r2], db.ref_snps)
        result = db.bulk_insert(self.r1.alleles, db.alleles)
        RefSNP.delete_chromosomes(["1", "2"], db.connection)
        select_query = db.ref_snps.select().where(db.ref_snps.c.chromosome == '1')
        one_row = db.connection.execute(select_query).fetchone()
        self.assertIsNone(one_row)


    def test_default_init(self):
        db.default_init()
        self.assertTrue(True)

Пример #2

Показать файл

Файл: db_test.py Проект: yagoubali/PopFactory

 def test_delete_chromosome(self):
     result = db.bulk_insert([self.r1, self.r2], db.ref_snps)
     result = db.bulk_insert(self.r1.alleles, db.alleles)
     RefSNP.delete_chromosomes(["1", "2"], db.connection)
     select_query = db.ref_snps.select().where(db.ref_snps.c.chromosome == '1')
     one_row = db.connection.execute(select_query).fetchone()
     self.assertIsNone(one_row)

Пример #3

Показать файл

Файл: db_test.py Проект: yagoubali/PopFactory

 def setUp(self):
     db.db_init('sqlite:///unit_test.db')
     db.metadata.drop_all(bind=db.connection)
     db.metadata.create_all(db.engine)
     self.r1 = RefSNP(1000, '1')
     self.r2 = RefSNP(1001, '2')
     a1 = Allele('A', 'A', 100190109)
     a1.allele_count = 5000
     a1.total_count = 11000
     self.r1.put_allele(a1)
     a2 = Allele('A', 'G', 100190109)
     a2.allele_count = 6000
     a2.total_count = 11000
     self.r1.put_allele(a2)

Пример #4

Показать файл

def fetch_snp_file(json_file, queue, min_maf=0):
    """
    Fetch a NIH refSNP file then open it and add RefSNP objects to the work queue.
    :param json_file: NIH file to download via FTP
    :param queue: work queue for RefSNP objects
    :param min_maf minimum minor allele frequency. SNPs with a lower MAF will not be saved to database.
    :return:
    """
    # Not sure if ftplib is threadsafe so use a ftp login per call
    ftp = ftp_login()
    os.makedirs(DOWNLOAD_DIR, exist_ok=True)
    download_path = DOWNLOAD_DIR + json_file
    download_needed = True
    if os.path.exists(download_path):
        md5 = []
        ftp.retrlines('RETR ' + json_file + ".md5", md5.append)
        if md5:
            md5 = md5[0].split(" ")[0]
        print("FTP MD5 of %s: %s" % (json_file, md5))
        block_size = 65536
        hasher = hashlib.md5()
        with open(DOWNLOAD_DIR + json_file, 'rb') as afile:
            buf = afile.read(block_size)
            while len(buf) > 0:
                hasher.update(buf)
                buf = afile.read(block_size)
        local_md5 = hasher.hexdigest()
        print("Local File MD5: %s" % local_md5)
        if local_md5 == md5:
            print("MD5 matches local copy. Skipping download.")
            download_needed = False
    if download_needed:
        with open(DOWNLOAD_DIR + json_file, 'wb') as f:
            ftp.retrbinary('RETR ' + json_file, f.write)
    with bz2.BZ2File(DOWNLOAD_DIR + json_file, 'rb') as f_in:
        chromosome = chromosome_from_filename(json_file)
        for line in f_in:
            snp = RefSNP.from_nih_json(line, chromosome)
            if snp.total_count > 0 and snp.alleles:
                if 0 <= min_maf <= snp.maf:
                    queue.put(snp)
    return True

Пример #5

Показать файл

Файл: pop_factory.py Проект: yagoubali/PopFactory

    def load_snps_db(self, min_freq, max_snps):
        """
        Load snps from DB and store as SNPTuples. Also output map file for plink.
        :param max_snps: Max number of snps to load
        :param min_freq: min Minor Allele frequency
        :return:
        """

        invalid_count = 0
        snps_result = db.connection.execute(
            "Select r.id, chromosome, maf, total_count,  deleted, inserted, position, allele_count "
            "from ref_snps r  "
            "join alleles a on r.id = a.ref_snp_id "
            "and r.maf >= %f and r.total_count >= %i" % (min_freq, MIN_TOTAL_COUNT)
        )
        current_snp_id = -1
        snp = None
        for snp_row in snps_result:
            if snp_row["id"] != current_snp_id:
                if snp and snp.valid_for_plink():
                    if self.snp_count >= max_snps - 1:
                        print("Hit max_snps size of %i. Stopping loading snps." % max_snps, flush=True)
                        break
                    self.add_snp_tuple(snp)
                    if self.snp_count % 100000 == 0:
                        print("Loaded %i snps. %s" % (self.snp_count, datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
                              flush=True)
                else:
                    invalid_count += 1
                # otherwise new snp row
                snp = RefSNP.from_row_proxy(snp_row)

            # Added joined allele data every time
            snp.put_allele(Allele.from_row_proxy(snp_row))
            current_snp_id = snp_row["id"]
        self.add_snp_tuple(snp)
        print("Skipped Invalid:        %i" % invalid_count, flush=True)
        print("Total Loaded:           %i" % len(self.ordered_snps), flush=True)

Пример #6

Показать файл

def load_via_sql():
    start = datetime.now()
    db.default_init()
    print("%s SQL loading started" % start.strftime("%Y-%m-%d %H:%M:%S"))
    query = db.ref_snps.select().where(db.ref_snps.columns.maf >= MIN_FREQ)
    print(str(query.compile(db.engine, compile_kwargs={"literal_binds": True})))
    result = db.connection.execute(query)
    ref_snps = {}
    for row in result:
        snp = RefSNP.from_row_proxy(row)
        ref_snps[snp.id] = snp
    print("%s RefSNP Query complete" % datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
    allele_query = "Select ref_snp_id, deleted, inserted, position, allele_count from ref_snps r  " \
                   "join alleles a on r.id = a.ref_snp_id and r.maf >= %f" \
                   " " % MIN_FREQ
    result = db.connection.execute(allele_query)
    for row in result:
        allele = Allele.from_row_proxy(row)
        ref_snps[row.ref_snp_id].put_allele(allele)
    end = datetime.now()
    print("%s DB loading finished. %s elapsed" % (end.strftime("%Y-%m-%d %H:%M:%S"), str(end - start)))
    print("%i snps loaded from the DB" % len(ref_snps))
    return ref_snps

Пример #7

Показать файл

def download_ref_snps(chromosome_list,
                      num_workers=2,
                      append_mode=False,
                      min_maf=0):
    """ Downloads all RefSNP data from NIH's FTP site. Requires ~250 GB of disk space
    """
    ftp = ftp_login()
    file_list = []
    # Get a list of files for download
    ftp.retrlines('NLST', file_list.append)
    search_pattern = DBSNP_JSON_PATTERN % ".*"
    if chromosome_list:
        chromosome_match = "(" + "|".join(chromosome_list).replace(" ",
                                                                   "") + ")"
        search_pattern = DBSNP_JSON_PATTERN % chromosome_match
    json_for_dl = [f for f in file_list if re.search(search_pattern, f)]
    if not append_mode:
        print("Removing old data from DB.")

        if not chromosome_list:
            try:
                print("No chromosome list specified. Clearing entire DB.")
                db.ref_snps.drop(db.engine)
                db.alleles.drop(db.engine)
            except:
                print(
                    'INFO - Exception raised droping tables. Possibly tables do not exist. Continuing on.'
                )
        else:
            RefSNP.delete_chromosomes(chromosome_list, db.connection)
    # Create schema if missing
    db.metadata.create_all(db.engine)
    with concurrent.futures.ProcessPoolExecutor(
            max_workers=num_workers) as executor, multiprocessing.Manager(
            ) as m:
        q = m.Queue(10000)
        # Start the load operations and save map of Future to filename
        future_to_file = {
            executor.submit(fetch_snp_file, json_file, q, min_maf): json_file
            for json_file in json_for_dl
        }
        # Sleep a bit to wait for download threads to load the queue
        time.sleep(10)
        count_inserted = 0
        while any(not f.done() for f in future_to_file.keys()):
            try:
                count_inserted += write_snps_to_db(q)
                if count_inserted > 0:
                    print("Inserted %i refSNPs." % count_inserted)
                time.sleep(2)  # Wait for more items in the queue
            except Exception as e:
                print("Exception writing snps to DB.")
                print(e)
                for f in future_to_file.keys():
                    f.cancel()
                raise e
        for future in future_to_file.keys():
            filename = future_to_file[future]
            try:
                data = future.result()
            except Exception as exc:
                print('%r generated an exception: %s' % (filename, exc))
            else:
                print('Successfully downloaded %s and loaded into db.' %
                      filename)