示例#1
0
def load(filepath, db, action=None):
    samfile = _load_sam(filepath, db)

    sam_data = Sam(db)
    chromosome_data = Chromosome(db)

    chromosomes = []
    for ref in samfile.references:
        name = trim_chromosome_name(ref)

        c = chromosome_data.get_by_name(name)
        c['ref'] = ref

        chromosomes.append(c)

    filename = os.path.basename(filepath)
    sam = sam_data.get_by_filename(filename)

    if sam is None:
        logging.error('Error : please load "%s" first' % filename)

    samId = sam['id']

    # cypileup
    cypileup.pileup(samfile, chromosomes, samId, db, action)

    samfile.close()
示例#2
0
def load(filepath, db):
    filename = os.path.basename(filepath)
    base, ext = os.path.splitext(filepath)
    if not re.match('^\.(sam|bam)', ext):
        raise UnsupportedFileError('ERROR: Not supported file format')

    sam_data = Sam(db)
    chr_data = Chromosome(db)

    if sam_data.get_by_filename(filename) is not None:
        raise AlreadyLoadedError('WARNING: Already loaded "%s"' % filename)

    logging.info("Begin to load '%s'" % filename)

    # Convert sam to bam
    if ext == '.sam':
        insam = pysam.Samfile(filepath, 'r')
        filepath = base + '.bam'
        outbam = pysam.Samfile(filepath, 'wb', template=insam)
        for s in insam:
            outbam.write(s)

    # Create index if not exist
    bai = filepath + '.bai'
    if not os.path.isfile(bai):
        logging.info("Create index '%s'" % os.path.basename(bai))
        pysam.index(filepath)

    # load sam
    samfile = pysam.Samfile(filepath)
    sam_data.append(filename,
                    samfile.header, samfile.lengths,
                    samfile.mapped,
                    samfile.nreferences, samfile.references)

    # load chromosome
    for ref in samfile.references:
        chr_data.append(trim_chromosome_name(ref))
示例#3
0
def load(filepath, db, action=None):

    filename = os.path.basename(filepath)

    file_ext = filename.split('.')[-1]
    if file_ext != 'bed':
        raise UnsupportedFileError('ERROR: Not supported file format')

    bed_data = Bed(db)
    chr_data = Chromosome(db)
    bed_fragment_data = BedFragment(db)

    if bed_data.get_by_filename(filename) is not None:
        raise AlreadyLoadedError('WARNING: Already loaded "%s"' % filename)

    logging.info("Begin to load '%s'" % filename)

    # load bed
    bedfile = pybed.BedReader(open(filepath, 'r'))

    bed_data.append(filename, "", "", 0, 0)

    bed = bed_data.get_by_filename(filename)

    # load bed fragments
    count = 0

    for line in bedfile.yield_lines():
        b = bedfile.get_line(line)

        c_name = b['chrom']
        c_name = c_name.replace('Chr', '')
        c_name = c_name.replace('chr', '')
        c_name = c_name.replace('.', '')

        c = chr_data.get_by_name(c_name)

        if c is None:
            chr_data.append(c_name)
            c = chr_data.get_by_name(c_name)

        if bed['rgb'] == 0:
            rgb = b['itemRgb'].split(',')
            if len(rgb) == 3:
                [ir, ig, ib] = map(long, b['itemRgb'].split(','))
            else:
                [ir, ig, ib] = [0, 0, 0]
        else:
            [ir, ig, ib] = [0, 0, 0]

        if b['strand'] == '+':
            strand = 0
        elif b['strand'] == '-':
            strand = 1
        else:
            strand = -1

        bed_fragment_data.append(bed['id'], c['id'],
                                 long(b['chromStart']),
                                 long(b['chromEnd']),
                                 b['name'],
                                 long(b['score']),
                                 strand,
                                 long(b['thickStart']),
                                 long(b['thickEnd']),
                                 ir, ig, ib,
                                 long(b['blockCount']),
                                 b['blockSizes'],
                                 b['blockStarts'])

        count += 1

        if bedfile.length >= 100 and count % (bedfile.length / 100) == 0:
            if action is not None:
                act = action()
                if isinstance(act, BedLoaderAction):
                    progress = (count + 1) * 100 / bedfile.length
                    act(progress)

    logging.debug('Loaded %d fragments' % count)