示例#1
0
def output(stream, peaks, chrom, w=73, strand='+', ):
    "Outputs peaks to a stream"

    logger.debug('writing %s peaks on strand %s' % (commify(len(peaks)), strand))
    for mid, value in peaks:
        start, end = mid - w, mid + w
        stream.write("%s\t%d\t%d\t.\t%f\t%s\n" % (chrom, start, end, value, strand))
示例#2
0
def transform(inpname, size, outname=None):
    """
    Transforms reads stored in bedfile to a genetrack input file.
    Requires at least 6 bed columns to access the strand.
    """
    logger.debug('input %s' % inpname)
    logger.debug('output %s' % outname)

    reader = csv.reader(open(inpname), delimiter='\t')

    # unwind the iterator
    list(takewhile(lambda x: x[0].startswith('#'), reader))

    output = file(outname, 'wt')
    output.write('##gff-version 3\n')
    output.write('# created with eland2gff on %s\n' % inpname)
    output.write('# fixed read lenght of %s\n' % size)
    for row in reader:
        chrom, start, strand = row[10], int(row[12]), row[13]
        end = start + size
        if strand == 'F':
            strand = '+'
        else:
            strand = '-'
        result = map(str, [chrom, '.', '.', start, end, '.', strand, '.', '.'])
        output.write("%s\n" % '\t'.join(result))

    output.close()
示例#3
0
def predict(inpname, outname, options):
    """
    Generate the peak predictions on a genome wide scale
    """
    if options.strand == TWOSTRAND:
            logger.info('operating in twostrand mode')

    if options.index:
        index = hdflib.PositionalData(fname='', index=inpname, nobuild=True, workdir=options.workdir)
    else:
        index = hdflib.PositionalData(fname=inpname, nobuild=True, workdir=options.workdir)

    fp = file(outname, 'wt')

    for label in index.labels:
        table = index.table(label)
        size  = table.cols.idx[-1]
        info  = util.commify(size)
        logger.info('predicting on %s of total size %s' % (label, info))
        lo = 0
        hi = min( (size, options.maxsize) )

        while True:
            if lo >= size:
                break
            perc = '%.1f%%' % (100.0*lo/size)
            logger.info('processing %s %s:%s (%s)' % (label, lo, hi, perc))
            
            # get the data
            res = index.query(start=lo, end=hi, label=label)

            
            # exclusion zone
            w = options.exclude/2

            def predict(x, y):
                fx, fy = fitlib.gaussian_smoothing(x=x, y=y, sigma=options.sigma, epsilon=options.level )
                peaks = fitlib.detect_peaks(x=fx, y=fy )
                if options.mode != 'all':
                    peaks = fitlib.select_peaks(peaks=peaks, exclusion=options.exclude, threshold=options.level)
                return peaks

            if options.strand == TWOSTRAND:
                # operates in two strand mode
                for yval, strand in [ (res.fwd, '+'), (res.rev, '-') ]:
                    logger.debug('processing strand %s' % strand)
                    peaks = predict(x=res.idx, y=yval)
                    output(stream=fp, peaks=peaks, chrom=label, w=w, strand=strand)
            else:
                # combine strands
                peaks = predict(x=res.idx, y=res.val)
                output(stream=fp, peaks=peaks, chrom=label, w=w, strand='+')

            # switching to a higher interval
            lo = hi
            hi += options.maxsize
        
    fp.close()
def fix_site_settings():
    """
    On first run the site information may not be correct. 
    This function updates the site to the correct values
    """
    site, flag = Site.objects.get_or_create( id=settings.SITE_ID )
    if site.domain != settings.SITE_DOMAIN:
        site.domain, site.name=settings.SITE_DOMAIN, settings.SITE_NAME
        logger.debug('modifying site domain:%s name: %s' % (site.domain, site.name) )
        site.save()
def get_project( user, pid, write=True ):
    "Returns a project for a given user"
    try:
        member  = models.Member.objects.get( user=user, project__id=pid )
        project = member.project
        project.role = member.role
        project.is_manager = (project.role == status.MANAGER)
    except ObjectDoesNotExist, exc:
        logger.debug( exc )
        raise AccessError("You may not access this project")
def get_result(user, rid):
    "Returns a result for a given user"

    try:
        # get the result
        result = models.Result.objects.get(id=rid)
        # verifies access rights
        data = get_data(user, did=result.data.id, write=False)
    except ObjectDoesNotExist, exc:
        logger.debug( exc )
        raise AccessError("You may not access this project")
def get_data( user, did, write=True ):
    "Returns a project for a given user"
    try:
        data = models.Data.objects.get(id=did)
        member  = models.Member.objects.get( user=user, project=data.project )
        project = member.project
        project.role = member.role
        project.is_manager = (project.role == status.MANAGER)
        data.write_access = project.is_manager or (data.owner==user)
    except ObjectDoesNotExist, exc:
        logger.debug( exc )
        raise AccessError("You may not access this project")
def output(
    stream,
    peaks,
    chrom,
    w=73,
    strand='+',
):
    "Outputs peaks to a stream"

    logger.debug('writing %s peaks on strand %s' %
                 (commify(len(peaks)), strand))
    for mid, value in peaks:
        start, end = mid - w, mid + w
        stream.write("%s\t%d\t%d\t.\t%f\t%s\n" %
                     (chrom, start, end, value, strand))
def load_users(fname, options):
    "Loads users into the database"

    if not os.path.isfile(fname):
        logger.error('file not found %s' % fname)
        return

    if options.flush:
        "Resets the database with fresh values"
        flush_database()

    # alters site settings
    fix_site_settings()

    # this will be the admin password
    passwd = settings.SECRET_KEY

    # check the lenght of secret key in non debug modes
    if not settings.DEBUG and len(passwd) < 5:
        msg = 'The value of the SECRET_KEY setting is too short. Please make it longer!'
        logger.error(msg)
        sys.exit()
    
    # shorcut to user creatgion
    user_get = User.objects.get_or_create

    # read the file and create the users based on the data in it
    stream = file(fname)
    for row in csv.DictReader( stream ):
        username, email, first_name, last_name = row['username'], row['email'], row['first_name'], row['last_name']
        is_superuser = (row['is_superuser'] == 'yes')
        user, flag = user_get(username=username, email=email, first_name=first_name, last_name=last_name, is_superuser=is_superuser, is_staff=is_superuser)
        if flag:
            logger.debug( 'created user: %s' % user.get_full_name() )
            if username in ('admin', 'demo', 'public'):
                # admin user will get the secret key as password
                user.set_password(passwd)
            else:
                if options.test_mode:
                    # in testmode we will set known passwords
                    # used during functional testing
                    user.set_password( passwd + 'X')
                else:
                    # all other users will need to reset their passwords 
                    user.set_unusable_password()
            user.save()
示例#10
0
def upload_processor(request, pid):
    "Handles the actual data upload"
    user = request.user
    if user.is_authenticated() and user.username!='public':
        if 'upload' in request.POST:
            count = 0
            for i in range(50): # take at most 50 files
                key = 'File%s' % i
                if key in request.FILES:
                    count += 1
                    stream = request.FILES[key]
                    name = html.chop_dirname( stream.name )
                    logger.debug('%s uploaded file %s' % (user.username, name) )
                    authorize.create_data(user=user, pid=pid, stream=stream, name=name, info='no information')

            user.message_set.create(message="Uploaded %s files" % count)
        if 'simple' in request.POST:
            return html.redirect("/project/view/%s/" % pid)

    # this is needed only because the JUPload applet makes a HEAD request        
    return html.response('SUCCESS\n')
示例#11
0
    def __init__(self, fname, workdir=None, update=False, nobuild=False, index=None):
        """
        Create the PositionalData
        """
        self.fname = fname
        self.db = None

        # split the incoming name to find the real name, and base directory
        basedir, basename = os.path.split(self.fname)

        # the index may be stored in the workdir if it was specified
        basedir = workdir or basedir

        # this is the HDF index name that the file operates on
        self.index = index or conf.path_join(basedir, "%s.hdf" % basename)

        # debug messages
        logger.debug("file path %s" % self.fname)
        logger.debug("index path %s" % self.index)

        # no building permitted
        if nobuild and missing(self.index):
            raise Exception("No autobuild allowed and no index found at %s" % self.index)

        # creating indices if these are missing or an update is forced
        if update or missing(self.index):
            self.build()

        # operates on the HDF file
        self.db = openFile(self.index, mode="r")
        self.root = self.db.root

        # shows the internal labels
        logger.debug("index labels -> %s" % self.labels)
示例#12
0
    def __init__(self,
                 fname,
                 workdir=None,
                 update=False,
                 nobuild=False,
                 index=None):
        """
        Create the PositionalData
        """
        self.fname = fname
        self.db = None

        # split the incoming name to find the real name, and base directory
        basedir, basename = os.path.split(self.fname)

        # the index may be stored in the workdir if it was specified
        basedir = workdir or basedir

        # this is the HDF index name that the file operates on
        self.index = index or conf.path_join(basedir, '%s.hdf' % basename)

        # debug messages
        logger.debug('file path %s' % self.fname)
        logger.debug('index path %s' % self.index)

        # no building permitted
        if nobuild and missing(self.index):
            raise Exception('No autobuild allowed and no index found at %s' %
                            self.index)

        # creating indices if these are missing or an update is forced
        if update or missing(self.index):
            self.build()

        # operates on the HDF file
        self.db = openFile(self.index, mode='r')
        self.root = self.db.root

        # shows the internal labels
        logger.debug('index labels -> %s' % self.labels)
示例#13
0
            fwd, rev, val = 1, 0, 1
        elif strand == '-':
            # on reverse strand, 5' is at end
            idx = int(end) - shift
            fwd, rev, val = 0, 1, 1
        else:
            # no strand specified, generate interval centers
            idx = (int(start)+int(end))/2
            fwd, rev, val = 0, 0, 1

        # it is essential be able to sort the index as a string! 
        fp.write('%s\t%012d\t%s\t%s\t%s\n' % (chrom, idx, fwd, rev, val))

    fp.close()
    linet = util.commify(linec)
    logger.debug("parsing %s lines finished in %s" % (linet, timer.report()))

    # if it is producing coverage then it will expand reads into full intervaals

    # now let the sorting commence
    cmd = "sort %s > %s" % (flat, sorted)
    logger.debug("sorting into '%s'" % sorted)
    os.system(cmd)
    logger.debug("sorting finished in %s" % timer.report() )

    logger.debug("consolidating into '%s'" % outname)
    consolidate( sorted, outname, format=format)
    logger.debug("consolidate finished in %s" % timer.report() )
    logger.debug("output saved to '%s'" % outname)
    logger.debug("full conversion finished in %s" % full.report() )
示例#14
0
def transform(inpname, outname, format, shift=0, index=False, options=None):
    """
    Transforms reads stored in bedfile to a genetrack input file.
    Requires at least 6 bed columns to access the strand.
    """

    # detect file formats
    if format == "BED":
        CHROM, START, END, STRAND = 0, 1, 2, 5
    elif format == "GFF":
        CHROM, START, END, STRAND = 0, 3, 4, 6
    else:
        raise Exception('Invalid file format' % format)

    # two sanity checks, one day someone will thank me
    if format == 'BED' and inpname.endswith('gff'):
        raise Exception('BED format on a gff file?')
    if format == 'GFF' and inpname.endswith('bed'):
        raise Exception('GFF format on a bed file?')

    # find the basename of the outputname
    basename = os.path.basename(outname)
   
    # two files store intermediate results
    flat = conf.tempdata( '%s.flat' % basename )
    sorted  = conf.tempdata( '%s.sorted' % basename )

    # check for track information on first line, 
    # much faster this way than conditional checking on each line
    fp = file(inpname, 'rU')
    first = fp.readline()
    fp.close()

    # create the reader
    reader = csv.reader(file(inpname, 'rU'), delimiter='\t')

    # skip if trackline exists
    if first.startswith == 'track':
        reader.next()

    # unwind the comments
    list(takewhile(lambda x: x[0].startswith('#'), reader))

    # copious timing info for those who enjoy these
    timer, full = util.Timer(), util.Timer()

    logger.debug("parsing '%s'" % inpname)
    logger.debug("output to '%s'" % outname)

    # create the unsorted output file and apply corrections
    logger.debug("unsorted flat file '%s'" % flat)

    fp = file(flat, 'wt')
    for linec, row in enumerate(reader):
        try:
            chrom, start, end, strand = row[CHROM], row[START], row[END], row[STRAND]
        except Exception, exc:
            first = row[0][0]
            # may be hitting the end of the file with other comments
            if  first == '>':
                break # hit the sequence content of the gff file
            elif first == '#':
                continue # hit upon some comments
            else:
                logger.error(row)
                raise Exception(exc) 

        if strand == '+':
            # on forward strand, 5' is at start
            idx = int(start) + shift
            fwd, rev, val = 1, 0, 1
        elif strand == '-':
            # on reverse strand, 5' is at end
            idx = int(end) - shift
            fwd, rev, val = 0, 1, 1
        else:
            # no strand specified, generate interval centers
            idx = (int(start)+int(end))/2
            fwd, rev, val = 0, 0, 1

        # it is essential be able to sort the index as a string! 
        fp.write('%s\t%012d\t%s\t%s\t%s\n' % (chrom, idx, fwd, rev, val))
示例#15
0
        help="how many jobs to run in parallel"
    )

    # flushes all content away, drops all database content!
    parser.add_option(
        '--server', action="store_true", 
        dest="server", default=False, 
        help="runs as a server and invokes the jobrunner at every delay seconds",
    )

    # parse the argument list
    options, args = parser.parse_args()

    logger.disable(options.verbosity)

    # missing file names
    if options.server and not options.delay:
        parser.print_help()
    else:
        if options.server:
            logger.info('server mode, delay=%ss' % options.delay)
        while 1:
            # this is used to start multiple jobs with cron (at every minute but
            # having them actually start up at smaller increments
            time.sleep(options.delay)
            execute(limit=options.limit)
            if not options.server:
                break
            else:
                logger.debug( 'jobserver waiting %ss' % options.delay)
示例#16
0
    return projects

def get_project( user, pid, write=True ):
    "Returns a project for a given user"
    try:
        member  = models.Member.objects.get( user=user, project__id=pid )
        project = member.project
        project.role = member.role
        project.is_manager = (project.role == status.MANAGER)
    except ObjectDoesNotExist, exc:
        logger.debug( exc )
        raise AccessError("You may not access this project")

    # write access check on by default
    if write and not project.is_manager:
        logger.debug( 'write access with invalid role' )
        raise AccessError('You may not change this project')
        
    return member.project

def get_data( user, did, write=True ):
    "Returns a project for a given user"
    try:
        data = models.Data.objects.get(id=did)
        member  = models.Member.objects.get( user=user, project=data.project )
        project = member.project
        project.role = member.role
        project.is_manager = (project.role == status.MANAGER)
        data.write_access = project.is_manager or (data.owner==user)
    except ObjectDoesNotExist, exc:
        logger.debug( exc )
def predict(inpname, outname, options):
    """
    Generate the peak predictions on a genome wide scale
    """
    if options.strand == TWOSTRAND:
        logger.info('operating in twostrand mode')

    if options.index:
        index = hdflib.PositionalData(fname='',
                                      index=inpname,
                                      nobuild=True,
                                      workdir=options.workdir)
    else:
        index = hdflib.PositionalData(fname=inpname,
                                      nobuild=True,
                                      workdir=options.workdir)

    fp = file(outname, 'wt')

    for label in index.labels:
        table = index.table(label)
        size = table.cols.idx[-1]
        info = util.commify(size)
        logger.info('predicting on %s of total size %s' % (label, info))
        lo = 0
        hi = min((size, options.maxsize))

        while True:
            if lo >= size:
                break
            perc = '%.1f%%' % (100.0 * lo / size)
            logger.info('processing %s %s:%s (%s)' % (label, lo, hi, perc))

            # get the data
            res = index.query(start=lo, end=hi, label=label)

            # exclusion zone
            w = options.exclude / 2

            def predict(x, y):
                fx, fy = fitlib.gaussian_smoothing(x=x,
                                                   y=y,
                                                   sigma=options.sigma,
                                                   epsilon=options.level)
                peaks = fitlib.detect_peaks(x=fx, y=fy)
                if options.mode != 'all':
                    peaks = fitlib.select_peaks(peaks=peaks,
                                                exclusion=options.exclude,
                                                threshold=options.level)
                return peaks

            if options.strand == TWOSTRAND:
                # operates in two strand mode
                for yval, strand in [(res.fwd, '+'), (res.rev, '-')]:
                    logger.debug('processing strand %s' % strand)
                    peaks = predict(x=res.idx, y=yval)
                    output(stream=fp,
                           peaks=peaks,
                           chrom=label,
                           w=w,
                           strand=strand)
            else:
                # combine strands
                peaks = predict(x=res.idx, y=res.val)
                output(stream=fp, peaks=peaks, chrom=label, w=w, strand='+')

            # switching to a higher interval
            lo = hi
            hi += options.maxsize

    fp.close()