Пример #1
0
def importRepeatsFromUCSC(infile, outfile, ucsc_database, repeattypes, genome):
    '''import repeats from a UCSC formatted file.

    The repeats are stored as a :term:`gff` formatted file.
    '''

    repclasses = "','".join(repeattypes.split(","))

    # Repeats are either stored in a single ``rmsk`` table (hg19) or in
    # individual ``rmsk`` tables (mm9) like chr1_rmsk, chr2_rmsk, ....
    # In order to do a single statement, the ucsc mysql database is
    # queried for tables that end in rmsk.
    dbhandle = PipelineUCSC.connectToUCSC(
        host=PARAMS["ucsc_host"],
        user=PARAMS["ucsc_user"],
        database=ucsc_database)

    cc = dbhandle.execute("SHOW TABLES LIKE '%%rmsk'")
    tables = [x[0] for x in cc.fetchall()]
    if len(tables) == 0:
        raise ValueError("could not find any `rmsk` tables")

    tmpfile = P.getTempFile(shared=True)

    total_repeats = 0
    for table in tables:
        E.info("%s: loading repeats from %s" % (ucsc_database, table))
        cc = dbhandle.execute(
            """SELECT genoName, 'repeat', 'exon', genoStart+1, genoEnd, '.',
            strand, '.',
            CONCAT('class \\"', repClass, '\\"; family \\"', repFamily, '\\";')
            FROM %(table)s
            WHERE repClass in ('%(repclasses)s') """ % locals())
        n = 0
        for data in cc.fetchall():
            n += 1
            tmpfile.write("\t".join(map(str, data)) + "\n")
        E.info("%s: %s=%i repeats downloaded" % (ucsc_database, table, n))
        total_repeats += n

    if total_repeats == 0:
        raise ValueErrror("did not find any repeats for %s" % ucsc_database)

    tmpfile.close()
    tmpfilename = tmpfile.name

    statement = '''cat %(tmpfilename)s
    | %(pipeline_scriptsdir)s/gff_sort pos
    | cgat gff2gff
    --method=sanitize
    --sanitize-method=genome
    --skip-missing
    --genome-file=%(genome)s
    --log=%(outfile)s.log
    | gzip
    > %(outfile)s
    '''
    P.run()

    os.unlink(tmpfilename)
Пример #2
0
def publish_tracks(export_files,
                   prefix="",
                   project_id=None,
                   project_name=None,
                   UCSC_ini=None):
    '''publish a UCSC Track Hub.

    This method takes a dictionary of file types associated
    with files. For each file, a link will be created in
    the upload directory. The track will be stored under
    a project name, which will be derived from the location
    of the working directory.

    Information about the genome, the upload directory, etc. will be
    taken from the global configuration dictionary.

    For example, calling the following code in a pipeline executed
    in .../proj013/mapping::

        export_files = {
            "bamfiles": glob.glob("*/*.bam") + glob.glob("*/*.bam.bai"),
            "bigwigfiles": glob.glob("*/*.bw"),
        }
        publish_tracks(export_files)

    will create a hub file at
    :file:`<uploaddir>/OBFUSID/mapping/ucsc.hub`, where
    OBFUSID is the obfuscated directory entry in the CGAT
    download directory for a particular project.

    If you want to create group tracks and get them to inherit from a
    parent, you can supply an filename for a UCSC ini file.  The ini
    file defines two types of parameters, parents and set_features.
    Parents define containers with a regex to identify the child
    tracks. Set_features add additional features to all tracks
    matching a regex. Parent and set_feature parameters are identified
    by their respective "parent" or "set_features" prefixes.

    For example, the following UCSC ini "test.ini" will create a
    parent multiWig track called "Test" with the UCSC options as
    defined in the values parameter. The values param must be a comma
    separated list of key:value pairs which are seperated by a single
    space. The regex param for parent_test defines the child tracks
    which will be contained within "Test". The optional colour param
    defines the colours for the child tracks. Colours are defined
    using the brewer2mpl python module. Colour parameters must contain
    the name of the pallete followed by the type of pallette.

    The ini file below also defines a "set_features" parameter,
    "bigwigs". Set_feature require a value and regex parameter. In
    this case, the UCSC options in the values parameter will be added
    to all tracks matching the ".*bigwig$" regex. As above, the values
    param must be a comma separated list of key:value pairs which are
    seperated by a single space. As above, an optional colours
    parameter can also be given.

    Note: colour palletes have a maximum number of allowable colours.
    To see the available palletes and their size, run:
    >import brewer2mpl
    >brewer2mpl.print_maps()

    >cat test.ini
    #######################
    #######################

    [parent_test]
    values=container multiWig,bigDataUrl Test,shortLabel Test,longLabel Test,type bigWig,viewLimits 0:160,visibility full,aggregate transparentOverlay,showSubtrackColorOnUi on,windowingFunction maximum,priority 1.2,configurable on,autoScale on,dragAndDrop subtracks

    regex=.*-Saline-.*bw$

    colour=Blues,Sequential

    #######################
    [set_features_bigwigs]

    values=configurable on,autoScale on,useScore on,visibility full

    regex=.*bigwig$

    colour=Oranges,Sequential
    #######################
    #######################

    Arguments
    ---------
    export_files : dict
        Dictionary mapping filetypes to files.
    prefix : string
        will be added to each track.
    project_id : string
        The project identifier. If not given, it will be taken from
        the path of the project directory.
    project_name : string
        The project name, typically the project number. If not given,
        it will be taken from the current directory.

    '''

    # the import is located here to avoid cyclical dependencies
    # between Local.py, Pipeline.py and PipelineUCSC.py
    import CGATPipelines.PipelineUCSC as PipelineUCSC

    if not prefix:
        prefix = PARAMS.get("report_prefix", "")

    if not UCSC_ini:
        UCSC_ini = PARAMS.get("ucsc_ini", None)

    web_dir = PARAMS["web_dir"]
    if project_id is None:
        project_id = getProjectId()
    if project_name is None:
        project_name = getProjectName()

    src_export = os.path.abspath("export")
    dest_report = prefix + "report"
    dest_export = prefix + "export"

    hubdir = os.path.join(PARAMS["web_dir"], "ucsc")

    if not os.path.exists(hubdir):
        E.info("creating %s" % hubdir)
        os.mkdir(hubdir)

    # write the UCSC hub file
    hubfile = os.path.join(hubdir, "hub.txt")
    genomesfile = os.path.join(hubdir, "genomes.txt")
    trackdir = os.path.join(hubdir, PARAMS["genome"])
    trackfile = os.path.join(hubdir, PARAMS["genome"], "trackDb.txt")
    trackrelpath = os.path.join(PARAMS["genome"], "trackDb.txt")

    if os.path.exists(hubfile):
        with IOTools.openFile(hubfile) as infile:
            hubdata = PipelineUCSC.readUCSCFile(infile)
    else:
        hubdata = [('hub', "CGAT-" + project_name),
                   ('shortLabel', "CGAT-" + project_name),
                   ('longLabel', "Data for CGAT project %s" % project_name),
                   ('genomesFile', "genomes.txt"),
                   ('email', '*****@*****.**')]

    E.info("writing to %s" % hubfile)
    with IOTools.openFile(hubfile, "w") as outfile:
        PipelineUCSC.writeUCSCFile(outfile, hubdata)

    # create the genomes.txt file - append to it if necessary.
    if os.path.exists(genomesfile):
        with IOTools.openFile(genomesfile) as infile:
            genomes = PipelineUCSC.readUCSCFile(infile)
    else:
        genomes = []

    if ("genome", PARAMS["genome"]) not in genomes:
        genomes.append(("genome", PARAMS["genome"]))
        genomes.append(("trackDb", trackrelpath))

    E.info("writing to %s" % genomesfile)
    with IOTools.openFile(genomesfile, "w") as outfile:
        PipelineUCSC.writeUCSCFile(outfile, genomes)

    # create the track data
    if not os.path.exists(trackdir):
        os.mkdir(trackdir)

    if os.path.exists(trackfile):
        E.debug('reading existing tracks from %s' % trackfile)
        with IOTools.openFile(trackfile) as infile:
            tracks = PipelineUCSC.readTrackFile(infile)
    else:
        tracks = []

    tracks = collections.OrderedDict(tracks)

    def getName(name):
        if name.endswith(".bam"):
            return "bam", name
        elif name.endswith(".bw") or name.endswith(".bigwig"):
            return "bigWig", name
        elif name.endswith(".bb") or name.endswith(".bigbed"):
            return "bigBed", name
        else:
            return None, None

    for targetdir, filenames in export_files.items():
        for src in filenames:
            dest = os.path.join(trackdir, prefix + os.path.basename(src))
            dest = os.path.abspath(dest)
            # create a symlink
            if not os.path.exists(dest):
                try:
                    os.symlink(os.path.abspath(src), dest)
                except OSError, msg:
                    E.warn("could not create symlink from %s to %s: %s" %
                           (os.path.abspath(src), dest, msg))
            ucsctype, trackname = getName(os.path.basename(dest))
            # ignore invalid types and other files (.bai files, ...)
            if ucsctype is None:
                continue
            tracks[trackname] = (("bigDataUrl", os.path.basename(dest)),
                                 ("shortLabel", trackname),
                                 ("longLabel", trackname),
                                 ("type", ucsctype))
Пример #3
0
            else:
                colours = None

            for n, child in enumerate(children):
                if make_group:
                    # make a parent and a copy of the child so we have
                    # two tracks, one grouped, one by itself
                    values = UCSC_PARAMS[param]
                    tracks[name] = [x.split(" ") for x in values.split(",")]
                    group_trackname = child + "_grouped"
                    tracks[group_trackname] = tracks[child]
                    tracks[group_trackname] += (("parent", name),)

                else:
                    # just add the values to the child
                    values = UCSC_PARAMS[name + "_values"]
                    tracks[child] += tuple([x.split(" ") for x in values.split(",")])

                if colours:
                    rgb = ",".join(map(str, colours[n]))
                    tracks[child] += (("color", rgb),)
                    if make_group:
                        tracks[group_trackname] += (("color", rgb),)

    E.info("writing to %s" % trackfile)
    with IOTools.openFile(trackfile, "w") as outfile:
        PipelineUCSC.writeTrackFile(outfile, list(tracks.iteritems()))

    E.info(
        "data hub has been created at http://www.cgat.org/downloads/%(project_id)s/ucsc/hub.txt" % locals())
Пример #4
0
def publish_tracks(export_files,
                   prefix="",
                   project_id=None,
                   project_name=None):
    '''publish a UCSC Track Hub.

    *export_files* is a dictionary of filetypes and files.
    *prefix* will be added to each track.

    '''

    if not prefix:
        prefix = PARAMS.get("report_prefix", "")

    web_dir = PARAMS["web_dir"]
    if project_id is None:
        project_id = P.getProjectId()
    if project_name is None:
        project_name = P.getProjectName()

    src_export = os.path.abspath("export")
    dest_report = prefix + "report"
    dest_export = prefix + "export"

    hubdir = os.path.join(PARAMS["web_dir"], "ucsc")

    if not os.path.exists(hubdir):
        E.info("creating %s" % hubdir)
        os.mkdir(hubdir)

    # write the UCSC hub file
    hubfile = os.path.join(hubdir, "hub.txt")
    genomesfile = os.path.join(hubdir, "genomes.txt")
    trackdir = os.path.join(hubdir, PARAMS["genome"])
    trackfile = os.path.join(hubdir, PARAMS["genome"], "trackDb.txt")
    trackrelpath = os.path.join(PARAMS["genome"], "trackDb.txt")

    if os.path.exists(hubfile):
        with IOTools.openFile(hubfile) as infile:
            hubdata = PipelineUCSC.readUCSCFile(infile)
    else:
        hubdata = [('hub', "CGAT-" + project_name),
                   ('shortLabel', "CGAT-" + project_name),
                   ('longLabel', "Data for CGAT project %s" % project_name),
                   ('genomesFile', "genomes.txt"),
                   ('email', '*****@*****.**')]

    E.info("writing to %s" % hubfile)
    with IOTools.openFile(hubfile, "w") as outfile:
        PipelineUCSC.writeUCSCFile(outfile, hubdata)

    # create the genomes.txt file - append to it if necessary.
    if os.path.exists(genomesfile):
        with IOTools.openFile(genomesfile) as infile:
            genomes = PipelineUCSC.readUCSCFile(infile)
    else:
        genomes = []

    if ("genome", PARAMS["genome"]) not in genomes:
        genomes.append(("genome", PARAMS["genome"]))
        genomes.append(("trackDb", trackrelpath))

    E.info("writing to %s" % genomesfile)
    with IOTools.openFile(genomesfile, "w") as outfile:
        PipelineUCSC.writeUCSCFile(outfile, genomes)

    # create the track data
    if not os.path.exists(trackdir):
        os.mkdir(trackdir)

    if os.path.exists(trackfile):
        E.debug('reading existing tracks from %s' % trackfile)
        with IOTools.openFile(trackfile) as infile:
            tracks = PipelineUCSC.readTrackFile(infile)
    else:
        tracks = []

    tracks = collections.OrderedDict(tracks)

    def getName(name):
        if name.endswith(".bam"):
            return "bam", name
        elif name.endswith(".bw") or name.endswith(".bigwig"):
            return "bigWig", name
        else:
            return None, None

    for targetdir, filenames in export_files.items():
        for src in filenames:
            dest = os.path.join(trackdir, prefix + os.path.basename(src))
            dest = os.path.abspath(dest)
            # create a symlink
            if not os.path.exists(dest):
                try:
                    os.symlink(os.path.abspath(src), dest)
                except OSError, msg:
                    E.warn("could not create symlink from %s to %s: %s" %
                           (os.path.abspath(src), dest, msg))
            ucsctype, trackname = getName(os.path.basename(dest))
            # ignore invalid types and other files (.bai files, ...)
            if ucsctype is None:
                continue
            tracks[trackname] = (("bigDataUrl", os.path.basename(dest)),
                                 ("shortLabel", trackname),
                                 ("longLabel", trackname),
                                 ("type", ucsctype))
Пример #5
0
def publish_tracks(export_files, prefix="", project_id=None, project_name=None):
    """publish a UCSC Track Hub.

    This method takes a dictionary of file types associated
    with files. For each file, a link will be created in
    the upload directory. The track will be stored under
    a project name, which will be derived from the location
    of the working directory.

    Information about the genome, the upload directory, etc. will be
    taken from the global configuration dictionary.

    For example, calling the following code in a pipeline executed
    in .../proj013/mapping::

        export_files = {
            "bamfiles": glob.glob("*/*.bam") + glob.glob("*/*.bam.bai"),
            "bigwigfiles": glob.glob("*/*.bw"),
        }
        publish_tracks(export_files)

    will create a hub file at
    :file:`<uploaddir>/OBFUSID/mapping/ucsc.hub`, where
    OBFUSID is the obfuscated directory entry in the CGAT
    download directory for a particular project.

    Arguments
    ---------
    export_files : dict
        Dictionary mapping filetypes to files.
    prefix : string
        will be added to each track.
    project_id : string
        The project identifier. If not given, it will be taken from
        the path of the project directory.
    project_name : string
        The project name, typically the project number. If not given,
        it will be taken from the current directory.

    """

    # the import is located here to avoid cyclical dependencies
    # between Local.py, Pipeline.py and PipelineUCSC.py
    import CGATPipelines.PipelineUCSC as PipelineUCSC

    if not prefix:
        prefix = PARAMS.get("report_prefix", "")

    web_dir = PARAMS["web_dir"]
    if project_id is None:
        project_id = getProjectId()
    if project_name is None:
        project_name = getProjectName()

    src_export = os.path.abspath("export")
    dest_report = prefix + "report"
    dest_export = prefix + "export"

    hubdir = os.path.join(PARAMS["web_dir"], "ucsc")

    if not os.path.exists(hubdir):
        E.info("creating %s" % hubdir)
        os.mkdir(hubdir)

    # write the UCSC hub file
    hubfile = os.path.join(hubdir, "hub.txt")
    genomesfile = os.path.join(hubdir, "genomes.txt")
    trackdir = os.path.join(hubdir, PARAMS["genome"])
    trackfile = os.path.join(hubdir, PARAMS["genome"], "trackDb.txt")
    trackrelpath = os.path.join(PARAMS["genome"], "trackDb.txt")

    if os.path.exists(hubfile):
        with IOTools.openFile(hubfile) as infile:
            hubdata = PipelineUCSC.readUCSCFile(infile)
    else:
        hubdata = [
            ("hub", "CGAT-" + project_name),
            ("shortLabel", "CGAT-" + project_name),
            ("longLabel", "Data for CGAT project %s" % project_name),
            ("genomesFile", "genomes.txt"),
            ("email", "*****@*****.**"),
        ]

    E.info("writing to %s" % hubfile)
    with IOTools.openFile(hubfile, "w") as outfile:
        PipelineUCSC.writeUCSCFile(outfile, hubdata)

    # create the genomes.txt file - append to it if necessary.
    if os.path.exists(genomesfile):
        with IOTools.openFile(genomesfile) as infile:
            genomes = PipelineUCSC.readUCSCFile(infile)
    else:
        genomes = []

    if ("genome", PARAMS["genome"]) not in genomes:
        genomes.append(("genome", PARAMS["genome"]))
        genomes.append(("trackDb", trackrelpath))

    E.info("writing to %s" % genomesfile)
    with IOTools.openFile(genomesfile, "w") as outfile:
        PipelineUCSC.writeUCSCFile(outfile, genomes)

    # create the track data
    if not os.path.exists(trackdir):
        os.mkdir(trackdir)

    if os.path.exists(trackfile):
        E.debug("reading existing tracks from %s" % trackfile)
        with IOTools.openFile(trackfile) as infile:
            tracks = PipelineUCSC.readTrackFile(infile)
    else:
        tracks = []

    tracks = collections.OrderedDict(tracks)

    def getName(name):
        if name.endswith(".bam"):
            return "bam", name
        elif name.endswith(".bw") or name.endswith(".bigwig"):
            return "bigWig", name
        else:
            return None, None

    for targetdir, filenames in export_files.items():
        for src in filenames:
            dest = os.path.join(trackdir, prefix + os.path.basename(src))
            dest = os.path.abspath(dest)
            # create a symlink
            if not os.path.exists(dest):
                try:
                    os.symlink(os.path.abspath(src), dest)
                except OSError, msg:
                    E.warn("could not create symlink from %s to %s: %s" % (os.path.abspath(src), dest, msg))
            ucsctype, trackname = getName(os.path.basename(dest))
            # ignore invalid types and other files (.bai files, ...)
            if ucsctype is None:
                continue
            tracks[trackname] = (
                ("bigDataUrl", os.path.basename(dest)),
                ("shortLabel", trackname),
                ("longLabel", trackname),
                ("type", ucsctype),
            )
Пример #6
0
def publish_tracks(export_files,
                   prefix="",
                   project_id=None,
                   project_name=None):
    '''publish a UCSC Track Hub.

    *export_files* is a dictionary of filetypes and files.
    *prefix* will be added to each track.

    '''

    if not prefix:
        prefix = PARAMS.get("report_prefix", "")

    web_dir = PARAMS["web_dir"]
    if project_id is None:
        project_id = P.getProjectId()
    if project_name is None:
        project_name = P.getProjectName()

    src_export = os.path.abspath("export")
    dest_report = prefix + "report"
    dest_export = prefix + "export"

    hubdir = os.path.join(PARAMS["web_dir"], "ucsc")

    if not os.path.exists(hubdir):
        E.info("creating %s" % hubdir)
        os.mkdir(hubdir)

    # write the UCSC hub file
    hubfile = os.path.join(hubdir, "hub.txt")
    genomesfile = os.path.join(hubdir, "genomes.txt")
    trackdir = os.path.join(hubdir, PARAMS["genome"])
    trackfile = os.path.join(hubdir, PARAMS["genome"], "trackDb.txt")
    trackrelpath = os.path.join(PARAMS["genome"], "trackDb.txt")

    if os.path.exists(hubfile):
        with IOTools.openFile(hubfile) as infile:
            hubdata = PipelineUCSC.readUCSCFile(infile)
    else:
        hubdata = [('hub', "CGAT-" + project_name),
                   ('shortLabel', "CGAT-" + project_name),
                   ('longLabel', "Data for CGAT project %s" % project_name),
                   ('genomesFile', "genomes.txt"),
                   ('email', '*****@*****.**')]

    E.info("writing to %s" % hubfile)
    with IOTools.openFile(hubfile, "w") as outfile:
        PipelineUCSC.writeUCSCFile(outfile, hubdata)

    # create the genomes.txt file - append to it if necessary.
    if os.path.exists(genomesfile):
        with IOTools.openFile(genomesfile) as infile:
            genomes = PipelineUCSC.readUCSCFile(infile)
    else:
        genomes = []

    if ("genome", PARAMS["genome"]) not in genomes:
        genomes.append(("genome", PARAMS["genome"]))
        genomes.append(("trackDb", trackrelpath))

    E.info("writing to %s" % genomesfile)
    with IOTools.openFile(genomesfile, "w") as outfile:
        PipelineUCSC.writeUCSCFile(outfile, genomes)

    # create the track data
    if not os.path.exists(trackdir):
        os.mkdir(trackdir)

    if os.path.exists(trackfile):
        E.debug('reading existing tracks from %s' % trackfile)
        with IOTools.openFile(trackfile) as infile:
            tracks = PipelineUCSC.readTrackFile(infile)
    else:
        tracks = []

    tracks = collections.OrderedDict(tracks)

    def getName(name):
        if name.endswith(".bam"):
            return "bam", name
        elif name.endswith(".bw") or name.endswith(".bigwig"):
            return "bigWig", name
        else:
            return None, None

    for targetdir, filenames in export_files.items():
        for src in filenames:
            dest = os.path.join(trackdir, prefix + os.path.basename(src))
            dest = os.path.abspath(dest)
            # create a symlink
            if not os.path.exists(dest):
                try:
                    os.symlink(os.path.abspath(src), dest)
                except OSError, msg:
                    E.warn("could not create symlink from %s to %s: %s" %
                           (os.path.abspath(src), dest, msg))
            ucsctype, trackname = getName(os.path.basename(dest))
            # ignore invalid types and other files (.bai files, ...)
            if ucsctype is None:
                continue
            tracks[trackname] = (("bigDataUrl", os.path.basename(dest)),
                                 ("shortLabel", trackname),
                                 ("longLabel", trackname),
                                 ("type", ucsctype))
Пример #7
0
            return "bigWig", name
        else:
            return None, None

    for targetdir, filenames in export_files.items():
        for src in filenames:
            dest = os.path.join(trackdir, prefix + os.path.basename(src))
            dest = os.path.abspath(dest)
            # create a symlink
            if not os.path.exists(dest):
                try:
                    os.symlink(os.path.abspath(src), dest)
                except OSError, msg:
                    E.warn("could not create symlink from %s to %s: %s" %
                           (os.path.abspath(src), dest, msg))
            ucsctype, trackname = getName(os.path.basename(dest))
            # ignore invalid types and other files (.bai files, ...)
            if ucsctype is None:
                continue
            tracks[trackname] = (("bigDataUrl", os.path.basename(dest)),
                                 ("shortLabel", trackname),
                                 ("longLabel", trackname),
                                 ("type", ucsctype))

    E.info("writing to %s" % trackfile)
    with IOTools.openFile(trackfile, "w") as outfile:
        PipelineUCSC.writeTrackFile(outfile, list(tracks.iteritems()))

    E.info(
        "data hub has been created at http://www.cgat.org/downloads/%(project_id)s/ucsc/hub.txt" % locals())
Пример #8
0
def publish_tracks(export_files,
                   prefix="",
                   project_id=None,
                   project_name=None,
                   UCSC_ini=None):
    '''publish a UCSC Track Hub.

    This method takes a dictionary of file types associated
    with files. For each file, a link will be created in
    the upload directory. The track will be stored under
    a project name, which will be derived from the location
    of the working directory.

    Information about the genome, the upload directory, etc. will be
    taken from the global configuration dictionary.

    For example, calling the following code in a pipeline executed
    in .../proj013/mapping::

        export_files = {
            "bamfiles": glob.glob("*/*.bam") + glob.glob("*/*.bam.bai"),
            "bigwigfiles": glob.glob("*/*.bw"),
        }
        publish_tracks(export_files)

    will create a hub file at
    :file:`<uploaddir>/OBFUSID/mapping/ucsc.hub`, where
    OBFUSID is the obfuscated directory entry in the CGAT
    download directory for a particular project.

    If you want to create group tracks and get them to inherit from a
    parent, you can supply an filename for a UCSC ini file.  The ini
    file defines two types of parameters, parents and set_features.
    Parents define containers with a regex to identify the child
    tracks. Set_features add additional features to all tracks
    matching a regex. Parent and set_feature parameters are identified
    by their respective "parent" or "set_features" prefixes.

    For example, the following UCSC ini "test.ini" will create a
    parent multiWig track called "Test" with the UCSC options as
    defined in the values parameter. The values param must be a comma
    separated list of key:value pairs which are seperated by a single
    space. The regex param for parent_test defines the child tracks
    which will be contained within "Test". The optional colour param
    defines the colours for the child tracks. Colours are defined
    using the brewer2mpl python module. Colour parameters must contain
    the name of the pallete followed by the type of pallette.

    The ini file below also defines a "set_features" parameter,
    "bigwigs". Set_feature require a value and regex parameter. In
    this case, the UCSC options in the values parameter will be added
    to all tracks matching the ".*bigwig$" regex. As above, the values
    param must be a comma separated list of key:value pairs which are
    seperated by a single space. As above, an optional colours
    parameter can also be given.

    Note: colour palletes have a maximum number of allowable colours.
    To see the available palletes and their size, run:
    >import brewer2mpl
    >brewer2mpl.print_maps()

    >cat test.ini
    #######################
    #######################

    [parent_test]
    values=container multiWig,bigDataUrl Test,shortLabel Test,longLabel Test,type bigWig,viewLimits 0:160,visibility full,aggregate transparentOverlay,showSubtrackColorOnUi on,windowingFunction maximum,priority 1.2,configurable on,autoScale on,dragAndDrop subtracks

    regex=.*-Saline-.*bw$

    colour=Blues,Sequential

    #######################
    [set_features_bigwigs]

    values=configurable on,autoScale on,useScore on,visibility full

    regex=.*bigwig$

    colour=Oranges,Sequential
    #######################
    #######################

    Arguments
    ---------
    export_files : dict
        Dictionary mapping filetypes to files.
    prefix : string
        will be added to each track.
    project_id : string
        The project identifier. If not given, it will be taken from
        the path of the project directory.
    project_name : string
        The project name, typically the project number. If not given,
        it will be taken from the current directory.

    '''

    # the import is located here to avoid cyclical dependencies
    # between Local.py, Pipeline.py and PipelineUCSC.py
    import CGATPipelines.PipelineUCSC as PipelineUCSC

    if not prefix:
        prefix = PARAMS.get("report_prefix", "")

    if not UCSC_ini:
        UCSC_ini = PARAMS.get("ucsc_ini", None)

    web_dir = PARAMS["web_dir"]
    if project_id is None:
        project_id = getProjectId()
    if project_name is None:
        project_name = getProjectName()

    src_export = os.path.abspath("export")
    dest_report = prefix + "report"
    dest_export = prefix + "export"

    hubdir = os.path.join(PARAMS["web_dir"], "ucsc")

    if not os.path.exists(hubdir):
        E.info("creating %s" % hubdir)
        os.mkdir(hubdir)

    # write the UCSC hub file
    hubfile = os.path.join(hubdir, "hub.txt")
    genomesfile = os.path.join(hubdir, "genomes.txt")
    trackdir = os.path.join(hubdir, PARAMS["genome"])
    trackfile = os.path.join(hubdir, PARAMS["genome"], "trackDb.txt")
    trackrelpath = os.path.join(PARAMS["genome"], "trackDb.txt")

    if os.path.exists(hubfile):
        with IOTools.openFile(hubfile) as infile:
            hubdata = PipelineUCSC.readUCSCFile(infile)
    else:
        hubdata = [('hub', "CGAT-" + project_name),
                   ('shortLabel', "CGAT-" + project_name),
                   ('longLabel', "Data for CGAT project %s" % project_name),
                   ('genomesFile', "genomes.txt"),
                   ('email', '*****@*****.**')]

    E.info("writing to %s" % hubfile)
    with IOTools.openFile(hubfile, "w") as outfile:
        PipelineUCSC.writeUCSCFile(outfile, hubdata)

    # create the genomes.txt file - append to it if necessary.
    if os.path.exists(genomesfile):
        with IOTools.openFile(genomesfile) as infile:
            genomes = PipelineUCSC.readUCSCFile(infile)
    else:
        genomes = []

    if ("genome", PARAMS["genome"]) not in genomes:
        genomes.append(("genome", PARAMS["genome"]))
        genomes.append(("trackDb", trackrelpath))

    E.info("writing to %s" % genomesfile)
    with IOTools.openFile(genomesfile, "w") as outfile:
        PipelineUCSC.writeUCSCFile(outfile, genomes)

    # create the track data
    if not os.path.exists(trackdir):
        os.mkdir(trackdir)

    if os.path.exists(trackfile):
        E.debug('reading existing tracks from %s' % trackfile)
        with IOTools.openFile(trackfile) as infile:
            tracks = PipelineUCSC.readTrackFile(infile)
    else:
        tracks = []

    tracks = collections.OrderedDict(tracks)

    def getName(name):
        if name.endswith(".bam"):
            return "bam", name
        elif name.endswith(".bw") or name.endswith(".bigwig"):
            return "bigWig", name
        elif name.endswith(".bb") or name.endswith(".bigbed"):
            return "bigBed", name
        else:
            return None, None

    for targetdir, filenames in export_files.items():
        for src in filenames:
            dest = os.path.join(trackdir, prefix + os.path.basename(src))
            dest = os.path.abspath(dest)
            # create a symlink
            if not os.path.exists(dest):
                try:
                    os.symlink(os.path.abspath(src), dest)
                except OSError, msg:
                    E.warn("could not create symlink from %s to %s: %s" %
                           (os.path.abspath(src), dest, msg))
            ucsctype, trackname = getName(os.path.basename(dest))
            # ignore invalid types and other files (.bai files, ...)
            if ucsctype is None:
                continue
            tracks[trackname] = (("bigDataUrl", os.path.basename(dest)),
                                 ("shortLabel", trackname),
                                 ("longLabel", trackname), ("type", ucsctype))