Exemplo n.º 1
0
Arquivo: mq2.py Projeto: PBR/MQ2
def _append_count_to_matrix(qtl_matrixfile, lod_threshold):
    """ Append an extra column at the end of the matrix file containing
    for each row (marker) the number of QTL found if the marker is known
    ie: Locus != ''

    :arg qtl_matrix, the matrix in which to save the output.
    :arg threshold, threshold used to determine if a given LOD value is
        reflective the presence of a QTL.

    """
    if not os.path.exists(qtl_matrixfile):  # pragma: no cover
        raise MQ2Exception('File not found: "%s"' % qtl_matrixfile)
    matrix = read_input_file(qtl_matrixfile, sep=',')
    tmp = list(matrix[0])
    tmp.append('# QTLs')
    matrix[0] = tmp
    cnt = 1
    while cnt < len(matrix):
        row = list(matrix[cnt])
        nr_qtl = 0
        for cel in row[3:]:
            if cel and float(cel) > float(lod_threshold):
                nr_qtl = nr_qtl + 1
        row.append(str(nr_qtl))
        matrix[cnt] = row
        cnt = cnt + 1
    write_matrix(qtl_matrixfile, matrix)
Exemplo n.º 2
0
def add_marker_to_qtls(qtlfile, mapfile, outputfile='qtls_with_mk.csv'):
    """This function adds to a list of QTLs, the closest marker to the
    QTL peak.

    :arg qtlfile: a CSV list of all the QTLs found.
        The file should be structured as follow::
            Trait, Linkage group, position, other columns

        The other columns will not matter as long as the first three
        columns are as such.
    :arg mapfile: a CSV representation of the map used for the QTL
        mapping analysis.
        The file should be structured as follow::
            Marker, Linkage group, position
    :kwarg outputfile: the name of the output file in which the list of
        QTLs with their closest marker will be written.

    """
    qtl_list = read_input_file(qtlfile, ',')
    map_list = read_input_file(mapfile, ',')
    if not qtl_list or not map_list:  # pragma: no cover
        return
    qtl_list[0].append('Closest marker')
    qtls = []
    qtls.append(qtl_list[0])
    for qtl in qtl_list[1:]:
        qtl.append(add_marker_to_qtl(qtl, map_list))
        qtls.append(qtl)
    LOG.info('- %s QTLs processed in %s' % (len(qtls), qtlfile))
    write_matrix(outputfile, qtls)
Exemplo n.º 3
0
def add_marker_to_qtls(qtlfile, mapfile, outputfile='qtls_with_mk.csv'):
    """This function adds to a list of QTLs, the closest marker to the
    QTL peak.

    :arg qtlfile: a CSV list of all the QTLs found.
        The file should be structured as follow::
            Trait, Linkage group, position, other columns

        The other columns will not matter as long as the first three
        columns are as such.
    :arg mapfile: a CSV representation of the map used for the QTL
        mapping analysis.
        The file should be structured as follow::
            Marker, Linkage group, position
    :kwarg outputfile: the name of the output file in which the list of
        QTLs with their closest marker will be written.

    """
    qtl_list = read_input_file(qtlfile, ',')
    map_list = read_input_file(mapfile, ',')
    if not qtl_list or not map_list:  # pragma: no cover
        return
    qtl_list[0].append('Closest marker')
    qtls = []
    qtls.append(qtl_list[0])
    for qtl in qtl_list[1:]:
        qtl.append(add_marker_to_qtl(qtl, map_list))
        qtls.append(qtl)
    LOG.info('- %s QTLs processed in %s' % (len(qtls), qtlfile))
    write_matrix(outputfile, qtls)
Exemplo n.º 4
0
def _append_count_to_matrix(qtl_matrixfile, lod_threshold):
    """ Append an extra column at the end of the matrix file containing
    for each row (marker) the number of QTL found if the marker is known
    ie: Locus != ''

    :arg qtl_matrix, the matrix in which to save the output.
    :arg threshold, threshold used to determine if a given LOD value is
        reflective the presence of a QTL.

    """
    if not os.path.exists(qtl_matrixfile):  # pragma: no cover
        raise MQ2Exception('File not found: "%s"' % qtl_matrixfile)
    matrix = read_input_file(qtl_matrixfile, sep=',')
    tmp = list(matrix[0])
    tmp.append('# QTLs')
    matrix[0] = tmp
    cnt = 1
    while cnt < len(matrix):
        row = list(matrix[cnt])
        nr_qtl = 0
        for cel in row[3:]:
            if cel and float(cel) > float(lod_threshold):
                nr_qtl = nr_qtl + 1
        row.append(str(nr_qtl))
        matrix[cnt] = row
        cnt = cnt + 1
    write_matrix(qtl_matrixfile, matrix)
Exemplo n.º 5
0
def append_flanking_markers(qtls_mk_file, flanking_markers):
    """ Append the flanking markers extracted in the process of
    generating the MapChart to the QTL list file.
    """
    matrix = read_input_file(qtls_mk_file, sep=',')
    output = []
    cnt = 0
    for row in matrix:
        if cnt == 0:
            markers = ['LOD2 interval start', 'LOD2 interval end']
        elif row[3] in flanking_markers:
            markers = flanking_markers[row[3]]
        else:
            markers = ['NA', 'NA']
        cnt += 1
        row.extend(markers)
        output.append(row)
    write_matrix(qtls_mk_file, output)
Exemplo n.º 6
0
Arquivo: mapchart.py Projeto: PBR/MQ2
def append_flanking_markers(qtls_mk_file, flanking_markers):
    """ Append the flanking markers extracted in the process of
    generating the MapChart to the QTL list file.
    """
    matrix = read_input_file(qtls_mk_file, sep=',')
    output = []
    cnt = 0
    for row in matrix:
        if cnt == 0:
            markers = ['LOD2 interval start', 'LOD2 interval end']
        elif row[3] in flanking_markers:
            markers = flanking_markers[row[3]]
        else:
            markers = ['NA', 'NA']
        cnt += 1
        row.extend(markers)
        output.append(row)
    write_matrix(qtls_mk_file, output)
Exemplo n.º 7
0
def add_qtl_to_map(qtlfile, mapfile, outputfile='map_with_qtls.csv'):
    """ This function adds to a genetic map for each marker the number
    of significant QTLs found.

    :arg qtlfile, the output from MapQTL transformed to a csv file via
        'parse_mapqtl_file' which contains the closest markers.
    :arg mapfile, the genetic map with all the markers.
    :kwarg outputfile, the name of the output file in which the map will
        be written.

    """
    qtl_list = read_input_file(qtlfile, ',')
    map_list = read_input_file(mapfile, ',')
    map_list[0].append('# QTLs')
    markers = []
    markers.append(map_list[0])
    qtl_cnt = 0
    for marker in map_list[1:]:
        markers.append(add_qtl_to_marker(marker, qtl_list[1:]))
        qtl_cnt = qtl_cnt + int(markers[-1][-1])
    LOG.info('- %s markers processed in %s' % (len(markers), mapfile))
    LOG.info('- %s QTLs located in the map: %s' % (qtl_cnt, outputfile))
    write_matrix(outputfile, markers)
Exemplo n.º 8
0
def add_qtl_to_map(qtlfile, mapfile, outputfile='map_with_qtls.csv'):
    """ This function adds to a genetic map for each marker the number
    of significant QTLs found.

    :arg qtlfile, the output from MapQTL transformed to a csv file via
        'parse_mapqtl_file' which contains the closest markers.
    :arg mapfile, the genetic map with all the markers.
    :kwarg outputfile, the name of the output file in which the map will
        be written.

    """
    qtl_list = read_input_file(qtlfile, ',')
    map_list = read_input_file(mapfile, ',')
    map_list[0].append('# QTLs')
    markers = []
    markers.append(map_list[0])
    qtl_cnt = 0
    for marker in map_list[1:]:
        markers.append(add_qtl_to_marker(marker, qtl_list[1:]))
        qtl_cnt = qtl_cnt + int(markers[-1][-1])
    LOG.info('- %s markers processed in %s' % (len(markers), mapfile))
    LOG.info('- %s QTLs located in the map: %s' % (qtl_cnt, outputfile))
    write_matrix(outputfile, markers)
Exemplo n.º 9
0
    def convert_inputfiles(
        cls,
        folder=None,
        inputfile=None,
        session=None,
        lod_threshold=None,
        qtls_file="qtls.csv",
        matrix_file="qtls_matrix.csv",
        map_file="map.csv",
    ):
        """ Convert the input files present in the given folder or
        inputfile.
        This method creates the matrix representation of the QTLs
        results providing for each marker position the LOD value found
        for each trait as well as a representation of the genetic map
        used in the experiment.
        The genetic map should be cleared of any markers added by the
        QTL mapping software.

        :kwarg folder: the path to the folder containing the files to
            check. This folder may contain sub-folders.
        :kwarg inputfile: the path to the input file to use
        :kwarg session: the session identifier used to identify which
            session to process
        :kwarg lod_threshold: the LOD threshold to apply to determine if
            a QTL is significant or not
        :kwarg qtls_file: a csv file containing the list of all the
            significant QTLs found in the analysis.
            The matrix is of type:
               trait, linkage group, position, Marker, LOD other columns
        :kwarg matrix_file: a csv file containing a matrix representation
            of the QTL data. This matrix is of type:
               marker, linkage group, position, trait1 lod, trait2, lod
        :kwarg map_file: a csv file containing the genetic map used
            in this experiment. The map is of structure:
               marker, linkage group, position

        """
        if folder is None and inputfile is None:
            raise MQ2Exception("You must specify either a folder or an " "input file")

        sessions = cls.get_session_identifiers(folder)
        if session is None:
            raise MQ2NoSessionException(
                "The MapQTL plugin requires a session identifier to "
                "identify the session to process."
                "Sessions are: %s" % ",".join(sessions)
            )
        elif str(session) not in sessions:
            raise MQ2NoSuchSessionException(
                "The MapQTL session provided (%s) could not be found in the "
                "dataset. "
                "Sessions are: %s" % (session, ",".join(sessions))
            )

        if folder is not None:
            if not os.path.isdir(folder):  # pragma: no cover
                raise MQ2Exception("The specified folder is actually " "not a folder")
            else:
                inputfiles = cls.get_files(folder, session_id=session)

        if inputfile is not None:  # pragma: no cover
            if os.path.isdir(inputfile):
                raise MQ2Exception("The specified input file is actually " "a folder")
            else:
                inputfiles = [inputfile]

        try:
            lod_threshold = float(lod_threshold)
        except ValueError:
            raise MQ2Exception("LOD threshold should be a number")

        inputfiles.sort()

        # QTL matrix and QTL files
        qtl_matrix = []
        qtls = []
        filename = None
        for filename in inputfiles:
            matrix = read_input_file(filename)
            headers = matrix[0]
            qtl_matrix = get_qtls_matrix(qtl_matrix, matrix, filename)
            qtls.extend(get_qtls_from_mapqtl_data(matrix, lod_threshold, filename))
        # format QTLs and write down the selection
        headers[0] = "Trait name"
        qtls.insert(0, headers)
        write_matrix(qtls_file, qtls)

        # Write down the QTL matrix
        del (qtl_matrix[0])
        # Reorganize a couple of columns
        qtl_matrix.insert(0, qtl_matrix[2])
        del (qtl_matrix[3])
        # write output
        qtl_matrix = list(zip(*qtl_matrix))
        write_matrix(matrix_file, qtl_matrix)

        # Map matrix
        map_matrix = get_map_matrix(inputfiles[0])
        write_matrix(map_file, map_matrix)
Exemplo n.º 10
0
    def convert_inputfiles(cls,
                           folder=None,
                           inputfile=None,
                           session=None,
                           lod_threshold=None,
                           qtls_file='qtls.csv',
                           matrix_file='qtls_matrix.csv',
                           map_file='map.csv'):
        """ Convert the input files present in the given folder or
        inputfile.
        This method creates the matrix representation of the QTLs
        results providing for each marker position the LOD value found
        for each trait as well as a representation of the genetic map
        used in the experiment.
        The genetic map should be cleared of any markers added by the
        QTL mapping software.

        :kwarg folder: the path to the folder containing the files to
            check. This folder may contain sub-folders.
        :kwarg inputfile: the path to the input file to use
        :kwarg session: the session identifier used to identify which
            session to process
        :kwarg lod_threshold: the LOD threshold to apply to determine if
            a QTL is significant or not
        :kwarg qtls_file: a csv file containing the list of all the
            significant QTLs found in the analysis.
            The matrix is of type:
               trait, linkage group, position, Marker, LOD other columns
        :kwarg matrix_file: a csv file containing a matrix representation
            of the QTL data. This matrix is of type:
               marker, linkage group, position, trait1 lod, trait2, lod
        :kwarg map_file: a csv file containing the genetic map used
            in this experiment. The map is of structure:
               marker, linkage group, position

        """
        if folder is None and inputfile is None:
            raise MQ2Exception('You must specify either a folder or an '
                               'input file')

        if folder is not None:  # pragma: no cover
            if not os.path.isdir(folder):
                raise MQ2Exception('The specified folder is actually '
                                   'not a folder')
            else:
                inputfiles = cls.get_files(folder)

        if inputfile is not None:  # pragma: no cover
            if os.path.isdir(inputfile):
                raise MQ2Exception('The specified input file is actually '
                                   'a folder')
            else:
                inputfiles = [inputfile]

        if len(inputfiles) == 0:  # pragma: no cover
            raise MQ2Exception('No files correspond to this plugin')

        if len(inputfiles) > 1:  # pragma: no cover
            raise MQ2Exception(
                'This plugin can only process one file at a time')

        try:
            lod_threshold = float(lod_threshold)
        except ValueError:
            raise MQ2Exception('LOD threshold should be a number')

        inputfile = inputfiles[0]

        # QTL matrix and QTL files
        qtls = []
        matrix = read_input_file(inputfile, sep=',', noquote=True)
        qtls.extend(get_qtls_from_rqtl_data(matrix, lod_threshold))
        # format QTLs and write down the selection
        write_matrix(qtls_file, qtls)

        # Write down the QTL matrix
        write_matrix(matrix_file, matrix)

        # Map matrix
        map_matrix = get_map_matrix(inputfile)
        write_matrix(map_file, map_matrix)
Exemplo n.º 11
0
    def convert_inputfiles(cls,
                           folder=None,
                           inputfile=None,
                           session=None,
                           lod_threshold=None,
                           qtls_file='qtls.csv',
                           matrix_file='qtls_matrix.csv',
                           map_file='map.csv'):
        """ Convert the input files present in the given folder or
        inputfile.
        This method creates the matrix representation of the QTLs
        results providing for each marker position the LOD value found
        for each trait as well as a representation of the genetic map
        used in the experiment.
        The genetic map should be cleared of any markers added by the
        QTL mapping software.

        :kwarg folder: the path to the folder containing the files to
            check. This folder may contain sub-folders.
        :kwarg inputfile: the path to the input file to use
        :kwarg session: the session identifier used to identify which
            session to process
        :kwarg lod_threshold: the LOD threshold to apply to determine if
            a QTL is significant or not
        :kwarg qtls_file: a csv file containing the list of all the
            significant QTLs found in the analysis.
            The matrix is of type:
               trait, linkage group, position, Marker, LOD other columns
        :kwarg matrix_file: a csv file containing a matrix representation
            of the QTL data. This matrix is of type:
               marker, linkage group, position, trait1 lod, trait2, lod
        :kwarg map_file: a csv file containing the genetic map used
            in this experiment. The map is of structure:
               marker, linkage group, position

        """
        if folder is None and inputfile is None:
            raise MQ2Exception('You must specify either a folder or an '
                               'input file')

        sessions = cls.get_session_identifiers(folder)
        if session is None:
            raise MQ2NoSessionException(
                'The MapQTL plugin requires a session identifier to '
                'identify the session to process.'
                'Sessions are: %s' % ','.join(sessions))
        elif str(session) not in sessions:
            raise MQ2NoSuchSessionException(
                'The MapQTL session provided (%s) could not be found in the '
                'dataset. '
                'Sessions are: %s' % (session, ','.join(sessions)))

        if folder is not None:
            if not os.path.isdir(folder):  # pragma: no cover
                raise MQ2Exception('The specified folder is actually '
                                   'not a folder')
            else:
                inputfiles = cls.get_files(folder, session_id=session)

        if inputfile is not None:  # pragma: no cover
            if os.path.isdir(inputfile):
                raise MQ2Exception('The specified input file is actually '
                                   'a folder')
            else:
                inputfiles = [inputfile]

        try:
            lod_threshold = float(lod_threshold)
        except ValueError:
            raise MQ2Exception('LOD threshold should be a number')

        inputfiles.sort()

        # QTL matrix and QTL files
        qtl_matrix = []
        qtls = []
        filename = None
        for filename in inputfiles:
            matrix = read_input_file(filename)
            headers = matrix[0]
            qtl_matrix = get_qtls_matrix(qtl_matrix, matrix, filename)
            qtls.extend(
                get_qtls_from_mapqtl_data(matrix, lod_threshold, filename))
        # format QTLs and write down the selection
        headers[0] = 'Trait name'
        qtls.insert(0, headers)
        write_matrix(qtls_file, qtls)

        # Write down the QTL matrix
        del (qtl_matrix[0])
        # Reorganize a couple of columns
        qtl_matrix.insert(0, qtl_matrix[2])
        del (qtl_matrix[3])
        # write output
        qtl_matrix = list(zip(*qtl_matrix))
        write_matrix(matrix_file, qtl_matrix)

        # Map matrix
        map_matrix = get_map_matrix(inputfiles[0])
        write_matrix(map_file, map_matrix)
Exemplo n.º 12
0
    def convert_inputfiles(cls,
                           folder=None,
                           inputfile=None,
                           session=None,
                           lod_threshold=None,
                           qtls_file='qtls.csv',
                           matrix_file='qtls_matrix.csv',
                           map_file='map.csv'):
        """ Convert the input files present in the given folder or
        inputfile.
        This method creates the matrix representation of the QTLs
        results providing for each marker position the LOD value found
        for each trait as well as a representation of the genetic map
        used in the experiment.
        The genetic map should be cleared of any markers added by the
        QTL mapping software.

        :kwarg folder: the path to the folder containing the files to
            check. This folder may contain sub-folders.
        :kwarg inputfile: the path to the input file to use
        :kwarg session: the session identifier used to identify which
            session to process
        :kwarg lod_threshold: the LOD threshold to apply to determine if
            a QTL is significant or not
        :kwarg qtls_file: a csv file containing the list of all the
            significant QTLs found in the analysis.
            The matrix is of type:
               trait, linkage group, position, Marker, LOD other columns
        :kwarg matrix_file: a csv file containing a matrix representation
            of the QTL data. This matrix is of type:
               marker, linkage group, position, trait1 lod, trait2, lod
        :kwarg map_file: a csv file containing the genetic map used
            in this experiment. The map is of structure:
               marker, linkage group, position

        """
        if folder is None and inputfile is None:
            raise MQ2Exception('You must specify either a folder or an '
                               'input file')

        if folder is not None:  # pragma: no cover
            if not os.path.isdir(folder):
                raise MQ2Exception('The specified folder is actually '
                                   'not a folder')
            else:
                inputfiles = cls.get_files(folder)

        if inputfile is not None:  # pragma: no cover
            if os.path.isdir(inputfile):
                raise MQ2Exception('The specified input file is actually '
                                   'a folder')
            else:
                inputfiles = [inputfile]

        if len(inputfiles) == 0:  # pragma: no cover
            raise MQ2Exception('No files correspond to this plugin')

        if len(inputfiles) > 1:  # pragma: no cover
            raise MQ2Exception(
                'This plugin can only process one file at a time')

        try:
            lod_threshold = float(lod_threshold)
        except ValueError:
            raise MQ2Exception('LOD threshold should be a number')

        inputfile = inputfiles[0]

        # QTL matrix and QTL files
        qtls = []
        matrix = read_input_file(inputfile, sep=',', noquote=True)
        qtls.extend(get_qtls_from_rqtl_data(matrix, lod_threshold))
        # format QTLs and write down the selection
        write_matrix(qtls_file, qtls)

        # Write down the QTL matrix
        write_matrix(matrix_file, matrix)

        # Map matrix
        map_matrix = get_map_matrix(inputfile)
        write_matrix(map_file, map_matrix)