Пример #1
0
def _multiparse(filepath, metalist, win):
    dirname = os.path.dirname(os.path.realpath(__file__))
    if not any(x in sys.argv for x in ('-h', '--help', '--version')):
        _ms = Ontology(os.path.join(dirname, "psi-ms.obo"), False)
        _ims = Ontology(os.path.join(dirname, "imagingMS.obo"), False)
        _ims.terms.update(_ms.terms)
    else:
        _ms, _ims = None, None
        _ims.merge(_ms)

    PARSERS = {'mzML': mzml.mzMLmeta, 'imzML': mzml.imzMLmeta}

    ONTOLOGIES = {'mzML': _ms, 'imzML': _ims}

    print('Parsing file: {}'.format(filepath))
    parser = PARSERS[filepath.split(os.path.extsep)[-1]]
    ont = ONTOLOGIES[filepath.split(os.path.extsep)[-1]]

    meta = parser(filepath, ont).meta

    metalist.append(meta)
Пример #2
0
def full_parse(in_dir,
               out_dir,
               study_identifier,
               usermeta=None,
               split=True,
               merge=False,
               verbose=False,
               multip=False):
    """ Parses every study from *in_dir* and then creates ISA files.

    A new folder is created in the out directory bearing the name of
    the study identifier.

    :param str in_dir:           path to directory containing studies
    :param str out_dir:          path to out directory
    :param str study_identifier: name of the study (directory to create)
    """
    dirname = os.path.dirname(os.path.realpath(__file__))
    if not any(x in sys.argv for x in ('-h', '--help', '--version')):
        ms = Ontology(os.path.join(dirname, "psi-ms.obo"), False)
        ims = Ontology(os.path.join(dirname, "imagingMS.obo"), False)
        ims.terms.update(ms.terms)
    else:
        ms, ims = None, None
        ims.merge(ms)

    PARSERS = {'mzML': mzml.mzMLmeta, 'imzML': mzml.imzMLmeta}

    ONTOLOGIES = {'mzML': ms, 'imzML': ims}

    # get mzML file in the example_files folder
    if os.path.isfile(in_dir) and tarfile.is_tarfile(in_dir):
        compr = True
        mzml_files = compr_extract(in_dir, "tar")
    elif os.path.isfile(in_dir) and zipfile.is_zipfile(in_dir):
        compr = True
        mzml_files = compr_extract(in_dir, "zip")
    else:
        compr = False
        mzml_path = os.path.join(in_dir, "*mzML")

        if verbose:
            print(mzml_path)

        mzml_files = [mzML for mzML in glob.glob(mzml_path)]
        #mzml_files.sort()

    # if multip:
    #     pool = Pool(multip)

    manager = Manager()
    metalist = manager.list()

    if mzml_files:
        # store the first mzml_files extension
        if compr:
            ext1 = mzml_files[0].name.split(os.path.extsep)[-1]
        else:
            ext1 = mzml_files[0].split(os.path.extsep)[-1]

        if multip:
            jobs = []

            for i in mzml_files:
                p = Process(target=_multiparse, args=(i, metalist))
                jobs.append(p)
                p.start()

            for proc in jobs:
                proc.join()

        # get meta information for all files
        elif not verbose:
            pbar = pb.ProgressBar(widgets=[
                'Parsing {:8}: '.format(study_identifier),
                pb.FormatLabel('%(value)4d'), '/',
                '%4d' % len(mzml_files),
                pb.Bar(marker=MARKER, left=" |", right="| "),
                pb.ETA()
            ])

            for i in pbar(mzml_files):

                if compr:
                    ext = i.name.split(os.path.extsep)[-1]
                else:
                    ext = i.split(os.path.extsep)[-1]
                parser = PARSERS[ext]
                ont = ONTOLOGIES[ext]

                metalist.append(parser(i, ont).meta)

        else:
            for i in mzml_files:
                print("Parsing file: {}".format(i))

                if compr:
                    ext = i.name.split(os.path.extsep)[-1]
                else:
                    ext = i.split(os.path.extsep)[-1]

                parser = PARSERS[ext]
                ont = ONTOLOGIES[ext]

                metalist.append(parser(i, ont).meta)

        # update isa-tab file

        if merge and ext1 == 'imzML':
            if verbose:
                print('Attempting to merge profile and centroid scans')
            metalist = merge_spectra(metalist)

        if metalist:
            if verbose:
                print("Parsing mzML meta information into ISA-Tab structure")
            isa_tab_create = isa.ISA_Tab(out_dir, study_identifier, usermeta
                                         or {}).write(metalist, ext1, split)

    else:
        warnings.warn("No files were found in {}.".format(in_dir), UserWarning)
Пример #3
0
def full_parse(in_dir, out_dir, study_identifier, usermeta=None, split=True, merge=False, verbose=False, multip=False):
    """ Parses every study from *in_dir* and then creates ISA files.

    A new folder is created in the out directory bearing the name of
    the study identifier.

    :param str in_dir:           path to directory containing studies
    :param str out_dir:          path to out directory
    :param str study_identifier: name of the study (directory to create)
    """
    dirname = os.path.dirname(os.path.realpath(__file__))
    if not any(x in sys.argv for x in ('-h', '--help', '--version')):
        _ms = Ontology(os.path.join(dirname, "psi-ms.obo"), False)
        _ims = Ontology(os.path.join(dirname, "imagingMS.obo"), False)
        _ims.terms.update(_ms.terms)
    else:
        _ms, _ims = None, None
        _ims.merge(_ms)

    _PARSERS = {'mzML': mzml.mzMLmeta,
                'imzML': mzml.imzMLmeta}

    _ONTOLOGIES = {'mzML': _ms,
                   'imzML': _ims}

    # get mzML file in the example_files folder
    if os.path.isfile(in_dir) and tarfile.is_tarfile(in_dir):
        compr = True
        mzml_files = compr_extract(in_dir, "tar")
    elif os.path.isfile(in_dir) and zipfile.is_zipfile(in_dir):
        compr = True
        mzml_files = compr_extract(in_dir, "zip")
    else:
        compr = False
        mzml_path = os.path.join(in_dir, "*mzML")

        if verbose:
            print(mzml_path)

        mzml_files = [mzML for mzML in glob.glob(mzml_path)]
        #mzml_files.sort()

    if multip:
        pool = Pool(multip)



    metalist = []
    if mzml_files:

        if multip:

            ppservers = ()
            ncpus = 2

            job_server = pp.Server(ncpus, ppservers=ppservers)

            print "Starting pp with", job_server.get_ncpus(), "workers"
            job1 = job_server.submit(_multiparse, (100,), (isprime,), ("math",))

            multi_in = [[i, _PARSERS, _ONTOLOGIES] for i in mzml_files]
            print multi_in

            for i in multi_in:
                multiparse(i)
            jobs = []
            for i in multi_in:


                p = Process(target=multiparse, args=(i,))
                jobs.append(p)
                p.start()


            #metalist = pool.map(_multiparse, multi_in)
            #pool.close()
            #pool.join()


        # get meta information for all files
        elif not verbose:
            pbar = pb.ProgressBar(widgets=['Parsing {:8}: '.format(study_identifier),
                                           pb.FormatLabel('%(value)4d'), '/',
                                           '%4d' % len(mzml_files),
                                           pb.Bar(marker=MARKER, left=" |", right="| "),
                                           pb.ETA()])

            for i in pbar(mzml_files):

                if compr:
                   ext = i.name.split(os.path.extsep)[-1]
                else:
                   ext = i.split(os.path.extsep)[-1]
                parser = _PARSERS[ext]
                ont = _ONTOLOGIES[ext]

                metalist.append(parser(i, ont).meta)

        else:
            for i in mzml_files:
                print("Parsing file: {}".format(i))

                if compr:
                    ext = i.name.split(os.path.extsep)[-1]
                else:
                    ext = i.split(os.path.extsep)[-1]

                parser = _PARSERS[ext]
                ont = _ONTOLOGIES[ext]

                print parser
                print ont
                print i

                metalist.append(parser(i, ont).meta)

        # update isa-tab file

        if merge and ext=='imzML':
            if verbose:
                print('Attempting to merge profile and centroid scans')
            metalist = merge_spectra(metalist)


        if metalist:
            if verbose:
                print("Parsing mzML meta information into ISA-Tab structure")
            isa_tab_create = isa.ISA_Tab(out_dir, study_identifier, usermeta or {}).write(metalist, ext, split)

    else:
        warnings.warn("No files were found in {}.".format(in_dir), UserWarning)