Exemplo n.º 1
0
def main():
    options = parser.parse_args()

    if options.logFile:
        logging.basicConfig(filename=options.logFile + '-' + str(RANK),
                            level=logging.DEBUG,
                            filemode='w',
                            format='%(asctime)s %(message)s',
                            datefmt='%m/%d/%Y %H:%M:%S')
    logging.info('program started')
    logging.info('command line: {}'.format(' '.join(sys.argv)))

    data_out = open(options.outputFile, "w")
    Drt = options.rtGap/2.0
    Dmz = options.mzGap/2.0
    idx = index.Index()
    logging.info('region: +/- {} sec, +/- {} mz'.format(Drt,Dmz))
    count = 0
    with open(options.inputFile, "r") as data_in:
        for datum in data_in:
#            rt, mz, amp, score = (float(v) for v in datum.split(","))
            rt, mz, amp, score = [float(v) for v in datum.split(",")][:4]
            coord = (rt-Drt, mz-Dmz, rt+Drt, mz+Dmz)
            if idx.count(coord) == 0:
                idx.insert(count, coord)
                print >> data_out, "{}, {}, {}, {}".format(rt, mz, amp, score)
                count += 1
    print "found {} regions".format(count)

    logging.debug('main mem: {}'.format(md_io.memory_usage_resource()))
    logging.info('program completed')
Exemplo n.º 2
0
def main(MPI=None):

    # try parallel
    COMM = None
    RANK = 0
    SIZE = 1
    if MPI:
        COMM = MPI.COMM_WORLD
        RANK = COMM.Get_rank()
        SIZE = COMM.Get_size()
        status = MPI.Status()

    if RANK == 0:
        options = parser.parse_args()
    else:
        options = []

    if SIZE > 1:
        options = COMM.bcast(options)

    if options.logFile:
        logging.basicConfig(
            filename=options.logFile + "-" + str(RANK),
            level=logging.DEBUG,
            filemode="w",
            format="%(asctime)s %(message)s",
            datefmt="%m/%d/%Y %H:%M:%S",
        )
    logging.info("program started")
    logging.info("command line: {}".format(" ".join(sys.argv)))

    if RANK == 0:
        if options.format == "mzml":
            reader = md_io.MZMLtoSpectrum
        elif options.format == "mzdata":
            # TODO: convert to MPI
            reader = md_io.parseMZDATA
        else:
            exit("Unknown mass spec data format: {}".format(options.format))

    # distribute to worker pool
    #           design
    # master            | workers
    # ---------  --------------------------------
    # while workers     | while !end
    #                   |   send result (or None)
    #   recv result     |
    #   store result    |
    #   get sender      |
    #   read data chunk |
    #   send to sender  |
    #                   |   recv data (or None)
    #                   |   do work (or end)
    #
    #
    # main split b/w master worker

    # read the input data file and extract useful contents
    if RANK == 0:
        data_out = open(options.outputFile, "w")
        half_window = int(math.ceil(options.rtSigma * options.rtWidth / 2.355))
        logging.debug("half RT window {}".format(half_window))
        if SIZE > 1:
            done = 1
        else:
            done = 0  # only needed for sequential
        raw_data = None  # only needed for sequential
        if not options.noScore:
            nextWindow = md_io.nextWindow(reader, options, half_window)
        else:
            nextWindow = reader(options)
        while done < SIZE:
            if SIZE > 1:
                raw_data, scores = COMM.recv(source=MPI.ANY_SOURCE, status=status)
                source = status.Get_source()
            if raw_data is not None:
                md_io.writeResults(data_out, raw_data, scores)

            ## Read data chunk
            try:
                spectra = nextWindow.next()
            except StopIteration:
                spectra = None

            # try removing low values
            if options.removeLow > 0 and spectra is not None:
                # use the specified low signal
                spectra = md_filter.removeLowSignal(spectra, options.removeLow)

            if SIZE > 1:
                COMM.send(spectra, dest=source)
            elif spectra is not None:  # do work sequentially
                ## do work
                if not options.noScore:
                    scores = md_filter.scoreSpectra(spectra, options)
                    raw_data = spectra[len(spectra) // 2]
                else:
                    scores = None
                    raw_data = spectra
            if spectra is None:
                done += 1  # can only ever close each worker once
    else:  # Worker
        scores = None
        raw = None
        # for stats
        send_time = []
        recv_time = []
        work_time = []
        in_mem = md_io.memory_usage_resource()
        while True:
            t1 = time.time()
            COMM.send((raw, scores))
            t2 = time.time()
            send_time.append(t2 - t1)
            raw = None
            scores = None
            logging.debug("rank {}, mem start, end: {:.1f} {:.1f}".format(RANK, in_mem, md_io.memory_usage_resource()))
            in_mem = md_io.memory_usage_resource()
            t1 = time.time()
            spectra = COMM.recv()
            t2 = time.time()
            recv_time.append(t2 - t1)
            if spectra is not None:
                ## do work
                try:
                    t1 = time.time()
                    if not options.noScore:
                        scores = md_filter.scoreSpectra(spectra, options)
                        raw = spectra[len(spectra) // 2]
                    else:
                        scores = None
                        raw = spectra
                    t2 = time.time()
                    work_time.append(t2 - t1)
                    spectra = None
                except MemoryError:
                    logging.debug("rank {} Memory Error".format(RANK))
            else:
                break
        logging.info("rank {}, count {}".format(RANK, len(work_time)))
        logging.info("rank {}, stats min 10% 25% 50% 75% 90% max".format(RANK))
        limits = np.percentile(send_time, [0, 10, 25, 50, 75, 90, 100])
        logging.info("rank {}, send {}".format(RANK, ", ".join(["{:.2f}".format(i) for i in limits])))
        limits = np.percentile(recv_time, [0, 10, 25, 50, 75, 90, 100])
        logging.info("rank {}, recv {}".format(RANK, ", ".join(["{:.2f}".format(i) for i in limits])))
        limits = np.percentile(work_time, [0, 10, 25, 50, 75, 90, 100])
        logging.info("rank {}, work {}".format(RANK, ", ".join(["{:.2f}".format(i) for i in limits])))

    logging.debug("main mem: {}".format(md_io.memory_usage_resource()))
    logging.info("program completed")
Exemplo n.º 3
0
def main(MPI=None):

    # try parallel
    COMM = None
    RANK = 0
    SIZE = 1
    if MPI:
        COMM = MPI.COMM_WORLD
        RANK = COMM.Get_rank()
        SIZE = COMM.Get_size()
        status = MPI.Status()

    if RANK == 0:
        options = parser.parse_args()
    else:
        options = []

    if SIZE > 1:
        options = COMM.bcast(options)

    if options.logFile:
        logging.basicConfig(filename=options.logFile + '-' + str(RANK),
                            level=logging.DEBUG,
                            filemode='w',
                            format='%(asctime)s %(message)s',
                            datefmt='%m/%d/%Y %H:%M:%S')
    logging.info('program started')
    logging.info('command line: {}'.format(' '.join(sys.argv)))

    if RANK == 0:
        if options.format == 'mzml':
            reader = md_io.MZMLtoSpectrum
        elif options.format == 'mzdata':
            # TODO: convert to MPI
            reader = md_io.parseMZDATA
        else:
            exit("Unknown mass spec data format: {}".format(options.format))

    # distribute to worker pool
    #           design
    # master            | workers
    # ---------  --------------------------------
    # while workers     | while !end
    #                   |   send result (or None)
    #   recv result     |
    #   store result    |
    #   get sender      |
    #   read data chunk |
    #   send to sender  |
    #                   |   recv data (or None)
    #                   |   do work (or end)
    #
    #
    # main split b/w master worker

    # read the input data file and extract useful contents
    if RANK == 0:
        data_out = open(options.outputFile, "w")
        half_window = int(math.ceil(options.rtSigma * options.rtWidth / 2.355))
        logging.debug('half RT window {}'.format(half_window))
        if SIZE > 1:
            done = 1
        else:
            done = 0  # only needed for sequential
        raw_data = None  # only needed for sequential
        if not options.noScore:
            nextWindow = md_io.nextWindow(reader, options, half_window)
        else:
            nextWindow = reader(options)
        while done < SIZE:
            if SIZE > 1:
                raw_data, scores = COMM.recv(source=MPI.ANY_SOURCE,
                                             status=status)
                source = status.Get_source()
            if raw_data is not None:
                md_io.writeResults(data_out, raw_data, scores)

            ## Read data chunk
            try:
                spectra = nextWindow.next()
            except StopIteration:
                spectra = None

            # try removing low values
            if options.removeLow > 0 and spectra is not None:
                # use the specified low signal
                spectra = md_filter.removeLowSignal(spectra, options.removeLow)

            if SIZE > 1:
                COMM.send(spectra, dest=source)
            elif spectra is not None:  # do work sequentially
                ## do work
                if not options.noScore:
                    scores = md_filter.scoreSpectra(spectra, options)
                    raw_data = spectra[len(spectra) // 2]
                else:
                    scores = None
                    raw_data = spectra
            if spectra is None:
                done += 1  # can only ever close each worker once
    else:  # Worker
        scores = None
        raw = None
        # for stats
        send_time = []
        recv_time = []
        work_time = []
        in_mem = md_io.memory_usage_resource()
        while True:
            t1 = time.time()
            COMM.send((raw, scores))
            t2 = time.time()
            send_time.append(t2 - t1)
            raw = None
            scores = None
            logging.debug('rank {}, mem start, end: {:.1f} {:.1f}'.format(
                RANK, in_mem, md_io.memory_usage_resource()))
            in_mem = md_io.memory_usage_resource()
            t1 = time.time()
            spectra = COMM.recv()
            t2 = time.time()
            recv_time.append(t2 - t1)
            if spectra is not None:
                ## do work
                try:
                    t1 = time.time()
                    if not options.noScore:
                        scores = md_filter.scoreSpectra(spectra, options)
                        raw = spectra[len(spectra) // 2]
                    else:
                        scores = None
                        raw = spectra
                    t2 = time.time()
                    work_time.append(t2 - t1)
                    spectra = None
                except MemoryError:
                    logging.debug('rank {} Memory Error'.format(RANK))
            else:
                break
        logging.info('rank {}, count {}'.format(RANK, len(work_time)))
        logging.info('rank {}, stats min 10% 25% 50% 75% 90% max'.format(RANK))
        limits = np.percentile(send_time, [0, 10, 25, 50, 75, 90, 100])
        logging.info('rank {}, send {}'.format(
            RANK, ', '.join(['{:.2f}'.format(i) for i in limits])))
        limits = np.percentile(recv_time, [0, 10, 25, 50, 75, 90, 100])
        logging.info('rank {}, recv {}'.format(
            RANK, ', '.join(['{:.2f}'.format(i) for i in limits])))
        limits = np.percentile(work_time, [0, 10, 25, 50, 75, 90, 100])
        logging.info('rank {}, work {}'.format(
            RANK, ', '.join(['{:.2f}'.format(i) for i in limits])))

    logging.debug('main mem: {}'.format(md_io.memory_usage_resource()))
    logging.info('program completed')