Пример #1
0
def repartition(in_path, out_path, split, downsample=1, filter=lambda x: True):
    perfLogger = logging.getLogger('performance')
    corpus = Corpus(in_path, communicator=MPI.COMM_WORLD)
    this_processor_out = Decomposer(range(split), communicator=MPI.COMM_WORLD)
    processor_paths = Decomposer(corpus.paths, communicator=MPI.COMM_WORLD)
    processor_corpus = Corpus(processor_paths)
    for chunk_index, chunk in enumerate(this_processor_out):
        perfLogger.info("Starting output zip " + str(chunk))
        books = Decomposer(processor_corpus,
                           rank=chunk_index,
                           size=len(this_processor_out),
                           subsample=downsample)
        perfLogger.debug("Will handle " + str(len(books)) + " books.")
        with zipfile.ZipFile(os.path.join(out_path,
                                          'chunk' + str(chunk) + '.zip'),
                             'w',
                             allowZip64=True) as outzip:
            for book in books:
                book.load()
                if not filter(book):
                    continue
                info = book.zip_info()
                # transfer from small zip to bigger zip
                outzip.writestr(info, book.archive.zip.read(info))
                for page_code in book.page_codes:
                    info = book.page_zip_info(page_code)
                    outzip.writestr(info, book.archive.zip.read(info))
        perfLogger.info("Completed output zip " + str(chunk))
    MPI.COMM_WORLD.Barrier()
Пример #2
0
def repartition_from_metazip(in_zip, out_path, split):
    tmpdir = tempfile.mkdtemp()
    this_processor = Decomposer(range(split))
    with zipfile.ZipFile(in_zip) as metazip:
        inzips = metazip.infolist()
        for chunk in this_processor:
            # open a zip for writing
            with zipfile.ZipFile(os.path.join(out_path,
                                              'chunk' + str(chunk) + '.zip'),
                                 'w',
                                 allowZip64=True) as outzip:
                this_chunk = list(
                    islice(metazip.infolist(), chunk, None, split))
                for archive in this_chunk:
                    # open a smaller zip
                    metazip.extract(archive, tmpdir)
                    small = os.path.join(tmpdir, archive.filename)
                    # should be able to do this in memory, but
                    # zipfile doesn't like importing from file-like-object
                    try:
                        with zipfile.ZipFile(small) as inzip:
                            # transfer from small zip to bigger zip
                            for info in inzip.infolist():
                                outzip.writestr(info, inzip.read(info))
                    except zipfile.BadZipfile:
                        print "Bad file:", archive.filename
                    os.remove(small)
    shutil.rmtree(tmpdir)
    MPI.COMM_WORLD.Barrier()
Пример #3
0
def decomposer_pipeline(arg_dict):
    """
    Run the decomposer pipeline. Includes searching for song and/or downloading Youtube video.
    Args:
        arg_dict (dict): dictionary of parsed arguments
    Returns: None
    """
    # download the song from youtube as video, cvt to wav, cleanup
    song = arg_dict.get('song', None)
    youtube_url = arg_dict.get('youtube', None)
    max_time = arg_dict.get('max_time', None)

    setup_dirs()

    # handle downloading and setup based on media input type
    if youtube_url:
        input_song = _handle_youtube_option(youtube_url)
    elif song:
        input_song = _handle_local_song_option(song)
    else:
        msg = '[PIPELINE] >>>> Must choose one option: --song or --youtube'
        logger.error(msg)
        raise DecomposerError(msg)

    # Decompose the song if needed
    if input_song:
        try:
            Decomposer(input_song, stop_time=max_time,
                       scale=2).cvt_audio_to_piano()
            logger.info(f'[PIPELINE] >>>> Song sucessfully decomposed!')
        except Exception:
            logger.error(traceback.print_exc())
    def parallel(self, data):
        perfLogger = logging.getLogger('performance')
        # local map
        if self.prepartitioned:
            partition = Decomposer(data, subsample=self.subsample)
        else:
            partition = Decomposer(data,
                                   self.communicator,
                                   subsample=self.subsample)
        perfLogger.info("Built iterator")
        quantities = map(self.mapper, partition)
        perfLogger.info("Mapped")
        local_result = reduce(self.reducer, quantities)
        perfLogger.info("Local reduce")

        # reduce under mpi
        def reduce_arrays(x, y, dtype):
            # the signature for the user defined op takes a datatype, which we can ignore
            return self.reducer(x, y)

        reducer_mpi = MPI.Op.Create(reduce_arrays, True)
        perfLogger.debug("Local result: " + str(local_result)[0:60])
        if self.shuffler:
            perfLogger.info("Shuffling")
            shuffled = defaultdict(dict)
            if local_result:
                for key in local_result:
                    shuffled[self.shuffler(
                        key, self.communicator.size)][key] = local_result[key]
            for root in range(self.communicator.size):
                perfLogger.info("Reducing to rank " + str(root))
                temp = self.communicator.reduce(shuffled[root],
                                                op=reducer_mpi,
                                                root=root)
                if self.communicator.rank == root:
                    result = temp
        else:
            result = self.communicator.reduce(local_result,
                                              op=reducer_mpi,
                                              root=0)
            result = self.communicator.bcast(result, root=0)
        perfLogger.info("Global reduce")

        reducer_mpi.Free()
        return result
 def serial(self, data):
     try:
         count = len(data)
     except AttributeError:
         count = None
     subsampled_data = Decomposer(data, subsample=self.subsample)
     quantities = map(self.mapper, subsampled_data)
     result = reduce(self.reducer, quantities)
     return result
Пример #6
0
    return parser.parse_args()


if __name__ == '__main__':
    args = parse_args()
    if args.train:
        model = CNN(args.model)
        trainer = Trainer(args, model)
        trainer.train()
        trainer.view()

    if args.decompose:
        path_i = f'models/{args.model}.pth'
        model = torch.load(path_i)
        decomposer = Decomposer(args, model)
        decomposer.run()
        if args.type == 'fc':
            params = defaultdict(dict)
            for k in args.key:
                params[k]['d'] = 6
                params[k]['tt_ranks'] = [1, 8, 8, 8, 8, 8, 1]

            decomposer.replace_layer(keys=args.key,
                                     type=args.type,
                                     params=params)
        else:
            decomposer.replace_layer(keys=args.key, type=args.type)

        path_o = f'models/{args.model}_{args.factorization}_{args.key}_{args.type}.pth'
        torch.save(decomposer.model, path_o)
Пример #7
0
parser = argparse.ArgumentParser(
    description="Dynamic programming on a TD of a MaxSAT instance")
parser.add_argument("file")
args = parser.parse_args()

with open(args.file) as f:
    print("Parsing...")
    formula = Formula(f)
    print(formula)
    print("Constructing primal graph...")
    g = formula.primal_graph()
    print(g)
    #td = Decomposer(g, Graph.min_degree_vertex, max_width=8).decompose()
    print("Decomposing...")
    tds = Decomposer(g, Graph.min_degree_vertex,
                     max_width=5).decompose_partially()
    #td.weakly_normalize()
    for td in tds:
        print(td)
        print()
        td.weakly_normalize()

    print("Solving...")
    for td in tds:
        if not td.children:
            continue  # maybe not so interesting...?
        print(td)
        table = Table(td, formula)
        table.compute()
        print()
        table.write_recursively()
Пример #8
0
                        choices=["min-degree", "min-fill"],
                        default="min-degree")
    parser.add_argument("--normalize", choices=["weak"])
    args = parser.parse_args()

    if args.heuristic == "min-degree":
        heuristic = Graph.min_degree_vertex
    elif args.heuristic == "min-fill":
        heuristic = Graph.min_fill_vertex

    normalize = None
    if args.normalize == "weak":
        normalize = TD.weakly_normalize

    with open(args.file) as f:
        f = Formula(f)
        print(f)
        g = f.primal_graph()
        print(g)
        decomposer = Decomposer(g,
                                heuristic,
                                max_width=args.max_width,
                                normalize=normalize)
        tds = decomposer.decompose()
        headline = "Partial TD" if len(tds) > 1 else "TD"
        for td in tds:
            print(f"{headline}:\n{td}")
        remainder = decomposer.remainder()
        if remainder:
            print(f"Remainder: {remainder}")
Пример #9
0
    log_level_number = getattr(logging, args.log.upper(), None)
    if not isinstance(log_level_number, int):
        raise ValueError(f"Invalid log level: {loglevel}")
    logging.basicConfig(level=log_level_number)

    with open(args.file) as f:
        print("Parsing...")
        formula = Formula(f)
        log.debug(formula)
        print("Constructing primal graph...")
        g = formula.primal_graph()
        log.debug(g)
        print("Decomposing...")
        tds = Decomposer(g,
                         Graph.min_degree_vertex,
                         normalize=TD.weakly_normalize).decompose()
        assert len(tds) == 1
        td = tds[0]
        log.debug(td)
        root_table = Table(td, formula)
        print("Solving...")
        root_table.compute()

        print("Resulting tables:")
        root_table.write_recursively()

        # for row in root_table.rows.values():
        #     print(f"Extensions of root row {row}:")
        #     for extension in row:
        #         print(extension)