示例#1
0
文件: process.py 项目: slee1009/liam2
    def run_guarded(self, context):
        period = context.period

        if config.log_level == "processes":
            print()

        try:
            for k, v in self.subprocesses:
                if config.log_level == "processes":
                    print("    *", end=' ')
                    if k is not None:
                        print(k, end=' ')
                    utils.timed(v.run_guarded, context)
                else:
                    v.run_guarded(context)
                    #            print "done."
                context.simulation.start_console(context)
        finally:
            if config.autodump is not None:
                self._autodump(context)

            if config.autodiff is not None:
                self._autodiff(period)

            if self.purge:
                self.entity.purge_locals()
示例#2
0
        def simulate_period(period_idx, period, processes, entities,
                            init=False):
            period_start_time = time.time()

            # set current period
            eval_ctx.period = period

            if config.log_level in ("functions", "processes"):
                print()
            print("period", period,
                  end=" " if config.log_level == "periods" else "\n")
            if init and config.log_level in ("functions", "processes"):
                for entity in entities:
                    print("  * %s: %d individuals" % (entity.name,
                                                      len(entity.array)))
            else:
                if config.log_level in ("functions", "processes"):
                    print("- loading input data")
                    for entity in entities:
                        print("  *", entity.name, "...", end=' ')
                        timed(entity.load_period_data, period)
                        print("    -> %d individuals" % len(entity.array))
                else:
                    for entity in entities:
                        entity.load_period_data(period)
            for entity in entities:
                entity.array_period = period
                entity.array['period'] = period

            if processes:
                num_processes = len(processes)
                for p_num, process_def in enumerate(processes, start=1):
                    process, periodicity = process_def

                    # set current entity
                    eval_ctx.entity_name = process.entity.name

                    if config.log_level in ("functions", "processes"):
                        print("- %d/%d" % (p_num, num_processes), process.name,
                              end=' ')
                        print("...", end=' ')
                    if period_idx % periodicity == 0:
                        elapsed, _ = gettime(process.run_guarded, eval_ctx)
                    else:
                        elapsed = 0
                        if config.log_level in ("functions", "processes"):
                            print("skipped (periodicity)")

                    process_time[process.name] += elapsed
                    if config.log_level in ("functions", "processes"):
                        if config.show_timings:
                            print("done (%s elapsed)." % time2str(elapsed))
                        else:
                            print("done.")
                    self.start_console(eval_ctx)

            if config.log_level in ("functions", "processes"):
                print("- storing period data")
                for entity in entities:
                    print("  *", entity.name, "...", end=' ')
                    timed(entity.store_period_data, period)
                    print("    -> %d individuals" % len(entity.array))
            else:
                for entity in entities:
                    entity.store_period_data(period)
#            print " - compressing period data"
#            for entity in entities:
#                print "  *", entity.name, "...",
#                for level in range(1, 10, 2):
#                    print "   %d:" % level,
#                    timed(entity.compress_period_data, level)
            period_objects[period] = sum(len(entity.array)
                                         for entity in entities)
            period_elapsed_time = time.time() - period_start_time
            if config.log_level in ("functions", "processes"):
                print("period %d" % period, end=' ')
            print("done", end=' ')
            if config.show_timings:
                print("(%s elapsed)" % time2str(period_elapsed_time), end="")
                if init:
                    print(".")
                else:
                    main_elapsed_time = time.time() - main_start_time
                    periods_done = period_idx + 1
                    remaining_periods = self.periods - periods_done
                    avg_time = main_elapsed_time / periods_done
                    # future_time = period_elapsed_time * 0.4 + avg_time * 0.6
                    remaining_time = avg_time * remaining_periods
                    print(" - estimated remaining time: %s."
                          % time2str(remaining_time))
            else:
                print()
示例#3
0
    def run_single(self, run_console=False, run_num=None):
        start_time = time.time()

        input_dataset = timed(self.data_source.load,
                              self.globals_def,
                              self.entities_map)

        globals_data = input_dataset.get('globals')
        timed(self.data_sink.prepare, self.globals_def, self.entities_map,
              input_dataset, self.start_period - 1)

        print(" * building arrays for first simulated period")
        for ent_name, entity in self.entities_map.items():
            print("    -", ent_name, "...", end=' ')
            # TODO: this whole process of merging all periods is very
            # opinionated and does not allow individuals to die/disappear
            # before the simulation starts. We couldn't for example,
            # take the output of one of our simulation and
            # re-simulate only some years in the middle, because the dead
            # would be brought back to life. In conclusion, it should be
            # optional.
            timed(entity.build_period_array, self.start_period - 1)
        print("done.")

        if config.autodump or config.autodiff:
            if config.autodump:
                fname, _ = config.autodump
                mode = 'w'
            else:  # config.autodiff
                fname, _ = config.autodiff
                mode = 'r'
            fpath = os.path.join(config.output_directory, fname)
            h5_autodump = tables.open_file(fpath, mode=mode)
            config.autodump_file = h5_autodump
        else:
            h5_autodump = None

        # tell numpy we do not want warnings for x/0 and 0/0
        np.seterr(divide='ignore', invalid='ignore')

        process_time = defaultdict(float)
        period_objects = {}
        eval_ctx = EvaluationContext(self, self.entities_map, globals_data)

        def simulate_period(period_idx, period, processes, entities,
                            init=False):
            period_start_time = time.time()

            # set current period
            eval_ctx.period = period

            if config.log_level in ("functions", "processes"):
                print()
            print("period", period,
                  end=" " if config.log_level == "periods" else "\n")
            if init and config.log_level in ("functions", "processes"):
                for entity in entities:
                    print("  * %s: %d individuals" % (entity.name,
                                                      len(entity.array)))
            else:
                if config.log_level in ("functions", "processes"):
                    print("- loading input data")
                    for entity in entities:
                        print("  *", entity.name, "...", end=' ')
                        timed(entity.load_period_data, period)
                        print("    -> %d individuals" % len(entity.array))
                else:
                    for entity in entities:
                        entity.load_period_data(period)
            for entity in entities:
                entity.array_period = period
                entity.array['period'] = period

            if processes:
                num_processes = len(processes)
                for p_num, process_def in enumerate(processes, start=1):
                    process, periodicity = process_def

                    # set current entity
                    eval_ctx.entity_name = process.entity.name

                    if config.log_level in ("functions", "processes"):
                        print("- %d/%d" % (p_num, num_processes), process.name,
                              end=' ')
                        print("...", end=' ')
                    if period_idx % periodicity == 0:
                        elapsed, _ = gettime(process.run_guarded, eval_ctx)
                    else:
                        elapsed = 0
                        if config.log_level in ("functions", "processes"):
                            print("skipped (periodicity)")

                    process_time[process.name] += elapsed
                    if config.log_level in ("functions", "processes"):
                        if config.show_timings:
                            print("done (%s elapsed)." % time2str(elapsed))
                        else:
                            print("done.")
                    self.start_console(eval_ctx)

            if config.log_level in ("functions", "processes"):
                print("- storing period data")
                for entity in entities:
                    print("  *", entity.name, "...", end=' ')
                    timed(entity.store_period_data, period)
                    print("    -> %d individuals" % len(entity.array))
            else:
                for entity in entities:
                    entity.store_period_data(period)
#            print " - compressing period data"
#            for entity in entities:
#                print "  *", entity.name, "...",
#                for level in range(1, 10, 2):
#                    print "   %d:" % level,
#                    timed(entity.compress_period_data, level)
            period_objects[period] = sum(len(entity.array)
                                         for entity in entities)
            period_elapsed_time = time.time() - period_start_time
            if config.log_level in ("functions", "processes"):
                print("period %d" % period, end=' ')
            print("done", end=' ')
            if config.show_timings:
                print("(%s elapsed)" % time2str(period_elapsed_time), end="")
                if init:
                    print(".")
                else:
                    main_elapsed_time = time.time() - main_start_time
                    periods_done = period_idx + 1
                    remaining_periods = self.periods - periods_done
                    avg_time = main_elapsed_time / periods_done
                    # future_time = period_elapsed_time * 0.4 + avg_time * 0.6
                    remaining_time = avg_time * remaining_periods
                    print(" - estimated remaining time: %s."
                          % time2str(remaining_time))
            else:
                print()

        print("""
=====================
 starting simulation
=====================""")
        try:
            simulate_period(0, self.start_period - 1, self.init_processes,
                            self.entities, init=True)
            main_start_time = time.time()
            periods = range(self.start_period,
                            self.start_period + self.periods)
            for period_idx, period in enumerate(periods):
                simulate_period(period_idx, period,
                                self.processes, self.entities)

            total_objects = sum(period_objects[period] for period in periods)
            avg_objects = str(total_objects // self.periods) \
                if self.periods else 'N/A'
            main_elapsed_time = time.time() - main_start_time
            ind_per_sec = str(int(total_objects / main_elapsed_time)) \
                if main_elapsed_time else 'inf'

            print("""
==========================================
 simulation done
==========================================
 * %s elapsed
 * %s individuals on average
 * %s individuals/s/period on average
==========================================
""" % (time2str(time.time() - start_time), avg_objects, ind_per_sec))

            show_top_processes(process_time, 10)
#            if config.debug:
#                show_top_expr()

            if run_console:
                ent_name = self.default_entity
                if ent_name is None and len(eval_ctx.entities) == 1:
                    ent_name = list(eval_ctx.entities.keys())[0]
                # FIXME: fresh_data prevents the old (cloned) EvaluationContext
                # to be referenced from each EntityContext, which lead to period
                # being fixed to the last period of the simulation. This should
                # be fixed in EvaluationContext.copy but the proper fix breaks
                # stuff (see the comments there)
                console_ctx = eval_ctx.clone(fresh_data=True,
                                             entity_name=ent_name)
                c = console.Console(console_ctx)
                c.run()

        finally:
            self.close()
            if h5_autodump is not None:
                h5_autodump.close()
            if self.minimal_output:
                output_path = self.data_sink.output_path
                dirname = os.path.dirname(output_path)
                try:
                    os.remove(output_path)
                    os.rmdir(dirname)
                except OSError:
                    print("WARNING: could not delete temporary directory: %r"
                          % dirname)
示例#4
0
 def load(self):
     return timed(self.data_source.load, self.globals_def, self.entities_map)
示例#5
0
    # copy globals
    if copy_globals:
        # noinspection PyProtectedMember
        input_file.root.globals._f_copy(output_file.root, recursive=True)

    output_entities = output_file.create_group("/", "entities", "Entities")
    for table in input_file.iterNodes(input_file.root.entities):
        # noinspection PyProtectedMember
        print(table._v_name, "...")
        copy_table(table, output_entities, condition=condition)

    input_file.close()
    output_file.close()


if __name__ == '__main__':
    import sys
    import platform

    print("LIAM HDF5 filter %s using Python %s (%s)\n" %
          (__version__, platform.python_version(), platform.architecture()[0]))

    args = dict(enumerate(sys.argv))
    if len(args) < 4:
        print("""Usage: {} inputpath outputpath condition [copy_globals]
where condition is an expression
      copy_globals is True (default)|False""".format(args[0]))
        sys.exit()

    timed(filter_h5, args[1], args[2], args[3], eval(args.get(4, 'True')))
示例#6
0
文件: merge_h5.py 项目: liam2/liam2
def merge_h5(input1_path, input2_path, output_path):
    input1_file = tables.open_file(input1_path)
    input2_file = tables.open_file(input2_path)
    output_file = tables.open_file(output_path, mode="w")

    input1root = input1_file.root
    input2root = input2_file.root

    merge_group(input1root, input2root, 'globals', output_file, 'PERIOD')
    merge_group(input1root, input2root, 'entities', output_file, 'period')

    input1_file.close()
    input2_file.close()
    output_file.close()


if __name__ == '__main__':
    import sys
    import platform

    print("LIAM HDF5 merge %s using Python %s (%s)\n" %
          (__version__, platform.python_version(), platform.architecture()[0]))

    args = sys.argv
    if len(args) < 4:
        print("Usage: %s inputpath1 inputpath2 outputpath" % args[0])
        sys.exit()

    timed(merge_h5, args[1], args[2], args[3])
示例#7
0
文件: data.py 项目: liam2/liam2
def index_tables(globals_def, entities, fpath):
    print("reading data from %s ..." % fpath)
    input_file = tables.open_file(fpath)
    try:
        input_root = input_file.root

        def must_load_from_input_file(gdef):
            return isinstance(gdef, dict) and 'path' not in gdef
        any_global_from_input_file = any(must_load_from_input_file(gdef) for gdef in globals_def.values())
        if any_global_from_input_file and 'globals' not in input_root:
            raise Exception('could not find any globals in the input data file '
                            '(but some are declared in the simulation file)')

        globals_data = load_path_globals(globals_def)
        constant_globals_data = handle_constant_globals(globals_def)
        globals_data.update(constant_globals_data)
        globals_node = getattr(input_root, 'globals', None)
        for name, global_def in globals_def.items():
            # already loaded from another source (path)
            if name in globals_data:
                continue

            if name not in globals_node:
                raise Exception("could not find 'globals/%s' in the input "
                                "data file" % name)

            global_data = getattr(globals_node, name)

            global_type = global_def.get('type', global_def.get('fields'))
            # TODO: move the checking (assertValidType) to a separate function
            assert_valid_type(global_data, global_type, context=name)
            array = global_data.read()
            if isinstance(global_type, list):
                # make sure we do not keep in memory columns which are
                # present in the input file but where not asked for by the
                # modeller. They are not accessible anyway.
                array = add_and_drop_fields(array, global_type)
            attrs = global_data.attrs
            dim_names = getattr(attrs, 'dimensions', None)
            if dim_names is not None:
                # we serialise dim_names as a numpy array so that it is
                # stored as a native hdf type and not a pickle but we
                # prefer to work with simple lists
                # also files serialized using Python2 are "bytes" not "str"
                dim_names = [str(dim_name) for dim_name in dim_names]
                pvalues = [getattr(attrs, 'dim%d_pvalues' % i)
                           for i in range(len(dim_names))]
                axes = [la.Axis(labels, axis_name)
                        for axis_name, labels in zip(dim_names, pvalues)]
                array = la.LArray(array, axes)
            globals_data[name] = array

        input_entities = input_root.entities

        entities_tables = {}
        print(" * indexing tables")
        for ent_name, entity in entities.items():
            print("    -", ent_name, "...", end=' ')

            table = getattr(input_entities, ent_name)
            assert_valid_type(table, list(entity.fields.in_input.name_types))

            rows_per_period, id_to_rownum_per_period = \
                timed(index_table, table)
            indexed_table = IndexedTable(table, rows_per_period,
                                         id_to_rownum_per_period)
            entities_tables[ent_name] = indexed_table
    except:
        input_file.close()
        raise

    return input_file, {'globals': globals_data, 'entities': entities_tables}
示例#8
0
        if action != 'sort':
            if len(args) < 5 and action != 'sort':
                print("link_fields argument must be provided if using an .h5 "
                      "input file")

            entities = [entity.split(':') for entity in args[4].split(';')]
            to_change = {ent_name: fields.split(',')
                         for ent_name, fields in entities}
            # convert {ent_name: [target_ent1.fname1, target_ent2.fname2]}
            #      to {ent_name: [(target_ent1, fname1), (target_ent2, fname2)]}
            for ent_name, fields in to_change.items():
                for i, fname in enumerate(fields):
                    fields[i] = \
                        fname.split('.') if '.' in fname else (ent_name, fname)
    else:
        simulation = Simulation.from_yaml(inputpath)
        inputpath = simulation.data_source.input_path
        to_change = {entity.name: fields_from_entity(entity)
                     for entity in simulation.entities}

    assert action in {'shrink', 'shuffle', 'sort'}
    if action == 'shrink':
        timed(change_ids, inputpath, outputpath, to_change)
    elif action == 'shuffle':
        timed(change_ids, inputpath, '_shuffled_temp.h5', to_change,
              shuffle=True)
        timed(h5_sort, '_shuffled_temp.h5', outputpath, list(to_change.keys()))
    else:
        ent_names = args[4].split(',') if len(args) >= 5 else None
        timed(h5_sort, inputpath, outputpath, ent_names)
示例#9
0
    output_entities = output_file.create_group("/", "entities", "Entities")
    for table in input_file.iterNodes(input_root.entities):
        table_fields = get_fields(table)
        output_dtype = np.dtype([(fname, ftype)
                                 for fname, ftype in table_fields
                                 if fname not in todrop])
        size = (len(table) * table.dtype.itemsize) / 1024.0 / 1024.0
        # noinspection PyProtectedMember
        print(" * copying table %s (%.2f Mb) ..." % (table._v_name, size),
              end=' ')
        copy_table(table, output_entities, output_dtype)
        print("done.")

    input_file.close()
    output_file.close()


if __name__ == '__main__':
    import sys
    import platform

    print("LIAM HDF5 drop fields %s using Python %s (%s)\n" % \
          (__version__, platform.python_version(), platform.architecture()[0]))

    args = sys.argv
    if len(args) < 4:
        print("Usage: %s inputpath outputpath field1 [field2 ...]" % args[0])
        sys.exit()

    timed(dropfields, args[1], args[2], args[3:])
示例#10
0
文件: data.py 项目: slee1009/liam2
def index_tables(globals_def, entities, fpath):
    print("reading data from %s ..." % fpath)
    input_file = tables.open_file(fpath)
    try:
        input_root = input_file.root

        def must_load_from_input_file(gdef):
            return isinstance(gdef, dict) and 'path' not in gdef

        any_global_from_input_file = any(
            must_load_from_input_file(gdef) for gdef in globals_def.values())
        if any_global_from_input_file and 'globals' not in input_root:
            raise Exception(
                'could not find any globals in the input data file '
                '(but some are declared in the simulation file)')

        globals_data = load_path_globals(globals_def)
        constant_globals_data = handle_constant_globals(globals_def)
        globals_data.update(constant_globals_data)
        globals_node = getattr(input_root, 'globals', None)
        for name, global_def in globals_def.items():
            # already loaded from another source (path)
            if name in globals_data:
                continue

            if name not in globals_node:
                raise Exception("could not find 'globals/%s' in the input "
                                "data file" % name)

            global_data = getattr(globals_node, name)

            global_type = global_def.get('type', global_def.get('fields'))
            # TODO: move the checking (assertValidType) to a separate function
            assert_valid_type(global_data, global_type, context=name)
            array = global_data.read()
            if isinstance(global_type, list):
                # make sure we do not keep in memory columns which are
                # present in the input file but where not asked for by the
                # modeller. They are not accessible anyway.
                array = add_and_drop_fields(array, global_type)
            attrs = global_data.attrs
            dim_names = getattr(attrs, 'dimensions', None)
            if dim_names is not None:
                # we serialise dim_names as a numpy array so that it is
                # stored as a native hdf type and not a pickle but we
                # prefer to work with simple lists
                # also files serialized using Python2 are "bytes" not "str"
                dim_names = [str(dim_name) for dim_name in dim_names]
                pvalues = [
                    getattr(attrs, 'dim%d_pvalues' % i)
                    for i in range(len(dim_names))
                ]
                axes = [
                    la.Axis(labels, axis_name)
                    for axis_name, labels in zip(dim_names, pvalues)
                ]
                array = la.LArray(array, axes)
            globals_data[name] = array

        input_entities = input_root.entities

        entities_tables = {}
        print(" * indexing tables")
        for ent_name, entity in entities.items():
            print("    -", ent_name, "...", end=' ')

            table = getattr(input_entities, ent_name)
            assert_valid_type(table, list(entity.fields.in_input.name_types))

            rows_per_period, id_to_rownum_per_period = \
                timed(index_table, table)
            indexed_table = IndexedTable(table, rows_per_period,
                                         id_to_rownum_per_period)
            entities_tables[ent_name] = indexed_table
    except:
        input_file.close()
        raise

    return input_file, {'globals': globals_data, 'entities': entities_tables}
示例#11
0
    output_entities = output_file.create_group("/", "entities", "Entities")
    for table in input_file.iterNodes(input_root.entities):
        table_fields = get_fields(table)
        output_dtype = np.dtype([(fname, ftype) for fname, ftype in table_fields
                                 if fname not in todrop])
        size = (len(table) * table.dtype.itemsize) / 1024.0 / 1024.0
        # noinspection PyProtectedMember
        print(" * copying table %s (%.2f Mb) ..." % (table._v_name, size),
              end=' ')
        copy_table(table, output_entities, output_dtype)
        print("done.")

    input_file.close()
    output_file.close()


if __name__ == '__main__':
    import sys
    import platform

    print("LIAM HDF5 drop fields %s using Python %s (%s)\n" % \
          (__version__, platform.python_version(), platform.architecture()[0]))

    args = sys.argv
    if len(args) < 4:
        print("Usage: %s inputpath outputpath field1 [field2 ...]" % args[0])
        sys.exit()

    timed(dropfields, args[1], args[2], args[3:])