Python CsvDataset примеры, csv.CsvDataset Python примеры использования

Пример #1

0

Показать файл

def make_int_to_action(file):
    ita_data = CsvDataset(file)
    ita = [0 for i in range(0, len(ita_data.feature_vectors))]
    for fv in ita_data.feature_vectors:
        idx = int(fv[0])
        ita[idx] = fv[1]
    return ita

Пример #2

0

Показать файл

Файл: summarize-experiment.py Проект: jhostetler/jmcplan

def summarize(dir):
    Nagents = 2
    summary = Summary()
    for w in os.listdir(dir):
        wpath = os.path.join(dir, w)
        if os.path.isdir(wpath):
            print(wpath)
            try:
                wfile = open(os.path.join(wpath, "result.csv"))
            except:
                print("! No 'results.csv' in '" + wpath + "'")
                continue
            results = CsvDataset(wfile)
            wfile.close()
            try:
                wfile = open(os.path.join(wpath, "game-log.csv"))
            except:
                print("! No 'game-log.csv' in '" + wpath + "'")
                continue
            log = CsvDataset(wfile)
            wfile.close()

            # Commit
            log_ai = [log.attribute_index("a0"), log.attribute_index("a1")]
            for fv in results.feature_vectors:
                if fv[0] == "winner":
                    summary.win_counts[int(fv[1])] += 1
                elif fv[0] == "score":
                    summary.score.add(float(fv[1]))
            policies = [None, None]
            switches = [0, 0]
            for fv in log.feature_vectors:
                for i in range(0, Nagents):
                    if policies[i] is None:
                        policies[i] = fv[log_ai[i]]
                    elif policies[i] != fv[log_ai[i]]:
                        switches[i] += 1
                        policies[i] = fv[log_ai[i]]
            for i in range(0, Nagents):
                summary.switches[i].add(switches[i])
    return summary

Пример #3

0

Показать файл

Файл: csv-merge.py Проект: jhostetler/jmcplan

def process_data( data ):
	global acc, headers, master, Ngames_idx, options
	if master is None:
		master = CsvDataset( attributes=data.attributes[:], feature_vectors=[] )
		Ngames_idx = master.attribute_index( "Ngames" )
		mean_attributes = [master.attribute_index( a ) for a in [
							"mean", "state_branching_mean", "action_branching_mean", "tree_depth_mean", "steps_mean"]]
		var_attributes = [master.attribute_index( a ) for a in [
							"var", "state_branching_var", "action_branching_var", "tree_depth_var", "steps_var"]]
		min_attributes = [master.attribute_index( a ) for a in ["steps_min"]]
		max_attributes = [master.attribute_index( a ) for a in ["steps_max"]]
	else:
		if len(master.attributes) != len(data.attributes):
			on_error( "Unequal column count" )
		for i in range(0, len(master.attributes)):
			if master.attributes[i].name != data.attributes[i].name:
				on_error( "Different headers" )
	hidx = None
	if options.combine is not None:
		hidx = [data.attribute_index( name ) for name in eval( options.combine )]
		# print( eval( options.combine ) )
	for fv in data.feature_vectors:
		key = len(acc)
		if hidx is not None:
			key = tuple(fv[i] for i in hidx)
		try:
			exemplar = acc[key]
			acc[key] = combine( exemplar, fv )
		except KeyError: 
			acc[key] = fv[:]

Пример #4

0

Показать файл

# ----------------------------------------------------------------------------
# Main
# ----------------------------------------------------------------------------

cl_parser = OptionParser(usage="%prog [options] file")
cl_parser.add_option("-o",
                     dest="output_file",
                     type="string",
                     default=None,
                     help="The file to write the output to (default: stdout)")
(options, args) = cl_parser.parse_args()

if len(args) == 0:
    cl_parser.error("No input file")
elif len(args) > 1:
    print("WARNING: Multiple input files; ignoring all but the first")

arff_file = open(args[0], "r")

if options.output_file is None:
    # Saying 'open( "sys.stdout" )' doesn't seem to accomplish this
    output_file = sys.stdout
else:
    output_file = open(options.output_file, "w")

arff_dataset = ArffDataset(arff_file)
csv_dataset = CsvDataset.from_arff_dataset(arff_dataset)

output_file.write(repr(csv_dataset))
output_file.close()

Пример #5

0

Показать файл

Файл: jair-analyze.py Проект: jhostetler/jmcplan

else:
    output_file = open(options.output_file, "w")

out_attributes = [
    "Domain", "Algorithm", "domain_params", "criterion", "ss.abstraction",
    "par.priority", "par.classifier", "random_abstraction.k", "ss.budget_type",
    "ss.budget", "V_mean", "V_var", "V_conf", "ss.width", "ss.depth",
    "seed.world", "seed.sim"
]

# out_attributes = [
# "domain", "criterion", "ss.abstraction", "par.subtree_refinement_order", "par.classifier", "random_partition.k", "ss.budget_type", "ss.budget",
# "V_mean", "V_var", "V_conf", "ss.width", "ss.depth", "seed"
# ]

out_data = CsvDataset(attributes=[CsvAttribute(a) for a in out_attributes],
                      feature_vectors=[])

with open(args[0], "r") as input_file:
    in_data = CsvDataset(input_file)

# [jhostetler:20160804] The Saving domain had an extra header in its output,
# causing everything to be shifted by 1.
Nepisodes_idx = len(in_data.attributes)
try:
    Nepisodes_idx = in_data.attribute_index("Nepisodes")
except KeyError:
    # Exception indicates that problematic field was not present
    pass
if Nepisodes_idx != len(in_data.attributes):
    print("WARNING: Found 'Nepisodes' header; correcting dataset")
    sailing_p = in_data.attribute_index("sailing.p")

Пример #6

0

Показать файл

cl_parser.add_option("--attribute",
                     dest="attribute",
                     type="string",
                     default=None,
                     help="The attribute to match")
cl_parser.add_option("--value",
                     dest="value",
                     type="string",
                     default=None,
                     help="The value to match")
(options, args) = cl_parser.parse_args()


def open_output(options):
    if options.output_file == "-":
        # Saying 'open( "sys.stdout" )' doesn't seem to accomplish this
        return sys.stdout
    else:
        return open(options.output_file, "w")


with open(args[0], "r") as input_file:
    in_data = CsvDataset(input_file)
    out_data = CsvDataset(attributes=in_data.attributes, feature_vectors=[])
    idx = in_data.attribute_index(options.attribute)
    for fv in in_data.feature_vectors:
        if fv[idx] == options.value:
            out_data.feature_vectors.append(fv[:])
with open_output(options) as output_file:
    output_file.write(repr(out_data))

Пример #7

0

Показать файл

Файл: csv-merge.py Проект: jhostetler/jmcplan

cl_parser.add_option( "--loose", dest="loose", action="store_true", default=False,
					  help="If specified, files are not checked for header or column count equality." )
cl_parser.add_option( "--no-headers", dest="no_headers", action="store_true", default=False,
					  help="If specified, indicates that the files have no header rows." )
cl_parser.add_option( "--combine", dest="combine", type="string", default=None,
					  help="""A python list of header names. If specified, results will be combined for
							  all rows that have the same value in all specified columns.""" )
(options, args) = cl_parser.parse_args();

if options.output_file == "-":
	# Saying 'open( "sys.stdout" )' doesn't seem to accomplish this
	output_file = sys.stdout
else:
	output_file = open( options.output_file, "w" )

for file in args:
	print( file )
	input_file = open( file, "r" )
	in_data = CsvDataset( input_file )
	process_data( in_data )
	input_file.close()
	
conf_idx = master.attribute_index( "conf" )
var_idx = master.attribute_index( "var" )
Ngames_idx = master.attribute_index( "Ngames" )
for fv in acc.values():
	fv[conf_idx] = 1.96 * math.sqrt( float(fv[var_idx]) ) / math.sqrt( float(fv[Ngames_idx]) )
master.feature_vectors = [map(str, v) for v in acc.values()]
output_file.write( repr(master) )
output_file.close()

Пример #8

0

Показать файл

Файл: post-analyze.py Проект: jhostetler/jmcplan

    dest="complete",
    action="store_true",
    default=False,
    help=
    "If specified, only parameterizations for which results are available for all budgets are considered."
)
(options, args) = cl_parser.parse_args()

if options.output_file == "-":
    # Saying 'open( "sys.stdout" )' doesn't seem to accomplish this
    output_file = sys.stdout
else:
    output_file = open(options.output_file, "w")

with open(args[0], "r") as input_file:
    in_data = CsvDataset(input_file)

domain_kb = KeyBuilder(in_data, ["domain_params"])
# par_kb = KeyBuilder( in_data, ["ss.abstraction", "par.priority", "par.classifier"] )
par_kb = KeyBuilder(in_data, ["Algorithm"])
values = dict()
# The "blocks" are Domain x Budget
block_kb = KeyBuilder(in_data, ["domain_params", "ss.budget"])
column_kb = KeyBuilder(in_data, ["Algorithm"])
friedman = dict()
alg_set = set()
for fv in in_data.feature_vectors:
    if not fv[in_data.attribute_index("Algorithm")].startswith(
            "PAR"):  # != "par":
        continue
    print("par: " + fv[in_data.attribute_index("Algorithm")])

Пример #9

0

Показать файл

# Set output
if options.output_file is None:
    output_file = sys.stdout
else:
    output_file = open(options.output_file, "w")

out_fields = [
    "experiment", "algorithm", "faults", "N", "Nblackout", "V", "V_stdev",
    "V_median", "V_min", "V_max", "R_end", "R_end_stdev", "R_end_median",
    "R_end_min", "R_end_max", "t_blackout", "t_blackout_stdev",
    "t_blackout_median", "t_blackout_min", "t_blackout_max", "NLoadShed",
    "NLoadShed_stdev", "NLoadShed_median", "NLoadShed_min", "NLoadShed_max",
    "NIsland", "NIsland_stdev", "NIsland_median", "NIsland_min", "NIsland_max"
]
out_data = CsvDataset(attributes=list(map(CsvAttribute, out_fields)),
                      feature_vectors=[])

missing = []
for filename in args:
    # for filename in glob.iglob( args[0] ):
    # experiment = os.path.dirname( filename )
    experiment = filename
    print("'" + experiment + "'")
    with open(experiment + ".csv") as fparams:
        params = CsvDataset(fparams)
        try:
            f = open(os.path.join(filename, "rewards.csv"))
        except IOError as ex:
            print("WARNING: Skipping " + filename)
            print(str(ex))
            missing.append(

Пример #10

0

Показать файл

Файл: fix-poland_nothing_2.py Проект: jhostetler/jmcplan

# header_file.close()

for rewards_filename in args:
    missing = None
    with open(rewards_filename, "r+") as rewards_file:
        lines = sum(1 for line in rewards_file)

        if lines == 2:
            # Assumed good
            print(rewards_filename)

        if False:
            # if lines == 1:
            print("Missing header: " + rewards_filename)
            rewards_file.seek(0, 0)
            bad_dataset = CsvDataset(rewards_file, headers=False)
            bad_dataset.attributes = header_dataset.attributes[:]
            params_filename = os.path.dirname(rewards_filename) + ".csv"
            print(params_filename)
            with open(params_filename) as params_file:
                params_dataset = CsvDataset(params_file)
            params = params_dataset.feature_vectors[0]
            # Fix the feature vectors
            fv = bad_dataset.feature_vectors[0]
            fv[0] = str(0)  # Filled with NUL for some reason
            for i in range(0, 3):
                # Copy missing parameters
                attr_name = header_dataset.attributes[i].name
                attr_idx = params_dataset.attribute_index(attr_name)
                print("\tInsert " + attr_name + " = " + params[attr_idx])
                fv.insert(i, params[attr_idx])

Пример #11

0

Показать файл

    dest="fix_fault_time",
    default=False,
    help=
    "Enable correction for early experiments that activated the fault one step later"
)
(options, args) = cl_parser.parse_args()

if options.output_file == "-":
    output_file = sys.stdout
else:
    output_file = open(options.output_file, "w")

out_fields = [
    "Tstable", "Tepisode", "Vu", "Ru_end", "Nfaults", "fault0", "fault1"
]
out_data = CsvDataset(attributes=map(CsvAttribute, out_fields),
                      feature_vectors=[])

for filename in args:
    with open(filename) as f:
        data = CsvDataset(f)
        [iTstable, iTepisode, iNfaults, ifault0,
         ifault1] = map(data.attribute_index,
                        ["Tstable", "Tepisode", "Nfaults", "fault0", "fault1"])
        for fv in data.feature_vectors:
            Tstable = int(fv[iTstable])
            Tepisode = int(fv[iTepisode])
            T = Tstable + Tepisode
            r = 0.0
            Rend = 0
            for t in range(0, T):
                i = data.attribute_index("t" + str(t))

Пример #12

0

Показать файл

cl_parser.add_option( "--delim", dest="delim", type="string", default=",",
					  help="""The delimiter string (default: ","). """ )
cl_parser.add_option( "--loose", dest="loose", action="store_true", default=False,
					  help="If specified, files are not checked for header or column count equality." )
(options, args) = cl_parser.parse_args();

if options.output_file == "-":
	output_file = sys.stdout
else:
	output_file = open( options.output_file, "w" )

# Find the largest set of headers
headers = HeaderAccumulator()
for file in args:
	input_file = open( file, "r" )
	in_data = CsvDataset( input_file )
	print( file + ": " + str(len(in_data.attributes)) )
	headers.add( in_data.attributes )
	input_file.close()
print( headers.attributes )

# Treat the largest header set as canonical
master = CsvDataset( attributes=headers.attributes[:], feature_vectors=[] )
for file in args:
	input_file = open( file, "r" )
	in_data = CsvDataset( input_file )
	process_data( master, in_data )
	input_file.close()

output_file.write( repr(master) )
output_file.close()

Пример #13

0

Показать файл

                     default="-",
                     help="The file to write the output to (default: stdout)")
# cl_parser.add_option( "--complete", dest="complete", action="store_true", default=False,
# help="If specified, only parameterizations for which results are available for all budgets are considered." )
(options, args) = cl_parser.parse_args()

if options.output_file == "-":
    # Saying 'open( "sys.stdout" )' doesn't seem to accomplish this
    output_file = sys.stdout
    rank_file = sys.stdout
else:
    output_file = open(options.output_file, "w")
    rank_file = open("rank-" + options.output_file, "w")

with open(args[0], "r") as input_file:
    in_data = CsvDataset(input_file)

# The "blocks" are Domain x Budget
block_kb = KeyBuilder(in_data, ["domain_params", "ss.budget"])
# The "treatments" are the different PARSS variants
column_kb = KeyBuilder(in_data, ["Algorithm"])
# This will be a two-level map Block -> Column -> V_mean
friedman = dict()
alg_set = set([
    "PAR(bf; DT)", "PAR(bf; random)", "PAR(uniform; DT)",
    "PAR(uniform; random)", "PAR(variance; DT)", "PAR(variance; random)"
])
# Gather results into 'friedman' table
for fv in in_data.feature_vectors:
    alg = fv[in_data.attribute_index("Algorithm")]
    if alg not in alg_set:

Пример #14

0

Показать файл

cl_parser.add_argument("-N", type=int, default=0, help="Size of each subset")
cl_parser.add_argument("-k",
                       type=int,
                       default=1,
                       help="Number of subsets for cross-validation")
cl_parser.add_argument("--seed",
                       type=int,
                       default=0,
                       help="RNG seed (default: no particular seed)")
args = cl_parser.parse_args()

if args.N == 0 and args.k != 1:
    raise RuntimeError("If args.N == 0, then args.k must be 1")

with open(args.input_file[0]) as input_file:
    data = CsvDataset(input_file)
    t_blackout = data.attribute_index("t_blackout")
    faults = data.attribute_index("faults")
    recoverable_faults = []
    for fv in data.feature_vectors:
        t = int(fv[t_blackout])
        if t > 10 and t < 311:
            recoverable_faults.append(fv[faults])
    if args.N * args.k > len(recoverable_faults):
        raise AssertionError("Not enough faults for requested dataset size")
    rng = random.Random()
    if args.seed != 0:
        rng.seed(args.seed)
    rng.shuffle(recoverable_faults)
    next = 0
    for i in range(0, args.k):

Пример #15

0

Показать файл

def make_action_to_int(file):
    ati_data = CsvDataset(file)
    ati = dict()
    for fv in ati_data.feature_vectors:
        ati[fv[1]] = int(fv[0])
    return ati

Пример #16

0

Показать файл

cl_parser = OptionParser(usage="%prog [options] file")
cl_parser.add_option("--subdivide",
                     dest="subdivide",
                     type="string",
                     default=None,
                     help="Attribute to subdivide.")
cl_parser.add_option("--subdivisions",
                     dest="subdivisions",
                     type="int",
                     default=1,
                     help="Number of subdivisions.")
(options, args) = cl_parser.parse_args()

csv_file = open(args[0], "r")
csv_dataset = CsvDataset(csv_file)

count = 0
for fv in csv_dataset.feature_vectors:
    print(fv)
    exploded = CsvDataset(attributes=csv_dataset.attributes[:],
                          feature_vectors=[fv[:]])
    print(exploded.feature_vectors[0])
    if options.subdivide is not None:
        idx = exploded.attribute_index(options.subdivide)
        n = int(exploded.feature_vectors[0][idx])
        q = n / options.subdivisions
        r = n % options.subdivisions
        sub = [q] * options.subdivisions
        for i in range(0, r):
            sub[i] += 1

Пример #17

0

Показать файл

Файл: summarize-rewards.py Проект: jhostetler/jmcplan

import statistics

cl_parser = argparse.ArgumentParser(
    description="Creates random subsets of faults")
cl_parser.add_argument("input_file",
                       type=str,
                       nargs=1,
                       help="A 'rewards.csv' input file")
cl_parser.add_argument("-d",
                       type=int,
                       default=60,
                       help="Size of intervals for analysis")
args = cl_parser.parse_args()

with open(args.input_file[0]) as fin:
    data = CsvDataset(fin)
    intervals = []
    for fv in data.feature_vectors:
        i = data.attribute_index("r1")
        j = i + args.d
        while j < len(data.attributes):
            ri = float(fv[i])
            rj = float(fv[j])
            intervals.append(abs(rj - ri))
            i += 2
            j += 2

print("n:      " + str(len(intervals)))
print("mean:   " + str(statistics.mean(intervals)))
print("stddev: " + str(statistics.stdev(intervals)))
print("min:    " + str(min(intervals)))

Пример #18

0

Показать файл

					  help="If specified, only parameterizations for which results are available for all budgets are considered." )
(options, args) = cl_parser.parse_args();

if options.output_file == "-":
	# Saying 'open( "sys.stdout" )' doesn't seem to accomplish this
	output_file = sys.stdout
else:
	output_file = open( options.output_file, "w" )

fault_str = "faults"
out_attributes = [
"Domain", "domain_parameters", fault_str, "Algorithm", "algorithm_parameters", "budget_type", "budget",
"Vc", "Vu", "dV", "Rc_end", "Ru_end", "dR", "NLoadShed", "NIsland"
]

out_data = CsvDataset( attributes=[CsvAttribute( a ) for a in out_attributes], feature_vectors=[] )
	
with open( args[0], "r" ) as input_file:
	c_data = CsvDataset( input_file )
with open( args[1], "r" ) as input_file:
	u_data = CsvDataset( input_file )
	
# Index map for fault sets in baseline data
u_idx = dict()
for i in range(0, len(u_data.feature_vectors)):
	u_fv = u_data.feature_vectors[i]
	u_idx[u_fv[u_data.attribute_index(fault_str)]] = i
	
for c_fv in c_data.feature_vectors:
	faults = c_fv[c_data.attribute_index(fault_str)]
	Vc = float( c_fv[c_data.attribute_index("V")] )

Python CsvDataset примеры использования