def load_ch_matrix(): print "Reading NodeID map started at", time.strftime(time_format) start = time.time() node_id_map = inout.load_from_file(path + "{0}_node_id_map".format(dataset)) print "Reading NodeID map took", time.time() - start, "s" print "-----------------------------------------" print "Reading hypergraph started at", time.strftime(time_format) start = time.time() hypergraph = Hypergraph.load_from_file(path + "{0}_hgraph".format(dataset)) print "Reading hypergraph took", time.time() - start, "s" print "-----------------------------------------" print "Reading characteristic matrix started at", time.strftime( time_format) start = time.time() ch_matrix = CharacteristicMatrix.load_from_file( path + "{0}_ch_matrix".format(dataset)) print "Reading characteristic matrix took", time.time() - start, "s" print "-----------------------------------------" print "Reading Column index to Node map started at", time.strftime( time_format) start = time.time() index_node_map = inout.load_from_file(path + "{0}_index_node_map".format(dataset)) print "Reading Column index to Node map took", time.time() - start, "s" print "-----------------------------------------" return ch_matrix, hypergraph, index_node_map, node_id_map
def load_ch_matrix(): print "Reading NodeID map started at", time.strftime(time_format) start = time.time() node_id_map = inout.load_from_file(path + "{0}_node_id_map".format(dataset)) print "Reading NodeID map took", time.time() - start, "s" print "-----------------------------------------" print "Reading hypergraph started at", time.strftime(time_format) start = time.time() hypergraph = Hypergraph.load_from_file(path + "{0}_hgraph".format(dataset)) print "Reading hypergraph took", time.time() - start, "s" print "-----------------------------------------" print "Reading characteristic matrix started at", time.strftime(time_format) start = time.time() ch_matrix = CharacteristicMatrix.load_from_file(path + "{0}_ch_matrix".format(dataset)) print "Reading characteristic matrix took", time.time() - start, "s" print "-----------------------------------------" print "Reading Column index to Node map started at", time.strftime(time_format) start = time.time() index_node_map = inout.load_from_file(path + "{0}_index_node_map".format(dataset)) print "Reading Column index to Node map took", time.time() - start, "s" print "-----------------------------------------" return ch_matrix, hypergraph, index_node_map, node_id_map
def build_svmlight_chemical_data(in_files, wl_iterations, output_dir, format_rdf=False, compounds_targets_file=None, uri_prefix=None, shingles_type="features", window_size=5, accumulate_wl_shingles=True, fingerprints=False, sort_rdf_nodes_before_processing=True, state_input_file=None, state_output_file=None, save_just_last_wl_it=False): if format_rdf: assert type(in_files) is list assert bool(compounds_targets_file) assert bool(uri_prefix) else: if type(in_files) is list: in_files = in_files[0] files = [] for i in range(wl_iterations + 1): files.append(open(output_dir + "svm_light_data_wl_{0}".format(i), "w")) if state_input_file: state = inout.load_from_file(state_input_file) state['files'] = files else: wl_state = {"wl_state": None} shingle_id_map = {} if not fingerprints: if accumulate_wl_shingles: wl_state["next_shingle_id"] = 1 else: for i in range(wl_iterations + 1): wl_state["wl_{0}_next_shingle_id".format(i)] = 1 state = { "files": files, "wl_state": wl_state, "shingle_id_map": shingle_id_map, "rdf_colors": {'colors': None, 'next_color_id': None} } def process_compound(chem_record): process_record(chem_record, wl_iterations, state, binary_target_labels=True, shingles_type=shingles_type, window_size=window_size, accumulate_wl_shingles=accumulate_wl_shingles, fingerprints=fingerprints, save_just_last_wl_it=save_just_last_wl_it) if format_rdf: chem_database, state['rdf_colors'] = prepare_rdf_chemical_data(in_files, compounds_targets_file, uri_prefix, process_compound, sort_rdf_nodes_before_processing=sort_rdf_nodes_before_processing, rdf_colors_state=state['rdf_colors']) else: chem_database = read_chemical_compounts(in_files, process_compound) for i, _ in enumerate(chem_database): print i for f in files: f.close() del state['files'] if state_output_file: inout.save_to_file(state, state_output_file) print "Done."
def load_sketch_matrix(): print "Reading NodeID map started at", time.strftime(time_format) start = time.time() node_id_map = inout.load_from_file(path + "{0}_node_id_map".format(dataset)) print "Reading NodeID map took", time.time() - start, "s" print "Reading sketch matrix started at", time.strftime(time_format) start = time.time() sketch_matrix = SketchMatrix.load_from_file(path + "{0}_sketch".format(dataset)) print "Reading sketch matrix took", time.time() - start, "s" print "-----------------------------------------" print "Reading Column index to Node map started at", time.strftime(time_format) start = time.time() index_node_map = inout.load_from_file(path + "{0}_index_node_map".format(dataset)) print "Reading Column index to Node map took", time.time() - start, "s" print "-----------------------------------------" return sketch_matrix, index_node_map, node_id_map
def load_sketch_matrix(): print "Reading NodeID map started at", time.strftime(time_format) start = time.time() node_id_map = inout.load_from_file(path + "{0}_node_id_map".format(dataset)) print "Reading NodeID map took", time.time() - start, "s" print "Reading sketch matrix started at", time.strftime(time_format) start = time.time() sketch_matrix = SketchMatrix.load_from_file(path + "{0}_sketch".format(dataset)) print "Reading sketch matrix took", time.time() - start, "s" print "-----------------------------------------" print "Reading Column index to Node map started at", time.strftime( time_format) start = time.time() index_node_map = inout.load_from_file(path + "{0}_index_node_map".format(dataset)) print "Reading Column index to Node map took", time.time() - start, "s" print "-----------------------------------------" return sketch_matrix, index_node_map, node_id_map
def read_r_balls_database(r_balls_directory, r_balls_count): for i in range(r_balls_count): record = inout.load_from_file(r_balls_directory + "r_ball_{0}".format(i)) yield record
def load_from_file(in_file, compressed=True): return inout.load_from_file(in_file, compressed)