Exemplo n.º 1
0
    def __init__(self, **kwargs):

        prop_defaults = {
            'max_levels': 3,
            'reduction_factor': 0.5,
            'similarity': 'weighted_common_neighbors',
            'matching': 'gmb',
            'global_min_vertices': None,
            'upper_bound': 0.2,
            'tolerance': 0.01,
            'itr': 10,
            'logger': None
        }

        self.__dict__.update(prop_defaults)
        self.__dict__.update(kwargs)

        if self.logger is None:
            self.logger = helper.initialize_logger('log')

        # Validation of similarity measure
        valid_similarity = [
            'common_neighbors', 'weighted_common_neighbors', 'salton',
            'preferential_attachment', 'jaccard', 'adamic_adar',
            'resource_allocation', 'sorensen', 'hub_promoted', 'hub_depressed',
            'leicht_holme_newman', 'weighted_jaccard'
        ]
        if self.similarity.lower() not in valid_similarity:
            self.logger.warning('Similarity misure is unvalid.')
            sys.exit(1)

        # Validation of matching method
        valid_matching = ['mlpb', 'nmlpb', 'gmb', 'rgmb', 'hem', 'lem', 'rm']
        if self.matching.lower() not in valid_matching:
            self.logger.warning('Matching method is unvalid.')
            sys.exit(1)
Exemplo n.º 2
0
    def __init__(self):
        """ Initialize the bnoc app

        For help use:
            > python bnoc.py --help
        """

        self.timing = Timing(['Snippet', 'Time [m]', 'Time [s]'])
        with self.timing.timeit_context_add('Pre-processing'):
            # Setup parse options command line
            current_path = os.path.dirname(
                os.path.abspath(inspect.getfile(inspect.currentframe())))
            parser = args.setup_parser(current_path + '/args/bnoc.json')
            self.options = parser.parse_args()
            args.update_json(self.options)
            args.check_output(self.options)
            self.log = helper.initialize_logger(dir='log', output='log')

            if self.options.save_arff and (self.options.x is not None):
                self.log.warning(
                    'Warning: Arff format does not allow overlap in the first layer (parameter x).\
                                Please use --save_arff=False or supress x parameter.'
                )
                sys.exit(1)

            self.layers = len(self.options.vertices)

            self.start_end = []
            for layer in range(self.layers):
                start = sum(self.options.vertices[0:layer])
                end = sum(self.options.vertices[0:layer + 1]) - 1
                self.start_end.append([start, end])

            if self.options.p is None or self.options.balanced is True:
                self.generate_balanced_probabilities()

            for p in self.options.p:
                if str(numpy.sum(round(sum(p), 1))) != str(1.0):
                    self.log.warning(
                        'Warning: The sum of probabilities p1 must be equal to 1.'
                    )
                    sys.exit(1)

            if len(self.options.communities) is None:
                self.options.communities = [1] * len(self.options.vertices)

            if self.options.x is not None and isinstance(self.options.x, int):
                self.options.x = [self.options.x] * self.layers
            if self.options.y is not None and isinstance(self.options.y, int):
                self.options.y = [self.options.y] * self.layers
            if self.options.z is not None and isinstance(self.options.z, int):
                self.options.z = [self.options.z] * self.layers

            if all(isinstance(item, tuple) for item in self.options.schema):
                self.options.schema = [
                    list(elem) for elem in self.options.schema
                ]
            if not all(isinstance(item, list) for item in self.options.schema):
                it = iter(self.options.schema)
                self.options.schema = zip(it, it)

            if self.options.mu is not None and isinstance(
                    self.options.mu, (int, float)):
                self.options.mu = [self.options.mu] * len(self.options.schema)
            if self.options.dispersion is not None and isinstance(
                    self.options.dispersion, (int, float)):
                self.options.dispersion = [self.options.dispersion] * len(
                    self.options.schema)
            if self.options.noise is not None and isinstance(
                    self.options.noise, (int, float)):
                self.options.noise = [self.options.noise] * len(
                    self.options.schema)

            for layer, comm in enumerate(self.options.communities):
                if comm == 0:
                    self.log.warning(
                        'The number of communities must be greater than zero.')
                    sys.exit(1)
                if self.options.communities[layer] > self.options.vertices[
                        layer]:
                    self.log.warning(
                        'Warning: The number of communities must be less than the number of vertices.'
                    )
                    sys.exit(1)

            if self.options.x is not None and self.options.z is not None:
                if self.options.z[layer] > self.options.communities[layer]:
                    self.log.warning(
                        'Warning: Number of vertices of overlapping communities must be less than \
                                    the number of communities in all layers.')
                    sys.exit(1)
                if sum(self.options.x) > 0 and sum(self.options.z) == 0:
                    self.options.z = [2] * len(self.options.x)
Exemplo n.º 3
0
        for color in colors:
            f.write(color + '\n')


if __name__ == '__main__':

    # Setup parse options command line
    current_path = os.path.dirname(
        os.path.abspath(inspect.getfile(inspect.currentframe())))
    parser = args.setup_parser(current_path + '/args/pynetviewer.json')
    options = parser.parse_args()
    args.update_json(options)
    args.check_output(options)

    # Log instanciation
    log = helper.initialize_logger(dir='log', output='log')

    # Check required fields
    if options.input is None:
        parser.error('required -f [input] arg.')
    if options.vertices is None:
        parser.error('required -v [number of vertices for each layer] arg.')

    graph = helperigraph.load(options.input,
                              options.vertices,
                              type_filename=options.file_type)

    graph.vs['membership'] = None
    graph['overlapping'] = None
    graph['comms'] = None
    graph['overlapping'] = None
Exemplo n.º 4
0
def main():
	"""
	Main entry point for the application when run from the command line.
	"""

	# Timing instanciation
	timing = Timing(['Snippet', 'Time [m]', 'Time [s]'])

	with timing.timeit_context_add('Pre-processing'):

		# Setup parse options command line
		current_path = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
		parser = args.setup_parser(current_path + '/args/mdr.json')
		options = parser.parse_args()
		args.update_json(options)
		args.check_output(options)

		# Log instanciation
		log = helper.initialize_logger(dir='log', output='log')

		if options.input and options.vertices is None:
			log.warning('Vertices are required when input is given.')
			sys.exit(1)

		# Create default values for optional parameters
		if options.reduction_factor is None:
			options.reduction_factor = 0.5
		if options.max_levels is None:
			options.max_levels = 3
		if options.matching is None:
			options.matching = 'greedy_seed_twohops'
		if options.similarity is None:
			options.similarity = 'weighted_common_neighbors'

		# Validation of matching method
		valid_matching = ['gmb', 'rgmb', 'hem', 'lem', 'rm']
		if options.matching.lower() not in valid_matching:
			log.warning('Matching method is unvalid.')
			sys.exit(1)

		# Validation of input extension
		valid_input = ['.arff', '.dat']
		if options.extension not in valid_input:
			log.warning('Input is unvalid.')
			sys.exit(1)

		# Validation of similarity measure
		valid_similarity = ['common_neighbors', 'weighted_common_neighbors',
		'salton', 'preferential_attachment', 'jaccard', 'adamic_adar',
		'resource_allocation', 'sorensen', 'hub_promoted', 'hub_depressed',
		'leicht_holme_newman', 'weighted_jaccard']
		if options.similarity.lower() not in valid_similarity:
			log.warning('Similarity misure is unvalid.')
			sys.exit(1)

		options.vertices = map(int, options.vertices)
		options.max_levels = int(options.max_levels)
		options.reduction_factor = float(options.reduction_factor)

	# Load bipartite graph
	with timing.timeit_context_add('Load'):
		if options.extension == '.arff':
			graph = helperigraph.load_csr(options.input)
		elif options.extension == '.dat':
			graph = helperigraph.load_dat(options.input, skip_last_column=options.skip_last_column, skip_rows=options.skip_rows)
		graph['level'] = 0

	# Coarsening
	with timing.timeit_context_add('Coarsening'):
		hierarchy_graphs = []
		hierarchy_levels = []
		while not graph['level'] == options.max_levels:

			matching = range(graph.vcount())
			levels = graph['level']

			levels += 1
			graph['similarity'] = getattr(Similarity(graph, graph['adjlist']), options.similarity)
			start = sum(graph['vertices'][0:1])
			end = sum(graph['vertices'][0:1 + 1])
			if options.matching in ['hem', 'lem', 'rm']:
				one_mode_graph = graph.weighted_one_mode_projection(vertices)
				matching_method = getattr(one_mode_graph, options.matching)
				matching_method(matching, reduction_factor=options.reduction_factor)
			else:
				matching_method = getattr(graph, options.matching)
				matching_method(range(start, end), matching, reduction_factor=options.reduction_factor)

			coarse = graph.contract(matching)
			coarse['level'] = levels
			graph = coarse
			if options.save_hierarchy or (graph['level'] == options.max_levels):
				hierarchy_graphs.append(graph)
				hierarchy_levels.append(levels)

	# Save
	with timing.timeit_context_add('Save'):

		output = options.output
		for index, obj in enumerate(reversed(zip(hierarchy_levels, hierarchy_graphs))):
			levels, graph = obj

			if options.save_conf:
				with open(output + '-' + str(index) + '.conf', 'w+') as f:
					d = {}
					d['source_filename'] = options.input
					d['source_v0'] = options.vertices[0]
					d['source_v1'] = options.vertices[1]
					d['source_vertices'] = options.vertices[0] + options.vertices[1]
					d['edges'] = graph.ecount()
					d['vertices'] = graph.vcount()
					d['reduction_factor'] = options.reduction_factor
					d['max_levels'] = options.max_levels
					d['similarity'] = options.similarity
					d['matching'] = options.matching
					d['levels'] = levels
					for layer in range(graph['layers']):
						vcount = str(len(graph.vs.select(type=layer)))
						attr = 'v' + str(layer)
						d[attr] = vcount
					json.dump(d, f, indent=4)

			if options.save_ncol:
				graph.write(output + '-' + str(index) + '.ncol', format='ncol')

			if options.save_source:
				with open(output + '-' + str(index) + '.source', 'w+') as f:
					for v in graph.vs():
						f.write(' '.join(map(str, v['source'])) + '\n')

			if options.save_predecessor:
				with open(output + '-' + str(index) + '.predecessor', 'w+') as f:
					for v in graph.vs():
						f.write(' '.join(map(str, v['predecessor'])) + '\n')

			if options.save_successor:
				numpy.savetxt(output + '-' + str(index) + '.successor', graph.vs['successor'], fmt='%d')

			if options.save_weight:
				numpy.savetxt(output + '-' + str(index) + '.weight', graph.vs['weight'], fmt='%d')

			if options.save_adjacency:
				numpy.savetxt(output + '-' + str(index) + '.dat', helperigraph.biajcent_matrix(graph), fmt='%.2f')

			if options.save_gml:
				del graph['adjlist']
				del graph['similarity']
				graph['layers'] = str(graph['layers'])
				graph['vertices'] = ','.join(map(str, graph['vertices']))
				graph['level'] = str(graph['level'])
				graph.vs['name'] = map(str, range(0, graph.vcount()))
				graph.vs['type'] = map(str, graph.vs['type'])
				graph.vs['weight'] = map(str, graph.vs['weight'])
				graph.vs['successor'] = map(str, graph.vs['successor'])
				for v in graph.vs():
					v['source'] = ','.join(map(str, v['source']))
					v['predecessor'] = ','.join(map(str, v['predecessor']))
				graph.write(output + '-' + str(index) + '.gml', format='gml')

			if not options.save_hierarchy:
				break

	if options.show_timing:
		timing.print_tabular()
	if options.save_timing_csv:
		timing.save_csv(output + '-timing.csv')
	if options.save_timing_json:
		timing.save_json(output + '-timing.csv')