def __init__(self, graph, name=None): super(ProxMarkovExtractionGlobalBigraph, self).__init__(name=name) self.add_option( "half_length", Numeric( min=0, max=20, default=2, help= "Two walks will be computed one of length t*2-1 one of lenght t*2" )) self.add_option( "odd_count", Numeric( min=0, default=15, help="Number of vertices to keep with the *odd* length walk")) self.add_option( "even_count", Numeric( min=0, default=35, help="Number of vertices to keep with the *even* length walk")) # create the the basic extractor self.extrator = ProxMarkovExtractionGlobal(graph)
def _build_result_set(self, v_extract): """ Building of the Doc list from the list of retrived vertices """ vid_to_docnum = lambda vid: "%d" % vid global_graph = self.graph kdocs = [] schema = Schema(docnum=Numeric(), degree_out=Numeric(), degree_in=Numeric(), score=Numeric(vtype=float), label=Text(), neighbors=Numeric(multi=True, uniq=True)) for vid, score in v_extract: kdoc = Doc(schema, docnum=vid) kdoc.score = score vtx = global_graph.vs[vid] # autres attributs kdoc.degree_out = vtx.degree(ig.OUT) kdoc.degree_in = vtx.degree(ig.IN) kdoc.label = vtx['label'] # les voisins sont dans un term field for nei in vtx.neighbors(): #TODO: ajout IN/OUT ? kdoc["neighbors"].add(nei.index) #TODO ajout d'un poids ! # on ajoute le doc kdocs.append(kdoc) return kdocs
def __init__(self, name=None, **kwargs): super(ProxExtract, self).__init__(name=name) options = [ ("length", Numeric(vtype=int, default=3, min=1, help="random walk length")), ("cut", Numeric(vtype=int, default=100, min=1, help="vcount cut")), ("pzeros", Numeric(multi=True, uniq=True, vtype=int, default=[], min=0, help="pzero vertex index all if empty list or None")), ("add_loops", Boolean(default=True, help="add loops on vertices")), ("mode", Numeric(choices=[ IN, OUT, ALL], default=ALL, help="edge directions")), ("weighted", Boolean( default=True)) ] for e,v in options: self.add_option(e, v ) if e in kwargs : self.set_option_value(e, kwargs[e])
def __init__(self, graph, name="simple_graph_search", copy=[]): """ Initialise searcher with the graph. @param kgraph: the L{KodexGraph} to use, """ AbstractSearch.__init__(self, name) self._logger = logging.getLogger(__name__) self.graph = graph self.attrs_to_copy = copy self.add_option( "nb_results", Numeric(default=30, help="Max number of vertices to retrieve")) self.add_option("l", Numeric(default=3, help="length of the random walk"))
def graph_engine(graphdb): # setup engine = Engine("graph") engine.graph.setup(in_name="request", out_name="graph") def _global(query, reset=False, all_articles=False, cut=100, **kwargs): gid = query['graph'] query, graph = db_graph(graphdb, query) nodes = [] if reset else query['nodes'] g = graph_articles(gid, graph, all_articles=all_articles, cut=cut, uuids=nodes, **kwargs) return g comp = Optionable("Graph") comp._func = _global comp.add_option("reset", Boolean(default=False, help="reset or add")) comp.add_option("all_articles", Boolean(default=False, help="includes all articles")) comp.add_option( "weighting", Text(choices=[ u"0", u"1", u"weight", u"auteurs", u"refBibAuteurs", u"keywords", u"categories" ], multi=True, default=u"1", help="ponderation")) comp.add_option("length", Numeric(vtype=int, min=1, default=3)) comp.add_option("cut", Numeric(vtype=int, min=2, default=100)) def _reset_global(query, **kwargs): gid = query['graph'] headers = istex.get_schema() graph = empty_graph(gid, headers, **kwargs) graphdb.graphs[gid] = graph g = graph_articles(gid, graph, all_articles=True, uuids=[], **kwargs) return g reset = Optionable('ResetGraph') reset._func = _reset_global reset.add_option("reset", Boolean(default=True, help=""), hidden=True) engine.graph.set(comp, reset) return engine
def starred_engine(graphdb): """ Prox engine """ # setup engine = Engine("graph") engine.graph.setup(in_name="request", out_name="graph") ## Search def subgraph(query, limit=200, prune=False): """ :param mode: """ graph = db_graph(graphdb, query) return starred(graph, limit=100, prune=True) graph_search = Optionable("GraphSearch") graph_search._func = Composable(subgraph) graph_search.add_option("limit", Numeric(vtype=int, default=200)) graph_search.add_option("prune", Boolean(default=True)) from cello.graphs.transform import VtxAttr graph_search |= VtxAttr(color=[ (45, 200, 34), ]) graph_search |= VtxAttr(type=1) engine.graph.set(graph_search) return engine
def explore_engine(graphdb): """ Prox engine """ # setup engine = Engine("graph") engine.graph.setup(in_name="request", out_name="graph") ## Search def subgraph(query, size=50): gid = query['graph'] uuids = [ q for q in query['units']] return expand_subgraph(graphdb, gid, uuids, limit= size/len(uuids) if len(uuids) else size ) #return prox_subgraph(graphdb, gid, pzeros, size=size) graph_search = Optionable("GraphSearch") graph_search._func = Composable(subgraph) graph_search.add_option("size", Numeric( vtype=int, default=50)) from cello.graphs.transform import VtxAttr graph_search |= VtxAttr(color=[(45, 200, 34), ]) graph_search |= VtxAttr(type=1) engine.graph.set(graph_search) return engine
def __init__(self, name=None): super(TooFewDoc, self).__init__(name=name) self.add_option( "min_doc", Numeric(default=2, min=0, help=u"Minimum number of document per cluster"))
def __init__(self, name=None): super(ProxMonteCarloExtraction, self).__init__(prox.prox_markov_mtcl, name=name) self.add_option( "throws", Numeric(default=500, help="The number of throws in montecarlo process"))
def __init__(self, global_graph, name=None): super(ProxMtclExtractionGlobal, self).__init__(global_graph, prox.prox_markov_mtcl, name=name) self.add_option( "throws", Numeric(default=500, help="The number of throws in montecarlo process"))
def __init__(self, index=None, field=None, size=3): super(ESPhraseSuggest, self).__init__() # configure ES connection self.index = index self.add_option("field", Text(default=field, help="Suggestions field")) self.add_option( "size", Numeric(vtype=int, default=size, help="max suggestions"))
def expand_prox_engine(graphdb): """ prox with weights and filters on UNodes and UEdges types input: { nodes : [ uuid, .. ], //more complex p0 distribution weights: [float, ..], //list of weight } output: { graph : gid, scores : [ (uuid_node, score ), .. ] } """ engine = Engine("scores") engine.scores.setup(in_name="request", out_name="scores") ## Search def expand(query, length=3, cut=300, weightings=None): graph = db_graph(graphdb, query) gid = query.get("graph") nodes = query.get("nodes", []) expand = query.get("expand", []) vs = expand_subgraph(graph, expand, nodes, cut=cut, weightings=weightings) vs = [(graph.vs[v[0]]['uuid'], v[1]) for v in vs] return dict(vs) scores = Optionable("scores") scores._func = Composable(expand) scores.add_option("length", Numeric(vtype=int, default=3)) scores.add_option("cut", Numeric(vtype=int, default=50, max=300)) scores.add_option( "weighting", Text(choices=[u"0", u"1", u"weight"], multi=True, default=u"1", help="ponderation")) engine.scores.set(expand) return engine
def __init__(self, name=None, edge_wgt_attr=EDGE_WEIGHT_ATTR): super(MaxDensity, self).__init__(name=name) self.edge_wgt_attr = EDGE_WEIGHT_ATTR self.add_option( "kmax", Numeric(vtype=float, default=10., min=0.1, help="Maximum mean degree"))
def __init__(self, prox_func, name=None): """ :param prox_func: curryfied function for prox. Only `graph`, `pzero`, and `length` will be passed a argument to the fuction. If one wants to modified the named argument you want passed a lamdba with all named arguments setted. Here is an example of usable prox fct: >>> def prox_func(graph, pzero, length): ... return prox.prox_markov_dict(graph, pzero, length, mode=OUT, ... add_loops=False, weight=None) """ super(ProxExtract, self).__init__(name=name) self.add_option("vcount", Numeric(default=10, help="max vertex count")) self.add_option("length", Numeric(default=3, help="random walk length")) self.prox_func = prox_func
def starred_engine(graphdb): """ Prox engine """ # setup engine = Engine("graph") engine.graph.setup(in_name="request", out_name="graph") ## Search def subgraph(query, limit=200, prune=False): """ :param mode: """ gid = query['graph'] uuids = graphdb.get_starred_node_uuids(gid) if len(uuids) == 0 : graph = igraph.Graph(directed=True, graph_attrs={}, n=0, vertex_attrs={}, edges=[], edge_attrs={}) if len(uuids) == 1 : #FIXME: issue #78 mode = "prox" graph = expand_subgraph(graphdb, gid, uuids, limit=limit) elif len(uuids) <= 5: mode = "expand" graph = expand_subgraph(graphdb, gid, uuids, limit= limit/len(uuids) if len(uuids) else 0. ) else: mode = "nodes" uuids = uuids[:limit] graph = nodes_subgraph(graphdb, gid, uuids) if prune : graph = _prune(graph) return graph graph_search = Optionable("GraphSearch") graph_search._func = Composable(subgraph) graph_search.add_option("limit", Numeric( vtype=int, default=200)) graph_search.add_option("prune", Boolean(default=True)) from cello.graphs.transform import VtxAttr graph_search |= VtxAttr(color=[(45, 200, 34), ]) graph_search |= VtxAttr(type=1) engine.graph.set(graph_search) return engine
def __init__(self, name="grid_layout", dim=3): """ Build the layout component :param name: mane of the component :param dim: the number of dimention of the output layouts (2 or 3) """ super(GridLayout, self).__init__(name=name) self.add_option( "width", Numeric(default=0, help="""Number of vertices in a single row of the layout. Zero means that the height should be determined automatically.""")) self.add_option( "height", Numeric( default=0, help="""Number of vertices in a single column of the layout. Zero means that the height should be determined automatically.""")) assert dim == 2 or dim == 3 self.dimensions = dim
def __init__(self, name=None, weighted=False): """ :param weighted: whether to use the weight of the graph, is True the edge attribute `cello.graphs.EDGE_WEIGHT_ATTR` is used. :type weighted: boolean """ super(ProxBigraphLayout, self).__init__(name=name) self.add_option( "length", Numeric(default=3, min=1, max=50, help="Random walks length")) self.weighted = weighted
def __init__(self, name=None): Optionable.__init__(self, name=name) self.add_option( "top_min", Numeric( default=0, min=0., help= "Removes type=False vertices connected to less than top_min (type=True) vertices" )) self.add_option( "top_max_ratio", Numeric( vtype=float, default=1., min=0., max=1., help= "Removes type=False vertices connected to more than top_max_ratio percents of the (type=True) vertices" ))
def __init__(self, name=None): Optionable.__init__(self, name=name) self.add_option( "m", Numeric(default=0, min=0., help="Number of edges (with stronger weight) to keep")) self.add_option( "remove_single", Boolean(default=True, help="Remove vertices with no links after filtering"))
def expand_prox_engine(graphdb): """ prox with weights and filters on UNodes and UEdges types input: { nodes : [ uuid, .. ], //more complex p0 distribution weights: [float, ..], //list of weight } output: { graph : gid, scores : [ (uuid_node, score ), .. ] } """ engine = Engine("scores") engine.scores.setup(in_name="request", out_name="scores") ## Search def expand(query, step=3, limit=100, filter_nodes=None, filter_edges=None): if filter_nodes is None : filter_nodes = [] if filter_edges is None: filter_edges = [] gid = query.get("graph") pzeros = query.get("nodes") weights = query.get("weights", []) return graphdb.proxemie( gid, pzeros, weights, filter_edges=filter_edges, filter_nodes=filter_nodes, limit=limit, n_step=step) scores = Optionable("scores") scores._func = Composable(expand) scores.add_option("step", Numeric( vtype=int, default=3)) scores.add_option("limit", Numeric( vtype=int, default=50, max=100)) scores.add_option("filter_nodes", Text( default=set([]), multi=True, uniq=True)) scores.add_option("filter_edges", Text( default=set([]), multi=True, uniq=True)) engine.scores.set(expand) return engine
def __init__(self, name="prox_layout", weighted=False): """ :param weighted: whether to use the weight of the graph, is True the edge attribute `cello.graphs.EDGE_WEIGHT_ATTR` is used. :type weighted: boolean """ super(ProxLayout, self).__init__(name=name) self.add_option( "length", Numeric(default=3, min=1, max=50, help="Random walks length")) self.add_option( "add_loops", Boolean(default=True, help="Wether to add self loop on all vertices")) self.weighted = weighted
def __init__(self, index, field='form_suggest', size=10): """ :param index: <EsIndex> to search candidates :param field: the field to use for autocompletion :param size: default value for options 'size' """ super(TmuseEsComplete, self).__init__() self.es_idx = index self.field = field self.add_option( "size", Numeric(vtype=int, min=0, max=300, default=size, help="Max number of propositions"))
def __init__(self, index=None, doc_type=None, host="localhost:9200", name=None): """ :param index: index name :param doc_type: document type to search, if list of str then option will be added, if None :param host: ES hostname :param name: component name """ super(ESSearch, self).__init__(name=name) self.add_option( "size", Numeric(vtype=int, default=10, min=0, help="number of document to returns")) # configure ES connection self.host = host self._es_conn = elasticsearch.Elasticsearch(hosts=self.host) if not self._es_conn.ping(): raise RuntimeError("Couldn't ping ES server at '%s'" % self.host) self.index = index # manage doctype: add an option if needed self.doc_type = None if isinstance(doc_type, basestring): # only one doctype self.doc_type = doc_type else: if doc_type is None: # fetch all the existing doctype mappings = self._es_conn.indices.get_mapping(index=self.index) doc_type = mappings[self.index]['mappings'].keys() if len(doc_type): self.add_option( "doc_type", Text(multi=True, choices=doc_type, default=doc_type, help="Documents type")) else: # if empty list no option, no doctype selection self.doc_type = None
def search_engine(graphdb): # setup engine = Engine("search") engine.search.setup(in_name="request", out_name="graph") ## Search def Search(query, results_count=10, **kwargs): query, graph = db_graph(graphdb, query) gid = query['graph'] q = kwargs.pop("q", "*") field = kwargs.pop("field", None) g = query_istex(gid, q, field, results_count) graph = merge(gid, graph, g, index=index, vid=vid) nodes = query['nodes'] g = graph_articles(gid, graph, weighting=["1"], all_articles=True, cut=100, uuids=nodes, **kwargs) return g search = Optionable("IstexSearch") search._func = Search search.add_option("q", Text(default=u"clle erss")) search.add_option( "field", Text(choices=[ u"*", u"istex", u"auteurs", u"refBibAuteurs", u"keywords" ], default=u"*")) search.add_option( "results_count", Numeric(vtype=int, min=1, default=10, help="Istex results count")) engine.search.set(search) return engine
def clusters_labels_engine(graphdb): def _labels(query, weighting=None, count=2, **kwargs): query, graph = db_graph(graphdb, query) gid = query['graph'] clusters = [] for clust in query['clusters']: labels = [] pz = graph.vs.select(uuid_in=clust) pz = [ v.index for v in pz if v['nodetype'] == ("_%s_article" % gid) ] if len(pz): vs = extract(graph, pz, cut=300, weighting=weighting, length=3) labels = [{ 'uuid': graph.vs[i]['uuid'], 'label': graph.vs[i]['properties']['label'], 'score': v } for i, v in vs if graph.vs[i]['nodetype'] != ("_%s_article" % gid) ][:count] clusters.append(labels) return clusters comp = Optionable("labels") comp._func = _labels comp.add_option( "weighting", Text(choices=[ u"0", u"1", u"weight", u"auteurs", u"refBibAuteurs", u"keywords", u"categories" ], multi=True, default=u"1", help="ponderation")) comp.add_option("count", Numeric(vtype=int, min=1, default=2)) engine = Engine("labels") engine.labels.setup(in_name="request", out_name="labels") engine.labels.set(comp) return engine
"term": { "graph": graph, } } } } } } } # /q res = index.search(body=q, size=len(ids)) return res TmuseDocSchema = Schema( docnum=Numeric(), # stored fields graph=Text(), lang=Text(), pos=Text(), pzero=Boolean(), form=Text(), neighbors=Numeric(), out_links=Numeric(multi=True, uniq=True), # computed fields rank=Numeric(), score=Numeric(vtype=float, default=0.)) def to_docs(es_res, pzeros): _pzeros = set(pzeros) or set([])
def __init__(self, global_graph, prox_func, default_mode=OUT, weight=None, loops_weight=None, name=None): """ :param global_graph: a subclass of :class:`.AbstractGraph` :param prox_func: curryfied function for prox. Only `graph`, `pzero`, and `length` will be passed a argument to the fuction. If one wants to modified the named argument you want passed a lamdba with all named arguments setted. :param default_mode: default mode for the random walk (useful only if the graph is directed) :param weight: if None the graph is not weighting, else it could be: a str corresponding to an edge attribute to use as weight, or a list of weight (`|weight| == graph.ecount()`), or a callable `lambda graph, source, target: wgt` :param loops_weight: only if `add_loops`, weight for added loops, it may be : a str corresponding to a vertex attribute, or a list of weight (`|loops_weight| == graph.vcount()`), or a callable `lambda graph, vid, mode, weight: wgt` Here is an example of usable prox fct: >>> def prox_func(graph, pzero, length): ... return prox.prox_markov_dict(graph, pzero, length, mode=OUT, ... add_loops=False, weight=None) """ super(ProxExtractGlobal, self).__init__(name=name) self.add_option("vcount", Numeric(default=10, help="max vertex count")) self.add_option("length", Numeric(default=3, help="random walk length")) self.add_option( "add_loops", Boolean(default=True, help="virtualy add loops on each vertex")) self._modes = { "text_to_num": { "IN": IN, "OUT": OUT, "ALL": ALL }, "num_to_text": { IN: u"IN", OUT: u"OUT", ALL: u"ALL" } } self.add_option( "mode", Text(default=self._modes["num_to_text"][default_mode], choices=[u"IN", u"OUT", u"ALL"], help="edges to walk on from a vertex")) self._wgt = weight if weight is not None: self.add_option( "is_wgt", Boolean(default=True, help="consider graph weight?")) self.prox_func = prox_func self.global_graph = global_graph self._loops_weight = loops_weight
def test_numeric(self): # Numeric Field (int or float) f = Numeric(vtype=float) self.assertNotEqual(repr(f), "") self.assertRaises(ReliureTypeError, lambda: Numeric(vtype=any) ) self.assertEqual(f.validate(2.), 2.) # ok self.assertEqual(f.validate(-2.2), -2.2) # ok self.assertEqual(f.validate(-5e0), -5.) # ok self.assertEqual(f.validate(0.), 0.) # ok self.assertRaises(ValidationError, f.validate, 1) self.assertRaises(ValidationError, f.validate, "1") self.assertRaises(ValidationError, f.validate, "blabla") self.assertRaises(ValidationError, f.validate, int) self.assertEqual(f.parse("45"), 45.) # unsigned field f = Numeric(vtype=int, min=0) self.assertEqual(f.validate(2), 2) # ok self.assertEqual(f.validate(0), 0) # ok self.assertRaises(ValidationError, f.validate, -1) self.assertEqual(f.parse("45"), 45) # with min and max f = Numeric(vtype=int, min=-10, max=45) self.assertEqual(f.validate(-10), -10) # ok self.assertEqual(f.validate(0), 0) # ok self.assertEqual(f.validate(2), 2) # ok self.assertEqual(f.validate(45), 45) # ok self.assertRaises(ValidationError, f.validate, -45) self.assertRaises(ValidationError, f.validate, 4.5) self.assertRaises(ValidationError, f.validate, -11) self.assertRaises(ValidationError, f.validate, 55) # with min and max f = Numeric(vtype=int, min=0, max=4, help="an int") self.assertEqual(f.validate(0), 0) # ok self.assertEqual(f.validate(4), 4) # ok self.assertRaises(ValidationError, f.validate, -1) self.assertRaises(ValidationError, f.validate, 8) # as dict self.assertDictEqual(f.as_dict(), { 'vtype': 'int', 'default': None, 'multi': False, 'uniq': False, 'choices': None, 'help': 'an int', 'max': 4, 'min': 0, 'type': 'Numeric', } )
def explore_engine(graphdb): """ Prox engine """ # setup engine = Engine("graph") engine.graph.setup(in_name="request", out_name="graph") ## Search @Composable def get_graph(query, **kwargs): return db_graph(graphdb, query) @Composable def subgraph(query, cut=100, weighted=True, length=7, mode=ALL, add_loops=False, **kwargs): graph = db_graph(graphdb, query) idx = {v['uuid']: v.index for v in graph.vs} uuids = [q for q in query.get('units', [])] uuids = [idx[p] for p in uuids] return prox_subgraph(graph, uuids, cut=cut, weighted=weighted, length=length, mode=mode, add_loops=add_loops, **kwargs) from cello.graphs.transform import VtxAttr searchs = [] for k, w, l, m, n in [ (u"Search", True, 3, ALL, 100), ]: search = Optionable("GraphSearch") search._func = subgraph search.add_option("weighted", Boolean(default=w)) search.add_option("add_loops", Boolean(default=True, help="add loops on vertices")) search.add_option( "mode", Numeric(choices=[OUT, IN, ALL], default=m, help="edge directions")) search.add_option("length", Numeric(vtype=int, min=1, default=l)) search.add_option("cut", Numeric(vtype=int, min=2, default=n)) search |= VtxAttr(color=[ (45, 200, 34), ]) search |= VtxAttr(type=1) search.name = k searchs.append(search) sglobal = get_graph | ProxSubgraph() sglobal.name = "Global" sglobal.change_option_default("cut", 200) searchs.append(sglobal) engine.graph.set(*searchs) return engine
def __init__(self, name=None): super(WeightByConfluence, self).__init__(name=name) self.add_option("wlength", Numeric(default=3, min=1, max=10, help="length of the random walks"))
def __init__(self, name=None): super(TooSmall, self).__init__(name=name) self.add_option( "min_vtx", Numeric(default=2, help=u"Minimum number of vertex per cluster"))