def export_calc_engine(graphdb): def _export_calc(query, calc_id=None, **kwargs): if calc_id == None: return {'message': "No calc_id ", 'gid': calc_id, 'url': ""} query, graph = db_graph(graphdb, query) url = "http://calc.padagraph.io/_/cillex-%s" % calc_id print "_export_calc", query, calc_id, url headers, rows = istex.graph_to_calc(graph) print("* PUT %s %s " % (url, len(rows))) r = requests.put(url, data=istex.to_csv(headers, rows)) url = "http://calc.padagraph.io/cillex-%s" % calc_id return {'message': "Calc exported ", 'gid': calc_id, 'url': url} export = Optionable("export_calc") export._func = _export_calc export.add_option( "calc_id", Text( default=None, help= "identifiant du calc, le calc sera sauvegardé vers l’adresse http://calc.padagraph.io/cillex-{calc-id}" )) engine = Engine("export") engine.export.setup(in_name="request", out_name="url") engine.export.set(export) return engine
def _build_result_set(self, v_extract): """ Building of the Doc list from the list of retrived vertices """ vid_to_docnum = lambda vid: "%d" % vid global_graph = self.graph kdocs = [] schema = Schema(docnum=Numeric(), degree_out=Numeric(), degree_in=Numeric(), score=Numeric(vtype=float), label=Text(), neighbors=Numeric(multi=True, uniq=True)) for vid, score in v_extract: kdoc = Doc(schema, docnum=vid) kdoc.score = score vtx = global_graph.vs[vid] # autres attributs kdoc.degree_out = vtx.degree(ig.OUT) kdoc.degree_in = vtx.degree(ig.IN) kdoc.label = vtx['label'] # les voisins sont dans un term field for nei in vtx.neighbors(): #TODO: ajout IN/OUT ? kdoc["neighbors"].add(nei.index) #TODO ajout d'un poids ! # on ajoute le doc kdocs.append(kdoc) return kdocs
def __init__(self, name=None): """ Projection of a bipartite graph to a unipartite graph """ Optionable.__init__(self, name=name) self.add_option("proj_wgt", Text(default='p', help=u"projection weighting method", choices=['no', 'count', 'p', 'pmin', 'pmax', 'pavg', 'confl']))
def import_calc_engine(graphdb): def _import_calc(query, calc_id=None, **kwargs): query, graph = db_graph(graphdb, query) if calc_id == None: return None url = "http://calc.padagraph.io/cillex-%s" % calc_id graph = istex.pad_to_graph(calc_id, url) graph['meta']['pedigree'] = pedigree.compute(graph) graph['properties']['description'] = url graphdb.graphs[calc_id] = graph return graph_articles(calc_id, graph, cut=100) comp = Optionable("import_calc") comp._func = _import_calc comp.add_option( "calc_id", Text( default=None, help= "identifiant du calc,le calc sera importé depuis l'adresse http://calc.padagraph.io/cillex-{calc-id}" )) engine = Engine("import_calc") engine.import_calc.setup(in_name="request", out_name="graph") engine.import_calc.set(comp) return engine
def search_engine(graphdb): # setup engine = Engine("search") engine.search.setup(in_name="request", out_name="graph") ## Search def Search(query, **kwargs): query, graph = db_graph(graphdb, query) gid = query['graph'] q = kwargs.pop("URI") # field = kwargs.pop("field", None) #g = query_istex(gid, q, field) g = query_rdf(gid, q) graph = merge(gid, graph, g) nodes = query['nodes'] #g = graph_articles(gid, graph, weighting=["1"], all_articles=True, cut=100, uuids=nodes, **kwargs ) return graph search = Optionable("RDFSearch") search._func = Search search.add_option( "URI", Text(default=u"http://silene.magistry.fr/data/nan/sinogram/好")) # search.add_option("field", Text(choices=[ u"*", u"istex", u"auteurs", u"refBibAuteurs", u"keywords" ], default=u"*")) # search.add_option("results_count", Numeric( vtype=int, min=1, default=10, help="Istex results count")) engine.search.set(search) return engine
def __init__(self, index=None, field=None, size=3): super(ESPhraseSuggest, self).__init__() # configure ES connection self.index = index self.add_option("field", Text(default=field, help="Suggestions field")) self.add_option( "size", Numeric(vtype=int, default=size, help="max suggestions"))
def __init__(self, name=None): super(ESQueryStringBuilder, self).__init__(name=name) self.add_option( "operator", Text(choices=[ u"AND", u"OR", ], default=u"OR", help=u"operator used for chaining terms")) self.add_option( "fields", Text(default=u"_all", help=u"""List of fields and the 'boosts' to associate with each of them. The format supported is "fieldOne^2.3 fieldTwo fieldThree^0.4", which indicates that fieldOne has a boost of 2.3, fieldTwo has the default boost, and fieldThree has a boost of 0.4 ..."""))
def __call__(self, docs): text_fields = self.text_fields out_field = self.out_field guess_language = self.guess_language for doc in docs: if out_field not in doc: doc[out_field] = Text() texte = "\n".join(doc[text_field] for text_field in text_fields) doc[out_field] = guess_language(texte) return docs
def search_engine(graphdb): # setup engine = Engine("search") engine.search.setup(in_name="request", out_name="graph") ## Search def Search(query, results_count=10, **kwargs): query, graph = db_graph(graphdb, query) gid = query['graph'] q = kwargs.pop("q", "*") field = kwargs.pop("field", None) g = query_istex(gid, q, field, results_count) graph = merge(gid, graph, g, index=index, vid=vid) nodes = query['nodes'] g = graph_articles(gid, graph, weighting=["1"], all_articles=True, cut=100, uuids=nodes, **kwargs) return g search = Optionable("IstexSearch") search._func = Search search.add_option("q", Text(default=u"clle erss")) search.add_option( "field", Text(choices=[ u"*", u"istex", u"auteurs", u"refBibAuteurs", u"keywords" ], default=u"*")) search.add_option( "results_count", Numeric(vtype=int, min=1, default=10, help="Istex results count")) engine.search.set(search) return engine
def expand_prox_engine(graphdb): """ prox with weights and filters on UNodes and UEdges types input: { nodes : [ uuid, .. ], //more complex p0 distribution weights: [float, ..], //list of weight } output: { graph : gid, scores : [ (uuid_node, score ), .. ] } """ engine = Engine("scores") engine.scores.setup(in_name="request", out_name="scores") ## Search def expand(query, step=3, limit=100, filter_nodes=None, filter_edges=None): if filter_nodes is None : filter_nodes = [] if filter_edges is None: filter_edges = [] gid = query.get("graph") pzeros = query.get("nodes") weights = query.get("weights", []) return graphdb.proxemie( gid, pzeros, weights, filter_edges=filter_edges, filter_nodes=filter_nodes, limit=limit, n_step=step) scores = Optionable("scores") scores._func = Composable(expand) scores.add_option("step", Numeric( vtype=int, default=3)) scores.add_option("limit", Numeric( vtype=int, default=50, max=100)) scores.add_option("filter_nodes", Text( default=set([]), multi=True, uniq=True)) scores.add_option("filter_edges", Text( default=set([]), multi=True, uniq=True)) engine.scores.set(expand) return engine
def __init__(self, directed=False, reflexive=True, label_attr='form', vtx_attr='docnum', links_attr="out_links"): # Optionable init OptionableGraphBuilder.__init__(self, "GraphBuilder", directed=False) self.reflexive = reflexive self.add_option("label_attr", Text(default=label_attr)) self.add_option("vtx_attr", Text(default=vtx_attr)) self.add_option("links_attr", Text(default=links_attr)) # Graph builder init vattrs = ("_doc", "rank", "pzero", "docnum", "graph", "lang", "pos", "form", "score", "neighbors") map(self.declare_vattr, vattrs) eattrs = ("weight", ) map(self.declare_eattr, eattrs)
def graph_engine(graphdb): # setup engine = Engine("graph") engine.graph.setup(in_name="request", out_name="graph") def _global(query, reset=False, all_articles=False, cut=100, **kwargs): gid = query['graph'] query, graph = db_graph(graphdb, query) nodes = [] if reset else query['nodes'] g = graph_articles(gid, graph, all_articles=all_articles, cut=cut, uuids=nodes, **kwargs) return g comp = Optionable("Graph") comp._func = _global comp.add_option("reset", Boolean(default=False, help="reset or add")) comp.add_option("all_articles", Boolean(default=False, help="includes all articles")) comp.add_option( "weighting", Text(choices=[ u"0", u"1", u"weight", u"auteurs", u"refBibAuteurs", u"keywords", u"categories" ], multi=True, default=u"1", help="ponderation")) comp.add_option("length", Numeric(vtype=int, min=1, default=3)) comp.add_option("cut", Numeric(vtype=int, min=2, default=100)) def _reset_global(query, **kwargs): gid = query['graph'] headers = istex.get_schema() graph = empty_graph(gid, headers, **kwargs) graphdb.graphs[gid] = graph g = graph_articles(gid, graph, all_articles=True, uuids=[], **kwargs) return g reset = Optionable('ResetGraph') reset._func = _reset_global reset.add_option("reset", Boolean(default=True, help=""), hidden=True) engine.graph.set(comp, reset) return engine
def expand_prox_engine(graphdb): """ prox with weights and filters on UNodes and UEdges types input: { nodes : [ uuid, .. ], //more complex p0 distribution weights: [float, ..], //list of weight } output: { graph : gid, scores : [ (uuid_node, score ), .. ] } """ engine = Engine("scores") engine.scores.setup(in_name="request", out_name="scores") ## Search def expand(query, length=3, cut=300, weightings=None): graph = db_graph(graphdb, query) gid = query.get("graph") nodes = query.get("nodes", []) expand = query.get("expand", []) vs = expand_subgraph(graph, expand, nodes, cut=cut, weightings=weightings) vs = [(graph.vs[v[0]]['uuid'], v[1]) for v in vs] return dict(vs) scores = Optionable("scores") scores._func = Composable(expand) scores.add_option("length", Numeric(vtype=int, default=3)) scores.add_option("cut", Numeric(vtype=int, default=50, max=300)) scores.add_option( "weighting", Text(choices=[u"0", u"1", u"weight"], multi=True, default=u"1", help="ponderation")) engine.scores.set(expand) return engine
def __init__(self, vtx_attr, role=None, name=None): """ Build the labelling component :attr vtx_attr: the vertex attribute to use as label string :type vtx_attr: str :attr role: the role of the created vertices :type role: str :attr name: the name of the component :type name: str """ super(TypeFalseLabel, self).__init__(name=name) self.vtx_attr = vtx_attr self.role = role self.add_option( "score", Text(default=u"recall", choices=[u"recall", u"precision"], help="Label scoring method"))
def __init__(self, index=None, doc_type=None, host="localhost:9200", name=None): """ :param index: index name :param doc_type: document type to search, if list of str then option will be added, if None :param host: ES hostname :param name: component name """ super(ESSearch, self).__init__(name=name) self.add_option( "size", Numeric(vtype=int, default=10, min=0, help="number of document to returns")) # configure ES connection self.host = host self._es_conn = elasticsearch.Elasticsearch(hosts=self.host) if not self._es_conn.ping(): raise RuntimeError("Couldn't ping ES server at '%s'" % self.host) self.index = index # manage doctype: add an option if needed self.doc_type = None if isinstance(doc_type, basestring): # only one doctype self.doc_type = doc_type else: if doc_type is None: # fetch all the existing doctype mappings = self._es_conn.indices.get_mapping(index=self.index) doc_type = mappings[self.index]['mappings'].keys() if len(doc_type): self.add_option( "doc_type", Text(multi=True, choices=doc_type, default=doc_type, help="Documents type")) else: # if empty list no option, no doctype selection self.doc_type = None
def clusters_labels_engine(graphdb): def _labels(query, weighting=None, count=2, **kwargs): query, graph = db_graph(graphdb, query) gid = query['graph'] clusters = [] for clust in query['clusters']: labels = [] pz = graph.vs.select(uuid_in=clust) pz = [ v.index for v in pz if v['nodetype'] == ("_%s_article" % gid) ] if len(pz): vs = extract(graph, pz, cut=300, weighting=weighting, length=3) labels = [{ 'uuid': graph.vs[i]['uuid'], 'label': graph.vs[i]['properties']['label'], 'score': v } for i, v in vs if graph.vs[i]['nodetype'] != ("_%s_article" % gid) ][:count] clusters.append(labels) return clusters comp = Optionable("labels") comp._func = _labels comp.add_option( "weighting", Text(choices=[ u"0", u"1", u"weight", u"auteurs", u"refBibAuteurs", u"keywords", u"categories" ], multi=True, default=u"1", help="ponderation")) comp.add_option("count", Numeric(vtype=int, min=1, default=2)) engine = Engine("labels") engine.labels.setup(in_name="request", out_name="labels") engine.labels.set(comp) return engine
def __init__(self, global_graph, attr_list, default_attr, case_sensitive=True, name=None): """ :attr global_graph: the graph to search vertices in :attr attr_list: list of the vtx attributes used to identify vertices :attr default_attr: the one used by default (should be in `attr_list`) :arre case_sensitive: is the search case_sensitive """ super(VtxMatch, self).__init__(name=name) self.add_option( "default_attr", Text(default=default_attr, choices=attr_list, help="default search attribute")) self.global_graph = global_graph self._vattr_list = attr_list self._index = {} self._case_sensitive = case_sensitive # build the indices, for each attr for attr in attr_list: self._index[attr] = {} for vtx in global_graph.vs: #Manage the case sentivity if self._case_sensitive: vtx_label = vtx[attr] else: vtx_label = vtx[attr].lower() if vtx_label in self._index[attr]: self._index[attr][vtx_label].append(vtx.index) else: self._index[attr][vtx_label] = [vtx.index]
from botapi import Botagraph, BotApiError from reliure.types import Text NodeType = namedtuple("NodeType", "name description properties") EdgeType = namedtuple("EdgeType", "name description properties") # Graph Definition PDG_HOST = "http://g0v-tw.padagraph.io" PDG_KEY = "" GRAPHNAME = "G0V Who Writes What" DESCRIPTION = "a graph or pads and authors" TAGS = ["pads", "g0v-tw"] NodePad = NodeType("pad","", {"id": Text(), "label": Text()}) NodeAuthor = NodeType("author", "", {"id": Text(), "label": Text()}) EdgeLink = EdgeType("writes", "", {}) bot = Botagraph(PDG_HOST, PDG_KEY) bot.create_graph(GRAPHNAME, {'description': DESCRIPTION, "tags": TAGS, "image":"https://avatars3.githubusercontent.com/u/2668086?v=3&s=200"}) # Posting Nodes and Edges Types nodetypes_uuids = {}
from reliure.types import Text NodeType = namedtuple("NodeType", "name description properties") EdgeType = namedtuple("EdgeType", "name description properties") PDG_HOST = "http://g0v-tw.padagraph.io" PDG_KEY = "" GRAPHNAME = "vTaiwan x AirBnB" DESCRIPTION = "Opinion Graph about AirBnb in Taiwan" TAGS = ["vTaiwan", "airBnB", "pol.is"] N_COMMENTS = 227 NodeAgreedComment = NodeType("AgreedComment", "a comment participants agreed with", { "id": Text(), "body": Text(), "label": Text(), "shape": Text() }) NodeDisagreedComment = NodeType("DisagreedComment", "a comment participants disagreed with", { "id": Text(), "body": Text(), "label": Text(), "shape": Text() }) NodeUser = NodeType("Participant", "", { "id": Text(),
def parse(self, bot): # 1 2 3 4 5 6 9 10 11 12 13 14 15 16 self.completions = [] gid = self.gid path = self.path KEYS = [ "id","num", "prefix", "subscript", "superscript", "vocable"] WEIGHT_LOC = 1 WEIGHT_INC_DEF = 0 WEIGHT_INC_FORM = 0 idx = {} nodes = {} edges = [] nodetypes = {} edgetypes = {} bot.create_graph(gid, { 'name': gid, 'description': "", 'image': "", 'tags': [""] } ); lexie_props = { 'rlfid' : Text(), 'entry_id' : Text(), 'id' : Text(), 'label' : Text(), 'num' : Text(), 'vocable' : Text(), 'prefix' : Text(), 'subscript' : Text(), 'superscript' : Text(), 'gc' : Text(), # json 'df' : Text(), # json 'examples' : Text(), # json 'lfs' : Text(), # json } nodetypes["Lexie"] = bot.post_nodetype(gid, "Lexie", "", lexie_props) # noeuds lexicaux du graphe nodes = { e['id']: e for e in readcsv(path, "01-lsnodes.csv") } entries = { e['id']: e for e in readcsv( path, "02-lsentries.csv") } for k,v in entries.items() : i = v.pop('id') entries[k]['entry_id'] = i def as_token(nid, form, actants ): dic = dict(zip(KEYS, [ "" for e in KEYS ])) if nid : node = nodes.get(nid, None) if node: values = [ node[k] for k in KEYS ] dic = dict(zip(KEYS, values)) if form and len(form): # conversion des variables d actants _form = form if len(actants): for k,v,i in actants : _form = _form.replace(k,v) dic['vocable'] = _form return dic for node in nodes.values(): entry = entries[node['entry']] node.update(entry) node.update({ 'rlfid' : node['id'], 'id' : node['id'], 'label' : node['name'], 'vocable' : node['name'], 'prefix' : node['addtoname'], 'num' : node['lexnum'], 'subscript' : node['subscript'], 'superscript' : node['superscript'], 'label_form' : None, 'gc' : {}, 'lfs': [], 'df': { 'form' : node['name'], 'actants' : [], 'left_pf_form': '', 'right_pf_form': '', 'html' : '', }, 'examples' : [], 'definiens' : None, 'formatted_definiens' : None, }) to_delete = ('%', 'entry', 'lexnum', 'addtoname') for k in to_delete : del node[k] # DF # 09-lssemlabel-model.xml # 10-lssemlabel-rel.csv # 11-lspropform-rel.csv # 17-lsdef.csv handler = SemLabelHandler() semlabels = handler.parse("%s/09-lssemlabel-model.xml" % path) rels = readcsv(path, "10-lssemlabel-rel.csv", type=list) for sense, label, percent in rels: df = nodes[sense]['df'] df['label_form'] = semlabels[label] df['percent'] = percent rels = readcsv(path, "11-lspropform-rel.csv", type=list) for id, propform, tildevalue, percent, actantslist in rels: df = nodes[id]['df'] df['propform'] = propform df['tildevalue'] = tildevalue df['percent'] = percent actants = actantslist if actantslist else "()"; # [ "$1", "X" , 1 ] actants = [ "%s=%s"%(e,i+1) for i,e in enumerate(actants[1:-1].split(',')) if len(e)]; actants = list(map(lambda e: e.split('=') , actants)) df['actants'] = actants df['actantslist'] = actantslist # liens d inclusion definitionnelle l_inc_def = {} for r in readcsv(path, "17-lsdef.csv", type=list): id, def_XML, def_HTML = r if id in nodes: df = nodes[id]['df'] df['xml'] = def_XML soup = BeautifulSoup(def_XML, 'html.parser') rlfids = [ (st.attrs['sense'],st.attrs.get('sem',WEIGHT_INC_DEF)) for st in soup("st") if st.attrs.get('sense') ] soup = BeautifulSoup(def_HTML, 'html.parser') # ~ rlfids = [a.attrs['href'].split('/')[1] for a in soup("a")] l_inc_def[id] = rlfids df['html'] = soup.body.prettify() else : self.error( " # 17-lsdef # no def for %s" % id ) # GC + PH LOCUTIONS """ gc : { usagenote : [], othergc : [], pos : {}, locution : { locution_tokens : [ { id : "41142", num : "I.1b", prefix : "", subscript : "", superscript : "", vocable monter } ], name : locution verbale type : 2 } } """ handler = GramCharacHandler() pos = handler.parse("%s/05-lsgramcharac-model.xml" % path) rels = readcsv(path, "06-lsgramcharac-rel.csv", type=list) l_inc_form = {} print( "id, usagenote, usagenotevars, POS, phraseolstruc, embededlex, othercharac, othercharacvars" ) for r in rels: id, usagenote, usagenotevars, POS, phraseolstruc, embededlex, othercharac, othercharacvars = r if POS == "": error( " # 06-lsgramcharac-rel : missing POS %s for id %s : %s" % (POS, id,r) ) continue # GC Caractéristiques grammaticales # variable gc split = lambda chaine : ([] if len(chaine) <= 2 else chaine[1:-1].split(',') ) def splitvars(chaine): for e in '()': chaine = chaine.replace(e, "") return [ e for e in chaine.split(',') if len(e)] node = nodes[id] othercharac = [ pos[e]['name'] for e in split(othercharac) ] othercharacvars = splitvars(othercharacvars) for i,e in enumerate(othercharac): for j,v in enumerate(othercharacvars): e = e.replace('%%%s'% (j+1), v ) othercharac[i] = e usagenote = [ pos[e]['name'] for e in split(usagenote )] gc = {} gc['usagenote'] = usagenote # fem .. gc['othergc'] = othercharac gc['locution'] = None gc['pos'] = { 'name' : pos[POS]['name'], 'type' : pos[POS]['type'] } # LN Locutions nominales, prepositionnelles, phrases if len( embededlex ): #embededlex = re.findall( "[0-9]+", embededlex) #embededlex = embededlex[1:-1].split(',') _embededlex = embededlex.replace('),(', ');(') _embededlex = [ e for e in _embededlex[1:-1].split(';') ] _embededlex = [ e[1:-1].split(',') for e in _embededlex ] actants = node['df']['actants'] tokens = [ as_token(_id,form, actants) for _id,form in _embededlex ] gc['locution'] = { 'tokens' : tokens, 'name' : pos[POS]['name'], 'type' : pos[POS]['type'] } for t in tokens: tid = t['id'] if tid and len( tid): l_inc_form[tid] = l_inc_form.get(tid, []) + [id] if "$" in embededlex : actants = node['df']['actants'] z= [ e['vocable'] for e in [ as_token(id,form, actants) for id,form in _embededlex ]] node['gc'] = gc self.todo( " 06-lsgramcharac-rel.csv : TODO POST LOCUTIONS" ) for r in rels: id, usagenote, usagenotevars, POS, phraseolstruc, embededlex, othercharac, othercharacvars = r """ node = nodes[r['id']] TODO Locutions links !! """ pass # Nodes Exemples handler = ExempleSourceHandler() sources = handler.parse("%s/14-lsexsource-model.xml" % path) exemples = { e['id']: e for e in readcsv(path, "15-lsex.csv") } for e in exemples.values() : d,m,y = ("///%s" % e['date']).split('/')[-3:] e['source'] = sources[e['source']] e['date_day'] = d e['date_month'] = m e['date_year'] = y e['text'] = e['content'] e['authors'] = [ {'first_name':v.split(',')[0], 'last_name':v.split(',')[1] if len(v.split(',')) > 1 else '' } for v in ("%s"%e['authors']).split(';')[:2] if len(e['authors'])] del e['content'] rels = readcsv(path, "16-lsex-rel.csv") for e in rels: nid, exid, oc, po = ( e['id'],e['example'],e['occurrence'] ,e['position'] ) node = nodes[nid] example = dict(exemples[exid]) occurrences = [ {'first':v.split(',')[0], 'last':v.split(',')[1]} for v in oc.split(';') if len(v)] example.update({ 'occurrences': occurrences, 'position' : po }) node['examples'].append( example ) # POST Nodes vertex self.info( "\n * POSTING Lexie nodes : %s" % (len(nodes.values())) ) def gen(nodes): jsons = ( 'df', 'gc', 'examples', 'locutions' ) for node in nodes : properties = { k : node[k] if k not in jsons else json.dumps(node[k]) for k in lexie_props } yield { 'nodetype': nodetypes['Lexie']['uuid'], 'properties': properties } for node, uuid in bot.post_nodes( gid, gen(nodes.values()), key='rlfid' ): idx[ node['properties']['rlfid'] ] = uuid r = list( node['properties'][k] for k in ['entry_id', 'rlfid', 'vocable','num','prefix','subscript','superscript']) self.completions.append( [uuid] + r ) self.info( " * POST Lexie nodes : %s" % (len(idx)) ) # Relations / edges """ ## Liens d inclusion définitionnelle 17-lsdef.csv """ self.info( "17-lsdef.csv [POST] Liens d inclusion définitionnelle " ) name = "DefinitionalInclusion" properties = { "weight": Text() } edgetypes[name] = bot.post_edgetype(gid, name, name, properties) info( " * POST edgetype : %s %s" % (name, edgetypes[name]['uuid']) ) edges = [] skipped_weight = 0 _nodes = { e['rlfid'] : e for e in nodes.values() } for source, targets in l_inc_def.items(): print (source, targets) for target, weight in targets: payload = { 'edgetype': edgetypes[name]['uuid'], 'source': idx[source], 'target': idx[target], 'properties': { 'weight' : weight, } } edges.append(payload) self.info( "17-lsdef.csv [POST] Liens d'inclusion formelle " ) name = "FormalInclusion" properties = { "weight": Text() } edgetypes[name] = bot.post_edgetype(gid, name, name, properties) for source, targets in l_inc_form.items(): weight = WEIGHT_INC_FORM for target in targets: payload = { 'edgetype': edgetypes[name]['uuid'], 'source': idx[source], 'target': idx[target], 'properties': { 'weight' : weight, } } edges.append(payload) for e in bot.post_edges(gid, iter(edges), lambda e: e['edgetype'] ) : pass """ ## Liens de co-polysémie 03-lscopolysemy-model.xml 04-lscopolysemy-rel.csv """ handler = CopolysemyHandler() copo = handler.parse("%s/03-lscopolysemy-model.xml" % path) copo = { e['id']: e for e in copo } _name = lambda t,s : "Co-polysemy/%s%s%s" % (t['name'], "/" if s else "", s['name']if s else "" ) def weight_copo(typ): return int(copo[typ]['semantics']) # edgetypes self.info( " * POSTING Co-polysemy edgetypes : %s" % (len(copo.values())) ) for cop in copo.values(): tp = cop['id'] # cop['name'] name = _name(cop, None) desc = "" properties = { "weight": Text(), 'i':Text() } edgetypes[name] = bot.post_edgetype(gid, name, desc, properties) info( " * POST edgetype : %s %s" % (name, edgetypes[name]['uuid']) ) for k,v in cop['subtypes'].items() : name = _name(cop, v) edgetypes[name] = bot.post_edgetype(gid, name, desc, properties) info( " * POST edgetype : %s %s" % (name, edgetypes[name]['uuid']) ) rels = readcsv(path, "04-lscopolysemy-rel.csv", type=list) edges = []; count = 0 for i, r in enumerate(rels): src, tgt, typ, subtype = r t = copo[typ] s = copo[typ]['subtypes'].get(subtype, None) if len(subtype) else None if len(subtype) and (copo[typ]['subtypes'].get(subtype, None) is None): self.error ( " # 04-lscopolysemy-rel # no subtype %s in type %s (line %s ) %s" \ % ( subtype , typ, i+2, r )) count +=1 payload = { 'edgetype': edgetypes[_name(t,s) ]['uuid'], 'source': idx[src], 'target': idx[tgt], 'properties': { 'weight' : weight_copo(typ), 'i': count } } edges.append(payload) self.info("\n * POSTING Co polysemy edges : %s" % len(edges) ) for cop in copo.values(): name = _name(cop, None) self.debug( " edges : %s %s" % (len( [ e for e in edges if e['edgetype'] == edgetypes[name]['uuid']]), name ) ) for k,v in cop['subtypes'].items() : name = _name(cop, v) self.debug( " edges : %s %s" % (len( [ e for e in edges if e['edgetype'] == edgetypes[name]['uuid']]), name ) ) for e in bot.post_edges(gid, iter(edges), extra= lambda e: e['properties']['i'] ) : pass """ ## Liens de fonctions lexicales (FL) 12-lslf-model.xml contient le modèle hiérarchique des FL : chaque FL appartient à une « famille » et chaque famille est elle-même élément d’un « groupe » de familles ; 13-lslf-rel.csv contient l’ensemble de liens de FL entre lexies individuelles. """ handler = LexicalFunctionHandler() flex = handler.parse("%s/12-lslf-model.xml" % path) # POST edgetypes self.info( " * POSTING Lexical Function edgetypes : %s" % (len(flex.values())) ) _name = lambda x: "LexicalFunction/%s" % x['name'] for fl in flex.values(): tp = fl['id'] # cop['name'] name = _name(fl) desc = "" properties = { "weight": Text(), 'form': Text(), 'separator': Text(), 'merged':Text(), 'syntacticframe':Text(), 'constraint': Text(), 'position': Text() } attributes = { "order" : fl['order'], "cdata" : fl['cdata'], } edgetypes[name] = bot.post_edgetype(gid, name, desc, properties, attributes) self.debug( " * POST edgetype : %s %s" % (name, edgetypes[name]['uuid']) ) # POST Edges # LF rels = readcsv(path, "13-lslf-rel.csv", type=list) edges = [] skipped_weight = 0 for source, lf, target, form, separator, merged, syntacticframe, constraint, position in rels: weight = int(flex[lf]['semantics']) payload = { 'edgetype': edgetypes[_name(flex[lf])]['uuid'], 'source': idx[source], 'target': idx[target], 'properties': { 'weight' : weight, 'form': form, 'separator': separator, 'merged':merged, 'syntacticframe':syntacticframe, 'constraint': constraint, 'position': position } } edges.append(payload) weights = list( len([ e for e in edges if e['properties']['weight'] == i ]) for i in [0,1,2] ) self.info(' !! weights [ 0 : %s, 1 : %s, 2 : %s ] ' % tuple(weights)) self.info(" * POSTING Lexical Function edges : %s" % len(edges) ) for fl in flex.values(): name = _name(fl) self.debug( " edges : %s %s" % (len( [ e for e in edges if e['edgetype'] == edgetypes[name]['uuid'] ] ), name, ) ) count = 0; uuids = [] for e, uuid in bot.post_edges(gid, iter(edges) , extra=lambda e : e['edgetype']) : count +=1 uuids.append(uuid) self.info(" * POST Lexical Function edges : %s " % (count) ) print( "\n\n == DEBUG == \n\n") print( len(nodes) )
def _parse_csvrows(self, csv, rows, **kwargs): # ( name, type indexed, projection ) def _w(e): isproj = "%" in e w = re.findall("\((-?[0-9]?\.?[0-9]+)\)", e) if isproj and len(w): w = float(w[0]) elif isproj: w = 1. else: w = None return w def _v(e): isproj = "%" in e w = "".join(re.findall("\[(.*)\]", e)) if not isproj: return w if len(w) else None elif isproj: return None for row in csv: cell = row[0] # ! comment if cell and cell[:1] == "!": continue # IMPORT external ressource if cell and cell[:1] == "&": url = cell[1:].strip() # circular references if url not in self.imports: self.log(" === Import === '%s'" % url) rows = self._parse(url, rows, **kwargs) else: raise BotapadParseError( self.path, "Same file is imported multiple times ! ", row) # @ Nodetypes, _ Edgetypes elif cell and cell[:1] in ("@", "_"): self.post(self.current, rows) rows = [] # processing directiv line = ";".join(row) cols = re.sub(' ', '', line[1:]) # no space # @Politic: %Chamber; #First Name; #Last Name;%Party;%State;%Stance;Statement; cols = [ e for e in re.split("[:;,]", "%s" % cols, flags=re.UNICODE) if len(e) ] label = cols[0] # @Something start = 1 if cell[:1] == "_" and cell[1] == "" and cell[1] == "": start = 3 props = [ Prop(name=norm_key(e), type=Text(multi="+" in e, default=_v(e)), isref="@" in e, isindex="#" in e, ismulti="+" in e, isproj="%" in e, iscliq="+" in e and "=" in e, isignored="!" in e, direction="OUT" if ">" in e else "IN" if "<" in e else "ALL", weight=_w(e), value=_v(e)) for e in cols[start:] ] def get_prop(name): for e in props: if e.name == name: return e return None start = 0 end = None props = props[0:end] self.log("\n * @%s : Props " % label) self.log(" (%s)" % ",".join(Prop()._fields)) for e in props: self.log(" %s" % str([v for v in e])) names = [k.name for k in props] projs = [k.name for k in props if k.isproj] indexes = [k.name for k in props if k.isindex] typeprops = lambda px: {p.name: p.type for p in px} if cell[:1] == "@": # nodetype def # raise error if no label & index pl = get_prop('label') if len(indexes) == 0 and pl is None: message = 'No `index` nor `label` set for @%s ' % ( label) raise BotapadParseError(self.path, message, row) if len(indexes) == 0: indexes = ['label'] for prop in props: if len(prop.name) == 0: message = "Property error %s " % prop raise BotapadParseError( self.path, 'Parse error : %s ' % message, row) if len(projs) > 0 and len(indexes) == 0: message = "no `index` properties to create edge %s " % self.current raise BotapadParseError( self.path, 'Parse error : %s\n ' % (message), row) self.current = (VERTEX, label, props) if not label in self.nodetypes: self.log("\n >> posting @ %s [%s] [%s] [%s]" % (label, ", ".join(names), ", ".join(indexes), ", ".join(projs))) self.nodetypes[label] = self.bot.post_nodetype( self.gid, label, label, typeprops(props)) self.node_headers[label] = props elif cell[:1] == "_": # edgetype def rows = [] self.current = (EDGE2, label, props) if not label in self.edgetypes: if "label" not in names: props = [ Prop(name="label", type=Text(), value="") ] + props if "weight" not in names: props = [ Prop(name="weight", type=Numeric(), value=1.) ] + props names = [k.name for k in props] self.log(" >> posting _ %s [%s]" % (label, ", ".join(names))) self.edgetypes[label] = self.bot.post_edgetype( self.gid, label, "", typeprops(props)) self.edge_headers[label] = props else: # table data if self.current and self.current[2]: props = self.current[2] if self.current[0] in (EDGE, EDGE2): start = 1 # if self.current[0] == EDGE: if self.current[0] == EDGE2: start = 3 for i, v in enumerate(row[start:]): if i >= len(props): break if props[i].ismulti: row[i + start] = list( set([ e.strip() for e in re.split( "[,;]", v.strip(), ) if e.strip() != "" ])) elif self.current[0] == VERTEX: for i, v in enumerate(row): if i >= len(props): break if props[i].ismulti: row[i] = [ e.strip() for e in re.split( "[,;]", v.strip(), ) if e.strip() != "" ] rows.append(row) return rows
def apply_projectors(self, rows, label): """ property projector """ src = label # @ Label props = self.node_headers[src] projs = [p for p in props if p.isproj] names = [k[0] for k in props] for iprop, prop in enumerate(props): if not (prop.isproj or prop.iscliq): continue # @ Label: %prop0 , ... tgt = prop.name # Distinct column values values = [] if prop.ismulti == False: values = [r[iprop] for r in rows] else: for r in rows: if iprop < len(r): values.extend([k.strip() for k in r[iprop]]) values = list(set(values)) self.log("\n * [Projector] : %s(%s) -- %s(%s) (%s) " % (src, len(rows), tgt, len(values), prop.name)) if tgt in self.node_headers: nodeprops = { prop.name: Text(default=prop.value) for prop in self.node_headers[tgt] } elif tgt not in self.node_headers: nodeprops = { "label": Text(), } self.node_headers[tgt] = [ Prop('label', Text(), False, False, False, False, False, False, 1., None) ] self.nodetypes[tgt] = self.bot.post_nodetype( self.gid, tgt, tgt, nodeprops) payload = [] # is this a table ? @ prop0 for v in values: #key = "%s_%s" % ( tgt, v ) key = "%s" % (v) if key not in self.idx: # defaults values _k = [p.name for p in self.node_headers[tgt] if p.value] _v = [p.value for p in self.node_headers[tgt] if p.value] properties = dict(zip(_k, _v)) properties['label'] = v payload.append({ 'nodetype': self.nodetypes[tgt]['uuid'], 'properties': properties }) if len(payload): self.log(" * [Projector] posting @ %s %s " % (len(payload), tgt)) for node, uuid in self.bot.post_nodes(self.gid, iter(payload)): tgtid = '%s' % (node['properties']['label']) self.idx[tgtid] = uuid self.debug(node) etname = "%s/%s" % (src, tgt) edgeprops = { "label": Text(), 'weight': Numeric(vtype=float, default=1.) } if etname not in self.edgetypes: self.log(" * [Projector] POST edgetype %s %s " % (etname, edgeprops)) self.edgetypes[etname] = self.bot.post_edgetype( self.gid, etname, etname, edgeprops) # label -- property edge edges = [] indexes = [e for e, k in enumerate(props) if k.isindex] cliqset = set() cliqedges = [] cliqname = "" for r in rows: if iprop < len(r): targets = r[iprop] if prop.ismulti else [r[iprop]] if prop.iscliq: cliqname = "%s_clique" % (prop.name) if cliqname not in self.edgetypes: self.log(" * [Projector] POST edgetype %s %s " % (cliqname, edgeprops)) self.edgetypes[cliqname] = self.bot.post_edgetype( self.gid, cliqname, cliqname, edgeprops) for e, t in enumerate(targets): for t2 in targets[e + 1:]: cliqe = '%s%s' % (t, t2) if t > t2 else (t2, t) if cliqe not in cliqset: properties = { "label": cliqname, 'weight': prop.weight } if cliqname in self.edge_headers: _k = [ p.name for p in self.edge_headers[cliqname] if p.value ] _v = [ p.value for p in self.edge_headers[cliqname] if p.value ] properties = dict(zip(_k, _v)) cliqedges.append({ 'edgetype': self.edgetypes[cliqname]['uuid'], 'source': self.idx['%s' % (t)], 'target': self.idx['%s' % (t2)], 'properties': properties }) cliqset.add(cliqe) if prop.isproj: for t in targets: st = self.node_headers[label] srcid = "".join([r[i] for i in indexes]) tgtid = '%s' % (t) properties = dict() if etname in self.edge_headers: _k = [ p.name for p in self.edge_headers[etname] if p.value ] _v = [ p.value for p in self.edge_headers[etname] if p.value ] properties = dict(zip(_k, _v)) properties['label'] = etname properties['weight'] = prop.weight # edge direction essrc = self.idx[srcid] if prop.direction in ( "IN", ) else self.idx[tgtid] estgt = self.idx[srcid] if prop.direction in ( "OUT", "ALL") else self.idx[tgtid] edges.append({ 'edgetype': self.edgetypes[etname]['uuid'], 'source': essrc, 'target': estgt, 'weight': prop.weight, 'properties': properties }) direction = prop.direction self.log(" * [Projector] posting _ = %s %s %s " % (len(cliqedges), direction, cliqname)) for e in self.bot.post_edges(self.gid, iter(cliqedges), extra=lambda x: etname): self.debug(e) self.log(" * [Projector] posting _ %% %s %s %s " % (len(edges), direction, etname)) for e in self.bot.post_edges(self.gid, iter(edges), extra=lambda x: etname): self.debug(e)
"vertices_color": {'fort': (255,150,0), 'bon': (200,255,0), 'faible': (50,50,255), 'mauvais': (255,50,50)}, }, } for gname, config in graph_config.iteritems(): graph = igraph.read(config.pop("path")) graph['vertices_color'] = config.pop("vertices_color") graphs.add(gname) engine = lexical_graph_engine(graph) view = EngineView(engine) view.set_input_type(Text()) view.add_output("query", lambda x : x.encode('utf8')) view.add_output("graph", export_graph) view.add_output("layout", export_layout) view.add_output("clusters", export_clustering) api = ReliureAPI(name=gname ) api.register_view(view, url_prefix="api" ) app.register_blueprint(api, url_prefix="/graph/%s" % (gname) ) # === Routes ===
} } } } } } } # /q res = index.search(body=q, size=len(ids)) return res TmuseDocSchema = Schema( docnum=Numeric(), # stored fields graph=Text(), lang=Text(), pos=Text(), pzero=Boolean(), form=Text(), neighbors=Numeric(), out_links=Numeric(multi=True, uniq=True), # computed fields rank=Numeric(), score=Numeric(vtype=float, default=0.)) def to_docs(es_res, pzeros): _pzeros = set(pzeros) or set([]) docs = [] if 'hits' in es_res and 'hits' in es_res['hits']:
def main(): parser = argparse.ArgumentParser() parser.add_argument("--host", action='store', help="host", default="http://localhost:5000") parser.add_argument("--key", action='store', help="authentification token", default=None) parser.add_argument("--gid", action='store', help="graph id", default=None) args = parser.parse_args() host, key, gid = (args.host, args.key, args.gid) if None in (host, key, gid): parser.print_help() return # Setup schema from reliure.schema import Doc, Schema from reliure.types import Text, Numeric, Boolean, GenericType desc = """ Game of thrones %s """.replace(" ", "") g_attrs = { 'description': desc % gid, #'image': "https://commons.wikimedia.org/wiki/File:Game_of_Thrones_2011_logo.svg?uselang=fr", #'tags': ['social-network', 'game-of-thrones'] } # used for houses, sauvageons ... group_type = Schema( **{ 'label': Text(), 'url': Text(), 'tags': Text(multi=True, uniq=True), 'image': Text(), 'color': Text(), 'shape': Text(default=u"square"), "name": Text(), }) # human like characters character_type = Schema( **{ 'label': Text(), 'url': Text(multi=True, uniq=True), 'tags': Text(multi=True, uniq=True), 'image': Text(), 'shape': Text(default=u"circle"), 'description': Text(), "name": Text(), "actor": Text(multi=True, uniq=True), "dubbling_vf": Text(multi=True, uniq=True), "bio_fr": Text(), "bio_en": Text(), "seasons": Text(), "dead": Boolean(default=False), }) # creaturs dragons, wolf, white walkers ? creatur_type = Schema( **{ 'label': Text(), 'url': Text(), 'tags': Text(multi=True, uniq=True), 'image': Text(), 'shape': Text(default=u"triangle"), 'description': Text(), "name": Text(), "bio_fr": Text(), "bio_en": Text(), "seasons": Text(), "dead": Boolean(default=False), }) edgetypes = [ # Characters or Creaturs -- rel --> Group # (name, desc , properties ), ("is_member_of", "Character is member of a Group", { "from_ep": Text(), }), ("is_child_of", "character or creatur is child of another one", {}), ("works_for", "character or creatur works for a character or a group", { "from_episode": Text(), "to_episode": Text() }), ("is_friend_of", "character is friend of another one", { "from_ep": Text(), }), ("married", "character meet another one", { "force": Numeric() }), ("belongs_to", "character or creatur belongs to another one", { "from_episode": Text(), "to_episode": Text() }), ("kill", "character or creatur kill another one", { "episode": Text(), "method": Text() }), #("have_sex", "character or creatur have sex another one", { "episode":Text()} ), #("rape", "character or creatur rape another one", { "episode":Text()} ), #("meet", "character meet another one", { "episode":Text()}), #("loves", "character meet another one", {} ), ] # PARSING WK page from pyquery import PyQuery as pq import codecs root = "." path = "%s/pages/Personnages_de_Game_of_Thrones" % root graphmlz = "%s/got.graphml" % root def opengot(): html = codecs.open(path, mode='r', encoding="utf8").read() html = pq(html) html = html(".mw-content-ltr") html(".mw-content-ltr h2:first").prevAll().remove() html(".mw-content-ltr h2:first").remove() html(".mw-content-ltr h2:first").nextAll().remove() html('.mw-editsection').remove() html('sup').remove() html = pq(".mw-content-ltr", html) return html def as_doc(ctype, cdata): d = Doc(ctype) for k, v in cdata.iteritems(): if type(ctype[k]) == Text: d.set_field(k, v, True) else: d[k] = v return d.export() def _parse_color(e): color = None if "style" in e.attrib: styles = dict( pair.strip().split(':') for pair in pq(e).attr("style").strip().lower().split(';') if len(pair)) color = styles.get("background", None) if color and color in ("black", "#000"): color = "#000000" return color def parse_belongs_legend(html): houses_map = {} legende = pq("li", pq("table td ul", html)[:4]) for e in legende: color = _parse_color(pq("span", e)[0]) text = pq(e).text() #.replace("Maison ", "") houses_map[color] = text # removes legendes html(".mw-content-ltr h3:first").prevAll().remove() return houses_map def parse_creaturs_and_characters(html, houses): rel_belongs = [] rel_member_of = [] characters = [] creaturs = [] while True: # reading from end if len(html("h3:last")): ths = pq('th', html("h3:last").nextAll()) tds = pq('td', html("h3:last").nextAll()) title = html("h3:last").text() color = None flg = 0 if len(ths) % 5 == 0: c = {} member_of = [] for td in tds: colspan = td.attrib.get('colspan', 0) if colspan == "6": # table headers color = _parse_color(td) if color: member_of.append(houses[color]) flg = 1 elif colspan == 0: # table cells if flg == 1: actor_img = pq("img", td).attr("src") if actor_img: c['image'] = "http:%s" % actor_img elif flg == 2: name = pq(td).text() c['name'] = name for e in member_of: rel_member_of.append((name, e)) elif flg == 3: c['actor'] = [ pq(e).text() for e in pq("a", td) ] elif flg == 4: c['dubbling_vf'] = [ pq(e).text() for e in pq("a", td) ] elif flg == 5: c['seasons'] = pq(td).text() c['dead'] = u"✝" in pq(td).text() flg += 1 elif colspan == "5": # table bio cell c['bio_fr'] = pq(td).text() characters.append(as_doc(character_type, c)) # reset c = {} member_of = [] flg = 1 if len(ths) == 2: c = {} belongs = [] for td in tds: colspan = td.attrib.get('colspan', 0) if colspan == "6": color = _parse_color(td) if color: belongs.append(houses[color]) flg = 1 elif colspan == 0: if flg == 1: name = pq(td).text().strip() c['name'] = name for e in belongs: rel_belongs.append((name, e)) flg = 2 if flg == 2: c["seasons"] = pq(td).text() c["dead"] = True # u"✝" in pq(td).text() elif colspan == "5": c['bio_fr'] = pq(td).text() creaturs.append(as_doc(creatur_type, c)) c = {} belongs = [] flg = 0 #removes section once parsed html("h3:last").nextAll().remove() html("h3:last").remove() else: break return characters, rel_member_of, creaturs, rel_belongs # In[ ]: from reliure.schema import Doc locations = [] # TODO html = opengot() houses_map = parse_belongs_legend(html) characters, rel_member_of, creaturs, rel_belongs = parse_creaturs_and_characters( html, houses_map) print "Groups ", len(houses_map) print "Creaturs ", len(creaturs) print "Characters ", len(characters) print "member_of", len(rel_member_of) print "belongs", len(rel_belongs) from botapi import Botagraph, BotApiError bot = Botagraph(host, key) if not bot.has_graph(gid): print "\n * Creating graph %s" % gid bot.create_graph(gid, g_attrs) print "\n * Creating node type %s" % "" bot.post_nodetype(gid, "Character", "Character", character_type._fields) bot.post_nodetype(gid, "Creatur", "Creatur", creatur_type._fields) bot.post_nodetype(gid, "Group", "Group", group_type._fields) for name, desc, props in edgetypes: bot.post_edgetype(gid, name, desc, props) schema = bot.get_schema(gid)['schema'] nodetypes = {n['name']: n for n in schema['nodetypes']} edgetypes = {e['name']: e for e in schema['edgetypes']} idx = {} # (label, uuid) groups = [] for k, v in houses_map.iteritems(): g = as_doc(group_type, {'label': v, 'name': v, 'color': k}) groups.append(g) for name, els in [("Character", characters), ("Creatur", creaturs), ("Group", groups)]: print "Posting %s nodes %s" % (len(els), name) for c in els: payload = { 'nodetype': nodetypes[name]['uuid'], 'properties': {k: v for k, v in c.iteritems()} } payload['properties']['label'] = payload['properties']['name'] node = bot.post_node(gid, payload) idx[node['label']] = node['uuid'] vids = set() for name, rels in [("is_member_of", rel_member_of), ("belongs_to", rel_belongs)]: print "Posting %s rels %s" % (len(rels), name) for src, tgt in rels: if src in idx and tgt in idx: edge = { 'edgetype': edgetypes[name]['uuid'], 'source': idx[src], 'label': name, 'target': idx[tgt], 'properties': { "from_ep": "", } } uuid = bot.post_edge(gid, edge) vids.add(src) vids.add(tgt) else: print src, tgt print "Starring %s nodes" % len(list(vids)) bot.star_nodes(gid, list(vids))
def __init__(self, global_graph, prox_func, default_mode=OUT, weight=None, loops_weight=None, name=None): """ :param global_graph: a subclass of :class:`.AbstractGraph` :param prox_func: curryfied function for prox. Only `graph`, `pzero`, and `length` will be passed a argument to the fuction. If one wants to modified the named argument you want passed a lamdba with all named arguments setted. :param default_mode: default mode for the random walk (useful only if the graph is directed) :param weight: if None the graph is not weighting, else it could be: a str corresponding to an edge attribute to use as weight, or a list of weight (`|weight| == graph.ecount()`), or a callable `lambda graph, source, target: wgt` :param loops_weight: only if `add_loops`, weight for added loops, it may be : a str corresponding to a vertex attribute, or a list of weight (`|loops_weight| == graph.vcount()`), or a callable `lambda graph, vid, mode, weight: wgt` Here is an example of usable prox fct: >>> def prox_func(graph, pzero, length): ... return prox.prox_markov_dict(graph, pzero, length, mode=OUT, ... add_loops=False, weight=None) """ super(ProxExtractGlobal, self).__init__(name=name) self.add_option("vcount", Numeric(default=10, help="max vertex count")) self.add_option("length", Numeric(default=3, help="random walk length")) self.add_option( "add_loops", Boolean(default=True, help="virtualy add loops on each vertex")) self._modes = { "text_to_num": { "IN": IN, "OUT": OUT, "ALL": ALL }, "num_to_text": { IN: u"IN", OUT: u"OUT", ALL: u"ALL" } } self.add_option( "mode", Text(default=self._modes["num_to_text"][default_mode], choices=[u"IN", u"OUT", u"ALL"], help="edges to walk on from a vertex")) self._wgt = weight if weight is not None: self.add_option( "is_wgt", Boolean(default=True, help="consider graph weight?")) self.prox_func = prox_func self.global_graph = global_graph self._loops_weight = loops_weight
from botapi import Botagraph, BotApiError from reliure.types import Text NodeType = namedtuple("NodeType", "name description properties") EdgeType = namedtuple("EdgeType", "name description properties") # Graph Definition PDG_HOST = "http://g0v-tw.padagraph.io" PDG_KEY = "" GRAPHNAME = "G0V Hackpads network" DESCRIPTION = "a graph or inter-linked Hackpads" TAGS = ["pads", "g0v-tw"] NodePad = NodeType("pad", "", {"id": Text(), "label": Text(), "url": Text()}) EdgeLink = EdgeType("link to", "", {}) bot = Botagraph(PDG_HOST, PDG_KEY) bot.create_graph( GRAPHNAME, { 'description': DESCRIPTION, "tags": TAGS, "image": "https://avatars3.githubusercontent.com/u/2668086?v=3&s=200" }) # Posting Nodes and Edges Types nodetypes_uuids = {} edgetypes_uuids = {}
def __init__(self, **kwargs): super(Url, self).__init__(attrs={'type': Text(), 'title': Text(), }) self.validators.append(TypeValidator(datetime.datetime)) self._init_validation()
def parse(self, path): """ :param path : txt file path handles special lines starting with [# @ _] for comments, node type, property names """ csv = self.read(path) rows = [] current = () # (VERTEX | EDGE, label, names, index_prop) for row in csv: cell = row[0] # ! comment if cell[:1] == "!": continue # IMPORT external ressource if cell[:1] == "&": url = cell[1:].strip() self.parse(url) # @ Nodetypes, _ Edgetypes elif cell[:1] in ("@", "_"): if len(current) > 0: self.store(current, rows, path) # processing directiv line = ";".join(row) cols = re.sub(' ', '', line[1:]) # no space # @Politic: %Chamber; #First Name; #Last Name;%Party;%State;%Stance;Statement; cols = [ e for e in re.split("[:;,]", "%s" % cols, flags=re.UNICODE) if len(e) ] label = cols[0] # @Something # ( name, type indexed, projection ) props = [ Prop(norm_key(e), Text(multi="+" in e), "@" in e, "#" in e, "+" in e, "%" in e, "+" in e and "=" in e) for e in cols[1:] ] if cell[:1] == "@": # nodetype def rows = [] current = (VERTEX, label, props) elif cell[:1] == "_": # edgetype def rows = [] current = (EDGE, label, props) else: # table data if current and current[2]: for i, v in enumerate(row): if i >= len(props): break if props[i].ismulti: row[i] = [ e.strip() for e in re.split( "[_,;]", v.strip(), ) ] rows.append(row) self.store(current, rows, path)
def TmuseApi(name, host='localhost:9200', index_name='tmuse', doc_type='graph', retry=5): """ API over tmuse elastic search """ esindex = EsIndex(index_name, doc_type=doc_type, host=host) print "# TmuseApi", host, doc_type, index_name # let es start for i in range(retry): if not esindex._es.ping(): print "waiting for es to start" time.sleep(i) assert esindex._es.ping(), "impossible to reach ES server" # build the API from this engine print "api name", name api = ReliureAPI(name) # Main api entry point: tmuse engine (subgraph) view = EngineView(engine(esindex)) view.set_input_type(ComplexQuery()) view.add_output("query", ComplexQuery()) view.add_output("graph", export_graph) view.add_output("layout", export_layout) view.add_output("clusters", export_clustering) # add a simple play route view.play_route("<query>") api.register_view(view, url_prefix="subgraph") # Add auto completion View completion = TmuseEsComplete(index=esindex, size=20) # TODO suggestion rerank # completion |= rerank completion_view = ComponentView(completion) completion_view.add_input("lang", Text(default=u"*")) completion_view.add_input("pos", Text(default=u"*")) completion_view.add_input("form") completion_view.add_output("response") completion_view.play_route("<lang>.<pos>.<form>") api.register_view(completion_view, url_prefix="complete") import random @api.route("/random") @api.route("/random/<string:pos>") def random_node(pos=None, retry=5, count=0): if pos not in ALL_POS: pos = random.sample(ALL_POS, 1)[0] graph = "jdm.%s.flat" % pos docs = tmuse.random_node(esindex, graph) doc = docs[0] if len(docs) else dict() return jsonify({'pos': pos, 'doc': doc}) # Debug views @api.route("/_extract/<string:graph>/<string:text>") def _extract(graph, text): query = QueryUnit(graph=graph, form=text) es_res = tmuse.extract(esindex, query) return jsonify({'res': es_res}) @api.route("/_prox/<string:graph>/<string:text>") def _prox(graph, text): es_res = proxlist(esindex, graph, text, 100) return jsonify({'res': es_res}) return api