def reddit_reader(params): hg = HyperGraph(params) infile = params['infile'] comments = params['comments'] RedditReader(hg, comments=comments).read_file(infile)
if text[-1].isalnum(): text += '.' self.process_text(text, author, reset_context=True, aux_text='') if self.comments: self.process_comments(post) def read_file(self, filename): # self.extractor.debug = True if self.comments: print('Including comments.') else: print('Not including comments.') with open(filename, 'r') as f: for line in f: post = json.loads(line) self.process_post(post) print('main edges created: %s' % self.main_edges) print('extra edges created: %s' % self.extra_edges) print('ignored edges: %s' % self.ignored) if __name__ == '__main__': from gb.hypergraph.hypergraph import HyperGraph hgr = HyperGraph({'backend': 'leveldb', 'hg': 'wikidata.hg'}) RedditReader( hgr, comments=False).read_file('reddit-wordlnews-27032017-28032017.json')
def wikidata(ctx): click.echo('reading wikidata...') hg = HyperGraph(ctx.obj) infile = ctx.obj['infile'] wd.read(hg, infile) click.echo('done.')
def shell(params): hg = HyperGraph(params) sh = Shell(hg) sh.run()
def hypergraph(path): return HyperGraph({'backend': 'leveldb', 'hg': path})
def headlines_inference(params): hg = HyperGraph(params) infile = params['infile'] hl.headlines_inference(hg, infile)
def dbpedia_wordnet(params): print('reading DBPedia...') hg = HyperGraph(params) infile = params['infile'] dbpwn.read(hg, infile) print('done.')
def ui(ctx): hg = HyperGraph(ctx.obj) start_ui(hg)
def reddit_reader(ctx): hg = HyperGraph(ctx.obj) infile = ctx.obj['infile'] comments = ctx.obj['comments'] RedditReader(hg, comments=comments).read_file(infile)
def generate_parsed_sentences_file(ctx): hg = HyperGraph(ctx.obj) rt = ReaderTests(hg, ctx.obj['disamb']) rt.generate_parsed_sentences_file(ctx.obj['infile'], ctx.obj['outfile']) click.echo('done.')
def reader_debug(ctx): hg = HyperGraph(ctx.obj) rt = ReaderTests(hg, ctx.obj['disamb']) rt.reader_debug(ctx.obj['infile']) click.echo('done.')
def shell(ctx): hg = HyperGraph(ctx.obj) sh = Shell(hg) sh.run() click.echo('done.')
def info(ctx): hg = HyperGraph(ctx.obj) print('symbols: %s' % hg.symbol_count()) print('edges: %s' % hg.edge_count()) print('total degree: %s' % hg.total_degree())
def dbpedia_wordnet(ctx): click.echo('reading DBPedia...') hg = HyperGraph(ctx.obj) infile = ctx.obj['infile'] dbpwn.read(hg, infile) click.echo('done.')
def all2json(params): hg = HyperGraph(params) outfile = params['outfile'] filt = AllFilter(hg) filt.write_edges(outfile)
def create(ctx): click.echo('creating hypergraph...') HyperGraph(ctx.obj) click.echo('done.')
def generate_synonyms(params): hg = HyperGraph(params) synonyms.generate(hg)
def wordnet(ctx): click.echo('reading wordnet...') hg = HyperGraph(ctx.obj) wn.read(hg) click.echo('done.')
def create(params): print('creating hypergraph...') HyperGraph(params) print('done.')
# print(symbol) visited.add(sym.symbol2str(symbol)) synonyms = [synonym for synonym in syn.synonyms(hg, symbol)] edges = [s for s in hg.star(symbol)] edges = [edge for edge in edges if is_concept(edge)] return {'symbol': symbol, 'synonyms': [down(hg, synonym, visited) for synonym in synonyms], 'derived_symbols': [down(hg, edge, visited) for edge in edges]} def derived_symbols(hg, ont, symbols=None, depth=0): if not symbols: symbols = {} symbol = ont['symbol'] degree = syn.degree(hg, symbol) symbols[sym.symbol2str(symbol)] = {'degree': degree, 'depth': depth} for subont in ont['derived_symbols']: derived_symbols(hg, subont, symbols, depth + 1) return symbols if __name__ == '__main__': params = {'backend': 'leveldb', 'hg': 'reddit-worldnews-01012017-28032017.hg'} hyper = HyperGraph(params) onto = down(hyper, 'south_korea/lem.wdQ884') ds = derived_symbols(hyper, onto) # print(ds) for s in ds: print('%s %s' % (s, ds[s]['degree']))
def info(params): hg = HyperGraph(params) print('symbols: %s' % hg.symbol_count()) print('edges: %s' % hg.edge_count()) print('total degree: %s' % hg.total_degree())
def wordnet(params): print('reading wordnet...') hg = HyperGraph(params) wn.read(hg) print('done.')
def reader_tests(params): hg = HyperGraph(params) infile = params['infile'] show_namespaces = params['show_namespaces'] rtests.reader_tests(hg, infile, show_namespaces)
def wikidata(params): print('reading wikidata...') hg = HyperGraph(params) infile = params['infile'] wd.read(hg, infile) print('done.')
self.process_text(text, web_entity) def read_file(self, filename): # self.extractor.debug = True csv.field_size_limit(sys.maxsize) with open(filename, 'r') as csvfile: first = True for row in csv.reader(csvfile, delimiter=',', quotechar='"'): if first: first = False else: post = { 'id': row[0], 'url': row[1], 'web_entity_id': row[2], 'web_entity': row[3], 'text': row[4] } self.process_post(post) print('main edges created: %s' % self.main_edges) print('extra edges created: %s' % self.extra_edges) print('ignored edges: %s' % self.ignored) if __name__ == '__main__': from gb.hypergraph.hypergraph import HyperGraph hgr = HyperGraph({'backend': 'leveldb', 'hg': 'card_and_id_fraud.hg'}) SemBubbleReader(hgr).read_file('Card_and_ID_fraud.csv')
def ui(params): hg = HyperGraph(params) start_ui(hg)
self.entities[entity]['total_conflict_from'] = total h, total = herfindhal_and_total(self.entities[entity]['conflict_to'] + self.entities[entity]['conflict_from']) self.entities[entity]['h_conflict'] = h self.entities[entity]['total_conflict'] = total h, total = herfindhal_and_total(self.entities[entity]['conflict_over']) self.entities[entity]['h_conflict_over'] = h self.entities[entity]['total_conflict_over'] = total h, total = herfindhal_and_total(self.entities[entity]['conflict_for']) self.entities[entity]['h_conflict_for'] = h self.entities[entity]['total_conflict_for'] = total self.write_metrics(entity) i += 1 bar.update(i) def process(self): self.find_actors() self.infer() self.compute_metrics() if __name__ == '__main__': hgr = HyperGraph({'backend': 'leveldb', 'hg': 'infer.hg'}) parse = par.Parser() Headlines(hgr, parse, 'predicate_patterns.csv').process()
print('author: %s' % author) text = message.strip() if len(text) == 0: return if text[-1].isalnum(): text += '.' self.process_text(parent, author, text) def read_file(self, filename): # self.extractor.debug = True with open(filename, 'r') as f: for entry in f: entry = json.loads(entry) parent = entry['begin']['from'] for comment in entry['comments']: author = comment['from'] message = comment['message'] self.process_comment(parent, author, message) print('main edges created: %s' % self.main_edges) print('extra edges created: %s' % self.extra_edges) print('ignored edges: %s' % self.ignored) if __name__ == '__main__': from gb.hypergraph.hypergraph import HyperGraph hgr = HyperGraph({'backend': 'leveldb', 'hg': 'facebook.hg'}) FacebookReader(hgr).read_file('statuses.json')