Exemplo n.º 1
0
def reddit_reader(params):
    hg = HyperGraph(params)
    infile = params['infile']
    comments = params['comments']
    RedditReader(hg, comments=comments).read_file(infile)
Exemplo n.º 2
0
        if text[-1].isalnum():
            text += '.'
        self.process_text(text, author, reset_context=True, aux_text='')
        if self.comments:
            self.process_comments(post)

    def read_file(self, filename):
        # self.extractor.debug = True

        if self.comments:
            print('Including comments.')
        else:
            print('Not including comments.')

        with open(filename, 'r') as f:
            for line in f:
                post = json.loads(line)
                self.process_post(post)

        print('main edges created: %s' % self.main_edges)
        print('extra edges created: %s' % self.extra_edges)
        print('ignored edges: %s' % self.ignored)


if __name__ == '__main__':
    from gb.hypergraph.hypergraph import HyperGraph
    hgr = HyperGraph({'backend': 'leveldb', 'hg': 'wikidata.hg'})
    RedditReader(
        hgr,
        comments=False).read_file('reddit-wordlnews-27032017-28032017.json')
Exemplo n.º 3
0
def wikidata(ctx):
    click.echo('reading wikidata...')
    hg = HyperGraph(ctx.obj)
    infile = ctx.obj['infile']
    wd.read(hg, infile)
    click.echo('done.')
Exemplo n.º 4
0
def shell(params):
    hg = HyperGraph(params)
    sh = Shell(hg)
    sh.run()
Exemplo n.º 5
0
def hypergraph(path):
    return HyperGraph({'backend': 'leveldb', 'hg': path})
Exemplo n.º 6
0
def headlines_inference(params):
    hg = HyperGraph(params)
    infile = params['infile']
    hl.headlines_inference(hg, infile)
Exemplo n.º 7
0
def dbpedia_wordnet(params):
    print('reading DBPedia...')
    hg = HyperGraph(params)
    infile = params['infile']
    dbpwn.read(hg, infile)
    print('done.')
Exemplo n.º 8
0
def ui(ctx):
    hg = HyperGraph(ctx.obj)
    start_ui(hg)
Exemplo n.º 9
0
def reddit_reader(ctx):
    hg = HyperGraph(ctx.obj)
    infile = ctx.obj['infile']
    comments = ctx.obj['comments']
    RedditReader(hg, comments=comments).read_file(infile)
Exemplo n.º 10
0
def generate_parsed_sentences_file(ctx):
    hg = HyperGraph(ctx.obj)
    rt = ReaderTests(hg, ctx.obj['disamb'])
    rt.generate_parsed_sentences_file(ctx.obj['infile'], ctx.obj['outfile'])
    click.echo('done.')
Exemplo n.º 11
0
def reader_debug(ctx):
    hg = HyperGraph(ctx.obj)
    rt = ReaderTests(hg, ctx.obj['disamb'])
    rt.reader_debug(ctx.obj['infile'])
    click.echo('done.')
Exemplo n.º 12
0
def shell(ctx):
    hg = HyperGraph(ctx.obj)
    sh = Shell(hg)
    sh.run()
    click.echo('done.')
Exemplo n.º 13
0
def info(ctx):
    hg = HyperGraph(ctx.obj)
    print('symbols: %s' % hg.symbol_count())
    print('edges: %s' % hg.edge_count())
    print('total degree: %s' % hg.total_degree())
Exemplo n.º 14
0
def dbpedia_wordnet(ctx):
    click.echo('reading DBPedia...')
    hg = HyperGraph(ctx.obj)
    infile = ctx.obj['infile']
    dbpwn.read(hg, infile)
    click.echo('done.')
Exemplo n.º 15
0
def all2json(params):
    hg = HyperGraph(params)
    outfile = params['outfile']
    filt = AllFilter(hg)
    filt.write_edges(outfile)
Exemplo n.º 16
0
def create(ctx):
    click.echo('creating hypergraph...')
    HyperGraph(ctx.obj)
    click.echo('done.')
Exemplo n.º 17
0
def generate_synonyms(params):
    hg = HyperGraph(params)
    synonyms.generate(hg)
Exemplo n.º 18
0
def wordnet(ctx):
    click.echo('reading wordnet...')
    hg = HyperGraph(ctx.obj)
    wn.read(hg)
    click.echo('done.')
Exemplo n.º 19
0
def create(params):
    print('creating hypergraph...')
    HyperGraph(params)
    print('done.')
Exemplo n.º 20
0
    # print(symbol)
    visited.add(sym.symbol2str(symbol))
    synonyms = [synonym for synonym in syn.synonyms(hg, symbol)]
    edges = [s for s in hg.star(symbol)]
    edges = [edge for edge in edges if is_concept(edge)]
    return {'symbol': symbol,
            'synonyms': [down(hg, synonym, visited) for synonym in synonyms],
            'derived_symbols': [down(hg, edge, visited) for edge in edges]}


def derived_symbols(hg, ont, symbols=None, depth=0):
    if not symbols:
        symbols = {}
    symbol = ont['symbol']
    degree = syn.degree(hg, symbol)
    symbols[sym.symbol2str(symbol)] = {'degree': degree, 'depth': depth}
    for subont in ont['derived_symbols']:
        derived_symbols(hg, subont, symbols, depth + 1)
    return symbols


if __name__ == '__main__':
    params = {'backend': 'leveldb',
              'hg': 'reddit-worldnews-01012017-28032017.hg'}
    hyper = HyperGraph(params)
    onto = down(hyper, 'south_korea/lem.wdQ884')
    ds = derived_symbols(hyper, onto)
    # print(ds)
    for s in ds:
        print('%s %s' % (s, ds[s]['degree']))
Exemplo n.º 21
0
def info(params):
    hg = HyperGraph(params)
    print('symbols: %s' % hg.symbol_count())
    print('edges: %s' % hg.edge_count())
    print('total degree: %s' % hg.total_degree())
Exemplo n.º 22
0
def wordnet(params):
    print('reading wordnet...')
    hg = HyperGraph(params)
    wn.read(hg)
    print('done.')
Exemplo n.º 23
0
def reader_tests(params):
    hg = HyperGraph(params)
    infile = params['infile']
    show_namespaces = params['show_namespaces']
    rtests.reader_tests(hg, infile, show_namespaces)
Exemplo n.º 24
0
def wikidata(params):
    print('reading wikidata...')
    hg = HyperGraph(params)
    infile = params['infile']
    wd.read(hg, infile)
    print('done.')
        self.process_text(text, web_entity)

    def read_file(self, filename):
        # self.extractor.debug = True

        csv.field_size_limit(sys.maxsize)
        with open(filename, 'r') as csvfile:
            first = True
            for row in csv.reader(csvfile, delimiter=',', quotechar='"'):
                if first:
                    first = False
                else:
                    post = {
                        'id': row[0],
                        'url': row[1],
                        'web_entity_id': row[2],
                        'web_entity': row[3],
                        'text': row[4]
                    }
                    self.process_post(post)

        print('main edges created: %s' % self.main_edges)
        print('extra edges created: %s' % self.extra_edges)
        print('ignored edges: %s' % self.ignored)


if __name__ == '__main__':
    from gb.hypergraph.hypergraph import HyperGraph
    hgr = HyperGraph({'backend': 'leveldb', 'hg': 'card_and_id_fraud.hg'})
    SemBubbleReader(hgr).read_file('Card_and_ID_fraud.csv')
Exemplo n.º 26
0
def ui(params):
    hg = HyperGraph(params)
    start_ui(hg)
Exemplo n.º 27
0
                self.entities[entity]['total_conflict_from'] = total

                h, total = herfindhal_and_total(self.entities[entity]['conflict_to']
                                                + self.entities[entity]['conflict_from'])
                self.entities[entity]['h_conflict'] = h
                self.entities[entity]['total_conflict'] = total

                h, total = herfindhal_and_total(self.entities[entity]['conflict_over'])
                self.entities[entity]['h_conflict_over'] = h
                self.entities[entity]['total_conflict_over'] = total

                h, total = herfindhal_and_total(self.entities[entity]['conflict_for'])
                self.entities[entity]['h_conflict_for'] = h
                self.entities[entity]['total_conflict_for'] = total

                self.write_metrics(entity)

                i += 1
                bar.update(i)

    def process(self):
        self.find_actors()
        self.infer()
        self.compute_metrics()


if __name__ == '__main__':
    hgr = HyperGraph({'backend': 'leveldb', 'hg': 'infer.hg'})
    parse = par.Parser()
    Headlines(hgr, parse, 'predicate_patterns.csv').process()
Exemplo n.º 28
0
        print('author: %s' % author)

        text = message.strip()
        if len(text) == 0:
            return
        if text[-1].isalnum():
            text += '.'
        self.process_text(parent, author, text)

    def read_file(self, filename):
        # self.extractor.debug = True

        with open(filename, 'r') as f:
            for entry in f:
                entry = json.loads(entry)
                parent = entry['begin']['from']
                for comment in entry['comments']:
                    author = comment['from']
                    message = comment['message']
                    self.process_comment(parent, author, message)

        print('main edges created: %s' % self.main_edges)
        print('extra edges created: %s' % self.extra_edges)
        print('ignored edges: %s' % self.ignored)


if __name__ == '__main__':
    from gb.hypergraph.hypergraph import HyperGraph
    hgr = HyperGraph({'backend': 'leveldb', 'hg': 'facebook.hg'})
    FacebookReader(hgr).read_file('statuses.json')