Exemplo n.º 1
0
def get_pattern_data(search_param):

    twitter = Twitter(language='en')

    for tweet in twitter.search(search_param, cached=True):
        print(plaintext(tweet.text).encode('ascii', 'ignore').decode('utf-8'))

    g = Graph()
    for i in range(10):
        for result in twitter.search(search_param, start=i + 1, count=50):
            s = result.text.lower()
            s = plaintext(s)
            s = parsetree(s)
            p = '{NP} (VP) ' + search_param + ' {NP}'
            for m in search(p, s):
                x = m.group(1).string  # NP left
                y = m.group(2).string  # NP right
                if x not in g:
                    g.add_node(x)
                    if y not in g:
                        g.add_node(y)
                    g.add_edge(g[x], g[y], stroke=(0, 0, 0, 0.75))  # R,G,B,A

    #if len(g)>0:
    #   g = g.split()[0] # Largest subgraph.

    for n in g.sorted()[:40]:  # Sort by Node.weight.
        n.fill = (0, 0.5, 1, 0.75 * n.weight)

    g.export('data', directed=False, weighted=0.6)
Exemplo n.º 2
0
    def render_graph(self, domains):
        """renders graph output"""
        g = Graph()
        for domain in domains.keys():
            if domain in self.cat_dict:
                categories = self.cat_dict[domain]
                stroke = (0, 0, 0, 0.5)
                if 'right' in categories:
                    stroke = (255, 0, 0, 1)
                elif 'right_center' in categories:
                    stroke = (255, 0, 0, .5)
                if 'left' in categories:
                    stroke = (0, 0, 255, 1)
                elif 'left_center' in categories:
                    stroke = (0, 0, 255, .5)
                if 'least_biased' in categories:
                    stroke = (0, 255, 0, 1)

            fill = (128, 128, 0, 0.1)
            dub_cats = [
                'fake', 'questionable', 'clickbait', 'unreliable', 'conspiracy'
            ]
            score = len([c for c in categories if c in dub_cats])
            if score:
                fill = (0, 0, 0, float(score) / 5)
            g.add_node(domain,
                       radius=len(domains[domain]) * 6,
                       stroke=stroke,
                       strokewidth=6,
                       fill=fill,
                       font_size=30)

        pairs = self.pairwise(domains.keys())
        for x, y in pairs:
            x_queries = set(domains[x])
            y_queries = set(domains[y])
            intersection = len(x_queries.intersection(y_queries))
            if intersection > 0:
                max_rad = max(len(domains[x]), len(domains[y])) + 1000
                g.add_edge(x, y, length=max_rad, strokewidth=intersection)

        path = 'graph'
        g.export(path,
                 encoding='utf-8',
                 distance=6,
                 directed=False,
                 width=1400,
                 height=900)
Exemplo n.º 3
0
    def __init__(self, args, depth=1):
        self.links = [WebPage(x) for x in args.url]
        self.depth = depth
        self.historyDb = WebsiteDatabase()
        self.done = False
        self.options = args
        self.results = {link.url.domain: Result() for link in self.links}

        self.cloudIndexer = CloudSearchIndexer.forDomainIndex("websites")

        if args.graph or args.rank:
            self.webGraph = Graph(distance=30.0)
            for link in self.links:
                self.webGraph.add_node(link.url.domain,
                                       radius=15,
                                       fill=(1, 0, 0, 0.5))
Exemplo n.º 4
0
 def make_graph(cls, parse, enhanced=True):
     edge_map, node_map = {}, {}
     g = Graph()
     root = None
     for child, parent, arc in parse:
         if arc == 'root':
             root = child
         if not enhanced:
             arc = arc.split(':')[0]
         if child not in node_map:
             node_map[child] = Node(child)
         child = node_map[child]
         if parent not in node_map:
             node_map[parent] = Node(parent)
         parent = node_map[parent]
         if parent.id != child.id:
             g.add_edge(parent, child, type=arc)
     return g, edge_map, node_map, root
Exemplo n.º 5
0
 def visualize_rel(self):
     orderedPairs = []
     for i in range(len(self.subject_object_dict)):
         orderedPair = list(
             itertools.product(
                 self.subject_object_dict["S" + str(i + 1)][0],
                 self.subject_object_dict["S" + str(i + 1)][1]))
         orderedPairs.append(orderedPair)
     g = Graph()
     for node in (orderedPairs):
         for n1, n2 in node:
             g.add_node(n1)
             g.add_node(n2)
             g.add_edge(n1, n2, weight=0.0, type='is-related-to')
     g.export('FeatureRelations', directed=True)
     orig_stdout = sys.stdout
     gn = file('GraphNodeWeights.txt', 'a')
     sys.stdout = gn
     for n in sorted(g.nodes, key=lambda n: n.weight):
         print '%.2f' % n.weight, n
     sys.stdout = orig_stdout
     gn.close()
Exemplo n.º 6
0
import os, sys
sys.path.insert(0, os.path.join("..", ".."))

from pattern.graph import Graph
from pattern.graph import export, WEIGHT, CENTRALITY
from random import choice, random

# This example demonstrates how a graph visualization can be exported to HTML,
# using the HTML5 <canvas> tag and Javascript.
# All properties (e.g. stroke color) of nodes and edges are ported.

g = Graph()
# Random nodes.
for i in range(50):
    g.add_node(id=str(i + 1), radius=5, stroke=(0, 0, 0, 1), text=(0, 0, 0, 1))
# Random edges.
for i in range(75):
    node1 = choice(g.nodes)
    node2 = choice(g.nodes)
    g.add_edge(node1, node2, length=1.0, weight=random(), stroke=(0, 0, 0, 1))

for node in g.sorted()[:20]:
    # More blue = more important.
    node.fill = (0.6, 0.8, 1.0, 0.8 * node.weight)

# This node's label is different from its id.
# We'll also make it a link, see the href attribute at the bottom.
g["1"].text.string = "home"

# The export() command generates a folder with an index.html,
# that displays the graph using an interactive, force-based spring layout.
Exemplo n.º 7
0
            }
            images = document.getElementsByTagName('img');
            for(var i = 0; i < images.length; i++) {
	            images[i].ondragstart = function() { return false; };
            }
    };
    </script>
""" % {
    "image": css_image,
    "user": css_user
}

# Create new Instagram API.
api = InstagramAPI(access_token=access_token, client_secret=client_secret)
# Create new Graph.
graph = Graph(distance=distance)


# It is for finding user-id of an user.
# It takes only one username (string) as an argument and
#    returns an User object and its user-id (as string.)
# !! Exact username must be given as argument otherwise that function will return wrong user!
def find_user(username):
    if not username:
        print "Name is empty!"
        return None, None
    res = api.user_search(q="@" + username, count=1)
    if not res:
        print "{user} cannot be found!".format(user=username)
        return None, None
    ret_user = res[0]
# Example of pattern: http://www.clips.ua.ac.be/pages/pattern

from pattern.web import Bing, plaintext
from pattern.en import Sentence, Chunk, parse
from pattern.search import Pattern
from pattern.graph import Graph, Node, Edge, export

g = Graph()
for i in range(1):
    print "--------------", i
    for r in Bing().search('"more important than"', start=i + 1, count=50):
        s = plaintext(r.description.lower())
        print s
        s = Sentence(parse(s))
        print s
        p = Pattern.fromstring('NP (VP) more important than NP')
        for m in p.search(s):
            a = m.constituents(p[+0])[-1]  # Left NP.
            b = m.constituents(p[-1])[+0]  # Right NP.
            a = (isinstance(a, Chunk) and a.head or a).string
            b = (isinstance(b, Chunk) and b.head or b).string
            if a and b:
                if a not in g:
                    g.add_node(a, radius=5, stroke=(0, 0, 0, 0.8))
                if b not in g:
                    g.add_node(b, radius=5, stroke=(0, 0, 0, 0.8))
                g.add_edge(g[b], g[a], stroke=(0, 0, 0, 0.6))

g = g.split()[0]  # Largest subgraph.

for n in g.sorted()[:40]:  # Sorted by Node.weight.