示例#1
0
def loop_data(g,dataset):
    print("running ....")
    for (index,data) in dataset.iterrows():
        # getorcreate node 
        v1 = g.V().has('sim','name',data["a_number"]).fold().coalesce(__.unfold(),__.addV('sim').property('name',data["a_number"]).property('tac',data["tac"])).next()
        v2 = g.V().has('sim','name',data["b_number"]).fold().coalesce(__.unfold(),__.addV('sim').property('name',data["b_number"])).next()
        # create egde
        g.V(v1).addE('info').to(v2).property('service_type',data["service_type"]).property('b_prefix',data["b_prefix"]).property('start_time',data["start_time"]).property('duration',data["duration"]).iterate()
 def __write_edges(self, g: traversal, edges: List[Dict], scan_id: str) -> None:
     """
     Writes the edges to the labeled property graph
     :param g: The graph traversal source
     :param edges: A list of dictionaries for each edge
     :return: None
     """
     cnt = 0
     t = g
     for r in edges:
         to_id = f'{r["~to"]}_{scan_id}'
         from_id = f'{r["~from"]}_{scan_id}'
         t = (
             t.addE(r["~label"])
             .property(T.id, str(r["~id"]))
             .from_(
                 __.V(from_id)
                 .fold()
                 .coalesce(
                     __.unfold(),
                     __.addV(self.parse_arn(r["~from"])["resource"])
                     .property(T.id, from_id)
                     .property("scan_id", scan_id)
                     .property("arn", r["~from"]),
                 )
             )
             .to(
                 __.V(to_id)
                 .fold()
                 .coalesce(
                     __.unfold(),
                     __.addV(self.parse_arn(r["~to"])["resource"])
                     .property(T.id, to_id)
                     .property("scan_id", scan_id)
                     .property("arn", r["~to"]),
                 )
             )
         )
         cnt += 1
         if cnt % 100 == 0 or cnt == len(edges):
             try:
                 self.logger.info(
                     event=LogEvent.NeptunePeriodicWrite,
                     msg=f"Writing edges {cnt} of {len(edges)}",
                 )
                 t.next()
                 t = g
             except Exception as err:
                 self.logger.error(event=LogEvent.NeptuneLoadError, msg=str(err))
                 raise NeptuneLoadGraphException(
                     f"Error loading edge {r} " f"with {str(t.bytecode)}"
                 ) from err
        def upsert_vertices_for_label(rows):

            conn = self.gremlin_utils.remote_connection()
            g = self.gremlin_utils.traversal_source(connection=conn)

            t = g
            i = 0
            for row in rows:
                entries = row.asDict()
                create_traversal = __.addV(label)
                for key, value in entries.items():
                    key = key.split(':')[0]
                    if key == '~id':
                        create_traversal = create_traversal.property(id, value)
                    elif key == '~label':
                        pass
                    else:
                        create_traversal = create_traversal.property(
                            key, value)
                t = t.V(entries['~id']).fold().coalesce(
                    __.unfold(), create_traversal)
                i += 1
                if i == batch_size:
                    self.retry_query(t)
                    t = g
                    i = 0
            if i > 0:
                self.retry_query(t)

            conn.close()
示例#4
0
 def get_or_create_vertex(self, label_value, id):
     return self._do_next(
         self.g.V(id).fold().coalesce(
             __.unfold(),
             __.addV(label_value).property(T.id,
                                           id).property(ID, id).property(
                                               DATE, utils.get_date_now())))
示例#5
0
def upsert_vertex(record, vertex_mapping, g):
    vertex_label = vertex_mapping['vertex_label']

    # Ensure all lookup values are present first
    lookup_values = get_lookup_values(record,
                                      vertex_mapping['lookup_properties'])
    if lookup_values is None:
        return

    # Setup traversals
    try:
        traversal = g.V().hasLabel(vertex_label)
        insertion_traversal = __.addV(vertex_label).property(
            'type', vertex_label)

        for prop_key, lookup_value in lookup_values.items():
            traversal = traversal.has(prop_key, lookup_value)
            insertion_traversal = insertion_traversal.property(
                prop_key, lookup_value)

        # Add Vertex insertion partial traversal
        for source_field, prop_key in vertex_mapping['other_properties'].items(
        ):
            insertion_traversal = insertion_traversal.property(
                prop_key, record[source_field])

        traversal.fold().coalesce(__.unfold(), insertion_traversal).next()
    except:
        print("Vertex error - skipping: {0}({1})".format(
            vertex_label, lookup_values))
示例#6
0
 def __add_author(self, t, author, post_url):
     img_src = None
     if "img_src" in author.keys():
         img_src = author['img_src']
         img_height = author['img_height']
         img_width = author['img_width']
     t = (
         t.V(author['name'])
         .fold()
         .coalesce(
             __.unfold(),
             __.addV('author')
             .property(T.id, author['name'])
             .property('name', author['name'])
         ).as_('p').addE('written_by').from_(__.V(post_url))
     )
     # Conditionally add the img_src, img_height, and img_width property if they do not exist
     if img_src:
         t = (
             t.sideEffect(
                 __.select('p').hasNot('img_src')
                 .property('img_src', img_src)
                 .property('img_height', img_height)
                 .property('img_width', img_width)
             )
         )
     return t
示例#7
0
    def handle_youtube_video_added(self, video_id, user_id, name, description, location, preview_image_location,
                                   tags, added_date, timestamp):
        # make sure tags are unique (no duplicates)
        unique_tags = set(tags)

        logging.debug('SuggestedVideosService:handle_youtube_video_added, video ID: ' + str(video_id) +
                      ', user ID: ' + str(user_id) + ', name: ' + name + ', description: ' + description +
                      ', location: ' + location + ', preview_image_location: ' + preview_image_location +
                      ', tags: ' + str(unique_tags) + ', timestamp: ' + str(timestamp))

        # Note: building a single traversal, but broken into several steps for readability

        # locate user vertex
        traversal = self.graph.V().has('user', 'userId', user_id).as_('^user')

        # add video vertex
        traversal = traversal.addV('video').property('videoId', video_id)\
            .property('added_date', added_date) \
            .property('description', description) \
            .property('name', name) \
            .property('preview_image_location', preview_image_location) \
            .as_('^video')

        # add edge from user to video vertex
        traversal = traversal.addE('uploaded').from_('^user').to('^video').property('added_date', added_date)

        # find vertices for tags and add edges from video vertex
        for tag in unique_tags:
            traversal = traversal.addE('taggedWith').from_('^video').to(__.coalesce(
                __.V().has('tag', 'name', tag),
                __.addV('tag').property('name', tag).property('tagged_date', added_date)))

        # execute the traversal
        traversal.iterate()
示例#8
0
def _build_gremlin_vertices(g: GraphTraversalSource,
                            row: Any) -> GraphTraversalSource:
    g = g.V(str(row["~id"])).fold().coalesce(
        __.unfold(),
        __.addV(row["~label"]).property(T.id, str(row["~id"])))
    g = _build_gremlin_properties(g, row)

    return g
    def upsert_node(self, node_id: str, node_label: str,
                    node_properties: Dict[str, Any]) -> None:
        create_traversal = __.addV(node_label).property(T.id, node_id)
        node_traversal = self.get_graph().V().has(T.id, node_id). \
            fold().coalesce(__.unfold(), create_traversal)

        node_traversal = NeptuneSessionClient.update_entity_properties_on_traversal(
            node_traversal, node_properties)
        node_traversal.next()
示例#10
0
def _build_gremlin_insert_vertices(
        g: GraphTraversalSource,
        row: Any,
        use_header_cardinality: bool = False) -> GraphTraversalSource:
    g = g.V(str(row["~id"])).fold().coalesce(
        __.unfold(),
        __.addV(row["~label"]).property(T.id, str(row["~id"])))
    g = _set_properties(g, use_header_cardinality, row)
    return g
 def test_explain(self) -> None:
     proxy = self.get_proxy()
     g = proxy.g.V().has(VertexTypes.User.value.label, proxy.key_property_name, 'jack').fold().coalesce(
         __.unfold(),
         __.addV(VertexTypes.User.value.label).property(Cardinality.single, proxy.key_property_name, 'jack'))
     g = g.property(Cardinality.single, 'email', '*****@*****.**')
     query = ScriptTranslator.translateT(g)
     g.iterate()
     # just enough to not explode
     proxy._explain(query)
示例#12
0
 def _write_vertex(self, vertex_id: str,
                   vertex_labels: List[str]) -> Traversal:
     logger.debug("Writing vertex %s", vertex_id)
     if self.supports_multiple_labels:
         vertex_label = "::".join(vertex_labels)
     else:
         vertex_label = vertex_labels[0]
     return self.g.V(vertex_id).fold().coalesce(
         __.unfold(),
         __.addV(vertex_label).property(T.id, vertex_id))
示例#13
0
def get_last_checkpoint(client, tablename):
    conn = client.remote_connection()
    g = client.traversal_source(conn)
    checkpoint = (g.V().hasLabel('Checkpoint').has(
        'table', tablename).fold().coalesce(
            __.unfold(),
            __.addV('Checkpoint').property('table', tablename).property(
                'value', datetime.datetime(2015, 1, 1, 0,
                                           0))).values('value').next())
    conn.close()
    return checkpoint
示例#14
0
    def __add_tag(self, t, tag, post_url):
        t = (
            t.V(tag)
            .fold()
            .coalesce(
                __.unfold(),
                __.addV('tag')
                .property(T.id, tag)
                .property('tag', tag)
            ).addE('tagged').from_(__.V(post_url))
        )

        return t
示例#15
0
    def __add_entities(self, t, entity, post_url):
        t = (
            t.V(f'{entity["Text"]}_{entity["Type"]}')
            .fold()
            .coalesce(
                __.unfold(),
                __.addV(entity["Type"].lower())
                .property(T.id, f'{entity["Text"]}_{entity["Type"]}')
                .property("text", entity["Text"])
                .property("type", entity["Type"])
            ).addE('found_in').from_(__.V(post_url))
            .property('score', entity['Score'])
        )

        return t
示例#16
0
def upsert_vertex(t, row, **kwargs):

    mappings = kwargs['mappings']
    label = kwargs['label'] if 'label' in kwargs else mappings.get_label(row)
    on_upsert = kwargs.get('on_upsert', None)

    #updateSingleCardinalityProperties
    #updateAllProperties
    #replaceAllProperties

    create_traversal = __.addV(label)

    updateable_items = []

    for key, value in row.items():

        mapping = mappings.mapping_for(key)

        if mapping.is_id_token():
            create_traversal = create_traversal.property(id, value)
        elif not mapping.is_token():
            if not on_upsert:
                create_traversal = create_traversal.property(
                    mapping.name, mapping.convert(value))
            elif on_upsert == 'updateSingleCardinalityProperties':
                if mapping.cardinality == 'single':
                    updateable_items.append((key, value))
                else:
                    create_traversal = create_traversal.property(
                        get_cardinality(mapping.cardinality), mapping.name,
                        mapping.convert(value))
            elif on_upsert == 'updateAllProperties':
                updateable_items.append((key, value))
            elif on_upsert == 'replaceAllProperties':
                pass

    t = t.V(mappings.get_id(row)).fold().coalesce(__.unfold(),
                                                  create_traversal)

    if updateable_items:
        for key, value in updateable_items:
            mapping = mappings.mapping_for(key)
            t = t.property(get_cardinality(mapping.cardinality), mapping.name,
                           mapping.convert(value))

    return t
 def upsert_vertices_for_label(rows):
     conn = self.remote_connection()
     g = self.traversal_source(conn)
     for row in rows:
         entries = row.asDict()
         create_traversal = __.addV(label)
         for key, value in entries.iteritems():
             key = key.split(':')[0]
             if key == '~id':
                 create_traversal.property(id, value)
             elif key == '~label':
                 pass
             else:
                 create_traversal.property(key, value)
         g.V(entries['~id']).fold().coalesce(__.unfold(),
                                             create_traversal).next()
     conn.close()
示例#18
0
    def __add_post(self, t, post):
        t = (
            t.V(post['url'])
            .fold()
            .coalesce(
                __.unfold(),
                __.addV('post')
                .property(T.id, post['url'])
                .property('title', post['title'])
                .property('post_date', post['date'])
                .property('img_src', post['img_src'])
                .property('img_height', post['img_height'])
                .property('img_width', post['img_width'])
            ).as_('post')
        )

        return t
示例#19
0
def test_big_result_set_secure(secure_client):
    g = Graph().traversal()
    t = g.inject(1).repeat(__.addV('person').property('name', __.loops())).times(20000).count()
    message = RequestMessage('traversal', 'bytecode', {'gremlin': t.bytecode, 'aliases': {'g': 'g'}})
    result_set = secure_client.submit(message)
    results = []
    for result in result_set:
        results += result
    assert len(results) == 1

    t = g.V().limit(10)
    message = RequestMessage('traversal', 'bytecode', {'gremlin': t.bytecode, 'aliases': {'g': 'g'}})
    result_set = secure_client.submit(message)
    results = []
    for result in result_set:
        results += result
    assert len(results) == 10

    t = g.V().limit(100)
    message = RequestMessage('traversal', 'bytecode', {'gremlin': t.bytecode, 'aliases': {'g': 'g'}})
    result_set = secure_client.submit(message)
    results = []
    for result in result_set:
        results += result
    assert len(results) == 100

    t = g.V().limit(1000)
    message = RequestMessage('traversal', 'bytecode', {'gremlin': t.bytecode, 'aliases': {'g': 'g'}})
    result_set = secure_client.submit(message)
    results = []
    for result in result_set:
        results += result
    assert len(results) == 1000

    t = g.V().limit(10000)
    message = RequestMessage('traversal', 'bytecode', {'gremlin': t.bytecode, 'aliases': {'g': 'g'}})
    result_set = secure_client.submit(message)
    results = []
    for result in result_set:
        results += result
    assert len(results) == 10000
 def __write_vertices(self, g: traversal, vertices: List[Dict], scan_id: str) -> None:
     """
     Writes the vertices to the labeled property graph
     :param g: The graph traversal source
     :param vertices: A list of dictionaries for each vertex
     :return: None
     """
     cnt = 0
     t = g
     for r in vertices:
         vertex_id = f'{r["~id"]}_{scan_id}'
         t = (
             t.V(vertex_id)
             .fold()
             .coalesce(
                 __.unfold(),
                 __.addV(self.parse_arn(r["~label"])["resource"]).property(T.id, vertex_id),
             )
         )
         for k in r.keys():
             # Need to handle numbers that are bigger than a Long in Java, for now we stringify it
             if isinstance(r[k], int) and (
                 r[k] > 9223372036854775807 or r[k] < -9223372036854775807
             ):
                 r[k] = str(r[k])
             if k not in ["~id", "~label"]:
                 t = t.property(k, r[k])
         cnt += 1
         if cnt % 100 == 0 or cnt == len(vertices):
             try:
                 self.logger.info(
                     event=LogEvent.NeptunePeriodicWrite,
                     msg=f"Writing vertices {cnt} of {len(vertices)}",
                 )
                 t.next()
                 t = g
             except Exception as err:
                 print(str(err))
                 raise NeptuneLoadGraphException(
                     f"Error loading vertex {r} " f"with {str(t.bytecode)}"
                 ) from err
示例#21
0
def test_big_result_set_secure(authenticated_client):
    g = Graph().traversal()
    t = g.inject(1).repeat(__.addV('person').property('name', __.loops())).times(20000).count()
    message = RequestMessage('traversal', 'bytecode', {'gremlin': t.bytecode, 'aliases': {'g': 'g'}})
    result_set = authenticated_client.submit(message)
    results = []
    for result in result_set:
        results += result
    assert len(results) == 1

    t = g.V().limit(10)
    message = RequestMessage('traversal', 'bytecode', {'gremlin': t.bytecode, 'aliases': {'g': 'g'}})
    result_set = authenticated_client.submit(message)
    results = []
    for result in result_set:
        results += result
    assert len(results) == 10

    t = g.V().limit(100)
    message = RequestMessage('traversal', 'bytecode', {'gremlin': t.bytecode, 'aliases': {'g': 'g'}})
    result_set = authenticated_client.submit(message)
    results = []
    for result in result_set:
        results += result
    assert len(results) == 100

    t = g.V().limit(1000)
    message = RequestMessage('traversal', 'bytecode', {'gremlin': t.bytecode, 'aliases': {'g': 'g'}})
    result_set = authenticated_client.submit(message)
    results = []
    for result in result_set:
        results += result
    assert len(results) == 1000

    t = g.V().limit(10000)
    message = RequestMessage('traversal', 'bytecode', {'gremlin': t.bytecode, 'aliases': {'g': 'g'}})
    result_set = authenticated_client.submit(message)
    results = []
    for result in result_set:
        results += result
    assert len(results) == 10000
 def upsert_vertices_for_label(rows):
     try:
         conn = self.gremlin_utils.remote_connection()
         g = self.gremlin_utils.traversal_source(connection=conn)
         for row in rows:
             entries = row.asDict()
             create_traversal = __.addV(label)
             for key, value in entries.items():
                 key = key.split(':')[0]
                 if key == '~id':
                     create_traversal.property(id, value)
                 elif key == '~label':
                     pass
                 else:
                     create_traversal.property(key, value)
             g.V(entries['~id']).fold().coalesce(
                 __.unfold(), create_traversal).next()
         conn.close()
     except GremlinServerError as err:
         print("Neptune error: {0}".format(err))
     except:
         print("Unexpected error:", sys.exc_info()[0])
示例#23
0
 def _get_or_create_node(self, label: str, uri: str):
     return self.g.V().has(URI, uri).hasLabel(label).fold().coalesce(
         __.unfold(),
         __.addV(label).property(URI, uri))
示例#24
0
from gremlin_python.process.graph_traversal import __
from gremlin_python.structure.graph import Vertex

graph_name = 'modern'
ep_schema = GraphExecutionProfile(graph_options=GraphOptions(graph_name=graph_name))
ep = DseGraph.create_execution_profile(graph_name)

cluster = Cluster(execution_profiles={'schema': ep_schema, EXEC_PROFILE_GRAPH_DEFAULT: ep})
session = cluster.connect()

# Define schema
session.execute_graph("system.graph(name).create()", { 'name': graph_name }, execution_profile = EXEC_PROFILE_GRAPH_SYSTEM_DEFAULT)
session.execute_graph("schema.propertyKey('neighborhood').Bigint().create()", execution_profile = 'schema')
session.execute_graph("schema.propertyKey('name').Text().create()", execution_profile = 'schema')
session.execute_graph("schema.propertyKey('age').Bigint().create()", execution_profile = 'schema')
session.execute_graph("schema.propertyKey('weight').Float().create()", execution_profile = 'schema')
session.execute_graph("schema.vertexLabel('person').partitionKey('neighborhood').clusteringKey('name').properties('age').create()", execution_profile = 'schema')
session.execute_graph("schema.edgeLabel('knows').properties('weight').connection('person', 'person').create()", execution_profile = 'schema')

# Execute batch
batch = DseGraph.batch()
batch.add(__.addV('person').property('neighborhood', 0).property('name', 'bob').property('age', 23))
batch.add(__.addV('person').property('neighborhood', 0).property('name', 'alice').property('age', 21))
batch.add(__.addE('knows')
        .from_(Vertex({ 'neighborhood': 0, 'name': 'bob', '~label' : 'person' }))
        .to(Vertex({ 'neighborhood': 0, 'name': 'alice', '~label' : 'person' }))
        .property('weight', 2.3))
session.execute_graph(batch.as_graph_statement())
cluster.close()

示例#25
0
 def get_or_create_vertice(self, label: str, name: str, value: str):
     return self.g.V().has(label, name, value).fold().coalesce(
         __.unfold(),
         __.addV(label).property(name, value)).next()