def test_set_shard_explodes(self) -> None: expected = get_shard() with self.assertRaisesRegex( AssertionError, 'can only shard_set_explicitly if it has not been used yet.'): shard_set_explicitly('x') actual = get_shard() self.assertEqual(expected, actual)
def construct_type(cls: Type["VertexType"], **kwargs: Any) -> "VertexType": defaults: Dict[str, Hashable] = dict() properties: Set[Property] = set(kwargs.pop('properties', [])) properties.update({ MagicProperties.LABEL.value, MagicProperties.ID.value, WellKnownProperties.Key.value }) # (magically) insinuate the shard identifier into the vertex id format shard = get_shard() id_format = kwargs.pop('id_format', VertexTypeIdFormats.DEFAULT.value) if shard: properties.update({WellKnownProperties.TestShard.value}) defaults.update({WellKnownProperties.TestShard.value.name: shard}) # prepend if it's not in the format already (which would be pretty weird, but anyway) if '{shard}' not in id_format: id_format = '{shard}:' + id_format parameters = _discover_parameters(id_format) properties_names = set([p.name for p in properties]) assert all(p in properties_names for p in parameters), \ f'id_format: {id_format} has parameters: {parameters} not found in our properties {properties_names}' return cls(properties=tuple(properties), defaults=tuple(defaults.items()), id_format=id_format, **kwargs)
def drop(self) -> None: test_shard = get_shard() g = self.g.V() if test_shard: g = g.has(WellKnownProperties.TestShard.value.name, test_shard) g = g.drop() LOGGER.warning('DROPPING ALL NODES') self.query_executor()(query=g, get=FromResultSet.iterate) # we seem to mess this up easily leftover = self.query_executor()(query=self.g.V().hasId(TextP.startingWith(test_shard)).id(), get=FromResultSet.toList) self.query_executor()(query=self.g.V().hasId(TextP.startingWith(test_shard)).drop(), get=FromResultSet.iterate) assert not leftover, f'we have some leftover: {leftover}' LOGGER.warning('COMPLETED DROP OF ALL NODES')
def bulk_load_entities( self, *, entities: Mapping[GraphEntityType, Mapping[str, GraphEntity]], object_prefix: Optional[str] = None, polling_period: int = 10, raise_if_failed: bool = False) -> Mapping[str, Mapping[str, Any]]: """ :param entities: The entities being bulk loaded. They will be partitioned at least by vertex vs edge, but possibly by conflicting property type (though the latter is unusual), and written to files in S3, then loaded by Neptune. :param object_prefix: (optional) The string is treated like a format string, and 'now' and 'shard' (if get_shard() is truthy) are the available parameters. Defaults to '{now}/{shard}' or '{now}'. :param polling_period: (optional) defaults to 10 (seconds). The period at which the status will be polled. :param raise_if_failed: (optional) defaults to False. If True, will raise if any of the loads failed, otherwise log a warning and return the status. True would be useful for testing or other situations where you would always expect the load to succeed. :return: """ format_args = dict(now=datetime.datetime.now().isoformat( timespec='milliseconds').replace(':', '-').replace('.', '-')) shard = get_shard() if shard: format_args.update(shard=shard) if not object_prefix: object_prefix = '{now}/{shard}' if 'shard' in format_args else '{now}' object_prefix = object_prefix.format(**format_args) assert isinstance(object_prefix, str) and all(c not in object_prefix for c in ':'), \ f'object_prefix is going to break S3 {object_prefix}' vertexes, edges = group_by_class(entities) # TODO: write these to tmp? stream them in? vertex_csvs: List[bytes] = [] for types in partition_properties(vertexes.keys()): with StringIO() as w: write_entities_as_csv(w, dict((t, vertexes[t]) for t in types)) vertex_csvs.append(w.getvalue().encode('utf-8')) edge_csvs: List[bytes] = [] for types in partition_properties(edges.keys()): with StringIO() as w: write_entities_as_csv(w, dict((t, edges[t]) for t in types)) edge_csvs.append(w.getvalue().encode('utf-8')) csvs: List[Tuple[str, bytes]] = [ (f'{object_prefix}/vertex{i}.csv', v) for i, v in enumerate(vertex_csvs) ] + [(f'{object_prefix}/edge{i}.csv', v) for i, v in enumerate(edge_csvs)] todo: List[str] = [] for s3_object_key, v in csvs: # upload to s3 with BytesIO(v) as r: self.upload(f=r, s3_object_key=s3_object_key) # now poke Neptune and tell it to load that file # TODO: dependencies? endpoint doesn't seem to like the way we pass these response = self.load(s3_object_key=s3_object_key) # TODO: retry? assert 'payload' in response and 'loadId' in response['payload'], \ f'failed to submit load for vertex.csv: {response}' todo.append(response['payload']['loadId']) status_by_load_id: Dict[str, Mapping[str, Any]] = dict() while todo: status_by_load_id.update([ (id, self.load_status(load_id=id, errors=True, errors_per_page=30)['payload']) for id in todo ]) todo = [ load_id for load_id, overall_status in status_by_load_id.items() if overall_status['overallStatus']['status'] not in ( 'LOAD_COMPLETED', 'LOAD_FAILED') ] time.sleep(polling_period) # TODO: timeout and parse errors assert not todo failed = dict([ (load_id, overall_status) for load_id, overall_status in status_by_load_id.items() if overall_status['overallStatus']['status'] != 'LOAD_COMPLETED' ]) if failed: LOGGER.warning( f'some loads failed: {failed.keys()}: bulk_loader_details={failed}' ) if raise_if_failed: raise AssertionError(f'some loads failed: {failed.keys()}') return status_by_load_id
def test_set_shard_works(self) -> None: expected = Fixtures.next_string() shard_set_explicitly(expected) actual = get_shard() self.assertEqual(expected, actual)