Exemplo n.º 1
0
class BulkLoad:
    
    def __init__(self, source, format='csv', role=None, region=None, endpoints=None):
        
        self.source = source
        self.format = format
        
        if role is None:
            assert ('NEPTUNE_LOAD_FROM_S3_ROLE_ARN' in os.environ), 'role is missing.'
            self.role = os.environ['NEPTUNE_LOAD_FROM_S3_ROLE_ARN']
        else:
            self.role = role
            
        if region is None:
            assert ('AWS_REGION' in os.environ), 'region is missing.'
            self.region = os.environ['AWS_REGION']
        else:
            self.region = region
        
        if endpoints is None:
            self.endpoints = Endpoints()
        else:
            self.endpoints = endpoints
            
    def __load_from(self, source, format, role, region):
        return { 
              'source' : source, 
              'format' : format,  
              'iamRoleArn' : role, 
              'region' : region, 
              'failOnError' : 'FALSE'
            }
    
    def __load(self, loader_url, data):    
        jsondataasbytes = json.dumps(data).encode('utf8')
        req = urllib.request.Request(loader_url, data=jsondataasbytes, headers={'Content-Type': 'application/json'})
        response = urllib.request.urlopen(req)
        jsonresponse = json.loads(response.read().decode('utf8'))
        return jsonresponse['payload']['loadId']
    
    def load_async(self):
        localised_source = self.source.replace('${AWS_REGION}', self.region)
        loader_url = self.endpoints.loader_endpoint()
        json_payload = self.__load_from(localised_source, self.format, self.role, self.region)
        print('''curl -X POST \\
    -H 'Content-Type: application/json' \\
    {} -d \'{}\''''.format(loader_url, json.dumps(json_payload, indent=4)))
        load_id = self.__load(loader_url, json_payload)
        return BulkLoadStatus(self.endpoints.load_status_endpoint(load_id))
    
    def load(self, interval=2):
        status = self.load_async()
        print('status_uri: {}'.format(status.uri()))
        status.wait(interval)
Exemplo n.º 2
0
class BulkLoad:
    def __init__(
            self,
            source,
            format='csv',
            role=None,
            mode='AUTO',
            region=None,
            fail_on_error=False,
            parallelism='OVERSUBSCRIBE',
            base_uri='http://aws.amazon.com/neptune/default',
            named_graph_uri='http://aws.amazon.com/neptune/vocab/v01/DefaultNamedGraph',
            update_single_cardinality_properties=False,
            endpoints=None):

        self.source = source
        self.format = format

        if role is None:
            assert ('NEPTUNE_LOAD_FROM_S3_ROLE_ARN'
                    in os.environ), 'role is missing.'
            self.role = os.environ['NEPTUNE_LOAD_FROM_S3_ROLE_ARN']
        else:
            self.role = role

        self.mode = mode

        if region is None:
            assert ('AWS_REGION' in os.environ), 'region is missing.'
            self.region = os.environ['AWS_REGION']
        else:
            self.region = region

        if endpoints is None:
            self.endpoints = Endpoints()
        else:
            self.endpoints = endpoints

        self.fail_on_error = 'TRUE' if fail_on_error else 'FALSE'
        self.parallelism = parallelism
        self.base_uri = base_uri
        self.named_graph_uri = named_graph_uri
        self.update_single_cardinality_properties = 'TRUE' if update_single_cardinality_properties else 'FALSE'

    def __load_from(self, source):
        return {
            'source':
            source,
            'format':
            self.format,
            'iamRoleArn':
            self.role,
            'mode':
            self.mode,
            'region':
            self.region,
            'failOnError':
            self.fail_on_error,
            'parallelism':
            self.parallelism,
            'parserConfiguration': {
                'baseUri': self.base_uri,
                'namedGraphUri': self.named_graph_uri
            },
            'updateSingleCardinalityProperties':
            self.update_single_cardinality_properties
        }

    def __load(self, loader_endpoint, data):

        json_string = json.dumps(data)
        json_bytes = json_string.encode('utf8')
        request_parameters = loader_endpoint.prepare_request(
            'POST', json_string)
        request_parameters.headers['Content-Type'] = 'application/json'
        req = urllib.request.Request(request_parameters.uri,
                                     data=json_bytes,
                                     headers=request_parameters.headers)
        try:
            response = urllib.request.urlopen(req)
            json_response = json.loads(response.read().decode('utf8'))
            return json_response['payload']['loadId']
        except HTTPError as e:
            exc_info = sys.exc_info()
            if e.code == 500:
                raise Exception(json.loads(e.read().decode('utf8'))) from None
            else:
                raise exc_info[0].with_traceback(exc_info[1], exc_info[2])

    def load_async(self):
        localised_source = self.source.replace('${AWS_REGION}', self.region)
        loader_endpoint = self.endpoints.loader_endpoint()
        json_payload = self.__load_from(localised_source)
        print('''curl -X POST \\
    -H 'Content-Type: application/json' \\
    {} -d \'{}\''''.format(loader_endpoint, json.dumps(json_payload,
                                                       indent=4)))
        load_id = self.__load(loader_endpoint, json_payload)
        return BulkLoadStatus(self.endpoints.load_status_endpoint(load_id))

    def load(self, interval=2):
        status = self.load_async()
        print('status_uri: {}'.format(status.load_status_endpoint))
        status.wait(interval)