def add_data(traffic_data): gpudb = GPUdb(encoding='BINARY',gpudb_ip='10.1.10.31',gpudb_port='9191') # Add more fileds as needed for the analysis type_definition = """{ "type":"record", "name":"gen_pt", "fields":[ {"name":"x","type":"double"}, {"name":"y","type":"double"}, {"name":"src","type":"string"}, {"name":"dst","type":"string"}, {"name":"payload","type":"string"} ] }""" retobj = gpudb.do_register_type(type_definition,"","point-type","POINT") type_id = retobj['type_id'] set_id = str(uuid.uuid1()) retobj = gpudb.do_new_set(type_id,set_id) x = 1;y = 1 encoded_datums = [] for e in traffic_data: datum = ordereddict.OrderedDict([('x',x), ('y',y), ('src',e[0]),('dst',e[1]),('payload',e[2])]) encoded_datum = gpudb.encode_datum(type_definition,datum) encoded_datums.append(encoded_datum) x+=1;y+=1 gpudb.do_bulk_add(set_id, encoded_datums) return set_id,gpudb
def add_data(traffic_data): gpudb = GPUdb(encoding='BINARY', gpudb_ip='10.1.10.31', gpudb_port='9191') # Add more fileds as needed for the analysis type_definition = """{ "type":"record", "name":"gen_pt", "fields":[ {"name":"x","type":"double"}, {"name":"y","type":"double"}, {"name":"src","type":"string"}, {"name":"dst","type":"string"}, {"name":"payload","type":"string"} ] }""" retobj = gpudb.do_register_type(type_definition, "", "point-type", "POINT") type_id = retobj['type_id'] set_id = str(uuid.uuid1()) retobj = gpudb.do_new_set(type_id, set_id) x = 1 y = 1 encoded_datums = [] for e in traffic_data: datum = ordereddict.OrderedDict([('x', x), ('y', y), ('src', e[0]), ('dst', e[1]), ('payload', e[2])]) encoded_datum = gpudb.encode_datum(type_definition, datum) encoded_datums.append(encoded_datum) x += 1 y += 1 gpudb.do_bulk_add(set_id, encoded_datums) return set_id, gpudb
def gpudb_cmd(argv): """A command line interface to send a specified request to a GPUDB server. Can be used to print the parameters for a request as well. """ # Default values file_name = "" # Add arguments to the parser parser = argparse.ArgumentParser() parser.add_argument( '-g', '--gpudb', nargs='?', default="127.0.0.1:9191", help= "IP address and port of GPUdb in the format: IP_ADDRESS:PORT (default 127.0.0.1:9191)" ) parser.add_argument('--username', nargs='?', default="", help="Username used when connecting to GPUdb.") parser.add_argument('--password', nargs='?', default="", help="Password used when connecting to GPUdb.") parser.add_argument( '--ask-password', action="store_true", help= "Ask for the password to use when connecting to GPUdb (more secure than --password)" ) parser.add_argument( '--json-encoding', action="store_true", help= "Use avro JSON encoding of request message to GPUdb (default is avro binary)" ) parser.add_argument( '-f', '--format', action='store', dest="format", default="json", choices=["json", "oneline", "ini", "raw"], help= "Format the returned GPUDB response in a few ways. (default 'json')") parser.add_argument( "--print-query", action='store_true', help= "Print the request query before sending it using the specified format." ) # User must provide one or the other query_group = parser.add_mutually_exclusive_group(required=True) query_group.add_argument("--list-queries", action='store_true', help="Lists all available GPUDB request queries.") query_group.add_argument( "--print-schemas", action='store', help= "Print the JSON schema of the specified request and response query.") query_group.add_argument( '--query', nargs = argparse.REMAINDER, help = "Send a request query by specifying the name of the query and the parameters associated with the query. " \ "Help is provided if only the query name is specified. " \ "Note that unspecified parameters will take a default value. " \ "Example: '--query aggregate_min_max --column_name x --table_name DataTable'" ) # Print the help message and quit if no arguments are given if (len(sys.argv) == 1): # None provided parser.print_help() sys.exit(2) # Parse the command line arguments args = parser.parse_args() # -------------------------------------- # Set up GPUdb GPUdb_IP, GPUdb_Port = args.gpudb.split(":") password = args.password if args.ask_password: password = getpass.getpass("GPUdb password:"******"Unknown query name: '%s'" % query_name) sys.exit(2) req_schema_str = gpudb.gpudb_schemas[query_name]["REQ_SCHEMA_STR"] rsp_schema_str = gpudb.gpudb_schemas[query_name]["RSP_SCHEMA_STR"] req_odict = json.JSONDecoder( object_pairs_hook=collections.OrderedDict).decode(req_schema_str) rsp_odict = json.JSONDecoder( object_pairs_hook=collections.OrderedDict).decode(rsp_schema_str) # Use desired formatting print_dict(req_odict, args.format) print_dict(rsp_odict, args.format) sys.exit(0) # -------------------------------------- # List all endpoint/query names, if desired by user if (args.list_queries == True) or (len(args.query) == 0): for q in sorted(query_names): print(q) sys.exit( 0) # Succesful termination after printing the desired help message # -------------------------------------- # Get the query JSON string from GPUdb query_name = args.query[0] if query_name not in query_names: print("Unknown query name: '%s'" % query_name) sys.exit(2) request_json = gpudb.gpudb_schemas[query_name]["REQ_SCHEMA_STR"] # Parse the request JSON to get the parameters request_schema = gpudb.gpudb_schemas[query_name]["REQ_SCHEMA"] request_json = request_schema.to_json()["fields"] # Create a dictionary of (param name, param type) pairs based on the JSON param_name_type = {} param_vals = {} for param in request_json: param_name_type[param['name']] = param['type'] # Binary/bytes parameters will be skipped if param['type'] == "string" or param['type'] == "bytes": param_vals[param['name']] = "" # Default is empty string if param['type'] == "map": param_vals[param['name']] = {} # Default is empty map if param['type'] == "list": param_vals[param['name']] = [] # Default is empty list # Note that numeric attributes are not getting a default # User MUST provide such values, or we output an error # Create a parser for query-specific parameters query_parser = argparse.ArgumentParser() # Add parameters to be parsed for pname, ptype in param_name_type.items(): if ptype == "string": # Make string arguments optional query_parser.add_argument("--" + pname, nargs='?', default="", help="Defaults to empty string") elif ptype == "double" or ptype == "float": query_parser.add_argument("--" + pname, type=float, required=True, help="Required parameter, type %s" % ptype) elif ptype == "long": query_parser.add_argument("--" + pname, type=long, required=True, help="Required parameter, type %s" % ptype) elif ptype == "int": query_parser.add_argument("--" + pname, type=int, required=True, help="Required parameter, type %s" % ptype) elif ptype == "bytes": continue # ignore bytes elif ptype == "boolean": # Boolean flag # User must provide one or the other bool_group = query_parser.add_mutually_exclusive_group( required=True) bool_group.add_argument( "--" + pname, action='store_true', dest=pname, help="Boolean parameter, include to set %s to TRUE" % pname) bool_group.add_argument( "--no-" + pname, action='store_false', dest=pname, help="Boolean parameter, include to set %s to FALSE" % pname) else: # Maps and lists get empty ones by default; handling is delicate; ignore 'bytes' if ptype['type'] == "map": query_parser.add_argument( "--" + pname, nargs='?', type=json.loads, default={}, help= "Expected map value of type: %s; surround the whole map with single quotes (') and any string (key or value) within with double quotes (\"). E.g. for random, --param_map '{\"x\":{\"min\":2}}'. When omitted, defaults to empty map" % ptype['values']) else: # Arrays query_parser.add_argument( "--" + pname, type=json.loads, default=[], help= "Comma separated list (escape spaces with \) enclosed in []. For example, for filter_by_nai, --x_vector [1,2,3,4] or --x_vector [1,\ 2,\ 3,\ 4]. If contains strings, then enclose the whole thing within single quotes and the individual string in double quotes. E.g., for filter_by_string, --attributes '[\"x\",\"y\"]'. When omitted, defaults to an empty list." ) # Print the help message and quit if no arguments are given (and none is expected) if (len(args.query[1:]) == 0 and len(param_name_type) > 0): print("No parameters provided for query: ", query_name) query_parser.print_help() sys.exit(2) # Parse the parameters and store in a dictionary query_args = vars(query_parser.parse_args(args.query[1:])) # Copy the parsed values to the ordered dictionary to pass to GPUdb for key, val in query_args.items(): param_vals[key] = val # -------------------------------------- # Call the GPUDB query: # Obtain the request and response schemas for the given query (req_schema, resp_schema) = gpudb._GPUdb__get_schemas(query_name) endpoint = gpudb._GPUdb__get_endpoint(query_name) # -------------------------------------- if args.print_query: encoded_datum = gpudb.encode_datum(req_schema, param_vals) request_odict = gpudb._GPUdb__read_orig_datum(req_schema, encoded_datum) print(endpoint) print_dict(request_odict, args.format) # -------------------------------------- # Perform the GPUDB query response = gpudb._GPUdb__post_then_get(req_schema, resp_schema, param_vals, endpoint) print_dict(response, args.format)
def run_gpudb( argv ): """An interface to GPUDB. Run the specified query on GPUDB on the local machine or at the specified address. Also provide usage information. """ # Default values file_name = "" # Add arguments to the parser parser = argparse.ArgumentParser() parser.add_argument( '-g', nargs = '?', default = "127.0.0.1:9191", help = "IP address and port of GPUdb in the format: xxx.xx.xx.xx:xxxx (defaults to 127.0.0.1:9191)" ) parser.add_argument( '--request-path', nargs = '?', default = gpudb_obj_defs_path, help = "Path of the JSON defintions (defaults to %s)" % gpudb_obj_defs_path ) # User must provide one or the other query_group = parser.add_mutually_exclusive_group( required = True ) query_group.add_argument( "--list-queries", action = 'store_true', help = "Lists all available GPUDB queries." ) query_group.add_argument( '--query', nargs = argparse.REMAINDER, help = "Name of the query to be executed and any parameters associated with the query. For example, '--query max_min --attribute x --set_id set1'. Not providing any parameter after the query name will print query specific help information." ) # Print the help message and quit if no arguments are given if ( len(sys.argv) == 1 ): # None provided parser.print_help() sys.exit( 2 ) # Parse the command line arguments args = parser.parse_args() # Parse and check the request JSON path request_path = args.request_path if not os.path.exists( request_path ): # Check that the path exists print "Path for JSONs does not exist: ", request_path sys.exit( 2 ) if request_path[-1] != "/": # simplify logic below by enforcing trailing '/' request_path += "/" # Create a list of all request JSON filenames filenames = [request_path + f for f in os.listdir( request_path ) if "_request.json" in f] # -------------------------------------- # List all endpoint/query names, if desired by user if (args.list_queries == True) or (len(args.query) == 0): # Strip filename of the path and suffix if it's a request JSON file query_names = [ f.replace( request_path, "" ).replace( "_request.json", "" ) for f in filenames if "_request.json" in f ] for q in sorted( query_names ): print q sys.exit( 0 ) # Succesful termination after printing the desired help message # -------------------------------------- # -------------------------------------- # Set up GPUdb GPUdb_IP, GPUdb_Port = args.g.split( ":" ) gpudbdb = GPUdb( encoding = 'BINARY', host = GPUdb_IP, port = GPUdb_Port ) # Find and read the desired query JSON file query_name = args.query[ 0 ] for f in filenames: file_name = f if ("/" + query_name + "_request.json") in f else file_name if file_name == "": print "Query not found: ", query_name sys.exit( 2 ) json_file = open( file_name, "r" ) request_json = json_file.read() json_file.close() # Parse the request JSON to get the parameters request_schema = schema.parse( request_json ) request_json = request_schema.to_json()["fields"] # Create a dictionary of (param name, param type) pairs based on the JSON param_name_type = {} param_vals = {} # param_vals = collections.OrderedDict() for param in request_json: param_name_type[ param['name'] ] = param['type'] # Binary/bytes parameters will be skipped if param['type'] == "string" or param['type'] == "bytes": param_vals[ param['name'] ] = "" # Default is empty string if param['type'] == "map": param_vals[ param['name'] ] = {} # Default is empty map if param['type'] == "list": param_vals[ param['name'] ] = [] # Default is empty list # Note that numeric attributes are not getting a default # User MUST provide such values, or we output an error # Create a parser for query-specific parameters query_parser = argparse.ArgumentParser() # Add parameters to be parsed query_parser.add_argument( "--format-response", action = 'store_true', dest = "format_response", help = "Boolean parameter, include to print formatted GPUDB response. Omitting it prints the raw GPUDB response." ) for pname, ptype in param_name_type.iteritems(): if ptype == "string": # Make string arguments optional query_parser.add_argument( "--" + pname, nargs='?', default="", help = "Defaults to empty string" ) elif ptype == "double" or ptype == "float": query_parser.add_argument( "--" + pname, type = float, required = True, help = "Required parameter, type %s" % ptype ) elif ptype == "long": query_parser.add_argument( "--" + pname, type = long, required = True, help = "Required parameter, type %s" % ptype ) elif ptype == "int": query_parser.add_argument( "--" + pname, type = int, required = True, help = "Required parameter, type %s" % ptype ) elif ptype == "bytes": continue # ignore bytes elif ptype == "boolean": # Boolean flag # User must provide one or the other bool_group = query_parser.add_mutually_exclusive_group( required = True ) bool_group.add_argument( "--" + pname, action = 'store_true', dest = pname, help = "Boolean parameter, include to set %s to TRUE" %pname ) bool_group.add_argument( "--no-" + pname, action = 'store_false', dest = pname, help = "Boolean parameter, include to set %s to FALSE" % pname ) else: # Maps and lists get empty ones by default; handling is delicate; ignore 'bytes' if ptype[ 'type' ] == "map": query_parser.add_argument( "--" + pname, nargs = '?', type = json.loads, default = {}, help = "Expected map value of type: %s; surround the whole map with single quotes (') and any string (key or value) within with double quotes (\"). E.g. for random, --param_map '{\"x\":{\"min\":2}}'. When omitted, defaults to empty map" % ptype['values'] ) else: # Arrays query_parser.add_argument( "--" + pname, type = json.loads, default=[], help = "Comma separated list (escape spaces with \) enclosed in []. For example, for filter_by_nai, --x_vector [1,2,3,4] or --x_vector [1,\ 2,\ 3,\ 4]. If contains strings, then enclose the whole thing within single quotes and the individual string in double quotes. E.g., for filter_by_string, --attributes '[\"x\",\"y\"]'. When omitted, defaults to an empty list." ) # Print the help message and quit if no arguments are given (and none is expected) if ( len( args.query[1:] ) == 0 and len( param_name_type ) > 0 ): print "No parameters provided for query: ", query_name query_parser.print_help() sys.exit( 2 ) # Parse the parameters and store in a dictionary query_args = vars( query_parser.parse_args( args.query[1:] ) ) # Copy the parsed values to the ordered dictionar to pass to GPUdb for key, val in query_args.iteritems(): param_vals[ key ] = val # -------------------------------------- # -------------------------------------- # Call the GPUDB query: # Derive the endpoint name from the query name endpoint_name = "/" + query_name.replace( "_", "" ) # One exception is /add if endpoint_name == "/addobject": endpoint_name = "/add" # Parse request and response schemas for GPUDB (req_schema, resp_schema) = gpudbdb.get_schemas( query_name ) # Perform the GPUDB query response = gpudbdb.post_then_get( req_schema, resp_schema, param_vals, endpoint_name ) print print "GPUDB Response:" if query_args[ "format_response" ] == True: print format_response( response ) else: print response
def test_gpudb_ingestor(): """Tries to stress out Kinetica's multi-head ingestion mode. Tests all possible sharding under the sun. """ global gpudb_ingestor gpudb = GPUdb( encoding='BINARY', host = '127.0.0.1', port = '9191' ) table_name = "test_ingest_table2" # Clear table if exists gpudb.clear_table( table_name, options = {"no_error_if_not_exists": "true"} ) # The table type/schema-- want all possibly type/properties to be sharded and nullable _type = [ ["i1", "int" ], ["i2", "int", "shard_key", "nullable" ], ["i8", "int", "shard_key", "nullable", "int8" ], ["i16", "int", "shard_key", "nullable", "int16" ], ["d1", "double", "shard_key", "nullable" ], ["f1", "float", "shard_key", "nullable" ], ["l1", "long", "shard_key", "nullable" ], ["timestamp", "long", "shard_key", "nullable", "timestamp" ], ["s1", "string", "shard_key", "nullable" ], ["date", "string", "shard_key", "nullable", "date" ], ["datetime", "string", "shard_key", "nullable", "datetime" ], ["decimal", "string", "shard_key", "nullable", "decimal" ], ["ipv4", "string", "shard_key", "nullable", "ipv4" ], ["time", "string", "shard_key", "nullable", "time" ], ["c1", "string", "shard_key", "nullable", "char1" ], ["c2", "string", "shard_key", "nullable", "char2" ], ["c4", "string", "shard_key", "nullable", "char4" ], ["c8", "string", "shard_key", "nullable", "char8" ], ["c16", "string", "shard_key", "nullable", "char16" ], ["c32", "string", "shard_key", "nullable", "char32" ], ["c64", "string", "shard_key", "nullable", "char64" ], ["c128", "string", "shard_key", "nullable", "char128" ], ["c256", "string", "shard_key", "nullable", "char256" ] ] table = GPUdbTable( _type, table_name, db = gpudb ) print ("Table Name:", table_name) record_type = table.get_table_type() # Instantiate a gpudb ingestor object; pay attention to the batch size. # Realistic cases would have higher batch sizes. ingestor_batch_size = 200 options = {} workers = GPUdbWorkerList( gpudb ) print ("Workers: ", workers.worker_urls, "\n") gpudb_ingestor = GPUdbIngestor( gpudb, table_name, record_type, ingestor_batch_size, options, workers ) # Generate records to insert num_batches = 5 # Passed to generate_and_insert_data() batch_size = 1000 # Passed to generate_and_insert_data() num_pools = 5 # Number of threads spawned in a single Pool call num_pool_batches = 10 # Number of times Pool is invoked # # In case someone wants to call the function directly # generate_and_insert_data( [batch_size, num_batches] ) # debug~~~~~~~~~~~~ # Generate and insert data parallelly; total number of processes # spawned: (num_pools * num_pool_batches) for i in range(0, num_pool_batches): pool = Pool( processes = num_pools ) results = pool.map_async( generate_and_insert_data, [[batch_size, num_batches]] * num_pools) results.get() pool.close() pool.join() # end multithreaded data generation and insertion # # Flush the ingestor # # NOTE: Was not seeing any record in the queues due to python's # # multithreading issues... need to flush from the function below # gpudb_ingestor.flush() num_records = num_batches * batch_size * num_pools * num_pool_batches print () print ("Table name:", table_name) print ("Total # objects inserted:", num_records) print ()
''' import ordereddict import sys reload(sys) #sys.setdefaultencoding('utf-8') sys.setdefaultencoding('iso-8859-1') # Traffic capture packages import dpkt #from scapy.all import sr1,IP,ICMP,rdpcap from scapy.all import * # GPUdb packages from gpudb import GPUdb import uuid #for generating uuids gpudb = GPUdb(encoding='BINARY', gpudb_ip='10.1.10.31', gpudb_port='9191') # Add more fileds as needed for the analysis type_definition = """{ "type":"record", "name":"gen_pt", "fields":[ {"name":"x","type":"double"}, {"name":"y","type":"double"}, {"name":"src","type":"string"}, {"name":"dst","type":"string"}, {"name":"payload","type":"string"} ] }""" # TODO : Pass pcap file as input
def diagnose_gpudb( argv ): """ Run a diagnostic test on GPUdb Argument: argv -- Command line arguments """ # Parse the command line arguments if ( len(sys.argv) == 1 ): # None provided # Print help message and quit print helpMessage sys.exit( 2 ) try: # Parse the command line arguments opts, args = getopt.getopt( sys.argv[1:], "hlg:p:" ) except getopt.GetoptError: print helpMessage sys.exit( 2 ) # Some default values GPUdb_IP = '127.0.0.1' # Run locally by default GPUdb_Port = '9191' # Default port # Parse the arguments for opt, arg in opts: if opt == '-h': # print usage and exit print helpMessage sys.exit() if opt == '-l': # run gpudb on local machine isServer = False if opt == '-g': # run gpudb on a server gpudb at the specified IP address GPUdb_IP = arg set_id = "TwitterPointText" # Default set ID for server gpudb if opt == '-p': # run gpudb on a server gpudb at the specified port GPUdb_Port = arg # Set up GPUdb with binary encoding gpudb = GPUdb( encoding='BINARY', host = GPUdb_IP, port = GPUdb_Port ) # Create a data type point_schema_str = """{ "type":"record", "name":"point", "fields": [ {"name":"x","type":"double"}, {"name":"y","type":"double"}, {"name":"OBJECT_ID","type":"string"} ] }""".replace(' ','').replace('\n','') # Register the data type and ensure that it worked # Endpoint: /registertype register_resp = gpudb.register_type ( point_schema_str, "", "point_type", "POINT" ) assert register_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to register point data type; error message: " \ % register_resp['status_info'][ 'message' ] # Using the registered type's ID, create a new set (and check that worked) # Endpoint: /newset type_id = register_resp[ 'type_id' ] set_id = "diagnostics_point_set_" + datetime.datetime.now().isoformat() new_set_resp = gpudb.new_set( type_id, set_id, "" ) # no parent set ID assert new_set_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to create point set; error message: %s" \ % new_set_resp['status_info'][ 'message' ] # Add some data to the set in batches # Endpoint: /random count_1 = 2000 param_map_1 = { "x": {"min": 0, "max": 42 }, "y": {"min": 0, "max": 42 } } random_resp = gpudb.random( set_id, count_1, param_map_1 ) # Check that the first set of objects were generated successfully assert random_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to generate random points; error message: %s" \ % random_resp['status_info'][ 'message' ] # Add another batch of data points to the same set, but at a different location # Endpoint: /random count_2 = 2000 param_map_2 = { "x": {"min": -50, "max": -20 }, "y": {"min": -50, "max": -20 } } random_resp = gpudb.random( set_id, count_2, param_map_2 ) # Check that the first set of objects were generated successfully assert random_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to generate random points; error message: %s" \ % random_resp['status_info'][ 'message' ] # Check the total size of the set is as intended # Endpoint: /status total_size = count_1 + count_2 status_resp = gpudb.status( set_id ) assert status_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to check status of set; error message: %s" \ % status_resp['status_info'][ 'message' ] assert status_resp[ 'total_size' ] == total_size, "Error: Total size of set is not as expected. Set size = %s, expected size = %s" % ( status_resp[ 'total_size' ], total_size ) # Query chaining: do two filters one after another, get final count # Do a similar query with select, check count against the chained queries # Bounding box: x within [10, 20] and y within [10, 20] # Endpoint: /boundingbox bbox_set_id = "diagnostics_bbox_result_" + datetime.datetime.now().isoformat() bbox_resp = gpudb.bounding_box( 10, 20, 10, 20, "x", "y", set_id, bbox_set_id ) assert bbox_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform bounding box query; error message: %s" \ % bbox_resp['status_info'][ 'message' ] # Filter by radius: 100km radius around (lon, lat) = (15, 15) # Endpoint: /filterbyradius fradius_set_id = "diagnostics_filter_by_radius_result_" + datetime.datetime.now().isoformat() fradius_resp = gpudb.filter_by_radius( bbox_set_id, "x", "y", 15, 15, 100000, fradius_set_id ) assert fradius_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform filter by radius query; error message: %s" \ % fradius_resp['status_info'][ 'message' ] # Do a select query with a predicate that should yield the same result # as the above chained queries # Select: ( (10 <= x) and (x <= 20) and (10 <= y) and (y <= 20) and (geodist(x, y, 15, 15) < 100000) ) # Endpoint: /select select_set_id = "diagnostics_select_result_" + datetime.datetime.now().isoformat() predicate = "( (10 <= x) and (x <= 20) and (10 <= y) and (y <= 20) and (geodist(x, y, 15, 15) < 100000) )" select_resp = gpudb.select( set_id, select_set_id, predicate ) assert select_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform select query; error message: %s" \ % select_resp['status_info'][ 'message' ] assert select_resp[ 'count' ] == fradius_resp[ 'count' ], "Error: Mismatch in counts of select (%s) and chained queries (bounding box then filter by radius) (%s)" \ % ( select_resp[ 'count' ], fradius_resp[ 'count' ] ) # Delete a few objects and check the set size of the original set # # Delete objects: Delte a few objects given a predicate # Endpoint: /selectdelete delete_predicate = "((15 <= x) and (x <= 18.5))" delete_resp = gpudb.select_delete( set_id, delete_predicate ) assert delete_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform select delete operation; error message: %s" \ % delete_resp['status_info'][ 'message' ] # Check that the size of the set has gone down # Statistics return the count as a default # Endpoint: /statistics new_size = total_size - delete_resp[ 'count' ] statistics_resp = gpudb.statistics( set_id, "x", "sum" ) assert statistics_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform statistics operation; error message: %s" \ % statistics_resp['status_info'][ 'message' ] assert statistics_resp[ 'stats' ][ 'count' ] == new_size, "Error: Mismatch in counts of set size (%s) and expected size (%s)" \ % ( statistics_resp[ 'count' ], new_size ) # Update a few objects and check the update was successful by doing a select # # Update objects based on x, change the y value # Endpoing: /selectupdate update_predicate = "((-35 <= x) and (x <= -33.5))" update_resp = gpudb.select_update( set_id, update_predicate, {'y': "71"} ) assert update_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform select update operation; error message: %s" \ % update_resp['status_info'][ 'message' ] # Check that the selected objects' y values have been changed # # Obtain the selected objects by performing a select query # Endpoint: /select select_set_id2 = "diagnostics_select_result_2_" + datetime.datetime.now().isoformat() select_resp1 = gpudb.select( set_id, select_set_id2, update_predicate ) assert select_resp1['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform select operation; error message: %s" \ % select_resp1['status_info'][ 'message' ] # Get all the objects in the resultant set that has the update y value # and check that it matches with the above count # Endpont: /select select_predicate = "(y == 71)" select_set_id3 = "diagnostics_select_result_3_" + datetime.datetime.now().isoformat() select_resp2 = gpudb.select( set_id, select_set_id3, select_predicate ) assert select_resp2['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform select operation; error message: %s" \ % select_resp2['status_info'][ 'message' ] # Now check that the counts match assert select_resp1[ 'count' ] == select_resp2[ 'count' ], "GPUdb failed in performing select update correctly; expected count is %s, but given count is %s" \ % ( select_resp1[ 'count' ], select_resp2[ 'count' ] ) # Clear all the sets clear_resp = gpudb.clear( set_id ) assert clear_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed in clearing set %s" % set_id clear_resp = gpudb.clear( bbox_set_id ) assert clear_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed in clearing set %s" % bbox_set_id clear_resp = gpudb.clear( fradius_set_id ) assert clear_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed in clearing set %s" % fradius_set_id clear_resp = gpudb.clear( select_set_id ) assert clear_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed in clearing set %s" % select_set_id clear_resp = gpudb.clear( select_set_id2 ) assert clear_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed in clearing set %s" % select_set_id2 clear_resp = gpudb.clear( select_set_id3 ) assert clear_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed in clearing set %s" % select_set_id3
def gpudb_ingestor_example(): global gpudb_ingestor gpudb = GPUdb( encoding='BINARY', host = '127.0.0.1', port = '9191') table_name = "test_ingest_table" # Clear table if exists gpudb.clear_table( table_name ) # Create the table schema and the table table_type_schema_json = { "type": "record", "name": "ingest_test_type", "fields" : [ { "name" : "d1", "type": "double" }, { "name" : "d2", "type": "double" }, { "name" : "l", "type": "long" }, { "name" : "s", "type": "string" } ] } table_type_schema_str = json.dumps( table_type_schema_json ) table_type_schema = schema.parse( table_type_schema_str ) # Column names d1 = "d1" d2 = "d2" l = "l" s = "s" table_column_properties = {} type_id = gpudb.create_type( type_definition = table_type_schema_str, label = "", properties = table_column_properties )[ "type_id" ] gpudb.create_table( table_name = table_name, type_id = type_id, ) print "Table Name:", table_name # Instantiate a gpudb ingestor object batch_size = 7000 options = {} # workers = None workers = GPUdbIngestor.WorkerList( gpudb ) print "Workers: ", workers.worker_urls, "\n" gpudb_ingestor = GPUdbIngestor( gpudb, table_name, batch_size, options, workers ) # Generate records to insert num_batches = 10 batch_size = 10000 num_pools = 5 num_pool_batches = 3 # Generate and insert data parallelly in a pool of 5 for i in range(0, num_pool_batches): pool = Pool( processes = num_pools ) results = pool.map_async( generate_and_insert_data, [[batch_size, num_batches]] * num_pools) results.get() pool.close() pool.join() # end multithreaded data generation and insertion # Flush the ingestor (must do this to actually insert the data) gpudb_ingestor.flush() num_records = num_batches * batch_size * num_pools * num_pool_batches print print "Total # objects inserted:", num_records print
def diagnose_gpudb( argv ): """ Run a diagnostic test on GPUdb Argument: argv -- Command line arguments """ # Parse the command line arguments if ( len(sys.argv) == 1 ): # None provided # Print help message and quit print ( helpMessage ) sys.exit( 2 ) try: # Parse the command line arguments opts, args = getopt.getopt( sys.argv[1:], "hlg:p:v" ) except getopt.GetoptError: print ( helpMessage ) sys.exit( 2 ) # Some default values GPUdb_IP = '127.0.0.1' # Run locally by default GPUdb_Port = '9191' # Default port isVerbose = False # Parse the arguments for opt, arg in opts: if opt == '-h': # print usage and exit print ( helpMessage ) sys.exit() if opt == '-l': # run gpudb on local machine isServer = False if opt == '-g': # run gpudb on a server gpudb at the specified IP address GPUdb_IP = arg set_id = "TwitterPointText" # Default set ID for server gpudb if opt == '-p': # run gpudb on a server gpudb at the specified port GPUdb_Port = arg if opt == '-v': # prints verbose messages (only the success message, really) isVerbose = True # Set up GPUdb with binary encoding gpudb = GPUdb( encoding='BINARY', host = GPUdb_IP, port = GPUdb_Port ) # Create a data type point_schema_str = """{ "type":"record", "name":"point", "fields": [ {"name":"x","type":"double"}, {"name":"y","type":"double"}, {"name":"OBJECT_ID","type":"string"} ] }""".replace(' ','').replace('\n','') # Register the data type and ensure that it worked # Endpoint: /create/type create_resp = gpudb.create_type ( point_schema_str, "point_type" ) assert create_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to create point data type; error message: " \ % create_resp['status_info'][ 'message' ] # Using the registered type's ID, create a new set (and check that worked) # Endpoint: /create/table type_id = create_resp[ 'type_id' ] table_name = "diagnostics_point_set_" + datetime.datetime.now().isoformat() create_table_resp = gpudb.create_table( table_name, type_id ) # not a part of a collection assert create_table_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to create point table; error message: %s" \ % create_table_resp['status_info'][ 'message' ] # Add some data to the set in batches # Endpoint: /insert/records/random count_1 = 2000 param_map_1 = { "x": {"min": 0, "max": 42 }, "y": {"min": 0, "max": 42 } } random_resp = gpudb.insert_records_random( table_name, count_1, param_map_1 ) # Check that the first set of objects were generated successfully assert random_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to generate random points; error message: %s" \ % random_resp['status_info'][ 'message' ] # Add another batch of data points to the same set, but at a different location # Endpoint: /insert/records/random count_2 = 2000 param_map_2 = { "x": {"min": -50, "max": -20 }, "y": {"min": -50, "max": -20 } } random_resp = gpudb.insert_records_random( table_name, count_2, param_map_2 ) # Check that the first set of objects were generated successfully assert random_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to generate random points; error message: %s" \ % random_resp['status_info'][ 'message' ] # Check the total size of the set is as intended # Endpoint: /show/table total_size = count_1 + count_2 show_table_resp = gpudb.show_table( table_name, options = {"get_sizes": "true"} ) assert show_table_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to check status of set; error message: %s" \ % show_table_resp['status_info'][ 'message' ] assert show_table_resp[ 'total_size' ] == total_size, "Error: Total size of set is not as expected. Set size = %s, expected size = %s" % ( show_table_resp[ 'total_size' ], total_size ) # Query chaining: do two filters one after another, get final count # Do a similar query with select, check count against the chained queries # Bounding box: x within [10, 20] and y within [10, 20] # Endpoint: /filter/bybox bbox_view_name = "diagnostics_bbox_result_" + datetime.datetime.now().isoformat() bbox_resp = gpudb.filter_by_box( table_name, bbox_view_name, "x", 10, 20, "y", 10, 20 ) assert bbox_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform bounding box query; error message: %s" \ % bbox_resp['status_info'][ 'message' ] # Filter by radius: 100km radius around (lon, lat) = (15, 15) # Endpoint: /filter/byradius fradius_view_name = "diagnostics_filter_by_radius_result_" + datetime.datetime.now().isoformat() fradius_resp = gpudb.filter_by_radius( bbox_view_name, fradius_view_name, "x", 15, "y", 15, 100000 ) assert fradius_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform filter by radius query; error message: %s" \ % fradius_resp['status_info'][ 'message' ] # Do a select query with a predicate that should yield the same result # as the above chained queries # Select: ( (10 <= x) and (x <= 20) and (10 <= y) and (y <= 20) and (geodist(x, y, 15, 15) < 100000) ) # Endpoint: /filter filter_view_name = "diagnostics_filter_result_" + datetime.datetime.now().isoformat() predicate = "( (10 <= x) and (x <= 20) and (10 <= y) and (y <= 20) and (geodist(x, y, 15, 15) < 100000) )" filter_resp = gpudb.filter( table_name, filter_view_name, predicate ) assert filter_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform filter query; error message: %s" \ % filter_resp['status_info'][ 'message' ] assert filter_resp[ 'count' ] == fradius_resp[ 'count' ], "Error: Mismatch in counts of filter (%s) and chained queries (bounding box then filter by radius) (%s)" \ % ( filter_resp[ 'count' ], fradius_resp[ 'count' ] ) # Delete a few objects and check the set size of the original set # # Delete objects: Delte a few objects given a predicate # Endpoint: /delete/records delete_expression = ["((15 <= x) and (x <= 18.5))"] delete_resp = gpudb.delete_records( table_name, delete_expression ) assert delete_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform delete operation; error message: %s" \ % delete_resp['status_info'][ 'message' ] # Check that the size of the set has gone down # Statistics return the count as a default # Endpoint: /aggregate/statistics new_size = total_size - delete_resp[ 'count_deleted' ] statistics_resp = gpudb.aggregate_statistics( table_name, "x", "count" ) assert statistics_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform the statistics operation; error message: %s" \ % statistics_resp['status_info'][ 'message' ] assert statistics_resp[ 'stats' ][ 'count' ] == new_size, "Error: Mismatch in counts of set size (%s) and expected size (%s)" \ % ( statistics_resp[ 'count' ], new_size ) # Update a few objects and check the update was successful by doing a select # # Update objects based on x, change the y value # Endpoing: /update/records update_predicate = "((-35 <= x) and (x <= -33.5))" update_resp = gpudb.update_records( table_name, [ update_predicate ], [{'y': "71"}] ) assert update_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform the update operation; error message: %s" \ % update_resp['status_info'][ 'message' ] # Check that the selected objects' y values have been changed # # Obtain the selected objects by performing a select query # Endpoint: /filter filter_view_name2 = "diagnostics_filter_result_2_" + datetime.datetime.now().isoformat() filter_resp1 = gpudb.filter( table_name, filter_view_name2, update_predicate ) assert filter_resp1['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform filter operation; error message: %s" \ % filter_resp1['status_info'][ 'message' ] # Get all the objects in the resultant set that has the update y value # and check that it matches with the above count # Endpont: /filter filter_expression = "(y == 71)" filter_view_name3 = "diagnostics_filter_result_3_" + datetime.datetime.now().isoformat() filter_resp2 = gpudb.filter( table_name, filter_view_name3, filter_expression ) assert filter_resp2['status_info'][ 'status' ] == 'OK', "GPUdb failed to perform filter operation; error message: %s" \ % filter_resp2['status_info'][ 'message' ] # Now check that the counts match assert filter_resp1[ 'count' ] == filter_resp2[ 'count' ], "GPUdb failed in performing update correctly; expected count is %s, but given count is %s" \ % ( filter_resp1[ 'count' ], filter_resp2[ 'count' ] ) # Clear all the tables (dropping the original table also drops views) clear_resp = gpudb.clear_table( table_name ) assert clear_resp['status_info'][ 'status' ] == 'OK', "GPUdb failed in clearing set %s" % table_name if isVerbose: print ( "The diagnostics tests succeeded!" )
import sys reload(sys) #sys.setdefaultencoding('utf-8') sys.setdefaultencoding('iso-8859-1') # Traffic capture packages import dpkt #from scapy.all import sr1,IP,ICMP,rdpcap from scapy.all import * # GPUdb packages from gpudb import GPUdb import uuid #for generating uuids gpudb = GPUdb(encoding='BINARY',gpudb_ip='10.1.10.31',gpudb_port='9191') # Add more fileds as needed for the analysis type_definition = """{ "type":"record", "name":"gen_pt", "fields":[ {"name":"x","type":"double"}, {"name":"y","type":"double"}, {"name":"src","type":"string"}, {"name":"dst","type":"string"}, {"name":"payload","type":"string"} ] }""" # TODO : Pass pcap file as input def print_packet_stats():