def generate_data(send_batch, make_data_args, samples_count, batch_size, resources_count, topic): make_data_args.interval = 1 make_data_args.start = (datetime.datetime.utcnow() - datetime.timedelta(minutes=samples_count)) make_data_args.end = datetime.datetime.utcnow() make_data_args.resource_id = None resources_list = [str(uuid.uuid4()) for _ in moves.xrange(resources_count)] resource_samples = {resource: 0 for resource in resources_list} batch = [] count = 0 for sample in make_test_data.make_test_data(**make_data_args.__dict__): count += 1 resource = resources_list[random.randint(0, len(resources_list) - 1)] resource_samples[resource] += 1 sample['resource_id'] = resource # need to change the timestamp from datetime.datetime type to iso # format (unicode type), because collector will change iso format # timestamp to datetime.datetime type before recording to db. sample['timestamp'] = sample['timestamp'].isoformat() # need to recalculate signature because of the resource_id change sig = utils.compute_signature(sample, cfg.CONF.publisher.telemetry_secret) sample['message_signature'] = sig batch.append(sample) if len(batch) == batch_size: send_batch(topic, batch) batch = [] if count == samples_count: send_batch(topic, batch) return resource_samples send_batch(topic, batch) return resource_samples
def generate_data(conf, send_batch, make_data_args, samples_count, batch_size, resources_count, topic): make_data_args.interval = 1 make_data_args.start = (datetime.datetime.utcnow() - datetime.timedelta(minutes=samples_count)) make_data_args.end = datetime.datetime.utcnow() make_data_args.resource_id = None resources_list = [str(uuid.uuid4()) for _ in range(resources_count)] resource_samples = {resource: 0 for resource in resources_list} batch = [] count = 0 for sample in make_test_data.make_test_data(conf, **make_data_args.__dict__): count += 1 resource = resources_list[random.randint(0, len(resources_list) - 1)] resource_samples[resource] += 1 sample['resource_id'] = resource # need to change the timestamp from datetime.datetime type to iso # format (unicode type), because collector will change iso format # timestamp to datetime.datetime type before recording to db. sample['timestamp'] = sample['timestamp'].isoformat() # need to recalculate signature because of the resource_id change sig = utils.compute_signature(sample, conf.publisher.telemetry_secret) sample['message_signature'] = sig batch.append(sample) if len(batch) == batch_size: send_batch(topic, batch) batch = [] if count == samples_count: send_batch(topic, batch) return resource_samples send_batch(topic, batch) return resource_samples
def generate_data(send_batch, make_data_args, samples_count, batch_size, resources_count, topic): make_data_args.interval = 1 make_data_args.start = (datetime.datetime.utcnow() - datetime.timedelta(minutes=samples_count)) make_data_args.end = datetime.datetime.utcnow() make_data_args.resource_id = None resources_list = [str(uuid.uuid4()) for _ in moves.xrange(resources_count)] resource_samples = {resource: 0 for resource in resources_list} batch = [] count = 0 for sample in make_test_data.make_test_data(**make_data_args.__dict__): count += 1 resource = resources_list[random.randint(0, len(resources_list) - 1)] resource_samples[resource] += 1 sample['resource_id'] = resource # need to recalculate signature because of the resource_id change sig = utils.compute_signature(sample, cfg.CONF.publisher.telemetry_secret) sample['message_signature'] = sig batch.append(sample) if len(batch) == batch_size: send_batch(topic, batch) batch = [] if count == samples_count: send_batch(topic, batch) return resource_samples send_batch(topic, batch) return resource_samples
def generate_data(rpc_client, make_data_args, samples_count, batch_size, resources_count, topic): make_data_args.interval = 1 make_data_args.start = (datetime.datetime.utcnow() - datetime.timedelta(minutes=samples_count)) make_data_args.end = datetime.datetime.utcnow() make_data_args.resource_id = None resources_list = [str(uuid.uuid4()) for _ in xrange(resources_count)] resource_samples = {resource: 0 for resource in resources_list} batch = [] count = 0 for sample in make_test_data.make_test_data(**make_data_args.__dict__): count += 1 resource = resources_list[random.randint(0, len(resources_list) - 1)] resource_samples[resource] += 1 sample['resource_id'] = resource batch.append(sample) if len(batch) == batch_size: send_batch(rpc_client, topic, batch) batch = [] if count == samples_count: send_batch(rpc_client, topic, batch) return resource_samples send_batch(rpc_client, topic, batch) return resource_samples
def make_train_test(): df_train = make_train_data() df_test = make_test_data() X_train, X_test, y = df_train.drop( ['report_id', 'y'], axis=1), df_test.drop(['report_id'], axis=1), df_train.y return X_train, X_test, y