def run(self, cloudburst, lookup_key, dynamic: bool, input_object, inp: Table): from flow.types.basic import get_type serialized = False if type(inp) == bytes: inp = deserialize(inp) serialized = True if cloudburst is None or dynamic: obj = input_object lookup_key = next(inp.get())[lookup_key] else: obj = cloudburst.get(lookup_key) schema = list(inp.schema) schema.append((lookup_key, get_type(type(obj)))) new_table = Table(schema) for row in inp.get(): vals = [row[key] for key, _ in inp.schema] vals.append(obj) new_table.insert(vals) if serialized: new_table = serialize(new_table) return new_table
def classify_language(self, table: Table) -> (str, str): inputs = [row['classify'] for row in table.get()] predicts = self.model.predict(inputs)[0] predicts = [label[0].split('_')[-1] for label in predicts] result = [] idx = 0 for row in table.get(): result.append([predicts[idx], row['translate']]) idx += 1 return result
def resnet_model_gpu(self, table: Table) -> (np.ndarray, int, float): """ ResNet101 for image classification on ResNet """ import torch originals = [row['img'] for row in table.get()] inputs = [torch.from_numpy(img) for img in originals] inputs = torch.stack(inputs, dim=0).cuda() out = self.resnet(inputs) _, indices = torch.sort(out, descending=True) percentage = torch.nn.functional.softmax(out, dim=1)[0] * 100 p_2 = percentage.cpu().detach().numpy() indicies = indices.cpu().detach().numpy() result = [] for i in range(len(originals)): index = indices[i][0].item() perc = p_2[indices[i][0]].item() img = originals[i] result.append([img, index, perc]) return result
def english_to_german_gpu(self, table: Table) -> str: inputs = [row['translate'] for row in table.get()] if len(inputs) > 0: return self.model.translate(inputs) else: return []
def english_to_french(self, table: Table) -> str: if type(table) == Table: inputs = [row['translate'] for row in table.get()] else: inputs = [table] if len(inputs) > 0: return self.model.translate(inputs) else: return []
def run(self, _, col: str, inp: Table): serialized = False if type(inp) == bytes: serialized = True inp = deserialize(inp) gb_table = GroupbyTable(inp.schema, col) for row in inp.get(): gb_table.add_row(row) if serialized: gb_table = serialize(gb_table) return gb_table
def cascade_predict_batch(self, table: Table) -> str: results = [] for row in table.get(): resnet_index = row['resnet_index'] resnet_max_prob = row['resnet_max_prob'] incept_index = row['incept_index'] incept_max_prob = row['incept_max_prob'] if incept_max_prob is None: # Didn't go to inception because resnet prediction was confident # enough. results.append(self.classes[resnet_index]) else: # choose the distribution with the higher max_prob. if resnet_max_prob > incept_max_prob: results.append(self.classes[resnet_index]) else: results.append(self.classes[incept_index]) return results
def resnet_model_cpu(self, table: Table) -> str: """ AlexNet for image classification on ImageNet """ import torch inputs = [] for row in table.get(): img = self.transforms(row['img']) inputs.append(img) inputs = torch.stack(inputs, dim=0) output = self.resnet(inputs) _, indices = torch.sort(output, descending=True) indices = indices.detach().numpy() result = [] for idx_set in indices: index = idx_set[0] result.append(self.classes[index]) return result
def inceptionv3_model_gpu(self, table: Table) -> (int, float): import torch # Shortcut for empty input. if table.size() == 0: return [] originals = [row['img'] for row in table.get()] inputs = [torch.from_numpy(img) for img in originals] inputs = torch.stack(inputs, dim=0).cuda() out = self.incept(inputs) _, indices = torch.sort(out, descending=True) percentage = torch.nn.functional.softmax(out, dim=1)[0] * 100 p_2 = percentage.cpu().detach().numpy() result = [] for i in range(len(originals)): index = indices[i][0].item() perc = p_2[indices[i][0]].item() result.append([index, perc]) return result
def transform_batch(self, table: Table) -> np.ndarray: return [self.transform(row['img']).detach().numpy() for row in table.get()]