def loss_fn(output, target, alpha): # 把NCHW - > NHWC output = output.permute(0,2,3,1) # 把NHWC->NHW3*15 output = output.reshape(output.size(0), output.size(1), output.size(2), 3,-1) target = target.to(utils.getDevice()) # 负样本的时候只需要计算置信度损失 mask_noobj = target[..., 4] <= 0.1 output_noobj, target_noobj = output[mask_noobj], target[mask_noobj] loss_noobj = conf_loss_fn(output_noobj[:, 4], target_noobj[:, 4]) # 损失分为两部分,一部分为有样本的格子,一部分为没有样本的各自 mask_obj = target[..., 4] > 0.1 output_obj, target_obj = output[mask_obj], target[mask_obj] if output_obj.size(0) > 0: loss_obj_conf = conf_loss_fn(output_obj[:, 4], target_obj[:, 4]) # 置信度损失 loss_obj_center = center_loss_fn(output_obj[:, 0:2], target_obj[:, 0:2]) # 中心点偏移量损失 loss_obj_wh = wh_loss_fn(output_obj[:, 2:4], target_obj[:, 2:4]) # 宽高 # loss_obj_cls = cls_loss_fn(output_obj[:,5:], target_obj[:,5].long()) target_obj_cls = target_obj[:, 5].unsqueeze(0).reshape(-1, 1) target_obj_cls_one_hot = torch.zeros(target_obj_cls.size(0), cfg.class_num, device='cuda').scatter_(1, target_obj_cls.long(), 1) # print(target_obj_cls_one_hot.shape) # print(output_obj[:, 5:].shape) loss_obj_cls = cls_loss_fn(output_obj[:, 5:], target_obj_cls_one_hot) # 改为用MSE损失 loss_obj = loss_obj_conf + loss_obj_center + loss_obj_wh + loss_obj_cls return alpha * loss_obj + (1 - alpha) * loss_noobj else: return loss_noobj
def main(): device = getDevice() device.enable_device() for i in range(world.stablisation): frame = device.poll_frames() color_intrinsics = device.get_device_intrinsics(frame)[rs.stream.color] depth_intrinsics = device.get_device_intrinsics(frame)[rs.stream.depth] print(">RGB:", color_intrinsics.width, color_intrinsics.height) print(">DEPTH:", depth_intrinsics.width, depth_intrinsics.height) calibrator = CALIBRATION() calibrator.run_calibration() qrcode = QRCODE(depth_intrinsics.ppx, depth_intrinsics.ppy, depth_intrinsics.fx, depth_intrinsics.fy) seg = Seg("ycb", color_intrinsics) # seg = Seg("linemod", color_intrinsics) flag = False already = False while True: frame = device.poll_frames() color = frame[rs.stream.color] depth = frame[rs.stream.depth] img = np.asanyarray(color.get_data()) img_axis = np.copy(img) img_axis = calibrator.draw_axis(img_axis) img_qr = np.copy(img) if flag == False: img_pred = np.copy(img) img_pred = cv.rectangle(img_pred, (320, 120), (960, 600), (0,0,0), 2) else: if already == False: pred, img_test = seg.predict(img_pred[120:600, 320:960], draw=True) img_pred[120:600, 320:960] = img_test already = True if len(pred) == 0: print("No Object") continue # print(pred) # a = input(">WHICH: %s \n" % ([seg.names[a[0]] for a in pred])) # pos = pred[0][1][:, 3].reshape((-1,1)) # out = np.matmul(seg.intrinsics, pos)/pos[2,0] # x, y = int(out[0,0]), int(out[1,0]) # img_pred[y-5:y+5, x-5:x+5] = 0 else: pass if qrcode.update(img, depth, frame[rs.stream.depth]): img_qr = qrcode.draw(img_qr) all_img = np.vstack((np.hstack((img_axis, img_qr)), np.hstack((img_pred,img) ))) all_img = cv.resize(all_img,(1280,720)) cv.imshow("world", all_img) key = cv.waitKey(1) if key == ord('q'): cv.destroyAllWindows() return elif key == ord('p'): flag = True if flag == False else False already = False elif key == ord('t'): cv.imwrite("./photos/main_%s.png" % (world.now), all_img) del img_axis, img, img_qr
def __getitem__(self, idx): path = self.X_filepaths[idx] f = open(path, 'rb') feature = np.swapaxes(np.load(f), 1, 2) scene = self.class_map[self.y_classnames[idx]] label_str = pathlib.Path(path).name.replace('.npy', '') device = utils.get_indexOfDevice(utils.getDevice(label_str)) return feature, scene, device
# print(target_obj_cls_one_hot.shape) # print(output_obj[:, 5:].shape) loss_obj_cls = cls_loss_fn(output_obj[:, 5:], target_obj_cls_one_hot) # 改为用MSE损失 loss_obj = loss_obj_conf + loss_obj_center + loss_obj_wh + loss_obj_cls return alpha * loss_obj + (1 - alpha) * loss_noobj else: return loss_noobj if __name__ == '__main__': myDataset = dataset.CocoDataset() # drop_last 批次不够的时候是否丢掉 train_loader = DataLoader(myDataset, batch_size=8, shuffle=True, num_workers=4, drop_last=True) # 创建网络 net = MainNet(cfg.class_num).to(utils.getDevice()) # 加载权重 # net.load_state_dict(torch.load('data/params/ckpt-185.pt')) # 开始训练 net.train() # 增加观察参数对象 # summaryWriter = SummaryWriter() # 定义优化器 opt = torch.optim.Adam(net.parameters()) for epoch in range(10000): for i, (target_13, target_26, target_52, img_data) in enumerate(train_loader): output_13, output_26, output_52 = net(img_data.to(utils.getDevice())) loss_13 = loss_fn(output_13, target_13, 0.9) loss_26 = loss_fn(output_26, target_26, 0.9)
import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torch.utils.data import DataLoader import torchvision from torchvision import transforms, models import matplotlib.pyplot as plt from sklearn.metrics import cohen_kappa_score import time import copy from data_aug import * from config import * import utils device = utils.getDevice() accs = [] kappas = [] losses = [] def train_model(model, dataloaders, dataset_sizes, criterion, optimizer, scheduler, num_epochs=25): since = time.time() best_model_wts = copy.deepcopy(model.state_dict()) best_acc = 0. for epoch in range(num_epochs): print("Epoch{}/{}".format(epoch, num_epochs-1)) print("-"*10) for phase in ["train", "val"]: if phase == "train": scheduler.step()
import numpy as np import pandas as pd from PIL import Image from tqdm import tqdm import torch from torch.utils.data import DataLoader from data_aug import * from utils import getDevice DATA_DIR = "../input/aptos2019-blindness-detection/" TEST_DIR = DATA_DIR + "test_images/" TEST_CSV = DATA_DIR + "test.csv" device = getDevice() print("Using ", device) data_transform = transforms.Compose([ transforms.RandomResizedCrop(224), # transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) # test dataset = BD_Dataset(TEST_CSV, TEST_DIR, data_transform, True) dataloader = DataLoader(dataset, batch_size=10, shuffle=False, num_workers=4) # Recreate the exact same model, including weights and optimizer. model = torch.load("finetuned_resnet50.pt") model.eval()
if np.isnan(z): xyz.extend([point[0], point[1], 0.0]) missing.extend([i + 2]) if world.qrcode_verbose: print("z is nan,replace by 0.0") continue xyz.extend(self.cal_point(point.x, point.y, z)) xyz = np.array(xyz) if len(missing): xyz[missing] = ma.masked return xyz if __name__ == "__main__": device_manager = getDevice() device_manager.enable_device() frames = device_manager.poll_frames() intrinsics = device_manager.get_device_intrinsics(frames) # pip = rs.pipeline() # config = rs.config() # config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30) # config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30) # profile = pip.start(config) # align_to = rs.stream.color # align = rs.align(align_to)
# 侦测网络 from darknet53 import * import utils import cfg import torch device = torch.device(utils.getDevice()) class Detector(nn.Module): def __init__(self): super(Detector, self).__init__() self.net = MainNet(cfg.class_num).to(device) a = torch.load('data/ckpt-225.pt') self.net.load_state_dict(a) self.net.eval() def forward(self, input, thresh, anchors): # thresh 计算置信度的时候要达到的阈值 # 通过网络得到输出NCHW output_13, output_26, output_52 = self.net(input.to(device)) # 通过过滤方法,得到置信度大于阈值的位置 # 得到置信度大于阈值的位置-idxs_13:大于1的数量,位置,例如:[[0,6,4,2],[0, 6, 5, 2]],shape:[12,4] # 位置上的值:大于1的数量,5+cls。shape:[12,85] idxs_13, vecs_13 = self._filter(output_13, thresh) # 得到 x1, y1, x2, y2, c 置信度, cls 类别, n 那个照片 boxes_13 = self._parse(idxs_13, vecs_13, 32, anchors[13]) idxs_26, vecs_26 = self._filter(output_26, thresh) boxes_26 = self._parse(idxs_26, vecs_26, 16, anchors[26])