class CPPN: def __init__(self, height, width, n_inputs, n_hidden, n_outputs, non_linearity, device, webcam=False): self.height = height self.width = width self.n_inputs = n_inputs self.n_hidden = n_hidden self.n_outputs = n_outputs self.non_linearity = non_linearity self.device = device self.network = Network(n_inputs, n_hidden, n_outputs, non_linearity) self.network.to(device) self.network_new = Network(n_inputs, n_hidden, n_outputs, non_linearity) self.network_new.to(device) self.sound = Sound() self.visualisation_input = self._create_visualisation_tensor() if webcam: self.webcam = Webcam() self.webcam.start() def _create_visualisation_tensor(self): visualisation_input = np.zeros( (self.height, self.width, self.n_inputs)) for i in range(self.height): for j in range(self.width): visualisation_input[i, j] = [ i / float(self.height), j / float(self.width) ] + [0] * (self.n_inputs - 2) visualisation_input = torch.tensor(visualisation_input.reshape( -1, self.n_inputs), dtype=torch.float, device=self.device) return visualisation_input # def _create_kaleidoscope_visualisation_tensor(self): # visualisation_input = np.zeros((self.height, self.width, self.n_inputs)) # # for i in range(self.height): # for j in range(self.width): # visualisation_input[i, j] = [i/float(self.height),j/float(self.width)] + [0]*(self.n_inputs-2) # # visualisation_input = torch.tensor(visualisation_input.reshape(-1, self.n_inputs), dtype=torch.float, device=self.device) # # return visualisation_input def _visualise_np(self, bands=None): with torch.no_grad(): if bands is not None: o = torch.tensor(bands, dtype=torch.float).repeat( self.visualisation_input.size(0), 1) self.visualisation_input[:, 2:] = o im = self.network(self.visualisation_input).reshape( self.width, self.height, self.n_outputs) return im.detach().cpu().numpy() def _visualise(self, name, save_im=False): with torch.no_grad(): im = self.network(self.visualisation_input).reshape( self.width, self.height, 3).detach().cpu().numpy() * 255 if save_im: cv2.imwrite(name + ".png", im) return im def start(self): cv2.namedWindow("CPPN", cv2.WINDOW_NORMAL) cv2.setWindowProperty("CPPN", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) audio_generator = self.sound.read_audio() t = time.time() interpolating = 0 training = 0 generator = None while True: if time.time() - t > 20. and not training and not interpolating: choice = np.random.choice([1]) if choice == 0: training = 1 target_images = glob.glob("target_images/*") choice = np.random.choice(target_images) image = cv2.imread(choice) image = ToTensor()(image).float().to(self.device) generator = self.train(image, self.height, self.width, 0.001, save_im=False, image_folder="formation1", network_name="Santa", epochs=100, live=True, verbose=True) elif choice == 1: self._random_network() generator = self.interpolate(60) interpolating = 1 elif choice == 2: print("webcam") training = 1 image = self.webcam.read() image = ToTensor()(image).float().to(self.device) generator = self.train(image, self.height, self.width, 0.001, save_im=False, image_folder="formation1", network_name="Santa", epochs=100, live=True, verbose=True) if choice == 0: self.sound.sin = self.sound.sin == False self.sound.amplitudes = np.abs(np.random.rand(8)) self.sound.frequencies = np.abs(np.random.rand(8)) elif choice == 1: self.sound.calculate_bands = self.sound.calculate_bands == False if self.sound.calculate_bands == False and self.sound.sin == False: if np.random.rand() < 0.5: self.sound.calculate_bands = True else: self.sound.sin = True t = time.time() if interpolating: try: self.network = next(generator) except StopIteration: interpolating = 0 if training: try: self.network = next(generator) except StopIteration: training = 0 torch.cuda.empty_cache() frame = self._visualise_np(next(audio_generator)) cv2.imshow( "CPPN", cv2.copyMakeBorder(frame, 0, 0, 0, 0, cv2.BORDER_CONSTANT, 0)) if cv2.waitKey(1) & 0xFF == ord('q'): break cv2.destroyAllWindows() self.sound.stream.stop_stream() self.sound.stream.close() self.sound.pa.terminate() def _resize_image(self, image, width, height): image = F.interpolate(image.unsqueeze(0), size=(width, height)) image = torch.squeeze(image, 0) image = image.permute(1, 2, 0) return image def _random_network(self): self.network_new = Network(self.n_inputs, self.n_hidden, self.n_outputs, self.non_linearity) self.network_new.to(self.device) def train(self, image, width, height, loss_threshold, save_im=False, image_folder="", save_network=True, network_name="", epochs=1, live=False, verbose=False): image = self._resize_image(image, width, height) optimiser = torch.optim.Adam(self.network.parameters()) criterion = torch.nn.MSELoss() previous_loss = None network = copy.copy(self.network) for i in range(epochs): loss = criterion( network(self.visualisation_input).reshape(width, height, 3), image) optimiser.zero_grad() loss.backward() optimiser.step() if verbose: print(i, loss) if save_im: if previous_loss is None or previous_loss - loss_threshold > loss: previous_loss = loss self._visualise( "{}/image{:06d}.png".format(image_folder, i), save_im) if live: yield network if save_network: torch.save(self.network.state_dict(), 'trained_networks/network{}.pt'.format(network_name)) torch.cuda.empty_cache() def load(self, network_name, new=False): if new: self.network_new.load_state_dict( torch.load("trained_networks/network" + network_name + ".pt")) self.network_new.to(self.device) else: self.network.load_state_dict( torch.load("trained_networks/network" + network_name + ".pt")) self.network.to(self.device) def interpolate(self, num_interpolation_frames, space="beta", beta=2.0): """ beta parameter alters the steepness of the function """ weights1 = [] biases1 = [] weights2 = [] biases2 = [] for idx, layer in enumerate(self.network.module_list): weights1.append(layer.weight.data) biases1.append(layer.bias.data) weights2.append(self.network_new.module_list[idx].weight.data) biases2.append(self.network_new.module_list[idx].bias.data) frame_distribution = frame_distribution = np.linspace( 0, 1, num=num_interpolation_frames) if space != "lin": f = lambda x: 1 / (1 + np.power(x / (1 + np.finfo(float).eps - x), -beta)) frame_distribution = f(frame_distribution) weights = [] for weight_pair in zip(weights1, weights2): difference = weight_pair[0] - weight_pair[1] new_weights = torch.zeros( num_interpolation_frames, list(weight_pair[0].size())[0], list(weight_pair[0].size())[1]).float().to(self.device) for idx, i in enumerate(frame_distribution): new_weights[idx] = weight_pair[0] - i * difference weights.append(new_weights) biases = [] for bias_pair in zip(biases1, biases2): difference = bias_pair[0] - bias_pair[1] new_biases = torch.zeros(num_interpolation_frames, list(bias_pair[0].size())[0]).float().to( self.device) for idx, i in enumerate(frame_distribution): new_biases[idx] = bias_pair[0] - i * difference biases.append(new_biases) network = copy.deepcopy(self.network) for i in range(num_interpolation_frames): for idx, layer in enumerate(network.module_list): network.module_list[idx].weight.data = weights[idx][i] network.module_list[idx].bias.data = biases[idx][i] yield network def random_walk(self): pass def animate(self): pass def alzheimer(self): pass
def main(): try: camera_type = sys.argv[1] recording = False if len(sys.argv) == 3: if sys.argv[2] == "record": recording = True else: recording = False if camera_type == "webcam": collector = Webcam(video_width=640, video_height=480) collector.start() else: print("No such camera {camera_type}") collector = None exit(-1) if not os.path.isfile(MODEL_PATH): print("Downloading model, please wait...") download_file_from_google_drive(SOURCE, MODEL_PATH) print("Done downloading the model.") # get device device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') # initialise model model = get_model_instance_segmentation(NUMBER_OF_CLASSES) model.load_state_dict( torch.load('./models/frcnn_hands.pth', map_location=device)) model.to(device) model.eval() if recording: movie = cv2.VideoWriter( f'./recordings/hand_frcnn_{camera_type}.avi', cv2.VideoWriter_fourcc(*'DIVX'), 8, (640, 480)) with torch.no_grad(): while collector.started: image, _ = collector.read() if image is not None: orig = image.copy() image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = transforms.ToTensor()(image).to(device) out = model([image]) boxes = get_prediction(pred=out, threshold=.7) try: for box in boxes: cv2.rectangle(img=orig, pt1=(box[0], box[1]), pt2=(box[2], box[3]), color=(0, 255, 255), thickness=2) if recording: movie.write(orig) cv2.imshow("mask", orig) k = cv2.waitKey(1) if k == ord('q'): collector.stop() except Exception as e: print(e) finally: print("Stopping stream.") collector.stop() if recording: movie.release() cv2.destroyAllWindows()
class Studio(QMainWindow, Ui_MainWindow): def __init__(self, *args, **kwargs): super(Studio, self).__init__(*args, **kwargs) self.setupUi(self) # Device self.device_default = 0 self.device = self.device_default # Webcam self.webcam = Webcam() # Image self.image_dir = 'outputs' self.image_ext = 'jpg' self.num_images_max_default = 10 self.num_images_max = self.num_images_max_default self.num_images = 0 self.saved_width_default = 416 # In pixel self.saved_height_default = 416 self.saved_width = self.saved_width_default self.saved_height = self.saved_height_default self.flip_image = False self.cb_flip_image.stateChanged.connect(self.change_flip_image) # Filename prefix self.filename_prefix = 'class_memo' # Recording flag self.is_recording = False # Timer self.timer_is_on = False self.timer_duration = 500 # msec self.timer = QTimer(self) self.timer.timeout.connect(self.process_image) # Plot min/max self.plot_min = 0.0 self.plot_max = -1.0 # Initialize self.initialize() def open_webcam(self): # Release the resource which had been used. if self.webcam.is_open(): self.webcam.release() self.webcam.open(self.device) self.process_image() # Show message self.show_message('webcam is opened.') # Start the timer if not self.timer_is_on: self.start_timer() def start_timer(self): self.timer_is_on = True self.timer.start(self.timer_duration) def stop_timer(self): self.timer_is_on = False self.timer.stop() def change_flip_image(self): if self.cb_flip_image.isChecked(): self.flip_image = True else: self.flip_image = False def start_recording(self): self.is_recording = True self.num_images = 0 self.show_message('recording frames.') def finish_recording(self): self.is_recording = False self.show_message('recording is finished.') def show_message(self, msg): text = 'Status: ' + msg self.lb_status.setText(text) def show_num_images(self): text = '{}/{}'.format(self.num_images, self.num_images_max) self.lb_num_images.setText(text) def get_image_path(self, n): str_num = to_str_digits(n, num_digits=5) filename = self.filename_prefix + '_' + str_num + '.' + self.image_ext path = os.path.join(self.image_dir, filename) return path def save_image(self): # Save the image. self.num_images += 1 if self.num_images <= self.num_images_max: image_path = self.get_image_path(self.num_images) frame = self.webcam.get_frame() image = Image.fromarray(frame) size = (self.saved_width, self.saved_height) image = make_square(image) image = image.resize(size) image.save(image_path) else: self.num_images = self.num_images_max self.finish_recording() # Show the number of images self.show_num_images() def process_image(self): if self.webcam.is_open(): # Show frame frame = self.webcam.read() image = QImage(frame.data, frame.shape[1], frame.shape[0], QImage.Format_RGB888) # Flip the image horizontally image_flipped = image.mirrored(True, False) if self.flip_image: pixmap = QPixmap.fromImage(image_flipped) else: pixmap = QPixmap.fromImage(image) self.lb_image.setPixmap(pixmap) # Record frame if self.is_recording: self.save_image() def initialize(self): # Connect the signal and slot self.cb_device.activated[str].connect(self.set_device) self.edit_num_images_max.textChanged.connect(self.set_num_images_max) self.edit_saved_width.textChanged.connect(self.set_saved_width) self.edit_saved_height.textChanged.connect(self.set_saved_height) self.edit_filename_prefix.textChanged.connect(self.set_filename_prefix) self.btn_open.clicked.connect(self.open_webcam) self.btn_record.clicked.connect(self.start_recording) # UI text = str(self.num_images_max) self.edit_num_images_max.setText(text) text = str(self.saved_width) self.edit_saved_width.setText(text) text = str(self.saved_height) self.edit_saved_height.setText(text) text = str(self.filename_prefix) self.edit_filename_prefix.setText(text) def set_device(self): value = self.cb_device.currentIndex() try: value = int(value) except: value = self.device_default self.device = value def set_num_images_max(self): value = self.edit_num_images_max.text() try: value = int(value) except: value = self.num_images_max_default self.num_images_max = value def set_saved_width(self): value = self.edit_saved_width.text() try: value = int(value) except: value = self.saved_width_default self.saved_width = value def set_saved_height(self): value = self.edit_saved_height.text() try: value = int(value) except: value = self.edit_saved_height_default self.saved_height = value def set_filename_prefix(self): value = self.edit_filename_prefix.text() self.filename_prefix = value def add_widget(self, widget): widget.setParent(self.central_widget) self.view_layout.addWidget(widget) def remove_widget(self, widget): self.view_layout.removeWidget(widget) widget.setParent(None) def refresh_view(self): text = 'Remianing time: {} (sec)'.format(self.num_images) self.lb_num_images.setText(text) def closeEvent(self, event): self.webcam.release()