Пример #1
0
def wait_available_gpu(free_memory=4000):
    all_gpus = gpu_info()
    if not len(all_gpus):
        log.warning("None GPU available")
        raise Exception("None GPU available")

    # GLOBAL_GPU_LOCK.acquire()
    while True:
        all_gpus = gpu_info()
        gpu_list = sorted(
            [(gpu['index'], gpu['mem_total'] - gpu['mem_used'])
             for gpu in all_gpus
             if (gpu['mem_total'] - gpu['mem_used']) >= free_memory],
            key=lambda x: x[1])
        if len(gpu_list) == 0:
            log.info("GPU wait 30 seconds")
            time.sleep(10)
            continue

        best_gpu = int(gpu_list[-1][0])
        with nvml_context():
            gpu_data = device_status(best_gpu)
            for gpu in all_gpus:
                if int(gpu["index"]) == best_gpu:
                    gpu_data.update(gpu)
                    break

        # GLOBAL_GPU_LOCK.release()
        log.info(f"Best GPU: {gpu_data}")
        return gpu_data
Пример #2
0
 def on_exchange_declareok(self, unused_frame):
     log.info('Exchange declared')
     log.info('Declaring queue')
     self._channel.queue_declare(queue='',
                                 callback=self.on_queue_declareok,
                                 exclusive=True,
                                 auto_delete=True)
Пример #3
0
    def import_dataset(self, filename=None):
        """
        Import dataset from folder with base and diffs
        """

        if not filename:
            if self.id:
                filename = self.id + ".dset"
            elif self.dataset_name:
                filename = self.dataset_name + ".dset"
            else:
                raise Exception(f'Must define import filename')

        export_data, self.parms, self.examples, self.images = Dataset.import_from_hdf5(
            filename, self.id)

        self.dataset_name = str(self.parms["name"])
        self.id = str(self.parms["_id"])

        if len(self.examples) != export_data["num_examples"]:
            raise Exception(
                f"Invalid number of examples. Imported: {len(self.examples)} - Exported: {export_data['num_examples']}"
            )

        if len(self.images) != len(export_data["images_list"]):
            raise Exception(
                f"Invalid number of images. Imported: {len(self.images)} - Exported: {len(export_data['images_list'])}"
            )

        log.info("Dataset imported from file %d examples" % len(self.examples))
Пример #4
0
    def export_dataset(self, filename=None):
        """
        Export dataset to folder adding only differences if a base export exists
        """

        if not filename:
            filename = self.id + ".dset"

        self.load_all_images()

        export_data = {
            "export_version": "2",
            "export_date": pytz.utc.localize(datetime.datetime.now()),
            "dataset": self.dataset_name,
            "dataset_id": self.id,
            "num_examples": len(self.examples),
            "images_list": list(self.images.keys())
        }

        # Dataset.export_to_file(filename)
        Dataset.export_to_hdf5(export_data, self.parms, self.examples,
                               self.images, filename, self.id)
        # if self._app_token is not None:

        log.info(f"Dataset exported to file {len(self.examples)} examples")
Пример #5
0
def get_dataset(app_token, dataset_id):
    try:
        log.info(f"Get dataset {dataset_id}")

        endpoint = jwt.decode(app_token, options={"verify_signature":
                                                  False})['endpoint']
        msg_headers = {'Authorization': f'Bearer {app_token}'}
        response = requests.get(f"{endpoint}/dataset/{dataset_id}",
                                headers=msg_headers)

        if response.status_code != 200:
            log.error(f"Failing get dataset: {response.json()}")
            return None

        dataset = response.json()
        if dataset["dataset_parms"]:
            return dataset
        else:
            log.warning(f"Failing get dataset: {response.json()}")
            return None

    except requests.ConnectionError as error:
        log.error(
            f'Failing get dataset_id: {dataset_id}. Connection error: {error}')
        return None
    except requests.Timeout as error:
        log.error(f'Failing get dataset_id: {dataset_id}. Timeout: {error}')
        return None
    except Exception as excp:
        log.error(f'Failing get dataset_id: {dataset_id} - {excp}')
        return None
Пример #6
0
    def on_message(self, unused_channel, basic_deliver, properties, body):
        log.info('Received message # %s from %s: %s', basic_deliver.delivery_tag, properties.app_id, body)
        parms = json.loads(body.decode())
        self._message_call(parms)

        # Acknowledge the message delivery from RabbitMQ by sending a Basic.Ack RPC method for the delivery tag.
        self._channel.basic_ack(basic_deliver.delivery_tag)
Пример #7
0
    def stop(self):
        log.info('Stopping')
        self._closing = True
        for channel in self._channels:
            channel.stop_consuming()

        self._connection.ioloop.stop()
        log.info('Stopped')
Пример #8
0
    def on_queue_declareok(self, method_frame):
        log.info('Queue declare ok: consumer_count={}, message_count={}, queue={}'.format(method_frame.method.consumer_count, method_frame.method.message_count, method_frame.method.queue))
        self._queue_name = method_frame.method.queue

        log.info('Binding %s to %s with %s' % (self._topic, self._queue_name, self._routing_key))
        self._channel.queue_bind(exchange=self._topic,
                                 routing_key=self._routing_key,
                                 queue=self._queue_name,
                                 callback=self.on_bindok)
Пример #9
0
def get_flow(app_token, flow_id):
    try:
        log.info(f"Get flow {flow_id}")

        if not Path(CONFIG["flow_folder"]).is_dir():
            Path(CONFIG["flow_folder"]).mkdir(parents=True, exist_ok=True)

        local_cache = os.path.join(CONFIG["flow_folder"], flow_id + '.json')

        endpoint = jwt.decode(app_token, options={"verify_signature":
                                                  False})['endpoint']
        msg_headers = {'Authorization': f'Bearer {app_token}'}
        response = requests.get(f"{endpoint}/flow/{flow_id}",
                                headers=msg_headers)

        if response.status_code != 200:
            log.error(f"Failing get flow from edge: {response.json()}")

            if os.path.isfile(local_cache):
                with open(local_cache) as fp:
                    flow = json.load(fp)

                return flow

            return None

        flow = response.json()["flow"]
        if "_id" in flow:

            if os.path.isfile(local_cache):
                os.remove(local_cache)

            with open(local_cache, 'w') as fp:
                json.dump(flow, fp, default=str)

            return flow
        else:
            log.warning(f"Failing get flow: {response.json()}")
            return None

    except requests.ConnectionError as error:
        log.error(f'Failing get flow_id: {flow_id}. Connection error: {error}')
        return None
    except requests.Timeout as error:
        log.error(f'Failing get flow_id: {flow_id}. Timeout: {error}')
        return None
    except Exception as excp:
        log.error(f'Failing get flow_id: {flow_id} - {excp}')
        return None
Пример #10
0
    def load_data(self):
        """
        Load dataset data from ws
        """
        if not self._app_token:
            raise Exception('AppToken not set')

        dataset = edge_client.get_dataset(self._app_token, self.id)
        if not dataset:
            raise Exception(f'Fail loading dataset_id {self.id}')

        self.dataset_name = dataset["dataset_parms"].get("name")
        self.parms = dataset["dataset_parms"]
        self.examples = dataset["annotations"]
        self.update_default_parms()
        log.info(f"Load dataset from database {len(self.examples)} examples")
Пример #11
0
    def on_channel_open(self, channel):
        log.info('Channel opened')
        self._channel = channel

        log.info('Adding channel close callback')
        self._channel.add_on_close_callback(self.on_channel_closed)

        log.info('Declaring queue %s' % self._queue)
        self._channel.queue_declare(queue=self._queue,
                                    callback=self.on_queue_declareok,
                                    durable=True)

        log.info('Adding process consumer cancellation callback')
        self._channel.add_on_cancel_callback(self.on_consumer_cancelled)
Пример #12
0
def get_train(app_token, dataset_id, train_id, train_folder):
    try:
        log.info(f"Get train {dataset_id}-{train_id}")
        folder_path = Path(train_folder + '/' + dataset_id + '/' + train_id)
        if not folder_path.is_dir():
            folder_path.mkdir(parents=True, exist_ok=True)

        endpoint = jwt.decode(app_token, options={"verify_signature":
                                                  False})['endpoint']
        url = f"{endpoint}/model-hist/{dataset_id}/{train_id}"
        msg_headers = {'Authorization': f'Bearer {app_token}'}
        payload = {"download_url": True}
        response = requests.get(url, headers=msg_headers, params=payload)

        if response.status_code != 200:
            log.error(f"Failing get train: {response.json()}")
            return None

        train_doc = response.json()

        dest_filename = os.path.join(str(folder_path), train_id + ".tar.gz")
        download_file(train_doc["download_url"], dest_filename)

        # expand_file
        call(["tar", "-xf", dest_filename, "--directory", str(folder_path)])

        os.remove(dest_filename)
        return train_id

    except requests.ConnectionError as error:
        log.error(
            f'Failing get train_id: {train_id}. Connection error: {error}')
        return None
    except requests.Timeout as error:
        log.error(f'Failing get train_id: {train_id}. Timeout: {error}')
        return None
    except Exception as excp:
        log.error(f'Failing get train_id: {train_id} - {excp}')
        return None
Пример #13
0
def get_edge_data(app_token):
    try:
        log.info(f"Get edge_data")
        endpoint = jwt.decode(app_token, options={"verify_signature":
                                                  False})['endpoint']
        msg_headers = {'Authorization': f'Bearer {app_token}'}
        response = requests.get(f"{endpoint}", headers=msg_headers)

        if response.status_code != 200:
            log.error(f"Failing get edge_data: {response.json()}")
            return None

        return response.json()["edge_data"]

    except requests.ConnectionError as error:
        log.error(f'Failing get edge_data. Connection error: {error}')
        return None
    except requests.Timeout as error:
        log.error(f'Failing get edge_data. Timeout: {error}')
        return None
    except Exception as excp:
        log.error(f'Failing get edge_data - {excp}')
        return None
Пример #14
0
    def on_connection_open(self, unused_connection):
        log.info('Connection opened')
        log.info('Adding connection close callback')
        self._connection.add_on_close_callback(self.on_connection_closed)

        if self._queues is not None:
            for queue in self._queues:
                log.info('Creating queue channel: ' + queue[0])
                new_channel = QueueChannel(queue[0], queue[1], self)
                self._channels.append(new_channel)
                self._connection.channel(on_open_callback=new_channel.on_channel_open)
        elif self._topic is not None:
            log.info('Creating topic channel: ' + self._topic[0])
            new_channel = TopicChannel(self._topic[0], self._topic[1], self._topic[2], self)
            self._channels.append(new_channel)
            self._connection.channel(on_open_callback=new_channel.on_channel_open)
Пример #15
0
    def purge_files(self, max_files=800, max_days=None):
        """
        Purge local and cloud files to max_files from de older to newer
        """
        local_files = self._get_list_files_info(self._local_folder)
        if len(local_files) > max_files:
            date_list = [(l_file["filename"], l_file["modified_date"])
                         for l_file in local_files]
            exclude_list = sorted(
                date_list, key=lambda x: x[1])[:len(local_files) - max_files]
            log.info(f"Purge local files: {len(exclude_list)}")
            for filename, _ in exclude_list:
                try:
                    os.remove(os.path.join(self._local_folder, filename))
                except:
                    pass

        if self._cloud_obj is None or self._cloud_folder is None:
            return

        cloud_files = self._cloud_obj.list_files_info(
            folder=self._cloud_folder, resource_id=self.resource_id)
        if len(cloud_files) > max_files:
            date_list = [(l_file["filename"], l_file["modified_date"])
                         for l_file in cloud_files]
            exclude_list = sorted(
                date_list, key=lambda x: x[1])[:len(cloud_files) - max_files]
            log.info(f"Purge cloud files: {len(exclude_list)}")
            for filename, _ in exclude_list:
                try:
                    self._cloud_obj.delete_file(
                        folder=self._cloud_folder,
                        resource_id=self.resource_id,
                        filename=filename,
                    )
                except:
                    pass
Пример #16
0
    def on_channel_open(self, channel):
        log.info('Channel opened')
        self._channel = channel

        log.info('Adding channel close callback')
        self._channel.add_on_close_callback(self.on_channel_closed)

        log.info('Declaring exchange %s' % self._topic)
        self._channel.exchange_declare(exchange=self._topic,
                                       callback=self.on_exchange_declareok,
                                       durable=True,
                                       exchange_type='topic')
Пример #17
0
 def close_connection(self):
     log.info('Closing connection')
     self._connection.close()
Пример #18
0
 def on_bindok(self, unused_frame):
     log.info('Queue bound')
     log.info('Adding process consumer cancellation callback')
     self._channel.add_on_cancel_callback(self.on_consumer_cancelled)
     self._consumer_tag = self._channel.basic_consume(queue=self._queue_name,
                                                      on_message_callback=self.on_message)
Пример #19
0
def upload_extract(app_token,
                   dataset_id,
                   extract_folder,
                   max_files=MAX_EXTRACT_FILES,
                   thumb_size=THUMB_SIZE):
    try:
        log.info(f"Upload extract {dataset_id}")
        folder_path = os.path.join(extract_folder, dataset_id)
        if not os.path.isdir(folder_path):
            raise Exception(f"Extract folder doesn't exists: {folder_path}")

        clear_log(folder_path, max_files)
        generate_extract_thumbs(folder_path, thumb_size)

        files_data = []
        file_list = [f for f in os.listdir(folder_path)]
        for filename in file_list:
            exp_id = filename[:24]
            if filename.endswith('_data.json') and (
                    exp_id + ".jpg") in file_list and (
                        exp_id + "_thumb.jpg") in file_list:
                try:
                    filepath = os.path.join(folder_path, filename)
                    with open(filepath, 'r') as json_file:
                        data = json.load(json_file)

                        if "date" in data:
                            data["date"] = {"$date": data["date"]}

                        if "_id" in data:
                            data["_id"] = {"$oid": data["_id"]}

                        files_data.append(data)
                except:
                    pass

        if not files_data:
            log.warning(
                f'Cannot upload post upload_extract: {dataset_id}. No files.')
            return dataset_id

        extract_files = {"files_data": files_data}

        with open(os.path.join(folder_path, 'extract_files.json'),
                  'w',
                  newline='',
                  encoding='utf8') as file_p:
            json.dump(extract_files, file_p, ensure_ascii=False, default=str)

        dest_filename = os.path.join(extract_folder, dataset_id + ".tar.gz")
        if os.path.isfile(dest_filename):
            os.remove(dest_filename)

        wd = os.getcwd()
        os.chdir(folder_path)
        with tarfile.open(dest_filename, "w:gz") as tar:
            for filename in os.listdir(folder_path):
                if filename.endswith(
                        '.jpg') or filename == 'extract_files.json':
                    tar.add(filename)

        os.chdir(wd)

        endpoint = jwt.decode(app_token, options={"verify_signature":
                                                  False})['endpoint']
        msg_headers = {'Authorization': f'Bearer {app_token}'}
        url = f"{endpoint}/dataset/{dataset_id}/extract"

        files = {'extract': open(dest_filename, 'rb')}
        values = {'dataset_id': dataset_id, 'extract_files': extract_files}

        response = requests.post(url,
                                 files=files,
                                 data=values,
                                 headers=msg_headers)

        if response.status_code != 201:
            raise Exception(f"Failing upload extract files: {response.json()}")

        os.remove(dest_filename)
        return dataset_id

    except requests.ConnectionError as error:
        log.error(
            f'Failing post upload_extract: {dataset_id}. Connection error: {error}'
        )
        return None
    except requests.Timeout as error:
        log.error(
            f'Failing post upload_extract: {dataset_id}. Timeout: {error}')
        return None
    except Exception as excp:
        log.error(f'Failing post upload_extract: {dataset_id} - {excp}')
        return None
Пример #20
0
 def on_consumer_cancelled(self, method_frame):
     log.info('Consumer was cancelled remotely, shutting down: %r', method_frame)
     if self._channel:
         self._channel.close()
Пример #21
0
 def stop_consuming(self):
     if self._channel:
         log.info('Sending a Basic.Cancel RPC command to RabbitMQ')
         self._channel.basic_cancel(self.on_cancelok, self._consumer_tag)
Пример #22
0
def get_model_component(app_token, model_component_id, model_component_folder):
    local_doc = None
    try:
        log.info(f"Get model_component {model_component_id}")
        folder_path = Path(model_component_folder + '/' + model_component_id)
        if not folder_path.is_dir():
            folder_path.mkdir(parents=True, exist_ok=True)

        local_cache = os.path.join(model_component_folder,
                                   model_component_id + '.json')
        if os.path.isfile(local_cache):
            with open(local_cache) as fp:
                local_doc = json.load(fp)

        endpoint = jwt.decode(app_token, options={"verify_signature":
                                                  False})['endpoint']
        url = f"{endpoint}/model-component/{model_component_id}"
        msg_headers = {'Authorization': f'Bearer {app_token}'}
        payload = {"download_url": False}
        response = requests.get(url, headers=msg_headers, params=payload)

        if response.status_code != 200:
            if local_doc:
                return local_doc

            log.error(f"Failing get model_component: {response.json()}")
            return None

        model_component_doc = response.json()
        if local_doc and model_component_doc["version"] == local_doc["version"]:
            return local_doc

        payload = {"download_url": True}
        response = requests.get(url, headers=msg_headers, params=payload)

        if response.status_code != 200:
            if local_doc:
                return local_doc

            log.error(f"Failing get model: {response.json()}")
            return None

        model_component_doc = response.json()

        dest_filename = os.path.join(folder_path,
                                     model_component_id + ".tar.gz")
        download_file(model_component_doc["download_url"], dest_filename)

        # expand_file
        call(["tar", "-xf", dest_filename, "--directory", folder_path])

        os.remove(dest_filename)

        if os.path.isfile(local_cache):
            os.remove(local_cache)

        with open(local_cache, 'w') as fp:
            json.dump(model_component_doc, fp, default=str)

        return model_component_id

    except requests.ConnectionError as error:
        if local_doc:
            return local_doc

        log.error(
            f'Failing get model_component: {model_component_id}. Connection error: {error}'
        )
        return None
    except requests.Timeout as error:
        if local_doc:
            return local_doc

        log.error(
            f'Failing get model_component: {model_component_id}. Timeout: {error}'
        )
        return None
    except Exception as excp:
        if local_doc:
            return local_doc

        log.error(
            f'Failing get model_component: {model_component_id} - {excp}')
        return None
Пример #23
0
 def on_cancelok(self, unused_frame):
     log.info('RabbitMQ acknowledged the cancellation of the consumer')
     log.info('Closing the channel')
     self._channel.close()
Пример #24
0
    def sync_files(self, origin, file_list=None):
        """
        Synchronizes files between local and cloud folder
        If file_list then synchronize only files in this list
        """
        if self._cloud_obj is None:
            raise Exception("Cloud provider not defined")

        if self._cloud_folder is None:
            raise Exception("Cloud folder not defined")

        if origin not in ["cloud", "local", "both"]:
            raise Exception(
                f"'origin'={origin} must be one of: ('cloud', 'local', 'both')"
            )

        try:
            local_files = self._get_list_files_info(self._local_folder)
            cloud_files = self._cloud_obj.list_files_info(
                folder=self._cloud_folder, resource_id=self.resource_id)

            if origin in ["cloud", "both"]:
                download_files = []
                for c_file in cloud_files:
                    if file_list is not None and c_file[
                            "filename"] not in file_list:
                        continue

                    for l_file in local_files:
                        if l_file["filename"] == c_file["filename"] and \
                        l_file["file_size"] == c_file["file_size"] and \
                        l_file["modified_date"] >= c_file["modified_date"]:
                            break
                    else:
                        download_files.append(c_file)

                if download_files:
                    log.info(
                        f"Storage: {self.storage} - Resource-ID: {self.resource_id} - Download {len(download_files)} files"
                    )
                    for c_file in download_files:
                        file_data = self._cloud_obj.download_file(
                            folder=self._cloud_folder,
                            resource_id=self.resource_id,
                            filename=c_file["filename"])

                        file_path = os.path.join(self._local_folder,
                                                 c_file["filename"])
                        dirname = os.path.dirname(file_path)
                        if not os.path.isdir(dirname):
                            os.makedirs(dirname, exist_ok=True)

                        with open(file_path, "wb") as fp:
                            fp.write(file_data)

            if origin in ["local", "both"]:
                upload_files = []
                for l_file in local_files:
                    if file_list is not None and l_file[
                            "filename"] not in file_list:
                        continue

                    for c_file in cloud_files:
                        if l_file["filename"] == c_file["filename"] and \
                        l_file["file_size"] == c_file["file_size"] and \
                        l_file["modified_date"] <= c_file["modified_date"]:
                            break
                    else:
                        upload_files.append(l_file)

                if upload_files:
                    log.info(
                        f"Storage: {self.storage} - Resource-ID: {self.resource_id} - Upload {len(upload_files)} files"
                    )
                    for l_file in upload_files:
                        file_path = os.path.join(self._local_folder,
                                                 l_file["filename"])
                        with open(file_path, "rb") as fp:
                            file_data = fp.read()

                        if self._cloud_obj.is_file(
                                folder=self._cloud_folder,
                                resource_id=self.resource_id,
                                filename=l_file["filename"],
                        ):
                            self._cloud_obj.delete_file(
                                folder=self._cloud_folder,
                                resource_id=self.resource_id,
                                filename=l_file["filename"],
                            )

                        file_data = self._cloud_obj.upload_file(
                            folder=self._cloud_folder,
                            resource_id=self.resource_id,
                            filename=l_file["filename"],
                            data=file_data)

                        # Set local file modified time to now() to prevent download the same file after
                        date = datetime.datetime.now()
                        mod_time = time.mktime(date.timetuple())
                        try:
                            os.utime(file_path, (mod_time, mod_time))
                        except:
                            pass

        except Exception as excp:
            log.error(traceback.format_exc())
            log.error(f"sync_files error: {excp}")
            raise excp
Пример #25
0
 def connect(self):
     log.info('Connecting to %s', self._url)
     self._connection = pika.SelectConnection(pika.URLParameters(self._url),
                                              on_open_callback=self.on_connection_open,
                                              on_open_error_callback=self.on_connection_error)