def __init__(self,
              stop_event: Event,
              parent: ServerRequestHandler,
              account_db_addr: str = "",
              seed_source_addr=""):
     """
     :param stop_event:
     :param account_db_addr:
     :param seed_source: you have to either provide seed_source or seed_source_addr, this could be a list of sites or
     :param seed_source_addr: ip addr of the seed source db
     :return:
     """
     self._stop_event = stop_event
     self._account_manager = AccountManager(account_db_addr)
     self._moz_account_list = [
         x for x in self._account_manager.AccountList
         if x.siteType == AccountType.Moz
     ]
     self._majestic_account = self._account_manager.get_accounts(
         AccountType.Majestic)[0]
     self._amazon_ec2_account = self._account_manager.get_accounts(
         AccountType.AmazonEC2)[0]
     self._account_lock = RLock()
     self._task_lock = RLock()
     self._task_list = []
     self._parent_server = parent
     self._seed_source_addr = seed_source_addr
     ServerRequestHandler.__init__(self)
 def __init__(self, stop_event: Event, parent: ServerRequestHandler,  account_db_addr: str="", seed_source_addr=""):
     """
     :param stop_event:
     :param account_db_addr:
     :param seed_source: you have to either provide seed_source or seed_source_addr, this could be a list of sites or
     :param seed_source_addr: ip addr of the seed source db
     :return:
     """
     self._stop_event = stop_event
     self._account_manager = AccountManager(account_db_addr)
     self._moz_account_list = [x for x in self._account_manager.AccountList if x.siteType == AccountType.Moz]
     self._majestic_account = self._account_manager.get_accounts(AccountType.Majestic)[0]
     self._amazon_ec2_account = self._account_manager.get_accounts(AccountType.AmazonEC2)[0]
     self._account_lock = RLock()
     self._task_lock = RLock()
     self._task_list = []
     self._parent_server = parent
     self._seed_source_addr = seed_source_addr
     ServerRequestHandler.__init__(self)
class CrawlTaskController(ServerRequestHandler):
    """
    TODO: complete the following functions.
    """
    def __init__(self,
                 stop_event: Event,
                 parent: ServerRequestHandler,
                 account_db_addr: str = "",
                 seed_source_addr=""):
        """
        :param stop_event:
        :param account_db_addr:
        :param seed_source: you have to either provide seed_source or seed_source_addr, this could be a list of sites or
        :param seed_source_addr: ip addr of the seed source db
        :return:
        """
        self._stop_event = stop_event
        self._account_manager = AccountManager(account_db_addr)
        self._moz_account_list = [
            x for x in self._account_manager.AccountList
            if x.siteType == AccountType.Moz
        ]
        self._majestic_account = self._account_manager.get_accounts(
            AccountType.Majestic)[0]
        self._amazon_ec2_account = self._account_manager.get_accounts(
            AccountType.AmazonEC2)[0]
        self._account_lock = RLock()
        self._task_lock = RLock()
        self._task_list = []
        self._parent_server = parent
        self._seed_source_addr = seed_source_addr
        ServerRequestHandler.__init__(self)

    def add_task(self, task: CrawlTask):
        pass

    def remove_task(self, task: CrawlTask):
        pass

    def _get_moz_account(self, count: int):
        accounts = []
        with self._account_lock:
            available_accs = [x for x in self._moz_account_list if x.Available]
            if len(available_accs) >= count:
                accounts = available_accs[0:count]
                for item in accounts:
                    if isinstance(item, SiteAccount):
                        accounts.append(deepcopy(item))
                        item.Available = False
        return accounts

    @staticmethod
    def request_spot_instances(amazon_ec2_account: SiteAccount, image_id: str,
                               instance_type: str, zone: str,
                               instance_count: int, max_price: float,
                               tag_ref: str, stop_event: Event,
                               return_results: list):
        tag_name = "LaunchGroupCrawl"
        tag_value = tag_ref
        tag_dict = {tag_name: tag_value}
        mins_to_wait = 30
        min_count = 0
        instance_type = instance_type
        instance_max_price = max_price
        zone = zone
        request_id_list = []
        request = EC2Resource.EC2Resource.request_spot_instances(
            amazon_ec2_account,
            # image_id=Const.ImageId.Crawler_v1047,
            image_id=image_id,
            key_name=Const.SshSecureKeyName.Default,
            security_group=Const.SecureGroupId.CrawlOperation,
            instance_type=instance_type,
            zone=zone,
            instance_count=instance_count,
            price=instance_max_price,
            launch_group=tag_value,
            request_valid_duration_min=mins_to_wait,
            dry_run=False)
        request_ids = request.ids
        print("request is send, request ids are:")
        if request_ids is not None:
            for item in request_ids:
                print(item)
        time.sleep(1)

        instance_id_list = []
        while min_count < mins_to_wait:
            if stop_event.is_set():
                EC2Resource.EC2Resource.cancel_spot_instances(
                    amazon_ec2_account, zone=zone, request_ids=request_id_list)
                break

            results = EC2Resource.EC2Resource.get_spot_instances_info(
                amazon_ec2_account, zone=zone, launch_group=tag_value)

            if results is not None:
                for item in results:
                    if item.ins_id is not None and len(item.ins_id) > 0:
                        # request_id_list.append(item.request_id)
                        instance_id_list.append(item.ins_id)
                        print(item)
                if len(instance_id_list) > 0:
                    print("instance finally launched!")
                    result = EC2Resource.EC2Resource.adding_tag_to_instances(
                        amazon_ec2_account,
                        zone=zone,
                        ids=instance_id_list,
                        tags_dict=tag_dict)
                    print("adding launch group tag:", result)
                    break
            print("nothing happens yet, please wait")
            time.sleep(60)
            min_count += 1

        private_ip_list = []
        if len(instance_id_list) > 0:
            while not stop_event.is_set():
                results = EC2Resource.EC2Resource.get_instances_by_tag(
                    amazon_ec2_account,
                    zone=zone,
                    tag_key=tag_name,
                    tag_value=tag_value)
                if results is not None:
                    print("here is a list of ip we can use:")
                    if results[0].state == Const.InstanceState.Running:
                        for item in results:
                            return_results.append(item.private_ip)
                            print(item.private_ip)
                            private_ip_list.append(item.private_ip)

                        break
                time.sleep(60)
        if len(private_ip_list) > 0:
            return private_ip_list
        else:
            return []

    @staticmethod
    def upload_seeds(ref: str,
                     target_host: Server,
                     seed_server: ServerRequestHandler,
                     niches=[],
                     init_seeds=[],
                     seed_source_addr="",
                     seeds_per_niche=5000):
        seeds = init_seeds
        if len(seeds) == 0:
            if len(seed_source_addr) > 0:
                raise NotImplementedError
            else:
                for niche in niches:
                    request = SeedDBRequest(niche=niche,
                                            random_read=True,
                                            reverse_read=True,
                                            data_len=seeds_per_niche)
                    cmd = CommandStruct(cmd=ServerCommand.Com_Get_DB_DATA,
                                        target=ServerType.ty_Seed_Database,
                                        data=request)

                    if isinstance(seed_server, ServerRequestHandler):
                        # case when it is a local seed db
                        temp = seed_server.handle_request(cmd)
                    elif len(seed_source_addr) > 0:
                        # TODO: case when it is a remote seed db
                        temp = None
                    else:
                        raise NotImplementedError

                    if isinstance(temp, MiningList):
                        seeds += temp.data

        seeds = [x for x in set(seeds)]
        in_data = MiningList(ref=ref, data=seeds)
        hostController = HostController(target_host,
                                        cmd=ServerCommand.Com_Add_Seed,
                                        in_data=in_data)
        hostController.start()
        hostController.join()
        return len(seeds)

    def _run_task(self, task: CrawlTask):
        moz_account_count = 750
        accounts = self._get_moz_account(moz_account_count)
        zone = Const.Zone.US_West_2A
        image_id = Const.ImageId.Crawler_v1050

        if len(accounts) > 0:
            host_type = Const.Ec2InstanceType.T2_Micro
            host_price = 0.2
            if task.instance_id == Const.Ec2InstanceType.M4_Large:
                slave_count = 160
                max_price = 1.1
            elif task.instance_id == Const.Ec2InstanceType.M4_4X:
                slave_count = 20
                max_price = 0.15
            else:
                raise ValueError(
                    "CrawlTaskController._run_task: invalid CrawlTask.instance_id"
                )

            host_ref = task.unique_ref + "Host"
            slave_results = list()
            slave_request_t = Thread(
                target=CrawlTaskController.request_spot_instances,
                args=(self._amazon_ec2_account, image_id, task.instance_id,
                      zone, slave_count, max_price, task.unique_ref,
                      self._stop_event, slave_results))
            host_results = list()
            host_ip = task.host_addr
            host_request_t = Thread(target=CrawlTaskController)
            host_request_t = Thread(
                target=CrawlTaskController.request_spot_instances,
                args=(self._amazon_ec2_account, image_id, host_type, zone, 1,
                      host_price, host_ref, self._stop_event, host_results))
            slave_request_t.start()
            if len(host_ip) == 0:
                host_request_t.start()
            if slave_request_t.is_alive():
                slave_request_t.join()
            if host_request_t.is_alive():
                host_request_t.join()
            if len(host_results) == 0:
                # todo: should start the instance normally, and add the tag
                raise NotImplementedError
            # upload seeds
            seeds_count = self.upload_seeds(
                task.unique_ref,
                target_host=Server(address=ServerAddress(
                    task.host_addr, MiningTCPServer.DefaultListenPort)),
                seed_server=self._parent_server,
                niches=task.niches,
                init_seeds=task.init_seeds,
                seed_source_addr=self._seed_source_addr,
                seeds_per_niche=task.seed_per_niche)
            # todo...

    def update_running_task_status(self):
        pass

    def handle_request(self, cmd: CommandStruct):
        raise NotImplementedError

    def run(self):
        while not self._stop_event.is_set():
            self.update_running_task_status()
            time.sleep(1)
class CrawlTaskController(ServerRequestHandler):
    """
    TODO: complete the following functions.
    """
    def __init__(self, stop_event: Event, parent: ServerRequestHandler,  account_db_addr: str="", seed_source_addr=""):
        """
        :param stop_event:
        :param account_db_addr:
        :param seed_source: you have to either provide seed_source or seed_source_addr, this could be a list of sites or
        :param seed_source_addr: ip addr of the seed source db
        :return:
        """
        self._stop_event = stop_event
        self._account_manager = AccountManager(account_db_addr)
        self._moz_account_list = [x for x in self._account_manager.AccountList if x.siteType == AccountType.Moz]
        self._majestic_account = self._account_manager.get_accounts(AccountType.Majestic)[0]
        self._amazon_ec2_account = self._account_manager.get_accounts(AccountType.AmazonEC2)[0]
        self._account_lock = RLock()
        self._task_lock = RLock()
        self._task_list = []
        self._parent_server = parent
        self._seed_source_addr = seed_source_addr
        ServerRequestHandler.__init__(self)

    def add_task(self, task: CrawlTask):
        pass

    def remove_task(self, task: CrawlTask):
        pass

    def _get_moz_account(self, count: int):
        accounts = []
        with self._account_lock:
            available_accs = [x for x in self._moz_account_list if x.Available]
            if len(available_accs) >= count:
                accounts = available_accs[0: count]
                for item in accounts:
                    if isinstance(item, SiteAccount):
                        accounts.append(deepcopy(item))
                        item.Available = False
        return accounts

    @staticmethod
    def request_spot_instances(amazon_ec2_account: SiteAccount, image_id: str,
                                   instance_type: str, zone: str, instance_count: int,
                                   max_price: float, tag_ref: str, stop_event: Event, return_results: list):
        tag_name = "LaunchGroupCrawl"
        tag_value = tag_ref
        tag_dict = {tag_name: tag_value}
        mins_to_wait = 30
        min_count = 0
        instance_type = instance_type
        instance_max_price = max_price
        zone = zone
        request_id_list = []
        request = EC2Resource.EC2Resource.request_spot_instances(amazon_ec2_account,
                                                                 # image_id=Const.ImageId.Crawler_v1047,
                                                                 image_id=image_id,
                                                                 key_name=Const.SshSecureKeyName.Default,
                                                                 security_group=Const.SecureGroupId.CrawlOperation,
                                                                 instance_type=instance_type,
                                                                 zone=zone,
                                                                 instance_count=instance_count,
                                                                 price=instance_max_price,
                                                                 launch_group=tag_value,
                                                                 request_valid_duration_min= mins_to_wait,
                                                                 dry_run=False)
        request_ids = request.ids
        print("request is send, request ids are:")
        if request_ids is not None:
            for item in request_ids:
                print(item)
        time.sleep(1)

        instance_id_list = []
        while min_count < mins_to_wait:
            if stop_event.is_set():
                EC2Resource.EC2Resource.cancel_spot_instances(amazon_ec2_account, zone=zone, request_ids=request_id_list)
                break

            results =EC2Resource.EC2Resource.get_spot_instances_info(amazon_ec2_account, zone=zone,
                                                                     launch_group=tag_value)

            if results is not None:
                for item in results:
                    if item.ins_id is not None and len(item.ins_id) > 0:
                        # request_id_list.append(item.request_id)
                        instance_id_list.append(item.ins_id)
                        print(item)
                if len(instance_id_list) > 0:
                    print("instance finally launched!")
                    result = EC2Resource.EC2Resource.adding_tag_to_instances(amazon_ec2_account,
                                                                              zone=zone,
                                                                              ids=instance_id_list,
                                                                              tags_dict=tag_dict)
                    print("adding launch group tag:", result)
                    break
            print("nothing happens yet, please wait")
            time.sleep(60)
            min_count += 1

        private_ip_list = []
        if len(instance_id_list) > 0:
            while not stop_event.is_set():
                results = EC2Resource.EC2Resource.get_instances_by_tag(amazon_ec2_account, zone=zone,
                                                                        tag_key=tag_name, tag_value=tag_value)
                if results is not None:
                    print("here is a list of ip we can use:")
                    if results[0].state == Const.InstanceState.Running:
                        for item in results:
                            return_results.append(item.private_ip)
                            print(item.private_ip)
                            private_ip_list.append(item.private_ip)

                        break
                time.sleep(60)
        if len(private_ip_list) > 0:
            return private_ip_list
        else:
            return []

    @staticmethod
    def upload_seeds(ref: str, target_host: Server, seed_server: ServerRequestHandler, niches=[], init_seeds=[],
                     seed_source_addr="", seeds_per_niche=5000):
        seeds = init_seeds
        if len(seeds) == 0:
            if len(seed_source_addr) > 0:
                raise NotImplementedError
            else:
                for niche in niches:
                    request = SeedDBRequest(niche=niche, random_read=True, reverse_read=True,
                                            data_len=seeds_per_niche)
                    cmd = CommandStruct(cmd=ServerCommand.Com_Get_DB_DATA,
                                        target=ServerType.ty_Seed_Database, data=request)

                    if isinstance(seed_server, ServerRequestHandler):
                        # case when it is a local seed db
                        temp = seed_server.handle_request(cmd)
                    elif len(seed_source_addr) > 0:
                        # TODO: case when it is a remote seed db
                        temp = None
                    else:
                        raise NotImplementedError

                    if isinstance(temp, MiningList):
                        seeds += temp.data

        seeds = [x for x in set(seeds)]
        in_data = MiningList(ref=ref, data=seeds)
        hostController = HostController(target_host, cmd=ServerCommand.Com_Add_Seed, in_data=in_data)
        hostController.start()
        hostController.join()
        return len(seeds)

    def _run_task(self, task: CrawlTask):
        moz_account_count = 750
        accounts = self._get_moz_account(moz_account_count)
        zone = Const.Zone.US_West_2A
        image_id = Const.ImageId.Crawler_v1050

        if len(accounts) > 0:
            host_type = Const.Ec2InstanceType.T2_Micro
            host_price = 0.2
            if task.instance_id == Const.Ec2InstanceType.M4_Large:
                slave_count = 160
                max_price = 1.1
            elif task.instance_id == Const.Ec2InstanceType.M4_4X:
                slave_count = 20
                max_price = 0.15
            else:
                raise ValueError("CrawlTaskController._run_task: invalid CrawlTask.instance_id")

            host_ref = task.unique_ref+"Host"
            slave_results = list()
            slave_request_t = Thread(target=CrawlTaskController.request_spot_instances,
                                     args=(self._amazon_ec2_account, image_id, task.instance_id,
                                           zone, slave_count, max_price, task.unique_ref,
                                           self._stop_event, slave_results))
            host_results = list()
            host_ip = task.host_addr
            host_request_t = Thread(target=CrawlTaskController)
            host_request_t = Thread(target=CrawlTaskController.request_spot_instances,
                                     args=(self._amazon_ec2_account, image_id, host_type,
                                           zone, 1, host_price, host_ref,
                                           self._stop_event, host_results))
            slave_request_t.start()
            if len(host_ip) == 0:
                host_request_t.start()
            if slave_request_t.is_alive():
                slave_request_t.join()
            if host_request_t.is_alive():
                host_request_t.join()
            if len(host_results) == 0:
                # todo: should start the instance normally, and add the tag
                raise NotImplementedError
            # upload seeds
            seeds_count = self.upload_seeds(task.unique_ref,
                              target_host=Server(address=ServerAddress(task.host_addr,MiningTCPServer.DefaultListenPort)),
                              seed_server=self._parent_server, niches=task.niches, init_seeds=task.init_seeds,
                              seed_source_addr=self._seed_source_addr, seeds_per_niche=task.seed_per_niche)
            # todo...



    def update_running_task_status(self):
        pass


    def handle_request(self, cmd: CommandStruct):
        raise NotImplementedError

    def run(self):
        while not self._stop_event.is_set():
            self.update_running_task_status()
            time.sleep(1)