def create_limb(limb_class, config_data, broker_ip, broker_port, limb_port):

    in_config = pickle.loads(config_data)
    in_config["logger"] = centipede_logger.create_logger(str(limb_class.__name__), in_config["log_level"])

    invoker = LimbInvoker(limb_class, in_config, broker_ip, broker_port, limb_port)
    invoker.send_enrolled_signal()
Пример #2
0
    def __init__(self, config=None):
        self.limb_classes = []
        self.limbs = []
        self.config = config

        common_config = self.config.GENERAL
        specific_config = getattr(self.config, "UrlGenerator")
        gen_config = {**common_config, **specific_config}
        self.job_generator = resource_generator.UrlGenerator(config=gen_config)

        self.log_level = self.config.GENERAL["log_level"]
        self.logger = centipede_logger.create_logger(self.__class__.__name__,
                                                     self.log_level)

        self.broker = centipede_broker.CentipedeBroker()

        threading.Timer(0, self.update_linked_resources).start()
Пример #3
0
        super(SleepLimb, self).__init__(config_dict)

        self.logger = config_dict["logger"]

        wildcard_re = re.compile("^")
        self.associate_regex_with_method(wildcard_re, self.ingest)

    def ingest(self, url, data_package):
        """
        Sends a text message to a pre-defined number based on attributes of data_package
        :param url: the URL of the page being processed
        :param data_package: the Package() object containing the data accrued from previous limbs
        :return: None
        """

        self.logger.info("Currently processing " + url +
                         " with the sleep limb. No action has been taken.")
        time.sleep(5)
        self.logger.info("\t Done.")


if __name__ == "__main__":
    config = {
        "logger": centipede_logger.create_logger("empty_limb", logging.DEBUG)
    }
    send_text = SleepLimb(config)

    pack = Package()
    send_text.scrape_from_url("", pack)
Пример #4
0
            send_text_flag = False
            try:
                send_text_flag = get_send_flag_func(data_package)
            except:
                pass

            if send_text_flag:

                message_body = self.config_dict["message_template"].format(url)
                client.publish(PhoneNumber=aws_sns_constants.DEST_NUMBER,
                               Message=message_body)
                self.logger.debug("We are sending a text for url " + url)

        else:
            raise AttributeError("The config dict for " + self.__class__ +
                                 " must contain an attribute 'get_text_flag'.")


if __name__ == "__main__":
    config = {
        "get_text_flag": lambda package: package.is_malicious,
        "message_template":
        "The thread found at {} was found to be malicious!",
        "logger": centipede_logger.create_logger("send_text", logging.DEBUG)
    }
    send_text = SendText(config)

    pack = Package()
    pack.is_malicious = True
    send_text.scrape_from_url("", pack)
Пример #5
0
            comment_data = RedditComment(
                input_dict={
                    "comment_id": id,
                    "content": body,
                    "comment_datetime": time_string,
                    "comment_author": username,
                    "points": points,
                    "source": page_url,
                    "rank": i
                })

            data_package.reddit_info.append(comment_data)


if __name__ == "__main__":
    config_dict = {
        "logger": centipede_logger.create_logger("reddit_scraper",
                                                 logging.DEBUG),
        "ff_binary_location": "C:\\Program Files\\Mozilla Firefox",
        "SPOOF_USER_AGENT": True,
        "USE_PROXY_SERVER": False
    }
    scraper = RedditScraper(config_dict)

    pkg = Package()
    scraper.scrape_from_url(
        "https://www.reddit.com/r/SelfAwarewolves/comments/ga057f/a_trans_exclusionist_has_a_brush_with_self/",
        pkg)
    print(pkg.__dict__)
Пример #6
0
                    self.logger.debug(data_package.threads[i].link +
                                      " is not malicious.")

                data_package.threads[i].is_malicious = is_malicious

        else:
            raise AttributeError("The config dict for " + str(self.__class__) +
                                 " must contain an attribute 'get_text_flag'.")


if __name__ == "__main__":
    config = {
        "get_text_method":
        lambda package: [
            thread.op_content for thread in package.threads
            if not thread.body_cut_off
        ],
        "logger":
        centipede_logger.create_logger("DetectMaliceInText", logging.DEBUG)
    }
    detect_malice_limb = DetectMaliceInText(config)

    package = Package()
    package.threads = [
        FourChanThread({
            "op_content":
            "If you live in wichita ks dont come to school tomorrow"
        })
    ]

    detect_malice_limb.scrape_from_url("", package)
Пример #7
0
            fp.close()

        if should_copy_page:
            self.logger.debug("Just finished copying " + page + " in " +
                              str(time.time() - start_time) + " seconds.")
        else:
            self.logger.debug(page +
                              " was not malicious, so we did not copy it.")

        return data_package


if __name__ == "__main__":

    copy_limb = DeepCopyPage({
        "SPOOF_USER_AGENT":
        True,
        "USE_PROXY_SERVER":
        False,
        "logger":
        centipede_logger.create_logger("DeepCopyPage", logging.DEBUG),
        "ff_binary_location":
        "C:\\Program Files\\Mozilla Firefox",
        "use_selenium":
        True
    })
    pack = Package()

    start_time = time.time()
    copy_limb.scrape_from_url("", pack)
    print("Time taken: " + str(time.time() - start_time))
Пример #8
0
    def __init__(self, config=None):
        self.resource_queue = ingestion_queue_manager.IngestionQueueManager(
            config)

        self.logger = centipede_logger.create_logger(self.__class__.__name__,
                                                     logging.DEBUG)