class DataCollector(): def __init__(self): self.config = BotConfig() try: # Opening the links file with utf encoding is required because unicode data # has to be written to the file at times -- specifically the em character (\u2014) # which represents an underscore in the username self.link_log = codecs.open(self.config.get_link_path(), "a", encoding="utf-8") self.link_log.write(self.get_date() + "\n") except IOError: "links log can't be opened." # matches all valid urls but not ie. google.com # taken from regexlib.com self.pattern = """(((http|ftp|https|ftps|sftp)://)|(www\.))+(([a-zA-Z 0-9\._-]+\.[a-zA-Z]{2,6})|([0-9]{1,3}\.[0-9]{1,3}\. [0-9]{1,3}\.[0-9]{1,3}))(/[a-zA-Z0-9\&%_\./-~-]*)?""" # pattern used to filter links # keywords included in this pattern will NOT be included in the links log self.filter_pat = "(codepad|pastebin|pocoo)" def extract_links(self, user, chan, msg): # extracts all urls found in message and store in an iterator matches = re.finditer(self.pattern, msg.strip(), re.IGNORECASE + re.VERBOSE) for link in matches: # link.group() contains the full valid url extracted by the regex link = link.group() if not re.search(self.filter_pat, link, re.IGNORECASE) and not user == "ChanServ": title = self.get_title(link) self.link_log.write("%s - %s: %s > %s\n" % (chan, user, link, title)) self.link_log.flush() def close(self): self.link_log.close() def get_date(self): # Return date in format: Sunday, August 01, 2010 return datetime.now().strftime("%A, %B %d, %Y") def get_title(self, url): # Get contents of <title> tag in the url try: source = urllib.urlopen(url).read() return BeautifulSoup(source).title.text except: return "No Title"
def __init__(self): self.config = BotConfig() try: # Opening the links file with utf encoding is required because unicode data # has to be written to the file at times -- specifically the em character (\u2014) # which represents an underscore in the username self.link_log = codecs.open(self.config.get_link_path(), "a", encoding="utf-8") self.link_log.write(self.get_date() + "\n") except IOError: "links log can't be opened." # matches all valid urls but not ie. google.com # taken from regexlib.com self.pattern = """(((http|ftp|https|ftps|sftp)://)|(www\.))+(([a-zA-Z 0-9\._-]+\.[a-zA-Z]{2,6})|([0-9]{1,3}\.[0-9]{1,3}\. [0-9]{1,3}\.[0-9]{1,3}))(/[a-zA-Z0-9\&%_\./-~-]*)?""" # pattern used to filter links # keywords included in this pattern will NOT be included in the links log self.filter_pat = "(codepad|pastebin|pocoo)"
""" ircbot.py Handles all the protocols and factories. This is where the behavior of the bot is defined and it is where the bot is connected. """ from sys import stdout from twisted.python.log import startLogging from twisted.internet import reactor, protocol from twisted.words.protocols import irc import datacollect from botconfig import BotConfig config = BotConfig() class IRCProtocol(irc.IRCClient): nickname = config.get_nick().encode("ascii") def signedOn(self): # Identify myself to NickServ so I can join # +r (must be registered) channels self.msg("NickServ", "identify " + config.get_pass().encode("ascii")) def privmsg(self, user, channel, message): # This method logs ALL messages by users in channel username = self.extract_nick(user) # have the data collector instance parse the message
# Scripts running location. Only set if called via python.exe __location__ = os.path.realpath( # From https://docs.python.org/3/library/os.path.html # If a component is an absolute path, all previous components # are thrown away and joining continues from the absolute path component. os.path.join(os.getcwd(), os.path.dirname(__file__))) # Load Configuration File config_file_path = Path(os.path.join(__location__, config_file_name)) # Read in configuration file. if(config_file_path.is_file()): print("Configuration found in: {}".format(config_file_path)) # Initiate the bot config object from file bot_config = BotConfig.from_json_config(config_file_path) print(str(bot_config)) else: print("The configuration file {} does not exist".format(path=config_file_path)) # Initialize the bot bot = Bot(command_prefix=bot_config.command_prefix) # Prep SQLAlchemy engine = create_engine(bot_config.db_url, pool_recycle=3600) session = Session(bind=engine) Base.metadata.create_all(engine) @bot.event async def on_ready(): '''Event for when the bot is ready to start working'''