示例#1
0
class TwitterBot:

    __favorites_regex = '<a href="http://twitter.com/[a-zA-Z0-9_]+" title="[a-zA-Z0-9\-_ .]+">[a-zA-Z0-9_]+</a>'

    __following_regex = '<a href="http://twitter.com/[a-zA-Z0-9_]+" rel="contact"><img alt="[a-zA-Z0-9\-_ .]+" class'

    __fav_complete_name_regex = 'title="[a-zA-Z0-9\-_ .]+">'
    __fav_complete_name_prefix = 'title="'
    __fav_complete_name_sufix = '">'

    __foll_complete_name_regex = 'img alt="[a-zA-Z0-9\-_ .]+" class'
    __foll_complete_name_prefix = 'img alt="'
    __foll_complete_name_sufix = '" class'

    __url_regex = 'href="http://twitter.com/[a-zA-Z0-9_]+" '
    __url_prefix = 'href="'
    __url_sufix = '" '

    def __init__(self):
        pass

    def initialize(self,
                   proxies_per_proto={},
                   user=None,
                   passw=None,
                   debug=False):

        print 'INIT: TwitterBot'

        self.__br = Browser()
        self.__br.set_proxies(proxies_per_proto)
        self.__br.set_debug_http(debug)
        self.__debug = debug
        self.__sandman = SandMan('TwitterBot')

        self.__ngd = NGD()
        self.__ngd.set_proxies(proxies_per_proto)

        self.__lock = Lock()
        try:
            # sign in
            self.__br.open("http://twitter.com/")
            self.__br.select_form(nr=1)
            self.__br['session[username_or_email]'] = user
            self.__br['session[password]'] = passw
            resp = self.__br.submit()
            time.sleep(0.2)

        except Exception, e:
            if self.__debug:
                traceback.print_exc(file=sys.stdout)
                print str(e)
            print 'EXCEPTION on TwitterBot, possibly bad user/password or https login don\' work behind a proxy.'
示例#2
0
class SearchEngineBot:
    def __init__(self):
        pass

    def initialize(self,
                   proxies_per_proto={},
                   user=None,
                   passw=None,
                   debug=False):

        print 'INIT: SearchEngineBot'

        self.__br = Browser()
        self.__br.set_proxies(proxies_per_proto)
        self.__br.set_debug_http(debug)
        self.__ngd = NGD(proxies_per_proto)
        self.__harvest_command = EmailHarvestingCommand()
        self.__harvest_command.set_only_complete_names(False)

        self.__sandman = SandMan('SearchEngineBot')

        # no sign in

    def set_proxies_per_proto(self, proxies):
        self.__proxies = proxies
        try:
            self.__ngd.set_proxies(proxies)
        except:
            print 'EXCEPTION on SeachEngineBot, possibly bad user/password or https login don\' work behind a proxy.'

        if len(proxies) == 0:
            proxy = None
        else:
            proxy = tuple(proxies['http'].split(':'))
            proxy = (proxy[0], int(proxy[1]))
        self.__proxy = proxy

    def set_sleep_secs(self, secs):
        self.__sandman.set_sleep_secs(secs)

    def set_sleep_module(self, iterations):
        self.__sandman.set_sleep_module(iterations)

    def set_sleep_failure(self, secs):
        self.__sandman.set_sleep_failure(secs)

    def set_sleep_random_flag(self, bool):
        self.__sandman.set_sleep_random_flag(bool)

    def self_email(self, email, name):
        if name.lower().startswith(email.split('@')[0].lower()):
            return True
        if len(name.split(' ')) == 1 and name.lower() == email.split(
                '@')[0].lower():
            return True
        if len(name.split(' ')) == 2 and '.'.join(
                name.split(' ')).lower() == email.split('@')[0].lower():
            return True
        if len(name.split(' ')) == 2 and '_'.join(
                name.split(' ')).lower() == email.split('@')[0].lower():
            return True
        if len(name.split(' ')) == 2 and name.split(
                ' ')[0].lower() == email.split('@')[0].lower():
            return True
        if len(name.split(' ')) == 2 and (
                name.split(' ')[0][0] +
                name.split(' ')[1]).lower() == email.split('@')[0].lower():
            return True
        return False

    def name_to_emails(self, (aliases, graph)):
        self.__harvest_command.set_only_complete_names(False)
        return self.__name_to_emails(aliases, 'all_mails')