Пример #1
0
 def obtainURLsFromDB(cls, table=Business):
     """
     obtain unscanned URLs from business table.
     """
     with open_db(dbname='bizsearch') as bizdb:
         dbitems = table.fetch_by(bizdb, ['url', 'country'], 0, "country='US'")
         url_list = [ url for url,_ in dbitems ]
         domain_list = [ domain_from_url(d) for d in url_list ]
     print url_list
     print domain_list
     return url_list, domain_list
Пример #2
0
    def __init__(self, input='db', output='db', dbname='bizsearch',dbpwd=None, *args, **kwargs):
        super(MySpider, self).__init__(*args, **kwargs)
        self.emailList = []

        self.output = output

        if output == 'db':
            self.dbcursor = open_db(dbname).cursor()
        else:
            self.f = open('results.list', 'wb')
            self.csvWriter = csv.writer(self.f, delimiter = '\t')
Пример #3
0
def run():
    with open_db("bizsearch") as conn:
        brecords = [ ["biz2", "biz21.com", "4044898763", "AGY", "US", "P", "Jack0"],
                    ["biz3", "biz31.com", "4044898764", "TUR", "US", "P", "Jack2"],
                    ["biz4", "biz41.com", "4044898765", "RLS", "US", "P", "Jack3"],
                    ["biz5", "biz51.com", "4044898766", "EDU", "US", "P", "Jack4"]
                  ]
        Business.insert_one_by_value(conn, "biz1", "biz1.com", "4044898763", "AGY", "US", "P", "Jack")
        Business.insert_a_batch(conn, brecords)
        
        erecords = [ ["a@b", 1, "2011-11-11", 1],
                     ["a@c", 22, "2011-11-11", 1],
                     ["a@d", 23, "2011-11-11", 2],
                     ["a@e", 24, "2011-11-11", 3]
                   ]
        Email.insert_one_by_value(conn, "a@b", 1, "2011-11-11", 0)
        Email.insert_a_batch(conn, erecords)
        
        results = Email.fetch_by(conn, ["*"], address="a@b")
        for r in results:
            print r
Пример #4
0
 def __init__(self):
     self.conn = open_db("bizsearch_work")
     self.dbcursor = self.conn.cursor()