def main(): config = setup_config() links = [] setup_logging() # Database configs hostname = config['harvester']['hostname'] username = config['harvester']['username'] password = config['harvester']['password'] db.check_database(hostname, username, password) connection = db.database_connection(hostname, username, password, config['harvester']['port'], config['harvester']['database']) db.create_table(connection) base_url = config['main']['url'] crawl(base_url, base_url, links, connection) db.close_connection(connection)
def upload_csv_to_db(): """First it checks if the database exists, if it doesn't it will be created along with the table that will store the companies' data. Then it will upload every row of the csv file to that table """ if not os.path.exists(DB_FILENAME): create_db() logging.info(f"Now uploading the csv file to the database") with open(CSV_FILENAME, "r", encoding='utf-8') as results: n_records = 0 lines = csv.reader(results) next(lines, None) connection = connect_to_db() for line in lines: insert_row(line, connection) n_records += 1 close_connection(connection) logging.info(f"{n_records} rows inserted in the table")
def run(self): print("Starts thread: %s" % self.threadName) # Server Connection to MySQL for Thread. db_server = MySQLdb.connect(host=DB_HOST, user=DB_USER, passwd=DB_PASS, db=DB_DABA) db_conn = db_server.cursor() try: smtp_session = smtplib.SMTP(SMTP_SERVER, SMTP_PORT) except: return smtp_session.ehlo() smtp_session.starttls() smtp_session.login(SMTP_USER, SMTP_PASS) while done == 0: mailing_q_lock.acquire() if not mails_queue.empty(): row = mails_queue.get() mailing_q_lock.release() mail_recipient = row[2] mail_headers = mail_manager.build_headers(mail_recipient) try: mail_manager.send_mail(smtp_session, mail_recipient, mail_headers) db_manager.check_row(db_conn, db_server, row[0], self.threadName) except: mails_queue.put(row) db_manager.close_connection(db_server) try: mail_manager.close_connection(smtp_session) except: print("GG") print("Ends thread: %s" % self.threadName)
def run(self): smtp_session = smtplib.SMTP(SMTP_SERVER, SMTP_PORT) smtp_session = smtplib.SMTP(SMTP_SERVER, SMTP_PORT) smtp_session.ehlo() smtp_session.starttls() smtp_session.login(SMTP_USER, SMTP_PASS) db_server = MySQLdb.connect(host=DB_HOST, user=DB_USER, passwd=DB_PASS, db=DB_DABA) db_conn = db_server.cursor() # Looping till we have rows available. while True: # Fetching One row for processing and locking row with # MySQL InnoDB FOR UPDATE clause. row = db_manager.retrieve_single_row(db_conn) # If we have a row, else break. if row: # Constructing Mail Headers. mail_recipient = row[2] mail_headers = mail_manager.build_headers(mail_recipient) # Sending mail through already existing server connection. mail_manager.send_mail(smtp_session, mail_recipient, mail_headers) # Updating Table, and releasing row lock. db_manager.check_row(db_conn, db_server, row[0], self.name) else: break # Closing connections once the thread is done. mail_manager.close_connection(smtp_session) db_manager.close_connection(db_server)
def get_all_gendas(): connection = db_manager.create_connection() gendas = db_manager.select_all_tasks(connection) db_manager.close_connection(connection) return gendas
smtp_session.starttls() smtp_session.login(SMTP_USER, SMTP_PASS) # Looping until we have rows available. while True: # Fetching One row for processing and locking row with # MySQL InnoDB FOR UPDATE clause. row = db_manager.retrieve_single_row(db_conn) # If we have a row, else break. if row: mail_recipient = row[2] # Building headers mail_headers = mail_manager.build_headers(mail_recipient) # Sending mail mail_manager.send_mail(smtp_session, mail_recipient, mail_headers) # Updating Table, and releasing row lock. db_manager.check_row(db_conn, db_server, row[0], 'main_thread') else: break mail_manager.close_connection(smtp_session) db_manager.close_connection(db_server) end_time = timeit.timeit() # Closing connections once the thread is done. db_manager.print_time_performance(db_conn, db_server) db_manager.print_thread_performance(db_conn, db_server) print("Execution time: %d s" % ((end_time-start_time)*1000))