def scrape_all_disease_contents(disease_db): is_still_need_to_scrape = True while is_still_need_to_scrape: is_still_need_to_scrape = scrape_all_disease_contents_once(disease_db) if __name__ == "__main__": common.setup_logger_warning() common.get_logger().warning("Start scraping!") # @todo: use INFO log level, disable logs from the 'request' package #logging.getLogger("requests").propagate = False #logging.getLogger("requests").setLevel(logging.WARNING) start_time = time.clock() disease_db = dict() common.init_disease_db(disease_db) scrape_all_disease_metadata(disease_db) common.get_logger().warning(''.join(["No. of diseases to scrape: ", str(len(disease_db[common.ALL_DISEASES_VIEW]))])) scrape_all_disease_contents(disease_db) end_time = time.clock() common.get_logger().warning(''.join(["Total Scraping Time:", str(end_time - start_time), "s"])) common.dump_disease_db(disease_db, common.DISEASE_DB_FILE_EN) common.get_logger().warning("Finished scraping!")
for i in range(0, len(all_diseases)): original_disease = all_diseases[i] common.get_logger().info(''.join(['Translating the ', str(i), 'th/', str(len(all_diseases)), ' disease "', original_disease.name, '"...'])) translated_disease = translate_a_disease(original_disease, language_code) translated_disease_db[common.ALL_DISEASES_VIEW][\ translated_disease.name] = translated_disease return disease_db_translated def load_en_db(): with open(common.DISEASE_DB_FILE_EN, 'rb') as f: disease_db_en = pickle.load(f) return disease_db_en if __name__ == '__main__': common.setup_logger_info() start_time = time.clock() disease_db_en = load_en_db() disease_db_vi = translate_db(disease_db_en, language_code='vi') common.dump_disease_db(disease_db_vi, common.DISEASE_DB_FILE_VI) end_time = time.clock() common.get_logger().info(''.join(["Total Translation Time:", str(end_time - start_time), "s"]))