示例#1
0
def scrape_all_disease_contents(disease_db):
    is_still_need_to_scrape = True
    while is_still_need_to_scrape:
        is_still_need_to_scrape = scrape_all_disease_contents_once(disease_db)


if __name__ == "__main__":
    common.setup_logger_warning()
    common.get_logger().warning("Start scraping!")

    # @todo: use INFO log level, disable logs from the 'request' package
    #logging.getLogger("requests").propagate = False
    #logging.getLogger("requests").setLevel(logging.WARNING)

    start_time = time.clock()
    disease_db = dict()
    common.init_disease_db(disease_db)
    scrape_all_disease_metadata(disease_db)
    common.get_logger().warning(''.join(["No. of diseases to scrape: ",
        str(len(disease_db[common.ALL_DISEASES_VIEW]))]))
    scrape_all_disease_contents(disease_db)
    end_time = time.clock()
    common.get_logger().warning(''.join(["Total Scraping Time:",
                                str(end_time - start_time), "s"]))

    common.dump_disease_db(disease_db, common.DISEASE_DB_FILE_EN)

    common.get_logger().warning("Finished scraping!")


示例#2
0
    for i in range(0, len(all_diseases)):
        original_disease = all_diseases[i]
        common.get_logger().info(''.join(['Translating the ', str(i),
            'th/', str(len(all_diseases)), ' disease "',
            original_disease.name, '"...']))
        translated_disease = translate_a_disease(original_disease,
                                                    language_code)
        translated_disease_db[common.ALL_DISEASES_VIEW][\
                                translated_disease.name] = translated_disease
    
    return disease_db_translated


def load_en_db():
    with open(common.DISEASE_DB_FILE_EN, 'rb') as f:
        disease_db_en = pickle.load(f)
    return disease_db_en


if __name__ == '__main__':
    common.setup_logger_info()

    start_time = time.clock()
    disease_db_en = load_en_db()
    disease_db_vi = translate_db(disease_db_en, language_code='vi')
    common.dump_disease_db(disease_db_vi, common.DISEASE_DB_FILE_VI)   
    end_time = time.clock()
    common.get_logger().info(''.join(["Total Translation Time:",
                                str(end_time - start_time), "s"]))