def crawler_and_save(serial_number, save=False): # Send lock.acquire() # TODO session = HTMLSession() response = session.get(url=football_news_url.format(serial_number)) session.close() lock.release() # TODO # Fail if response.status_code != 200: result = None # Success page_text = response.text # Save if save: with open(local_file_path.format(serial_number), 'w', encoding='UTF-8') as file: file.write(page_text) news = FootballNews(id=str(uuid.uuid4()).upper(), serial_number=serial_number, news_type=0) # Analyse try: area = response.html.find('.new-area')[0] news.news_type = news_type_dictionary.get(area.find('span')[0].text.strip()) try: news.create_time = datetime.datetime.strptime(area.find('span')[1].text[6:].strip(), datetime_format) except Exception as e: news.create_time = datetime.datetime.now() news.title = area.find('.new-title')[0].text.strip() news.content = '' for p in area.find('.new-content')[0].find('p'): news.content = news.content + p.text.strip() news.tags = [] for a in area.find('.new-tags')[0].find('a'): news.tags.append(a.text.strip()) news.tags = ','.join(news.tags) except Exception as e: news = None def get_session(): # 初始化数据库连接: engine = create_engine('mysql://*****:*****@47.94.84.81:3306/test_mysql') # 创建DBSession类型: DBSession = sessionmaker(bind=engine) # 创建session对象: session = DBSession() return session def close_session(session): session.close() if news is not None: try: session = get_session() session.add(news) session.commit() close_session(session) print('serial_number={},写入完毕!'.format(serial_number)) except Exception as e: print('serial_number={},Error: 向MySQL写入数据失败!'.format(serial_number)) else: print('serial_number={},Error: 当前页面不存在!'.format(serial_number))