Exemplo n.º 1
0
def crawl_save_upload():
    '''调用函数实现抓取、保存和上传数据文件'''
    print('-----数据抓取开始-----')
    wb = Workbook()
    engine, Base, session = get_mysql_connection()
    Order, Website = create_table(engine, Base)
    add_default_data(session, Website)
    codemart_crawler.main(wb, session, Order, Website)
    oschina_crawler.main(wb, session, Order, Website)
    rrkf_crawler.main(wb, session, Order, Website)
    shixian_crawler.main(wb, session, Order, Website)
    wywaibao_crawler.main(wb, session, Order, Website)
    yuanjisong_crawler.main(wb, session, Order, Website)
    print('-----数据抓取结束-----')

    print('-----文件保存开始-----')
    delete_data()
    now = datetime.now()
    file = r'data/%s.xlsx' % now.strftime("%Y-%m-%d %H-%M-%S")
    wb.save(file)
    time.sleep(3)
    print('-----文件保存结束-----')

    print('-----文件上传开始-----')
    media_id = get_media_id(file)
    if isinstance(media_id, str):
        upload_result = send_file(media_id)
        if upload_result == True:
            print('文件上传成功:%s' % file)
        else:
            message = '文件上传失败:%s' % upload_result[1]
            print(message)
            send_message(message)
    else:
        message = '获取media_id失败:%s' % media_id[1]
        print(message)
        send_message(message)

    print('-----文件上传结束-----')
                            category='',
                            pub_time=pub_time,
                            is_valid=is_valid,
                            is_delete=False if is_valid else True)
                        order.website = website
                        session.add(order)
                        if is_valid == True:
                            sheet.append(
                                [count, desc, link, pub_time, contact, ''])
                            count += 1
                else:
                    message = '人人开发详情爬取第%d行出错:%s' % (details[0], details[1])
                    print(message)
                    send_message(message)
            session.commit()
        elif isinstance(info_list, tuple):
            message = '人人开发爬取第%d行出错:%s' % (info_list[0], info_list[1])
            print(message)
            send_message(message)
    print('结束爬取人人开发订单')


if __name__ == '__main__':
    wb = Workbook()
    engine, Base, session = get_mysql_connection()
    Order, Website = create_table(engine, Base)
    add_default_data(session, Website)
    main(wb, session, Order, Website)
    now = datetime.now()
    wb.save(r'data/%s.xlsx' % now.strftime("%Y-%m-%d %H-%M-%S"))