Пример #1
0
class TemplateMange(Base):
    __tablename__ = 'template'
    template_name = db.Column(db.String(100))  # 模板名称(news/weixin/weibo)
    template_type = db.Column(db.String(100))  # 模板类型(通用型新闻网页/新浪微博/微信公众号)
    crawl_url = db.Column(db.String(100),
                          unique=True)  # 采集的url, 或者微博uuid, 或者微信号
    status = db.Column(db.String(100), default=1)  # 采集的url是否可添加为工程
Пример #2
0
class DataStorage(Base):
    __tablename__ = 'datastorage'
    project_name = db.Column(db.String(100))
    project_alias = db.Column(db.String(100))
    # 存储的批量大小
    num = db.Column(db.Integer, default=50)
    # 存储的附件的大小, 单位为Mb
    file_size = db.Column(db.Integer)
Пример #3
0
class ServerMachine(Base):
    __tabelename__ = 'sk_server_machine'
    url = db.Column(db.String(50), unique=True)
    status = db.Column(db.SmallInteger, default=1)  # 1启动 0禁用
    is_master = db.Column(db.SmallInteger)  # 1主 2从

    @classmethod
    def master_url(cls):
        sm = cls.query.filter(and_(cls.status == 1,
                                   cls.is_master == 1)).first()
        return sm.url if sm is not None else None

    @classmethod
    def slave_urls(cls):
        sms = cls.query.filter(and_(cls.status == 1, cls.is_master == 0)).all()
        return [sm.url for sm in sms]
Пример #4
0
class Project(Base):
    __tablename__ = 'project'
    project_name = db.Column(db.String(100), unique=True)
    applicant = db.Column(db.String(100))  # 申请人
    developers = db.Column(db.String(100))  # 项目的开发者
    for_project = db.Column(db.String(100))  # 提出需求的项目
    project_alias = db.Column(db.String(100))  # 项目的备注
    category = db.Column(db.String(255))  # 分类
    is_msd = db.Column(db.SmallInteger)  # 是否是主从分布式爬虫 0 单机爬虫 1 分布式爬虫
    status = db.Column(db.String(50))  # 运行状态,运行中或则休眠
Пример #5
0
class Spider(Base):
    __tablename__ = 'spider'
    name = db.Column(db.String(100))
    project_id = db.Column(db.INTEGER, nullable=False, index=True)
    project_name = db.Column(db.String(100))
    type = db.Column(db.String(50))
    address = db.Column(db.String(100))
    job_id = db.Column(db.String(255))
Пример #6
0
class Scheduler(Base):
    __tablename__ = 'scheduler'
    # 爬虫项目id
    project_id = db.Column(db.INTEGER, nullable=False, index=True)
    # 周期调度-月份
    cron_month = db.Column(db.String(255), default="*")
    # 周期调度时间-天, 默认是*
    cron_day_of_month = db.Column(db.String(255), default="*")
    # 周期调度时间-小时, 默认是*
    cron_hour = db.Column(db.String(255), default="*")
    # 周期调度时间-分钟, 默认是0
    cron_minutes = db.Column(db.String(255), default="0")
    # 0/-1  # 是否可以被周期调度 0可以 -1不可以
    enabled = db.Column(db.INTEGER, default=0)
    # periodic/onetime  调度方式 周期性 和 一次性
    run_type = db.Column(db.String(20), default="periodic")
    # 任务描述
    desc = db.Column(db.Text)
Пример #7
0
class DataCount(Base):
    __tablename__ = 'sk_data_count'
    project_name = db.Column(db.String(255))  # 工程名 **必须与上传到爬虫平台的英文工程名同名
    developers = db.Column(db.String(255))
    address = db.Column(db.String(255))
    db_name = db.Column(db.String(255))
    table_name = db.Column(db.String(255))
    number = db.Column(db.String(255))
    image_number = db.Column(db.String(255))
    video_number = db.Column(db.String(255))
    audio_number = db.Column(db.String(255))
    file_number = db.Column(db.String(255))
    image_size = db.Column(db.String(255))
    video_size = db.Column(db.String(255))
    audio_size = db.Column(db.String(255))
    file_size = db.Column(db.String(255))
Пример #8
0
class SendEmail(Base):
    __tablename__ = 'sendemail'
    project_name = db.Column(db.String(100))         # 项目名称
    project_id = db.Column(db.String(100))           # 项目id
    job_id = db.Column(db.String(100), unique=True)  # 任务调度id
    email = db.Column(db.String(100))      # 接收人邮箱