def run():
    df = pd.read_csv('./cleanfile.csv', encoding='utf-8', sep=',')
    df["sentiments"] = df["content"].map(
        lambda c: snownlp.SnowNLP(c).sentiments)
    df["keywords"] = df["content"].map(getKeyWord)

    #engine = create_engine('mysql+pymysql://root:@127.0.0.1:3306/sina')
    engine = create_engine(
        'mysql+mysqlconnector://root:[email protected]:3306/sina?charset=utf8&connect_timeout=10'
    )

    dtypedict = {
        'id': Integer(),
        'uid': VARCHAR(length=15),
        'area': VARCHAR(length=15),
        'ipadd': VARCHAR(length=15),
        'usertype': VARCHAR(length=10),
        'agree': VARCHAR(length=10),
        'cmttime': DATETIME(),
        'content': TEXT,
        'sentiments': DECIMAL('10,10'),
        'keywords': VARCHAR(length=100),
    }
    df.to_sql(name='news',
              con=engine,
              chunksize=100000,
              if_exists='replace',
              index=True,
              index_label='id',
              dtype=dtypedict)
示例#2
0
class Block(BaseModel):
    __tablename__ = 'block_behavior'

    TIME = Column(DATETIME(20), primary_key=True)
    IP = Column(CHAR(60), primary_key=True)
    PORT = Column(Integer, primary_key=True)
    TYPE = Column(String(20))
    METHOD = Column(String(20))
    URI = Column(Text)
    INFO = Column(Text)
示例#3
0
class ReagentInventoryTable(BaseModel):
    __tablename__ = 'reagent_inventory'
    node_id = Column(VARCHAR(45), primary_key=True, nullable=False)
    node_type = Column(VARCHAR(45), primary_key=True, nullable=False)
    reagent_name = Column(VARCHAR(45), primary_key=True, nullable=False)
    reagent_type = Column(
        VARCHAR(45), primary_key=True,
        nullable=False)  #reagent name can be All|Primary|Auxiliary
    reagent_count = Column(Integer())
    reagent_warn_threshold = Column(Integer())
    reagent_err_threshold = Column(Integer())
    expire_date_time = Column(DATETIME())
    reagent_pack_lot = Column(VARCHAR(45))
    updated_timestamp = Column(DATETIME())

    def __init__(self,\
                 node_id='NodeID',\
                 node_type='NodeType',\
                 reagent_name='ReagentName',\
                 reagent_type='All',\
                 reagent_count=None,\
                 reagent_warn_threshold=None,\
                 reagent_err_threshold=None,\
                 expire_date_time=None,\
                 reagent_pack_lot=None,\
                 updated_timestamp=None):
        self.id = id
        self.node_id = node_id
        self.node_type = node_type
        self.reagent_name = reagent_name
        self.reagent_type = reagent_type
        self.reagent_count = reagent_count
        self.reagent_warn_threshold = reagent_warn_threshold
        self.reagent_err_threshold = reagent_err_threshold
        self.expire_date_time = expire_date_time
        self.reagent_pack_lot = reagent_pack_lot
        self.updated_timestamp = updated_timestamp
示例#4
0
文件: models.py 项目: iam888/ant
class Asset(BaseModel):
    '''
    资产表,sn资产唯一标识,可含有业务含义
    '''
    __tablename__ = 'asset'
    __table_args__ = (UniqueConstraint('idc_id', 'sn', name='uniq_idc_sn'), {
        'mysql_engine': 'InnoDB'
    })

    id = Column(Integer, primary_key=True)
    sn = Column(String(50), nullable=False, unique=True)
    idc_id = Column(Integer(), ForeignKey('idc.id'), nullable=False)
    is_virtual = Column(Integer(), nullable=False, default=0)
    purchase_date = Column(DATETIME(), nullable=False)
    is_delete = Column(Integer(), nullable=False, default=0)
示例#5
0
class SampleLocationTable(BaseModel):
    __tablename__ = 'sample_location'
    sid = Column(VARCHAR(24), primary_key=True, nullable=False, unique=True)
    location = Column(VARCHAR(24))
    timestamp = Column(DATETIME())

    def __init__(self, sid, location=None, timestamp=None):
        self.sid = sid
        self.location = location
        self.timestamp = timestamp

    def __repr__(self):
        return 'sample location information: \n'+\
               'sid:'+HORIZONTAL_TABLE+self.sid+\
               'location:'+HORIZONTAL_TABLE+self.location+\
               'update timestamp:'+HORIZONTAL_TABLE+self.timestamp
示例#6
0
class Hack(BaseModel):
    __tablename__ = 'jm_hacker'

    id = Column(Integer, primary_key=True)
    ip = Column(CHAR(60))
    hack_city = Column(CHAR(40))
    hack_addr = Column(CHAR(40))
    url = Column(TEXT())
    host = Column(CHAR(80))
    acl = Column(TEXT(80))
    src_time = Column(DATETIME(20))
    method = Column(CHAR(10))
    headers = Column(TEXT(200))
    user_agent = Column(TEXT(80))
    cookie = Column(TEXT(80))
    post_data = Column(TEXT())
示例#7
0
class ResultTable(BaseModel):
    __tablename__ = 'result'
    rid = Column(Integer(), primary_key=True, nullable=False, unique=True)
    test_name = Column(VARCHAR(45))
    dilution_profile = Column(VARCHAR(45))
    dilution_factor = Column(VARCHAR(45))
    result = Column(VARCHAR(45))
    datetime = Column(DATETIME())
    instrument_id = Column(VARCHAR(45))
    sid = Column(VARCHAR(24))
    aspect = Column(VARCHAR(45))
    flagged = Column(Integer())

    def __init__(self,\
                 test_name = None,\
                 dilution_profile = None,\
                 dilution_factor = None,\
                 result = None,\
                 datetime = None,\
                 instrument_id = None,\
                 sid = None,\
                 aspect = None,\
                 flagged = None):
        self.test_name = test_name
        self.dilution_factor = dilution_factor
        self.dilution_profile = dilution_profile
        self.result = result
        self.datetime = datetime
        self.instrument_id = instrument_id
        self.sid = sid
        self.aspect = aspect
        self.flagged = flagged

    def __repr__(self):
        return 'result information: \n'+\
               'test name:'+HORIZONTAL_TABLE+self.test_name+\
               'dilution profile:'+HORIZONTAL_TABLE+self.dilution_profile+\
               'dilution factor:'+HORIZONTAL_TABLE+self.dilution_factor+\
               'result:'+HORIZONTAL_TABLE+self.result+\
               'datetime:'+HORIZONTAL_TABLE+str(self.datetime)+\
               'instrument_id:'+HORIZONTAL_TABLE+str(self.instrument_id)+\
               'sid:'+HORIZONTAL_TABLE+self.sid,\
               'aspect:'+HORIZONTAL_TABLE+self.aspect,\
               'flagged:'+HORIZONTAL_TABLE+self.flagged
示例#8
0
    def process_item(self, item, spider):
        """
        将数据写入数据库中
        """
        data = dict(item)
        # 指定建表时的数据类型
        type_dict = {key: VARCHAR(length=255) for key in data}
        df = pd.DataFrame([data])

        df['inserttime'] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        # 指定插入时间的格式为时间格式
        type_dict['inserttime'] = DATETIME()
        # print(df)
        df.to_sql(self.table_name,
                  self.engine,
                  index=False,
                  if_exists='append',
                  dtype=type_dict)
        return item
示例#9
0
    def test_defaults(self):
        assert self._comp(DATE()) == 'DATE'
        assert self._comp(DATETIME()) == 'TIMESTAMP(6)'
        assert self._comp(TIMESTAMP()) == 'TIMESTAMP(6)'
        assert self._comp(TIME()) == 'TIME(6)'

        assert self._comp(CHAR()) == 'CHAR(1)'
        assert self._comp(VARCHAR()) == 'LONG VARCHAR'
        assert self._comp(NCHAR()) == 'CHAR CHAR SET UNICODE'
        assert self._comp(NVARCHAR()) == 'LONG VARCHAR CHAR SET UNICODE'
        assert self._comp(CLOB()) == 'CLOB'
        assert self._comp(TEXT()) == 'CLOB'

        assert self._comp(DECIMAL()) == 'DECIMAL(5, 0)'
        assert self._comp(NUMERIC()) == 'NUMERIC(5, 0)'
        assert self._comp(INTEGER()) == 'INTEGER'
        assert self._comp(FLOAT()) == 'FLOAT'
        assert self._comp(REAL()) == 'REAL'
        assert self._comp(SMALLINT()) == 'SMALLINT'
        assert self._comp(BIGINT()) == 'BIGINT'

        assert self._comp(BOOLEAN()) == 'BYTEINT'
示例#10
0
    def test_numeric_coerced_to_text(self):
        """Numeric columns are coerced to text columns."""
        columns = self.coercer._coerce([
            Column('column', NUMERIC()),
            Column('column', BOOLEAN()),
            Column('column', INTEGER()),
            Column('column', BIGINT()),
            Column('column', SMALLINT()),
            Column('column', DATETIME()),
            Column('column', TIMESTAMP()),
        ])

        column_types = [type(column.type) for column in columns]

        self.assertListEqual(column_types, [
            TEXT,
            IntegerDecorator,
            IntegerDecorator,
            IntegerDecorator,
            IntegerDecorator,
            DatetimeDecorator,
            DatetimeDecorator,
        ])
示例#11
0
           default=text(u"'0'"),
           primary_key=True),
    Column(u'event_id', INTEGER(), primary_key=True),
    Column(u'record', VARCHAR(length=100), primary_key=True),
    Column(u'field_name', VARCHAR(length=100), primary_key=True),
    Column(u'value', TEXT()),
)

# this is mostly for testing
redcap_surveys_response = Table(
    'redcap_surveys_response',
    Base.metadata,
    Column(u'response_id', INTEGER(), primary_key=True, nullable=False),
    Column(u'participant_id', INTEGER()),
    Column(u'record', VARCHAR(length=100)),
    Column(u'first_submit_time', DATETIME()),
    Column(u'completion_time', DATETIME()),
    Column(u'return_code', VARCHAR(length=8)),
)

redcap_surveys_participants = Table(
    'redcap_surveys_participants',
    Base.metadata,
    Column(u'participant_id', INTEGER(), primary_key=True, nullable=False),
    Column(u'survey_id', INTEGER()),
    Column(u'event_id', INTEGER()),
    Column(u'hash', VARCHAR(length=6)),
    Column(u'legacy_hash', VARCHAR(length=32)),
    Column(u'participant_email', VARCHAR(length=255)),
    Column(u'participant_identifier', VARCHAR(length=255)),
)
示例#12
0
 def _type_descriptor_mock(desc):
     if type(desc) == DECIMAL:
         return NUMERIC(precision=desc.precision, scale=desc.scale)
     if type(desc) == DATETIME:
         return DATETIME()
示例#13
0
 def load_dialect_impl(self, dialect):
     if dialect.name == 'mysql':
         return dialect.type_descriptor(DECIMAL(precision=20,
                                                scale=6,
                                                asdecimal=True))
     return dialect.type_descriptor(DATETIME())