def run(): df = pd.read_csv('./cleanfile.csv', encoding='utf-8', sep=',') df["sentiments"] = df["content"].map( lambda c: snownlp.SnowNLP(c).sentiments) df["keywords"] = df["content"].map(getKeyWord) #engine = create_engine('mysql+pymysql://root:@127.0.0.1:3306/sina') engine = create_engine( 'mysql+mysqlconnector://root:[email protected]:3306/sina?charset=utf8&connect_timeout=10' ) dtypedict = { 'id': Integer(), 'uid': VARCHAR(length=15), 'area': VARCHAR(length=15), 'ipadd': VARCHAR(length=15), 'usertype': VARCHAR(length=10), 'agree': VARCHAR(length=10), 'cmttime': DATETIME(), 'content': TEXT, 'sentiments': DECIMAL('10,10'), 'keywords': VARCHAR(length=100), } df.to_sql(name='news', con=engine, chunksize=100000, if_exists='replace', index=True, index_label='id', dtype=dtypedict)
class Block(BaseModel): __tablename__ = 'block_behavior' TIME = Column(DATETIME(20), primary_key=True) IP = Column(CHAR(60), primary_key=True) PORT = Column(Integer, primary_key=True) TYPE = Column(String(20)) METHOD = Column(String(20)) URI = Column(Text) INFO = Column(Text)
class ReagentInventoryTable(BaseModel): __tablename__ = 'reagent_inventory' node_id = Column(VARCHAR(45), primary_key=True, nullable=False) node_type = Column(VARCHAR(45), primary_key=True, nullable=False) reagent_name = Column(VARCHAR(45), primary_key=True, nullable=False) reagent_type = Column( VARCHAR(45), primary_key=True, nullable=False) #reagent name can be All|Primary|Auxiliary reagent_count = Column(Integer()) reagent_warn_threshold = Column(Integer()) reagent_err_threshold = Column(Integer()) expire_date_time = Column(DATETIME()) reagent_pack_lot = Column(VARCHAR(45)) updated_timestamp = Column(DATETIME()) def __init__(self,\ node_id='NodeID',\ node_type='NodeType',\ reagent_name='ReagentName',\ reagent_type='All',\ reagent_count=None,\ reagent_warn_threshold=None,\ reagent_err_threshold=None,\ expire_date_time=None,\ reagent_pack_lot=None,\ updated_timestamp=None): self.id = id self.node_id = node_id self.node_type = node_type self.reagent_name = reagent_name self.reagent_type = reagent_type self.reagent_count = reagent_count self.reagent_warn_threshold = reagent_warn_threshold self.reagent_err_threshold = reagent_err_threshold self.expire_date_time = expire_date_time self.reagent_pack_lot = reagent_pack_lot self.updated_timestamp = updated_timestamp
class Asset(BaseModel): ''' 资产表,sn资产唯一标识,可含有业务含义 ''' __tablename__ = 'asset' __table_args__ = (UniqueConstraint('idc_id', 'sn', name='uniq_idc_sn'), { 'mysql_engine': 'InnoDB' }) id = Column(Integer, primary_key=True) sn = Column(String(50), nullable=False, unique=True) idc_id = Column(Integer(), ForeignKey('idc.id'), nullable=False) is_virtual = Column(Integer(), nullable=False, default=0) purchase_date = Column(DATETIME(), nullable=False) is_delete = Column(Integer(), nullable=False, default=0)
class SampleLocationTable(BaseModel): __tablename__ = 'sample_location' sid = Column(VARCHAR(24), primary_key=True, nullable=False, unique=True) location = Column(VARCHAR(24)) timestamp = Column(DATETIME()) def __init__(self, sid, location=None, timestamp=None): self.sid = sid self.location = location self.timestamp = timestamp def __repr__(self): return 'sample location information: \n'+\ 'sid:'+HORIZONTAL_TABLE+self.sid+\ 'location:'+HORIZONTAL_TABLE+self.location+\ 'update timestamp:'+HORIZONTAL_TABLE+self.timestamp
class Hack(BaseModel): __tablename__ = 'jm_hacker' id = Column(Integer, primary_key=True) ip = Column(CHAR(60)) hack_city = Column(CHAR(40)) hack_addr = Column(CHAR(40)) url = Column(TEXT()) host = Column(CHAR(80)) acl = Column(TEXT(80)) src_time = Column(DATETIME(20)) method = Column(CHAR(10)) headers = Column(TEXT(200)) user_agent = Column(TEXT(80)) cookie = Column(TEXT(80)) post_data = Column(TEXT())
class ResultTable(BaseModel): __tablename__ = 'result' rid = Column(Integer(), primary_key=True, nullable=False, unique=True) test_name = Column(VARCHAR(45)) dilution_profile = Column(VARCHAR(45)) dilution_factor = Column(VARCHAR(45)) result = Column(VARCHAR(45)) datetime = Column(DATETIME()) instrument_id = Column(VARCHAR(45)) sid = Column(VARCHAR(24)) aspect = Column(VARCHAR(45)) flagged = Column(Integer()) def __init__(self,\ test_name = None,\ dilution_profile = None,\ dilution_factor = None,\ result = None,\ datetime = None,\ instrument_id = None,\ sid = None,\ aspect = None,\ flagged = None): self.test_name = test_name self.dilution_factor = dilution_factor self.dilution_profile = dilution_profile self.result = result self.datetime = datetime self.instrument_id = instrument_id self.sid = sid self.aspect = aspect self.flagged = flagged def __repr__(self): return 'result information: \n'+\ 'test name:'+HORIZONTAL_TABLE+self.test_name+\ 'dilution profile:'+HORIZONTAL_TABLE+self.dilution_profile+\ 'dilution factor:'+HORIZONTAL_TABLE+self.dilution_factor+\ 'result:'+HORIZONTAL_TABLE+self.result+\ 'datetime:'+HORIZONTAL_TABLE+str(self.datetime)+\ 'instrument_id:'+HORIZONTAL_TABLE+str(self.instrument_id)+\ 'sid:'+HORIZONTAL_TABLE+self.sid,\ 'aspect:'+HORIZONTAL_TABLE+self.aspect,\ 'flagged:'+HORIZONTAL_TABLE+self.flagged
def process_item(self, item, spider): """ 将数据写入数据库中 """ data = dict(item) # 指定建表时的数据类型 type_dict = {key: VARCHAR(length=255) for key in data} df = pd.DataFrame([data]) df['inserttime'] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) # 指定插入时间的格式为时间格式 type_dict['inserttime'] = DATETIME() # print(df) df.to_sql(self.table_name, self.engine, index=False, if_exists='append', dtype=type_dict) return item
def test_defaults(self): assert self._comp(DATE()) == 'DATE' assert self._comp(DATETIME()) == 'TIMESTAMP(6)' assert self._comp(TIMESTAMP()) == 'TIMESTAMP(6)' assert self._comp(TIME()) == 'TIME(6)' assert self._comp(CHAR()) == 'CHAR(1)' assert self._comp(VARCHAR()) == 'LONG VARCHAR' assert self._comp(NCHAR()) == 'CHAR CHAR SET UNICODE' assert self._comp(NVARCHAR()) == 'LONG VARCHAR CHAR SET UNICODE' assert self._comp(CLOB()) == 'CLOB' assert self._comp(TEXT()) == 'CLOB' assert self._comp(DECIMAL()) == 'DECIMAL(5, 0)' assert self._comp(NUMERIC()) == 'NUMERIC(5, 0)' assert self._comp(INTEGER()) == 'INTEGER' assert self._comp(FLOAT()) == 'FLOAT' assert self._comp(REAL()) == 'REAL' assert self._comp(SMALLINT()) == 'SMALLINT' assert self._comp(BIGINT()) == 'BIGINT' assert self._comp(BOOLEAN()) == 'BYTEINT'
def test_numeric_coerced_to_text(self): """Numeric columns are coerced to text columns.""" columns = self.coercer._coerce([ Column('column', NUMERIC()), Column('column', BOOLEAN()), Column('column', INTEGER()), Column('column', BIGINT()), Column('column', SMALLINT()), Column('column', DATETIME()), Column('column', TIMESTAMP()), ]) column_types = [type(column.type) for column in columns] self.assertListEqual(column_types, [ TEXT, IntegerDecorator, IntegerDecorator, IntegerDecorator, IntegerDecorator, DatetimeDecorator, DatetimeDecorator, ])
default=text(u"'0'"), primary_key=True), Column(u'event_id', INTEGER(), primary_key=True), Column(u'record', VARCHAR(length=100), primary_key=True), Column(u'field_name', VARCHAR(length=100), primary_key=True), Column(u'value', TEXT()), ) # this is mostly for testing redcap_surveys_response = Table( 'redcap_surveys_response', Base.metadata, Column(u'response_id', INTEGER(), primary_key=True, nullable=False), Column(u'participant_id', INTEGER()), Column(u'record', VARCHAR(length=100)), Column(u'first_submit_time', DATETIME()), Column(u'completion_time', DATETIME()), Column(u'return_code', VARCHAR(length=8)), ) redcap_surveys_participants = Table( 'redcap_surveys_participants', Base.metadata, Column(u'participant_id', INTEGER(), primary_key=True, nullable=False), Column(u'survey_id', INTEGER()), Column(u'event_id', INTEGER()), Column(u'hash', VARCHAR(length=6)), Column(u'legacy_hash', VARCHAR(length=32)), Column(u'participant_email', VARCHAR(length=255)), Column(u'participant_identifier', VARCHAR(length=255)), )
def _type_descriptor_mock(desc): if type(desc) == DECIMAL: return NUMERIC(precision=desc.precision, scale=desc.scale) if type(desc) == DATETIME: return DATETIME()
def load_dialect_impl(self, dialect): if dialect.name == 'mysql': return dialect.type_descriptor(DECIMAL(precision=20, scale=6, asdecimal=True)) return dialect.type_descriptor(DATETIME())