Пример #1
0
def getNewRequests(requestURL, Host):
    import requests
    import Public.PublicFun as PublicFun
    request = requests.Session()

    JobID = PublicFun.createID()
    Chromedriver = PublicFun.getWebDriver("chrome", DataFolderName=JobID)
    Chromedriver.get(requestURL)
    cookies = Chromedriver.get_cookies()
    userAgent = Chromedriver.execute_script("return navigator.userAgent")
    PublicFun.closeWebDriver(JobID, Chromedriver)

    header = {
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7',
        'Connection': 'keep-alive',
        'DNT': '1',
        'Host': Host,
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': userAgent
    }
    request.headers.update(header)

    for cookie in cookies:
        request.cookies.set(cookie['name'], cookie['value'])

    return request
Пример #2
0
def addQueue(SystemName, QueueType, Path, Files, Param):
    import Public.PublicFun as PublicFun
    import json
    GUID = PublicFun.createID()
    #str(Param)
    jsonData = Param
    if type(Param) is not str:
        jsonData = json.dumps(Param, separators=(',', ':'))
    dbcon = getQueueDBConnect()
    if checkQueue(SystemName, QueueType, jsonData, dbcon):
        retry = 0
        while True:
            try:
                if retry > 10:
                    break
                else:
                    D_INSERTTIME = PublicFun.getNowDateTime(
                        "YYYY/MM/DD HH:MM:SS")
                    sql = (
                        "INSERT INTO [dbo].[JobQueue]([GUID],[SystemName],[QueueType],[Path],[Files],[Param],[D_INSERTTIME])"
                        + "VALUES(?,?,?,?,?,?,?)")
                    dbcon.Execute(sql, (GUID, SystemName, QueueType, Path,
                                        Files, jsonData, D_INSERTTIME))
                    break
            except Exception as ex:
                #若是寫入失敗, 則重試, 最多試10次
                retry += 1
                print("Retry:" + str(retry))
    dbcon.close()
Пример #3
0
def insertMappingList(dbcon, MapType, Value, RelValue):
    import Public.PublicFun as PublicFun
    GUID = PublicFun.createID()
    sql = (
        "INSERT INTO [dbo].[MAPAAA]([GUID],[MAPAAA001],[MAPAAA002],[MAPAAA003],[D_INSERTUSER],[D_INSERTTIME],[D_MODIFYUSER],[D_MODIFYTIME])"
        + "VALUES(?,?,?,?,'System',?,'','')")
    dbcon.Execute(sql, (GUID, MapType, Value, RelValue,
                        PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS")))
    return GUID
Пример #4
0
def getPageSize(requestURL):
    JobID=PublicFun.createID()
    Chromedriver=PublicFun.getWebDriver("chrome",DataFolderName=JobID)
    Chromedriver.get(requestURL)
    
    time.sleep(2)
    Soup = bs(Chromedriver.page_source, "html.parser")
    pageSize=len(Soup.select("select.b-clear-border.js-paging-select.gtm-paging-top option"))
    if pageSize==0:
        pageSize=len(Soup.select("select.page-select.js-paging-select.gtm-paging-top option"))
    PublicFun.closeWebDriver(JobID,Chromedriver)
    return pageSize
Пример #5
0
def writeDBMsg(msg):
    import Public.PublicFun as PublicFun
    import Public.LogHandler as LogHandler

    dbcon = getQueueDBConnect()
    sql = ("INSERT INTO [dbo].[LogMsg]([GUID],[Message],[D_INSERTTIME])" +
           "VALUES(?,?,?)")
    try:
        dbcon.Execute(sql, (PublicFun.createID(), PublicFun.SQLFilter(msg),
                            PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS")))
    except:
        msg = "writeDBMsg失敗:" + msg
    LogHandler.writeMsg(msg)
    dbcon.close()
Пример #6
0
def insertOption(dbcon, OptionName, OptionCode, OptionType, RelGUID=None):
    import Public.PublicFun as PublicFun
    GUID = PublicFun.createID()
    sql = (
        "INSERT INTO [dbo].[OPTAAA]([GUID],[OPTAAA001],[OPTAAA002],[OPTAAA003],[D_INSERTUSER],[D_INSERTTIME],[D_MODIFYUSER],[D_MODIFYTIME])"
        + "VALUES(?,?,?,?,'System',?,'','')")
    dbcon.Execute(sql, (GUID, OptionName, OptionCode, OptionType,
                        PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS")))
    if RelGUID is not None:
        sql = (
            "INSERT INTO [dbo].[OPTAAB]([GUID],[OPTAAB001],[OPTAAB002],[D_INSERTUSER],[D_INSERTTIME],[D_MODIFYUSER],[D_MODIFYTIME])"
            + "VALUES(?,?,?,'System',?,'','')")
        dbcon.Execute(sql, (PublicFun.createID(), RelGUID, GUID,
                            PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS")))
    return GUID
Пример #7
0
def save_company_info(db_connection, company_info):
    print(company_info)
    company_guid = BIASDataIO.CheckCompanyMappingList(
        db_connection, company_info["factory_name"], NewCompanyGUID=False)
    if company_guid == "":
        company_guid = PublicFun.createID()
    try:
        companys = Engine.Query(db_connection, Companys.Companys(), "GUID=?",
                                company_guid)
    except Exception as message:
        logger.logger.error(message)

    if companys.GUID == "":
        companys.GUID = company_guid
        companys.D_INSERTUSER = "******"
    else:
        companys.D_MODIFYUSER = "******"
    companys.Companys003 = company_info.get("factory_name", '')
    companys.Companys005 = company_info.get("factory_address", '')
    if not companys.Companys010 or "暫不提供" in companys.Companys010:
        companys.Companys010 = company_info.get("factory_phone", '')
    try:
        Engine.UpdateData(db_connection, companys)
    except Exception as message:
        logger.logger.error(message)
    return company_guid
Пример #8
0
def findOption(dbcon, OptionName):
    import Public.PublicFun as PublicFun
    sql = ("SELECT GUID FROM OPTAAA WHERE OPTAAA001=N'" +
           PublicFun.SQLFilter(OptionName) + "'")
    OptionGUID = dbcon.GetDataTable(sql)
    if OptionGUID is not None and len(OptionGUID) > 0:
        return OptionGUID[0].GUID
    return ""
Пример #9
0
def insertOption(dbcon, OptionName, RelGUID=None):
    import Public.PublicFun as PublicFun
    GUID = PublicFun.createID()
    sql = (
        "INSERT INTO [dbo].[OPTAAA]([GUID],[OPTAAA001],[D_INSERTUSER],[D_INSERTTIME],[D_MODIFYUSER],[D_MODIFYTIME])"
        + "VALUES('" + GUID + "',N'" + PublicFun.SQLFilter(OptionName) +
        "','System','" + PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS") +
        "','','')")
    dbcon.Execute(sql)
    if RelGUID is not None:
        sql = (
            "INSERT INTO [dbo].[OPTAAB]([GUID],[OPTAAB001],[OPTAAB002],[D_INSERTUSER],[D_INSERTTIME],[D_MODIFYUSER],[D_MODIFYTIME])"
            + "VALUES('" + PublicFun.createID() + "','" + RelGUID + "','" +
            GUID + "','System','" +
            PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS") + "','','')")
        dbcon.Execute(sql)
    return GUID
Пример #10
0
def findCompany(dbcon, CompanysName):
    import Public.PublicFun as PublicFun
    sql = ("SELECT GUID FROM Companys WHERE Companys003=N'" +
           PublicFun.SQLFilter(CompanysName) + "'")
    CompanysGUID = dbcon.GetDataTable(sql)
    if CompanysGUID is not None and len(CompanysGUID) > 0:
        return CompanysGUID[0].GUID
    return ""
Пример #11
0
def CheckMappingList(dbcon, MapType, Value):
    import Public.PublicFun as PublicFun
    if (MapType == "CompanyName"):
        sql = ("SELECT TOP 1 MAPAAA003 FROM MAPAAA WHERE MAPAAA001='" +
               MapType + "' AND MAPAAA002=N'" + PublicFun.SQLFilter(Value) +
               "'")
    else:
        sql = ("SELECT TOP 1 MAPAAA003 FROM MAPAAA WHERE MAPAAA001='" +
               MapType + "' AND MAPAAA002=N'" + Value + "'")
    return dbcon.GetDataTable(sql)
Пример #12
0
def CheckCompanyMappingList(dbcon, CompanyName, CompanyGUID=None):
    import Public.PublicFun as PublicFun
    MAPCompanyGUID = CheckMappingList(dbcon, "CompanyName", CompanyName)
    if MAPCompanyGUID is None or len(MAPCompanyGUID) == 0:
        if CompanyGUID is None:
            CompanyGUID = findCompany(dbcon, CompanyName)
            if (CompanyGUID is None or CompanyGUID == ""):
                CompanyGUID = PublicFun.createID()
        insertMappingList(dbcon, "CompanyName", CompanyName, CompanyGUID)
    else:
        CompanyGUID = MAPCompanyGUID[0].MAPAAA003
    return CompanyGUID
Пример #13
0
def save_product_info(db_connection, company_guid, product_info, file_name):
    product_guid = check_product_info(db_connection, company_guid,
                                      product_info["product_name"])
    print(product_info)
    if not product_guid:
        insert_sql = (
            "INSERT INTO CompanyProduct(GUID, CompanyProduct001,CompanyProduct002,CompanyProduct003,CompanyProduct004,"
            "CompanyProduct005,CompanyProduct006, CompanyProduct007, CompanyProduct008, D_INSERTUSER, D_INSERTTIME)"
            " VALUES (?,?,?,?,?,?,?,?,?,?,?)")
        try:
            db_connection.Execute(
                insert_sql,
                (PublicFun.createID(), company_guid, "Momo",
                 product_info.get("product_name",
                                  ''), product_info.get("product_format", ''),
                 product_info.get("other_info",
                                  ''), product_info.get("product_place", ''),
                 product_info.get("brand_name", ''), file_name, "MomoCrawler",
                 PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS")))
            logger.logger.info("Insert")
            print(file_name)
        except Exception as message:
            logger.logger.error(message)
            pass
    else:
        update_sql = "UPDATE CompanyProduct set CompanyProduct003=?,CompanyProduct004=?,CompanyProduct005=?,CompanyProduct006=?,CompanyProduct007=?,CompanyProduct008=?, D_MODIFYUSER=?, D_MODIFYTIME=? WHERE GUID = ?"
        try:
            db_connection.Execute(
                update_sql,
                (product_info.get("product_name",
                                  ''), product_info.get("product_format", ''),
                 product_info.get("other_info",
                                  ''), product_info.get("product_place", ''),
                 product_info.get("brand_name", ''), file_name, "MomoCrawler",
                 PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS"),
                 product_guid))
            # logger.logger.info("Update")
        except Exception as message:
            logger.logger.error(message)
            pass
Пример #14
0
def writeDBMsg(JobName, Param, msg, dbcon=None):
    jsonData = Param
    if type(jsonData) is not str:
        jsonData = json.dumps(Param, separators=(',', ':'))

    if dbcon is None:
        dbcon = SQLConnect.DBConnect(secName="QueueConnect",
                                     publicSetting=True)
        dbcon.ConnectDB()
        writeDBMsg(JobName, jsonData, msg, dbcon)
        dbcon.close()
    else:
        try:
            sql = (
                "INSERT INTO [dbo].[LogMsg]([GUID],[JOB],[Param],[Message],[D_INSERTUSER],[D_INSERTTIME],[D_MODIFYUSER],[D_MODIFYTIME])"
                + "VALUES(?, ?, ?, ?, ?, ?, ?, ?)")
            dbcon.Execute(
                sql, (PublicFun.createID(), JobName, jsonData, msg, JobName,
                      PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS"), "", ""))
            writeMsg(str(msg))
        except Exception as ex:
            writeMsg("寫入資料庫失敗:" + ex)
Пример #15
0
def Query(dbcon, DataObject, WhereClause="", parameter=None):
    import Public.PublicFun as PublicFun
    sql = DataObject.QueryStr
    if WhereClause is not None and WhereClause != "":
        WhereClause = " WHERE " + WhereClause
    excuteSQL = sql + WhereClause
    FindRow = dbcon.GetDataTable(excuteSQL, parameter)
    if FindRow is not None and len(FindRow) > 0:
        DataObject.DataRow = FindRow[0]
    else:
        DataObject.DataRow = [""] * len(DataObject.Fields)
    DataObject.TimeStamp = PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS")
    return DataObject
Пример #16
0
def UpdateData(dbcon,DataObject):
    import Public.PublicFun as PublicFun
    sql=DataObject.QueryStr
    WhereClause=" WHERE 1=1 "
    for key in DataObject.KeyFields:
        WhereClause=WhereClause+" AND "+key+ "='"+DataObject.getData(key)+"'"

    excuteSQL=sql+WhereClause
    FindRow = dbcon.GetDataTable(excuteSQL)

    excuteSQL=""
    NowTime=PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS")
    if FindRow is not None and len(FindRow)>0:
        FindRow=FindRow[0]
        DataObject.D_MODIFYTIME=NowTime
        if DataObject.CheckTimeStamp:
            DBTime=FindRow.D_MODIFYTIME
            if DBTime is None or DBTime =="":
                DBTime=FindRow.D_INSERTTIME
            if DBTime >DataObject.TimeStamp:
                raise Exception("The data has been updated by others!")

        excuteSQL="UPDATE "+DataObject.TableName+" SET "
        strField=""
        for Field in DataObject.Fields:
            strField=strField+Field+"=N'"+PublicFun.SQLFilter(DataObject.getData(Field))+"',"
        excuteSQL = excuteSQL+strField.rstrip(',') + WhereClause
    else:
        DataObject.D_INSERTTIME=NowTime
        excuteSQL="INSERT INTO "+DataObject.TableName 
        strField=""
        strValue=""
        for Field in DataObject.Fields:
            strField=strField+Field+","
            strValue=strValue+"N'"+PublicFun.SQLFilter(DataObject.getData(Field))+"',"
        excuteSQL=excuteSQL+"("+strField.rstrip(',')+")VALUES("+strValue.rstrip(',')+")"
    dbcon.Execute(excuteSQL)
    return True
Пример #17
0
def save_product_info(db_connection, company_guid, product_info):
    """新增產品資訊
    """
    product_guid = check_product_info(db_connection, company_guid,
                                      product_info["NAME"])
    if len(product_guid) == 0:
        logger.info("Insert")
        insert_sql = (
            "INSERT INTO CompanyProduct(GUID, CompanyProduct001,CompanyProduct002,CompanyProduct003,CompanyProduct004,CompanyProduct005,CompanyProduct006, D_INSERTUSER, D_INSERTTIME)"
            " VALUES (?,?,?,?,?,?,?,?,?)")
        db_connection.Execute(
            insert_sql,
            (PublicFun.createID(), company_guid, "Costco",
             product_info["NAME"], product_info["SPEC"],
             product_info["INGREDIENT"], product_info["ORIGIN"],
             "CostcoCrawler", PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS")))
    else:
        logger.info("Update")
        update_sql = "UPDATE CompanyProduct set D_MODIFYUSER = ?, D_MODIFYTIME = ? WHERE GUID = ?"
        db_connection.Execute(
            update_sql,
            ("CostcoCrawler", PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS"),
             product_guid))
Пример #18
0
def addQueue(SystemName, QueueType, Path, Files, Param):
    import Public.PublicFun as PublicFun

    GUID = PublicFun.createID()
    jsonData = str(Param)
    D_INSERTTIME = PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS")
    dbcon = getQueueDBConnect()
    sql = (
        "INSERT INTO [dbo].[JobQueue]([GUID],[SystemName],[QueueType],[Path],[Files],[Param],[D_INSERTTIME])"
        + "VALUES('" + GUID + "',N'" + PublicFun.SQLFilter(SystemName) +
        "',N'" + PublicFun.SQLFilter(QueueType) + "',N'" +
        PublicFun.SQLFilter(Path) + "',N'" + PublicFun.SQLFilter(str(Files)) +
        "',N'" + PublicFun.SQLFilter(str(jsonData)) + "','" +
        str(D_INSERTTIME) + "')")
    dbcon.Execute(sql)
    dbcon.close()
Пример #19
0
def save_company_info(db_connection, company_info):
    """新增或更新公司資訊
    """
    company_guid = BIASDataIO.CheckCompanyMappingList(db_connection,
                                                      company_info["NAME"],
                                                      NewCompanyGUID=False)
    if company_guid == "":
        company_guid = PublicFun.createID()
    companys = Engine.Query(db_connection, Companys.Companys(), "GUID=?",
                            (company_guid, ))

    if companys.GUID == "":
        companys.GUID = company_guid
        companys.D_INSERTUSER = "******"
    else:
        companys.D_MODIFYUSER = "******"
    companys.Companys003 = company_info["NAME"]
    companys.Companys005 = company_info["ADDRESS"]
    if companys.Companys010 == None or len(
            companys.Companys010) == 0 or "暫不提供" in companys.Companys010:
        companys.Companys010 = company_info["TEL"]
    Engine.UpdateData(db_connection, companys)
    return company_guid
Пример #20
0
def writeDBMsg(JobName, Param, msg, dbcon=None):
    import Public.PublicFun as PublicFun
    if dbcon is None:
        dbcon = SQLConnect.DBConnect(publicSetting=True)
        dbcon.ConnectDB()
        writeDBMsg(JobName, Param, msg, dbcon)
        dbcon.close()
    else:
        try:
            sql = (
                "INSERT INTO [dbo].[LogMsg]([GUID],[JOB],[Param],[Message],[D_INSERTUSER],[D_INSERTTIME],[D_MODIFYUSER],[D_MODIFYTIME])"
                + "VALUES('" + PublicFun.createID() + "',N'" +
                PublicFun.SQLFilter(JobName) + "',N'" +
                PublicFun.SQLFilter(str(Param)) + "',N'" +
                PublicFun.SQLFilter(str(msg)) + "',N'" +
                PublicFun.SQLFilter(JobName) + "','" +
                PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS") + "','','')")
            dbcon.Execute(sql)
            writeMsg(str(msg))
        except Exception as ex:
            writeMsg("寫入資料庫失敗:" + ex)
Пример #21
0
def getPhoneNumber(PhoneNumberurl):
    #Regex格式驗證
    import re
    text_re = re.compile('Text=')
    queryString = PhoneNumberurl.split("&")
    for param in queryString:
        if len(text_re.findall(param)) > 0:
            return param[5:]


if __name__ == '__main__':
    jsondata = ""
    try:
        JobID = sys.argv[1]
        data = sys.argv[2]
        jsondata = PublicFun.StringToJson(data)
        companyURL = str(jsondata["companyURL"])
        companyName = str(jsondata["companyName"])

        DBConnect = SQLConnect.DBConnect(publicSetting=True)
        DBConnect.ConnectDB()

        CompanyGUID = BIASDataIO.CheckCompanyMappingList(
            DBConnect, companyName)
        companyInFo = Engine.Query(DBConnect, Companys.Companys(),
                                   "GUID='" + CompanyGUID + "'")

        if companyInFo.GUID == "":
            companyInFo.GUID = CompanyGUID
            companyInFo.D_INSERTUSER = "******"
        else:
Пример #22
0
from selenium.webdriver.common.action_chains import ActionChains

import Public.PublicFun as PublicFun
import Public.SQLConnect as SQLConnect
import Public.BIASDataIO as BIASDataIO

DBConnect=None
JobID=""
try:
    JobID=sys.argv[1]
    DBConnect=SQLConnect.DBConnect(publicSetting=True)
    DBConnect.ConnectDB()
    DBConnect.StartTransaction()
    
    Chromedriver=PublicFun.getWebDriver("chrome",DataFolderName=JobID)
    Chromedriver.get("https://www.104.com.tw/jobs/search/")

    #找到職務類別的按鈕並點擊
    CategoryListButton=Chromedriver.find_element_by_id("job-cat")
    ActionChains(Chromedriver).click(CategoryListButton).perform()
    time.sleep(5)

    e104menu=Chromedriver.find_element_by_id("e104menu2011_main")
    e104menuCount = len(bs(e104menu.get_attribute('innerHTML'), "html.parser").select("ul li"))

    #逐項取得內容
    for ClassACount in range(e104menuCount):
        #移開選取項目
        tempElement=Chromedriver.find_element_by_id("globalbar")
        ActionChains(Chromedriver).move_to_element(tempElement).perform()
Пример #23
0
import Public.PublicFun as PublicFun
import Public.RequestsHandler as RequestsHandler
import Public.SettingReader as SettingReader
import Public.QueueIO as QueueIO
import Public.LogHandler as LogHandler
import Public.SQLConnect as SQLConnect
import Public.Engine as Engine

import Model.JOB as JOB

if __name__ == '__main__':
    jsondata=""
    try:
        JobID=sys.argv[1]
        data=sys.argv[2]
        jsondata = PublicFun.StringToJson(data)
        CompanyGUID=str(jsondata["CompanyGUID"])
        JOBAAA009=str(jsondata["JOBAAA009"])

        requestHost=SettingReader.getSetting("global","requestHost")
        
        DBConnect=SQLConnect.DBConnect(publicSetting=True)
        DBConnect.ConnectDB()
        
#        sql = "select GUID,JOBAAA009 from JOBAAA with(nolock) where JOBAAA029 != 'Y'"
#        dt = DBConnect.GetDataTable(sql)
        if(len(JOBAAA009) >0):
            req=RequestsHandler.getNewRequests(str(JOBAAA009),requestHost)
#            for rows in dt:
            if (len(str(JOBAAA009)) > 0 ):
                res = req.get(str(JOBAAA009))
Пример #24
0
    #Regex格式驗證
    import re
    text_re = re.compile('Text=')
    queryString = PhoneNumberurl.split("&")
    for param in queryString:
        if len(text_re.findall(param)) > 0:
            return param[5:]


if __name__ == '__main__':
    jsondata = ""
    try:
        JobID = sys.argv[1]
        data = sys.argv[2]

        jsondata = PublicFun.StringToJson(data)
        jobdate = str(jsondata["date"])
        jobName = PublicFun.SQLFilter(str(jsondata["jobName"]))
        jobURL = str(jsondata["jobURL"])
        jobAREA = str(jsondata["jobAREA"])
        companyInfo = jsondata["companyInfo"]
        try:
            companyName = PublicFun.SQLFilter(str(companyInfo["companyName"]))
        except Exception as ex:
            companyInfo = PublicFun.StringToJson(companyInfo, Default=False)
            companyName = PublicFun.SQLFilter(str(companyInfo["companyName"]))
        ScanDate = str(jsondata["ScanDate"])

        DBConnect = SQLConnect.DBConnect(publicSetting=True)
        DBConnect.ConnectDB()
Пример #25
0
def updateOption(dbcon, jobCategoryGuid, OptionCode):
    import Public.PublicFun as PublicFun
    sql = ("UPDATE OPTAAA SET OPTAAA002=?, D_MODIFYTIME=? WHERE GUID = ?")
    dbcon.Execute(sql,
                  (OptionCode, PublicFun.getNowDateTime("YYYY/MM/DD HH:MM:SS"),
                   jobCategoryGuid))