Пример #1
0
def GetTopVolume():
    cssSelector = '#divStockList'

    url = f'https://goodinfo.tw/tw/StockList.asp?RPT_TIME=&MARKET_CAT=熱門排行&INDUSTRY_CAT=日成交張數創近期新高日數@@成交張數@@日成交張數創近期新高日數'
    print(url)

    try:
        df = Utils.GetDataFrameByCssSelector(url, cssSelector)
        #return df
    except:
        time.sleep(random.randint(20, 30))
        df = Utils.GetDataFrameByCssSelector(url, cssSelector)
        print(df)
        #df.columns = df.columns.get_level_values(1)

    df.columns = df.columns.get_level_values(0)
    df = df.drop_duplicates(keep=False, inplace=False)
    #gain = pd.to_numeric(df['漲跌  價'], errors='coerce') > 0
    #market = df['市  場'] == '市'
    length = df['代號'].astype(str).map(len) == 4
    #df = df[gain & length]
    df = df[length]
    df.to_csv(f'{Utils.GetRootPath()}\Data\Daily\日成交張數創近期新高日數.csv',
              encoding='utf_8_sig')
    return df['代號'].values
Пример #2
0
def update_with_project_id(project_id, start_nodes, relationship,
                           config_order):
    """
    通过项目ID 更新 项目对应的model
    :return:
    """

    try:
        start_nodes = utils.list_str_to_list(start_nodes)
        relationship = utils.list_str_to_list(relationship)
        relationship_item_str = []
        for item in relationship:
            relationship_item_str.append(str(item))
        config = json.dumps(
            {
                'config_order': config_order,
                'relationship': '*,'.join(relationship_item_str)
            },
            ensure_ascii=False)
        query = db.session.query(Model)
        query.filter(Model.project_id == project_id).update({
            Model.start_nodes:
            ','.join(start_nodes),
            Model.config:
            config
        })
        db.session.commit()
        print('更新完成')
        return True

    except Exception:
        print(traceback.print_exc())
        return False
Пример #3
0
def GetFinData(stockId):
    url = f'https://goodinfo.tw/StockInfo/StockFinDetail.asp?RPT_CAT=XX_M_QUAR_ACC&STOCK_ID={stockId}'
    css_selector = '#txtFinBody'
    try:
        df = Utils.GetDataFrameByCssSelector(url, css_selector)
    except:
        time.sleep(random.randint(20, 30))
        df = Utils.GetDataFrameByCssSelector(url, css_selector)
    #print(df)
    return df
Пример #4
0
def GetFinDetail(stockId):
    df = GetFinData(stockId)
    dict = {}

    headers = GetFinHeaders()
    for header in headers:
        if header == '本業收益':
            try:
                # 本業收益(營業利益率/稅前淨利率) > 60%
                tempDict = round(
                    Decimal(dict['營業利益率']) / Decimal(dict['稅前淨利率']) * 100, 2)
                dict.update({'本業收益': str(tempDict)})
            except:
                dict.update({'本業收益': '0'})
        else:
            try:
                #print(header)
                tempDict = Utils.GetDataFrameValueByLabel(df, '獲利能力', header)
                dict.update({
                    header.replace('股東權益報酬率  (年預估)', 'ROE'):
                    str(Decimal(tempDict[0]))
                })
            except:
                dict.update({header.replace('股東權益報酬率  (年預估)', 'ROE'): '0'})

    df = pd.DataFrame([dict])

    return df
Пример #5
0
def getDirContent(req):
    path = req.POST.get('path', None)
    if path is not None:
        Folder = Utils.Folder(path)
        dataJson = Folder.getFolderJson()
        return HttpResponse(dataJson, content_type="application/json")
    return HttpResponse(json.dumps({}), content_type="application/json")
Пример #6
0
def decide_include_article(publishTime):
    if (publishTime == ''):
        return True  # could not find publishedTime in HTML, so include it in results
    # minimalAllowedDateAsStr was already calculated in main()
    publishedDateAsStr = Utils.date_to_str(publishTime)
    logger.debug("publishedDateAsStr=%s minimalAllowedDateAsStr=%s",
                 publishedDateAsStr, Utils.minimalAllowedDateAsStr)
    return (publishedDateAsStr >= Utils.minimalAllowedDateAsStr)
def mkdir(req):
    path = req.POST.get("path")
    fileOperator = Utils.fileOperator()
    fileOperator.mkdir(path)
    response = {
        "ok": True,
    }
    return HttpResponse(json.dumps(response), content_type="application/json")
Пример #8
0
def find_times(bs):
    publishedAt = ""
    updatedAt = ""
    try:
        logger.debug("1")
        elements = bs.find_all('time')
        if (elements is not None) and (len(elements) > 0):
            publishedAt = elements[0]['datetime']
            updatedAt = elements[1]['datetime']
        else:
            published = bs.head.find(name='meta',
                                     attrs={"property": "article:published"})
            if (published is not None):
                logger.debug("1.1")
                publishedAt = bs.head.find(name='meta',
                                           attrs={
                                               "property": "article:published"
                                           }).attrs['content']
            if bs.head.find(name='meta',
                            attrs={"property":
                                   "article:modified"}) is not None:
                logger.debug("1.2")
                updatedAt = bs.head.find(name='meta',
                                         attrs={
                                             "property": "article:modified"
                                         }).attrs['content']
    except:
        pass
    if (publishedAt == '' and updatedAt == ''):
        try:
            publishedAt = bs.head.find(name='meta',
                                       attrs={
                                           "property": "og:pubdate"
                                       }).attrs['content']
            logger.debug("1.3")
            updatedAt = bs.html.find(
                lambda tag: tag.name == "time" and "datetime" in tag.attrs.
                keys()).attrs['datetime']
            #publishedAt = updatedAt
        except:
            pass
    if (publishedAt == ''):
        publishedAt = Utils.today()  #'2020-03-25T00:01:00+0200'
    if (updatedAt == ''):
        updatedAt = Utils.today()  # '2020-03-25T00:01:00+0200'
    return publishedAt, updatedAt
Пример #9
0
def deleteFiles(req):
    deleteList = req.POST.get('deleteList', None).split(",")
    fileOperator = Utils.fileOperator()
    for file in deleteList:
        fileOperator.forceRemove(file)
    response = {
        "ok": True,
    }
    return HttpResponse(json.dumps(response), content_type="application/json")
Пример #10
0
def GetDividend(stockId):
    url = f'https://goodinfo.tw/tw/StockDividendPolicy.asp?STOCK_ID={stockId}'
    cssSelector = '#divDetail'
    try:
        df = Utils.GetDataFrameByCssSelector(url, cssSelector)
        df.columns = df.columns.get_level_values(3)
    except:
        time.sleep(random.randint(20, 30))
        df = Utils.GetDataFrameByCssSelector(url, cssSelector)
        df.columns = df.columns.get_level_values(3)

    # column replace space
    df.columns = df.columns.str.replace(' ', '')

    # filter not  ∟
    df = df[df['股利發放年度'] != '∟']
    #print(df)

    # 年度大於2022, 移除第一列
    firstRow = df.iloc[0, :]
    if int(firstRow['股利發放年度']) > datetime.now().year:
        df = df.iloc[1: , :]

    rowsCount = 5
    # 年度(取前5筆, index重新排序)
    year = pd.to_numeric(df.iloc[:, 0], errors='coerce').dropna(how='any',axis=0).head(rowsCount).astype(int).reset_index(drop=True)
    #print(year)

    # 現金(取前5筆, index重新排序)
    cash = pd.to_numeric(df.iloc[:, 3], errors='coerce').dropna(how='any',axis=0).head(rowsCount).reset_index(drop=True)
    #print(cash)
    
    # 股票(取前5筆, index重新排序)
    stock = pd.to_numeric(df.iloc[:, 6], errors='coerce').dropna(how='any',axis=0).head(rowsCount).reset_index(drop=True)
    #print(stock)

    data = []
    for index in range(0, rowsCount):
        data.append(str(cash[index]).rjust(6) + ' / ' + str(stock[index]).rjust(6))

    print(data)
    df = pd.DataFrame([data], columns=year)
    
    return df
def copyFiles(req):
    needCopyFileList = req.POST.get('needCopyFileList', None).split(",")
    targetPath = req.POST.get('targetPath', None)
    isMove = req.POST.get('isMove', False)
    fileOperator = Utils.fileOperator()

    fileOperator.copyFiles(needCopyFileList, targetPath, False if isMove != "true" else True)
    response = {
        "ok": True,
    }
    return HttpResponse(json.dumps(response), content_type="application/json")
Пример #12
0
def browseUrl(url):
    ts = time()
    logger.debug("[%s] loading %s...", 'id', url)
    request = Request(url, headers={'User-Agent': Constants.user_agent})
    ##
    #
    html = Utils.load_html(request)
    #
    ##
    logger.debug('[%s] loading completed in %s seconds', 'id', time() - ts)
    ts = time()
    return html
Пример #13
0
def main(req):
    #global rootpath
    #json_f=open('./app/rootpath.conf','r')
    #jsonObj=json.load(json_f)
    #rootpath=jsonObj["rootpath"]
    Folder = Utils.Folder(rootpath)
    dataJson = Folder.getFolderJson()
    language = req.COOKIES.get('language')

    if language == "en":
        return render(req, "index_en-US.html", {"dataJson": dataJson})
    if language == "cn":
        return render(req, "index_zh-CN.html", {"dataJson": dataJson})
Пример #14
0
def GetAllDividend():    
    cssSelector = '#divStockList'
    
    for rankIndex in range(0, 6):
        
        url = f'https://goodinfo.tw/tw/StockList.asp?SHEET=股利政策&MARKET_CAT=熱門排行&INDUSTRY_CAT=合計股利&RANK={str(rankIndex)}'
        print(url)
        
        # 休息10~20秒
        time.sleep(random.randint(10, 20))

        try:
            df = Utils.GetDataFrameByCssSelector(url, cssSelector)
            #return df
        except:
            time.sleep(random.randint(20, 30))
            df = Utils.GetDataFrameByCssSelector(url, cssSelector)
            print(df)
            #df.columns = df.columns.get_level_values(1)

        df.columns = df.columns.get_level_values(0)
        df = df.drop_duplicates(keep=False, inplace=False) #移除重複標題
        #gain = pd.to_numeric(df['漲跌  價'], errors='coerce') > 0
        #market = df['市  場'] == '市'
        print(df)
        length = df['代號'].astype(str).map(len) == 4
        #df = df[gain & length]
        df = df[length]

        filePath = f'{Utils.GetRootPath()}\Data\Yearly\合計股利.csv'
        if rankIndex == 0:
            df.to_csv(filePath, encoding='utf_8_sig')
        else:
            df.to_csv(filePath, mode='a', header=False, encoding='utf_8_sig')
        # 去除重複標頭
        #sum_df[sum_df.ne(sum_df.columns).any(1)].to_csv(f'{Utils.GetRootPath()}\Data\Monthly\董監持股比例.csv',encoding='utf_8_sig')

    print('執行完成')
Пример #15
0
def WriteData():
    cssSelector = '#divStockList'
    sum_df = pd.DataFrame()

    for rankIndex in range(0, 5):
        url = f'https://goodinfo.tw/tw/StockList.asp?SHEET=董監持股&MARKET_CAT=熱門排行&INDUSTRY_CAT=全體董監持股比例&RANK={str(rankIndex)}'
        print(url)

        try:
            time.sleep(random.randint(5, 10))
            df = Utils.GetDataFrameByCssSelector(url, cssSelector)
            print(df)
            sum_df = pd.concat([sum_df, df], axis=0)
            #df.columns = df.columns.get_level_values(1)
        except:
            time.sleep(random.randint(20, 30))
            df = Utils.GetDataFrameByCssSelector(url, cssSelector)
            print(df)
            #df.columns = df.columns.get_level_values(1)

    # 去除重複標頭
    sum_df[sum_df.ne(sum_df.columns).any(1)].to_csv(
        f'{Utils.GetRootPath()}\Data\Monthly\董監持股比例.csv', encoding='utf_8_sig')
Пример #16
0
def GetPE(stockId):
    url = f'https://goodinfo.tw/StockInfo/ShowK_ChartFlow.asp?RPT_CAT=PER&STOCK_ID={stockId}&CHT_CAT=WEEK'
    css_selector = '#divK_ChartFlowDetail'
    try:
        df = Utils.GetDataFrameByCssSelector(url, css_selector)
        # 取前兩列後面倒數6欄資料
        firtRowDf = df.iloc[0,-6:]
        #print(firtRowDf)
    except:
        time.sleep(random.randint(20, 30))
        df = Utils.GetDataFrameByCssSelector(url, css_selector)
        
        # 取前兩列後面倒數6欄資料
        firtRowDf = df.iloc[0,-6:]
        #print(firtRowDf)
    
    #dataframe轉成dictionary 參考 https://stackoverflow.com/questions/45452935/pandas-how-to-get-series-to-dict
    dictionaries = [dict(key=re.findall(r'[0-9]+[.]?[0-9]*', str(k))[0], value=v) for k, v in firtRowDf.items()]
    #print(data)
    
    # 轉換成dataframe
    data = []
    headers = ['本益比-級距1倍數', '本益比-級距1價格', 
               '本益比-級距2倍數', '本益比-級距2價格',
               '本益比-級距3倍數', '本益比-級距3價格',
               '本益比-級距4倍數', '本益比-級距4價格',
               '本益比-級距5倍數', '本益比-級距5價格', 
               '本益比-級距6倍數', '本益比-級距6價格']
    for entry in dictionaries:
        #print(entry)
        data.append(entry['key'])
        data.append(entry['value'])

    ##print(headers)
    #print(data)
    df = pd.DataFrame([data], columns=headers)
    return df
Пример #17
0
def main(req):
    global rootpath
    try:
        with open("./app/rootpath.conf") as root:
            rootpath = root.read()
    except Exception:
        pass
    Folder = Utils.Folder(rootpath)
    dataJson = Folder.getFolderJson()
    language = req.COOKIES.get('language')

    if language == "en":
        return render(req, "index_en-US.html", {"dataJson": dataJson})
    if language == "cn":
        return render(req, "index_zh-CN.html", {"dataJson": dataJson})
Пример #18
0
def main(req):
    global rootpath
    # try:
    #     with open("./app/rootpath.conf") as root:
    #         rootpath=root.read()
    # except Exception:
    #     pass
    if platform.system() == "Windows":
        rootpath = "D:\web_file_root\SC-TestCase"
    elif platform.system() == "Linux":
        rootpath = "/opt/web_file_root"
    logUtil.logger.info("view.rootpath" + rootpath)
    Folder = Utils.Folder(rootpath)
    dataJson = Folder.getFolderJson()
    language = req.COOKIES.get('language')

    if language == "en":
        return render(req, "index_en-US.html", {"dataJson": dataJson})
    if language == "cn":
        return render(req, "index_zh-CN.html", {"dataJson": dataJson})
Пример #19
0
 def test_different_ids(self):
     self.assertNotEqual(Utils.generate_id("Sanidad"), Utils.generate_id("Educación"))
Пример #20
0
 def test_equals_ids(self):
     self.assertEqual(Utils.generate_id("Sanidad"), Utils.generate_id("Sanidad"))
Пример #21
0
def GetTransaction(stockId):
    url = f'https://goodinfo.tw/tw/ShowK_Chart.asp?STOCK_ID={stockId}&CHT_CAT2=DATE'
    cssSelector = '#divPriceDetail'
    try:
        df = Utils.GetDataFrameByCssSelector(url, cssSelector)
        df.columns = df.columns.get_level_values(1)
    except:
        time.sleep(random.randint(20, 30))
        df = Utils.GetDataFrameByCssSelector(url, cssSelector)
        df.columns = df.columns.get_level_values(1)
    # 印出全部的rows
    #pd.set_option('display.max_rows', df.shape[0]+1)
    #print(df)

    headers = ['收盤', '張數', '外資  持股  (%)', '券資  比  (%)']
    smaPeroids = [1, 5, 20, 60]

    dict = {}
    for header in headers:
        try:
            #print(header)
            entry = ''
            for period in smaPeroids:
                #print(df[header])
                data = pd.to_numeric(df[header], errors='coerce').dropna(
                    how='any', axis=0).head(period)
                #print(data)
                sma = round(data.mean(), 2)
                #print(sma)
                entry += ('' if entry == '' else ' / ') + str(sma).rjust(8)

            #print(header.replace(' ', ''))
            #print(entry)

            if header == '收盤':
                data = [x.strip() for x in entry.split('/')]
                prefixIcon = ''
                if float(data[0]) > float(data[1]) and float(data[0]) > float(
                        data[2]):
                    prefixIcon = '👍'
                elif float(data[0]) < float(data[3]):
                    prefixIcon = '👎'
                entry = prefixIcon + entry

            # 成交量 > 5ma 3倍
            if header == '張數':
                data = [x.strip() for x in entry.split('/')]
                if (float(data[0]) / float(data[1]) > 3.0):
                    entry = '🏆' + entry

            dict.update({
                header.replace(' ', '') + '(' + 'ma / '.join(
                    map(str, smaPeroids)) + 'ma)':
                str(entry)
            })
        except:
            dict.update({
                header.replace(' ', '') + '(' + 'ma / '.join(
                    map(str, smaPeroids)) + 'ma)':
                ''
            })
    #print(dict)
    result = pd.DataFrame([dict])
    return result
Пример #22
0
def downloadFiles(req):
    downloadFileList = req.POST.get("downloadFileList").split(",")
    print(downloadFileList)
    fileOperator = Utils.fileOperator()
    return fileOperator.zipFilesInResponse(downloadFileList)
Пример #23
0
def GetChampionStock(op):
    # 過濾清單
    if op == 0:
        competitors = GetBasicStockInfo(True)
        print(competitors)
        competitors.to_csv(f'{Utils.GetRootPath()}\Data\Temp\過濾清單.csv',
                           encoding='utf_8_sig')

    if op == 1:
        basicStockInfo_df = GetBasicStockInfo(True)

        for stockId in [
                '2477', '2915', '1608', '2809', '5469', '1313', '2357', '1304',
                '2855', '5533', '2891', '3036', '2505', '2816', '2905', '2461',
                '2885', '1513', '3033', '9945', '3702', '1904', '3022', '1776',
                '3028', '2535', '2353', '1308', '3048', '3312', '2387', '1305',
                '1604'
        ]:
            PE_df = GetPE(stockId)
            print(PE_df)

            Sleep()
            stockInfo_df = basicStockInfo_df[basicStockInfo_df['證券代號'] ==
                                             stockId]
            stockInfo_df.reset_index(drop=True, inplace=True)
            print(stockInfo_df)

            temp_df = pd.concat([stockInfo_df, PE_df], axis=1)
            temp_df.to_csv(f'{Utils.GetRootPath()}\Data\Temp\過濾清單(含本益比).csv',
                           mode='a',
                           header=False,
                           encoding='utf_8_sig')

    # 明細資料
    if op == 2:
        basicStockInfo_df = GetBasicStockInfo()
        #sum_df = pd.DataFrame()

        for stockId in ['9930']:
            print(stockId)

            stockInfo_df = basicStockInfo_df[basicStockInfo_df['證券代號'] ==
                                             stockId]
            stockInfo_df.reset_index(drop=True, inplace=True)
            print(stockInfo_df)

            if not stockInfo_df.empty:
                Sleep()
                finDetail_df = GetFinDetail(stockId)
                print(finDetail_df)

                PE_df = GetPE(stockId)
                print(PE_df)

                Sleep()
                transaction_df = GetTransaction(stockId)
                print(transaction_df)

                volume_df = GetVolume(stockId)
                print(volume_df)

                Sleep()
                dividend_df = GetDividend(stockId)
                print(dividend_df)

                Sleep()
                distribution_df = shareholderDistribution.GetDistribution(
                    stockId)
                print(distribution_df)

                # 合併所有欄位成一列
                temp_df = pd.concat([
                    stockInfo_df, transaction_df, volume_df, PE_df,
                    distribution_df, finDetail_df, dividend_df
                ],
                                    axis=1)
                print(temp_df)

                # 將列合併入dataframe
                #sum_df = pd.concat([sum_df, temp_df], axis=0)

                # 每列寫入csv檔, 不含表頭
                temp_df.to_csv(f'{Utils.GetRootPath()}\Data\Temp\彙整清單.csv',
                               mode='a',
                               header=False,
                               encoding='utf_8_sig')

        # 寫入csv檔
        #sum_df.to_csv('彙整清單.csv', encoding='utf_8_sig')

    # 日常籌碼面資料
    if op == 3:
        basicStockInfo_df = GetBasicStockInfo()
        #sum_df = pd.DataFrame()
        for stockId in stocks:
            print(stockId)

            stockInfo_df = basicStockInfo_df[basicStockInfo_df['證券代號'] ==
                                             stockId]
            stockInfo_df.reset_index(drop=True, inplace=True)
            print(stockInfo_df)

            if not stockInfo_df.empty:
                Sleep()
                transaction_df = GetTransaction(stockId)
                print(transaction_df)

                volume_df = GetVolume(stockId)
                print(volume_df)

                temp_df = pd.concat([stockInfo_df, transaction_df, volume_df],
                                    axis=1)
                print(temp_df)

                temp_df.to_csv(f'{Utils.GetRootPath()}\Data\Daily\籌碼面資料.csv',
                               mode='a',
                               header=False,
                               encoding='utf_8_sig')
                # 合併所有欄位成一列
                #sum_df = pd.concat([sum_df, temp_df], axis=0)

        # 將列合併入dataframe
        # sum_df.to_csv('籌碼面資料.csv',encoding='utf_8_sig')

    # 大戶、本益比
    if op == 4:
        shareholderDistribution.WriteData()

        for stockId in stocks:
            print(stockId)

            Sleep()
            distribution_df = shareholderDistribution.GetDistribution(stockId)
            print(distribution_df)

            Sleep()
            PE_df = GetPE(stockId)
            print(PE_df)

            temp_df = pd.concat([PE_df, distribution_df], axis=1)
            print(temp_df)

            temp_df.to_csv(
                f'{Utils.GetRootPath()}\Data\\Weekly\股東分布_本益比_{date.today().strftime("%Y%m%d")}.csv',
                mode='a',
                header=False,
                encoding='utf_8_sig')

    if op == 5:
        directorSharehold.WriteData()

    if op == 7:
        basicStockInfo_df = GetBasicStockInfo()
        topVolumeStocks = dailyTopVolume.GetTopVolume()[:100]

        for stockId in topVolumeStocks:
            print(stockId)

            stockInfo_df = basicStockInfo_df[basicStockInfo_df['證券代號'] ==
                                             stockId]
            stockInfo_df.reset_index(drop=True, inplace=True)
            print(stockInfo_df)

            if not stockInfo_df.empty:
                volume_df = GetVolume(stockId)
                print(volume_df)

                temp_df = pd.concat([stockInfo_df, volume_df], axis=1)
                print(temp_df)

                temp_df.to_csv(
                    f'{Utils.GetRootPath()}\Data\Daily\異常籌碼資料_{date.today().strftime("%Y%m%d")}.csv',
                    mode='a',
                    header=False,
                    encoding='utf_8_sig')

        # 刪除暫存檔案
        try:
            folderPath = pathlib.Path(
                f'{Utils.GetRootPath()}\Data\Daily\Chip\{(date.today() - timedelta(days=1)).strftime("%Y%m%d")}'
            )
            Utils.delete_folder(folderPath)
        except Exception as ex:
            print(ex)