Exemplo n.º 1
0
def start_requests(conn, beginDate, endDate):
    cursor = conn.cursor()
    print (beginDate)
    print (endDate)
    url = 'https://www.tdcc.com.tw/smWeb/QryStockAjax.do'
    for date in pd.date_range(beginDate, endDate, freq='W-FRI')[::-1]:
#    for date in pd.date_range(beginDate, endDate, freq='W-WED')[::-1]:
#    for date in pd.date_range(beginDate, endDate, freq='W-THU')[::-1]:
#    for date in pd.date_range(beginDate, endDate, freq='W-SAT')[::-1]:
        scaDate = '{}{:02d}{:02d}'  .format(date.year, date.month, date.day)
        date    = '{}/{:02d}/{:02d}'.format(date.year, date.month, date.day)
        sql = "SELECT code " \
              "FROM own " \
              "where code not in ( " \
              "    select code " \
              "    from share_ratio " \
              "    where 1=1 " \
              "    and date = '" + scaDate + "'" \
              ") " \
              "group by code "
        cursor.execute(sql)
        code_list = list()
        for row in cursor:
            code_list.append(row[0])
        try:
            for code in code_list:
                payload = {
                    'scaDates': scaDate,
                    'scaDate': scaDate,
                    'SqlMethod': 'StockNo',
                    'StockNo': code,
                    'radioStockNo': code,
                    'StockName': '',
                    'REQ_OPR': 'SELECT',
                    'clkStockNo': code,
                    'clkStockName': ''
                } 
                headers = {"User-Agent" : "User-Agent:Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;"}          
                html=requests.post(url,data=payload, headers=headers).content.decode('big5')
                data=Extractor(html,'table.mt:eq(1)').df(1)
                for index, row in data.iterrows():        
                    if index == 15:
                        continue
                    sql = "INSERT INTO share_ratio (`date`,`code`,`rank`,`number`,`person`,`rate`) \
                           VALUES (%s,%s,%s,%s,%s,%s)"
                    if isNumer(row['股  數/單位數']):
                        val = (scaDate,code,row['持股/單位數分級'],row['股  數/單位數'],row['人  數'],row['占集保庫存數比例 (%)'])
                        cursor.execute(sql, val)
                        conn.commit()
                        print (code, scaDate, row['持股/單位數分級'],row['股  數/單位數'],row['人  數'],row['占集保庫存數比例 (%)'])
                time.sleep(3)                 
        except Exception as e:
            print (e)
Exemplo n.º 2
0
 def setUp(self):
     html = """
     <table>
         <tr>
           <td>1</td>
           <td>2</td>
         </tr>
         <tr>
           <td>3</td>
           <td>4</td>
         </tr>
     </table>
     """
     self.extractor = Extractor(html)
Exemplo n.º 3
0
 def setUp(self):
     html = """
     <table>
         <tr>
             <td rowspan=2>1</td>
             <td>2</td>
             <td rowspan=3>3</td>
         </tr>
         <tr>
             <td colspan=2>4</td>
         </tr>
         <tr>
             <td colspan=2>5</td>
         </tr>
     </table>
     """
     self.extractor = Extractor(html)
Exemplo n.º 4
0
 def test_init_with_id(self):
     html = """
     <table id='wanted'>
         <tr>
           <td>1</td>
           <td>2</td>
         </tr>
         <tr>
           <td>3</td>
           <td>4</td>
         </tr>
     </table>
     <table id='unwanted'>
         <tr>
           <td>unwanted</td>
         </tr>
     </table>
     """
     pq_html = pq(html)
     extractor = Extractor(pq_html, jquery='#wanted').parse()
     self.assertEqual(extractor.return_list(), [[u'1', u'2'], [u'3', u'4']])
Exemplo n.º 5
0
    def parse(self, response):
        m = response.meta
        data = Extractor(response.dom, 'table.mt:eq(1)').df(1)
        del data['持股/單位數分級']
        data.loc[15, '序'] = 17
        data.columns = ['持股分級', '人數', '股數', '佔集保庫存數比例%']

        data.insert(0, 'code', m['code'])
        data.insert(0, 'date', m['date'])

        for item in data.to_dict('row'):
            yield item
Exemplo n.º 6
0
class TestExtractorTransformer(unittest.TestCase):
    def setUp(self):
        html = """
        <table>
            <tr>
              <td>1</td>
              <td>2</td>
            </tr>
            <tr>
              <td>3</td>
              <td>4</td>
            </tr>
        </table>
        """
        self.extractor = Extractor(html, transformer=int)

    def test_config_transformer(self):
        self.assertEqual(self.extractor.return_list(), [[1, 2], [3, 4]])
Exemplo n.º 7
0
class TestSimpleExtractor(unittest.TestCase):
    def setUp(self):
        html = """
        <table>
            <tr>
              <td>1</td>
              <td>2</td>
            </tr>
            <tr>
              <td>3</td>
              <td>4</td>
            </tr>
        </table>
        """
        self.extractor = Extractor(html)

    def test_return_list(self):
        self.assertEqual(self.extractor.return_list(),
                         [[u'1', u'2'], [u'3', u'4']])
Exemplo n.º 8
0
class TestConflictedExtractor(unittest.TestCase):
    def setUp(self):
        html = """
        <table>
            <tr>
                <td rowspan=2>1</td>
                <td>2</td>
                <td rowspan=3>3</td>
            </tr>
            <tr>
                <td colspan=2>4</td>
            </tr>
            <tr>
                <td colspan=2>5</td>
            </tr>
        </table>
        """
        self.extractor = Extractor(html)

    def test_return_list(self):
        self.assertEqual(
            self.extractor.return_list(),
            [[u'1', u'2', u'3'], [u'1', u'4', u'3'], [u'5', u'5', u'3']])
Exemplo n.º 9
0
 def setUp(self):
     html = """
         <table width="100%" border="5" bordercolor="#FF6600" bgcolor="#FFFFFF">
             <tbody>
             <tr>
                 <th class="tt" colspan="2">&nbsp;</th>
                 <th class="tt" colspan="5">營業收入</th>
                 <th class="tt" colspan="3">累計營業收入</th>
                 <th rowspan="2" class="tt">備註</th>
             </tr>
             <tr>
                 <th class="tt">公司<br>代號</th>
                 <th class="tt">公司名稱</th>
                 <th class="tt">當月營收</th>
                 <th class="tt">上月營收</th>
                 <th class="tt">去年當月營收</th>
                 <th class="tt">上月比較<br>增減(%)</th>
                 <th class="tt">去年同月<br>增減(%)</th>
                 <th class="tt">當月累計營收</th>
                 <th class="tt">去年累計營收</th>
                 <th class="tt">前期比較<br>增減(%)</th>
             </tr>
             <tr align="right">
                 <td align="center">1101</td>
                 <td align="left">台泥</td>
                 <td nowrap=""> 10,757,628</td>
                 <td nowrap=""> 11,539,982</td>
                 <td nowrap=""> 7,858,569</td>
                 <td nowrap=""> -6.77</td>
                 <td nowrap=""> 36.89</td>
                 <td nowrap=""> 57,500,244</td>
                 <td nowrap=""> 45,893,851</td>
                 <td nowrap=""> 25.28</td>
                 <td align="center">-</td>
             </tr>
             <tr align="right">
                 <td align="center">1102</td>
                 <td align="left">亞泥</td>
                 <td nowrap=""> 7,549,925</td>
                 <td nowrap=""> 7,698,165</td>
                 <td nowrap=""> 5,331,442</td>
                 <td nowrap=""> -1.92</td>
                 <td nowrap=""> 41.61</td>
                 <td nowrap=""> 39,010,235</td>
                 <td nowrap=""> 28,812,149</td>
                 <td nowrap=""> 35.39</td>
                 <td align="center">-</td>
             </tr>
             <tr align="right">
                 <td align="center">1103</td>
                 <td align="left">嘉泥</td>
                 <td nowrap=""> 172,927</td>
                 <td nowrap=""> 185,856</td>
                 <td nowrap=""> 143,629</td>
                 <td nowrap=""> -6.95</td>
                 <td nowrap=""> 20.39</td>
                 <td nowrap=""> 1,000,927</td>
                 <td nowrap=""> 1,058,885</td>
                 <td nowrap=""> -5.47</td>
                 <td align="center">-</td>
             </tr>
             <tr align="right">
                 <td align="center">1104</td>
                 <td align="left">環球水泥</td>
                 <td nowrap=""> 337,575</td>
                 <td nowrap=""> 426,170</td>
                 <td nowrap=""> 318,948</td>
                 <td nowrap=""> -20.78</td>
                 <td nowrap=""> 5.84</td>
                 <td nowrap=""> 2,314,855</td>
                 <td nowrap=""> 2,159,764</td>
                 <td nowrap=""> 7.18</td>
                 <td align="center">-</td>
             </tr>
             <tr align="right">
                 <td align="center">1108</td>
                 <td align="left">幸福水泥</td>
                 <td nowrap=""> 276,298</td>
                 <td nowrap=""> 294,581</td>
                 <td nowrap=""> 243,699</td>
                 <td nowrap=""> -6.20</td>
                 <td nowrap=""> 13.37</td>
                 <td nowrap=""> 1,684,245</td>
                 <td nowrap=""> 1,761,992</td>
                 <td nowrap=""> -4.41</td>
                 <td align="center">-</td>
             </tr>
             <tr align="right">
                 <td align="center">1109</td>
                 <td align="left">信大水泥</td>
                 <td nowrap=""> 577,408</td>
                 <td nowrap=""> 625,561</td>
                 <td nowrap=""> 418,868</td>
                 <td nowrap=""> -7.69</td>
                 <td nowrap=""> 37.84</td>
                 <td nowrap=""> 2,809,558</td>
                 <td nowrap=""> 2,317,812</td>
                 <td nowrap=""> 21.21</td>
                 <td align="center">-</td>
             </tr>
             <tr align="right">
                 <td align="center">1110</td>
                 <td align="left">東泥</td>
                 <td nowrap=""> 119,405</td>
                 <td nowrap=""> 142,543</td>
                 <td nowrap=""> 107,913</td>
                 <td nowrap=""> -16.23</td>
                 <td nowrap=""> 10.64</td>
                 <td nowrap=""> 792,195</td>
                 <td nowrap=""> 684,515</td>
                 <td nowrap=""> 15.73</td>
                 <td align="center">-</td>
             </tr>
             <tr align="right">
                 <th class="tt" nowrap="" colspan="2" align="center">合計</th>
                 <td nowrap=""> 19,791,166</td>
                 <td nowrap=""> 20,912,858</td>
                 <td nowrap=""> 14,423,068</td>
                 <td nowrap=""> -5.36</td>
                 <td nowrap=""> 37.21</td>
                 <td> 105,112,259</td>
                 <td> 82,688,968</td>
                 <td nowrap=""> 27.11</td>
                 <td>&nbsp;</td>
             </tr>
             </tbody>
         </table>
     """
     self.extractor = Extractor(html)
Exemplo n.º 10
0
class TestConflictedExtractor(unittest.TestCase):
    def setUp(self):
        html = """
            <table width="100%" border="5" bordercolor="#FF6600" bgcolor="#FFFFFF">
                <tbody>
                <tr>
                    <th class="tt" colspan="2">&nbsp;</th>
                    <th class="tt" colspan="5">營業收入</th>
                    <th class="tt" colspan="3">累計營業收入</th>
                    <th rowspan="2" class="tt">備註</th>
                </tr>
                <tr>
                    <th class="tt">公司<br>代號</th>
                    <th class="tt">公司名稱</th>
                    <th class="tt">當月營收</th>
                    <th class="tt">上月營收</th>
                    <th class="tt">去年當月營收</th>
                    <th class="tt">上月比較<br>增減(%)</th>
                    <th class="tt">去年同月<br>增減(%)</th>
                    <th class="tt">當月累計營收</th>
                    <th class="tt">去年累計營收</th>
                    <th class="tt">前期比較<br>增減(%)</th>
                </tr>
                <tr align="right">
                    <td align="center">1101</td>
                    <td align="left">台泥</td>
                    <td nowrap=""> 10,757,628</td>
                    <td nowrap=""> 11,539,982</td>
                    <td nowrap=""> 7,858,569</td>
                    <td nowrap=""> -6.77</td>
                    <td nowrap=""> 36.89</td>
                    <td nowrap=""> 57,500,244</td>
                    <td nowrap=""> 45,893,851</td>
                    <td nowrap=""> 25.28</td>
                    <td align="center">-</td>
                </tr>
                <tr align="right">
                    <td align="center">1102</td>
                    <td align="left">亞泥</td>
                    <td nowrap=""> 7,549,925</td>
                    <td nowrap=""> 7,698,165</td>
                    <td nowrap=""> 5,331,442</td>
                    <td nowrap=""> -1.92</td>
                    <td nowrap=""> 41.61</td>
                    <td nowrap=""> 39,010,235</td>
                    <td nowrap=""> 28,812,149</td>
                    <td nowrap=""> 35.39</td>
                    <td align="center">-</td>
                </tr>
                <tr align="right">
                    <td align="center">1103</td>
                    <td align="left">嘉泥</td>
                    <td nowrap=""> 172,927</td>
                    <td nowrap=""> 185,856</td>
                    <td nowrap=""> 143,629</td>
                    <td nowrap=""> -6.95</td>
                    <td nowrap=""> 20.39</td>
                    <td nowrap=""> 1,000,927</td>
                    <td nowrap=""> 1,058,885</td>
                    <td nowrap=""> -5.47</td>
                    <td align="center">-</td>
                </tr>
                <tr align="right">
                    <td align="center">1104</td>
                    <td align="left">環球水泥</td>
                    <td nowrap=""> 337,575</td>
                    <td nowrap=""> 426,170</td>
                    <td nowrap=""> 318,948</td>
                    <td nowrap=""> -20.78</td>
                    <td nowrap=""> 5.84</td>
                    <td nowrap=""> 2,314,855</td>
                    <td nowrap=""> 2,159,764</td>
                    <td nowrap=""> 7.18</td>
                    <td align="center">-</td>
                </tr>
                <tr align="right">
                    <td align="center">1108</td>
                    <td align="left">幸福水泥</td>
                    <td nowrap=""> 276,298</td>
                    <td nowrap=""> 294,581</td>
                    <td nowrap=""> 243,699</td>
                    <td nowrap=""> -6.20</td>
                    <td nowrap=""> 13.37</td>
                    <td nowrap=""> 1,684,245</td>
                    <td nowrap=""> 1,761,992</td>
                    <td nowrap=""> -4.41</td>
                    <td align="center">-</td>
                </tr>
                <tr align="right">
                    <td align="center">1109</td>
                    <td align="left">信大水泥</td>
                    <td nowrap=""> 577,408</td>
                    <td nowrap=""> 625,561</td>
                    <td nowrap=""> 418,868</td>
                    <td nowrap=""> -7.69</td>
                    <td nowrap=""> 37.84</td>
                    <td nowrap=""> 2,809,558</td>
                    <td nowrap=""> 2,317,812</td>
                    <td nowrap=""> 21.21</td>
                    <td align="center">-</td>
                </tr>
                <tr align="right">
                    <td align="center">1110</td>
                    <td align="left">東泥</td>
                    <td nowrap=""> 119,405</td>
                    <td nowrap=""> 142,543</td>
                    <td nowrap=""> 107,913</td>
                    <td nowrap=""> -16.23</td>
                    <td nowrap=""> 10.64</td>
                    <td nowrap=""> 792,195</td>
                    <td nowrap=""> 684,515</td>
                    <td nowrap=""> 15.73</td>
                    <td align="center">-</td>
                </tr>
                <tr align="right">
                    <th class="tt" nowrap="" colspan="2" align="center">合計</th>
                    <td nowrap=""> 19,791,166</td>
                    <td nowrap=""> 20,912,858</td>
                    <td nowrap=""> 14,423,068</td>
                    <td nowrap=""> -5.36</td>
                    <td nowrap=""> 37.21</td>
                    <td> 105,112,259</td>
                    <td> 82,688,968</td>
                    <td nowrap=""> 27.11</td>
                    <td>&nbsp;</td>
                </tr>
                </tbody>
            </table>
        """
        self.extractor = Extractor(html)

    def test_return_df(self):
        self.assertEqual(
            self.extractor.df().as_matrix().tolist(),
            [[
                '1101', '台泥', '10757628', '11539982', '7858569', '-6.77',
                '36.89', '57500244', '45893851', '25.28', '-'
            ],
             [
                 '1102', '亞泥', '7549925', '7698165', '5331442', '-1.92',
                 '41.61', '39010235', '28812149', '35.39', '-'
             ],
             [
                 '1103', '嘉泥', '172927', '185856', '143629', '-6.95', '20.39',
                 '1000927', '1058885', '-5.47', '-'
             ],
             [
                 '1104', '環球水泥', '337575', '426170', '318948', '-20.78',
                 '5.84', '2314855', '2159764', '7.18', '-'
             ],
             [
                 '1108', '幸福水泥', '276298', '294581', '243699', '-6.20',
                 '13.37', '1684245', '1761992', '-4.41', '-'
             ],
             [
                 '1109', '信大水泥', '577408', '625561', '418868', '-7.69',
                 '37.84', '2809558', '2317812', '21.21', '-'
             ],
             [
                 '1110', '東泥', '119405', '142543', '107913', '-16.23', '10.64',
                 '792195', '684515', '15.73', '-'
             ],
             [
                 '合計', '合計', '19791166', '20912858', '14423068', '-5.36',
                 '37.21', '105112259', '82688968', '27.11', ''
             ]])