示例#1
0
def get_news_item(start_date, end_date):
    # 从最新到最远获取新闻
    reverse_year = int(end_date[0:4])
    reverse_month = int(end_date[4:6])
    reverse_day = int(end_date[6:8])
    reverse_pattern = (reverse_year, reverse_month, reverse_day, 23, 59, 59,
                       99, 99, 99)
    reverse_cursor = CommonUtil.convert_date_to_long(reverse_pattern)
    logger.info("reverseCursor is %s" % reverse_cursor)
    finished_year = int(start_date[0:4])
    finished_month = int(start_date[4:6])
    finished_day = int(start_date[6:8])
    finished_pattern = (finished_year, finished_month, finished_day, 0, 0, 0,
                        0, 0, 0)
    finished_cursor = CommonUtil.convert_date_to_long(finished_pattern)
    logger.info("finishedCursor is %s" % finished_cursor)
    # 需要爬数据的网址
    url_pattern = 'https://api-prod.wallstreetcn.com/apiv1/content/lives?' \
                  'channel=weex-channel,gold-channel,gold-forex-channel,' \
                  'forex-channel,goldc-channel,oil-channel&client=pc'
    news_limit = 100
    cursor = reverse_cursor
    file_content = ''
    # 页面计数器
    page_num = 0
    # 循环开始
    while int(cursor) > int(finished_cursor):
        page_num += 1
        url = url_pattern + "&cursor=" + str(cursor) + "&" + str(news_limit)
        logger.info(url)
        page = requests.Session().get(url)
        page.encoding = 'utf-8'
        if page.status_code == 200:
            data_all = json.loads(page.text)
            res_data = data_all['data']
            data_items = res_data['items']
            cursor = res_data['next_cursor']
            for item_i in range(len(data_items)):
                display_time = data_items[item_i]['display_time']
                context_text = data_items[item_i]['content_text']
                context = context_text.strip().replace('\n', '')
                context = context.replace('\r', '')
                time = CommonUtil.convert_long_to_date(display_time)
                file_content = file_content + time + "," + context + "\n"
                # print(item_i+1, ": " , time, ", ", context_text)
        CommonUtil.save_to_file(
            '../files/wallstreetcn_%s_%s.csv' % (start_date, end_date),
            file_content)
        file_content = ''
        # 无下一页数据时退出循环
        if cursor == '':
            break
    logger.info("Finished With %s Pages Crawled." % page_num)
示例#2
0
def get_market_data(i_type, i_count, s_data_type):
    url_pattern = 'https://forexdata.wallstreetcn.com/kline?prod_code=' + s_data_type + \
                  '&candle_period=' + str(i_type) + \
                  '&fields=time_stamp,open_px,close_px,high_px,low_px,ma5,ma10,ma20,ma60,upper,mid,lower,diff,dea,' \
                  'macd,k,d,j,rsi6,rsi12,rsi24&data_count=' + str(i_count)  # 需要爬数据的网址
    logger.info(url_pattern)
    page = requests.Session().get(url_pattern)
    page.encoding = 'utf-8'
    file_content = ''
    if page.status_code == 200:
        data_all = json.loads(page.text)
        res_data = data_all['data']
        candle_data = res_data['candle']
        # 处理标题
        data_fields = candle_data['fields']
        for item_i in range(len(data_fields)):
            file_content += data_fields[item_i] + ','
        file_content += '\n'
        # 处理数据
        data_list = candle_data[s_data_type]
        for item_i in range(len(data_list)):
            data_items = data_list[item_i]
            data_item = ''
            for item_j in range(len(data_items)):
                # 日期格式转换
                if item_j == 0:
                    data_item += CommonUtil.convert_long_to_date(
                        data_items[item_j]) + ','
                else:
                    data_item += str(data_items[item_j]) + ','
            file_content += data_item + '\n'
        logger.info("Finished With %s Items Crawled." % (len(data_list)))
        CommonUtil.save_to_file(MARKET_DATA_PATH + '/%s.csv' % s_data_type,
                                file_content)
    else:
        logger.warning("Response Code is %s, Please Check!" % page.status_code)