Exemplo n.º 1
0
def _download_sheet(stock_code, sheet_type, term_type, sheet_part):
    detail = 'download finance_sheet(sheet_type[{}], term_type[{}], sheet_part[{}]) failed.'.format(
        sheet_type, term_type, sheet_part)
    try:
        resp = requests.get(_download_url(stock_code, sheet_type, term_type,
                                          sheet_part),
                            stream=True)
        if resp is not None and resp.ok:
            lines = [
                line.strip() for line in resp.content.decode('gbk').replace(
                    '\r\n', '\n').strip().split('\n')
            ]
            lines = [
                line if line[-1] != ',' else line[0:-1] for line in lines
                if line != ''
            ]
            lines.append('')
            with codecs.open(
                    finance_sheet_file_path(stock_code, sheet_type, term_type,
                                            sheet_part), 'w', 'utf-8') as f:
                f.write('\n'.join(lines))
        else:
            raise error.ServerException(error.SERVER_ERR_DOWNLOAD_FAILED,
                                        detail)
    except error.ServerException as e:
        logger.error('{}.'.format(detail))
        raise e
    except Exception as e:
        raise error.ServerException(
            error.SERVER_ERR_DOWNLOAD_FAILED,
            '{}.{}'.format(detail, error.exception_string(e)))
Exemplo n.º 2
0
def create_directory(directory_path):
    if not os.path.exists(directory_path):
        os.makedirs(directory_path)
    elif os.path.isdir(directory_path):
        return
    else:
        raise error.ServerException('path[{}] exists and is not directory'.format(directory_path))
Exemplo n.º 3
0
def clean_directory(directory_path):
    if not os.path.exists(directory_path):
        os.makedirs(directory_path)
    elif os.path.isdir(directory_path):
        for path in os.listdir(directory_path):
            remove_path(os.path.join(directory_path, path))
    else:
        raise error.ServerException('path[{}] is not directory'.format(directory_path))
Exemplo n.º 4
0
def remove_path(path):
    if os.path.exists(path):
        if os.path.isdir(path):
            shutil.rmtree(path)
        elif os.path.isfile(path):
            os.remove(path)
        else:
            raise error.ServerException('nonsupport type for remove path[{}]'.format(path))
    else:
        return
Exemplo n.º 5
0
def download_stock_basics():
    utils.clean_directory(BASIC_DIR)
    try:
        df = ts.get_stock_basics()
        utils.df2csv(df, _stock_basics_file_path)
    except Exception as e:
        logger.error('download_categories failed. {}'.format(
            error.exception_string(e)))
        utils.clean_directory(BASIC_DIR)
        raise error.ServerException(error.SERVER_ERR_DOWNLOAD_FAILED,
                                    error.exception_string(e))
Exemplo n.º 6
0
def download_forecast(year, quarter):
    try:
        df = ts.forecast_data(year, quarter)
        df.set_index('code', inplace=True)
        utils.df2csv(df, _forecast_file_path(year, quarter))
    except Exception as e:
        logger.error(
            'download_forecast(year[{}], quarter[{}]) failed.{}'.format(
                year, quarter, error.exception_string(e)))
        raise error.ServerException(error.SERVER_ERR_DOWNLOAD_FAILED,
                                    error.exception_string(e))
Exemplo n.º 7
0
def download_categories():
    utils.clean_directory(CATEGORY_DIR)
    try:
        for category_method, (func_name,) in _all_categories.items():
            try:
                df = getattr(ts, func_name)()
                df.set_index('code', inplace=True)
                utils.df2csv(df, _category_file_path(category_method))
            except Exception as e:
                logger.error('download_categories[{}] failed. {}'.format(category_method, error.exception_string(e)))
                raise e
    except Exception as e:
        utils.clean_directory(CATEGORY_DIR)
        raise error.ServerException(error.SERVER_ERR_DOWNLOAD_FAILED, error.exception_string(e))
Exemplo n.º 8
0
def _get_latest_report_html(report_type, page_no, start_time=None, end_time=None):
    to_notice_type = {1: '010305', 2: '010303', 3: '010307', 4: '010301'}
    cur_dt = utils.current_datetime()
    today = '%d-%02d-%02d' % (cur_dt.year, cur_dt.month, cur_dt.day)
    url = 'http://www.cninfo.com.cn/search/search.jsp'
    data = {'orderby': 'date11', 'marketType': '', 'stockCode': '', 'keyword': '',
            'noticeType': to_notice_type[report_type], 'pageNo': page_no,
            'startTime': today if start_time is None else start_time,
            'endTime': today if end_time is None else end_time}
    resp = requests.post(url, data)
    if resp is not None and resp.ok:
        return resp.content.decode('gbk')
    else:
        raise error.ServerException(error.SERVER_ERR_DOWNLOAD_FAILED, 'download html from {} failed'.format(url))
Exemplo n.º 9
0
def crawl():
    with _crawl_lock:
        if CrawlSummary().crawling:
            raise error.ServerException(error.SERVER_ERR_OP_CONCURRENT)
        else:
            CrawlSummary().crawling = True

    if CrawlSummary().full_time is None:
        CrawlSummary().full_start()

    if CrawlSummary().full_complete:
        if CrawlSummary().increment_time is None:
            CrawlSummary().increment_start()
        crawl_increment()
    else:
        crawl_full()
Exemplo n.º 10
0
def finance_sheet_file_path(stock_code, sheet_type, term_type, sheet_part):
    # check param
    if sheet_type not in sheet_types or term_type not in sheet_types[sheet_type]['terms'] or \
                    sheet_part not in sheet_types[sheet_type]['parts']:
        raise error.ServerException(
            error.SERVER_ERR_INTERNAL,
            'wrong param: stock_code[{}], sheet_type[{}], term_type[{}], sheet_part[{}]'
            .format(stock_code, sheet_type, term_type, sheet_part))

    sheet_directory = finance_sheet_directory(stock_code)
    if sheet_part == '':
        return os.path.join(
            sheet_directory, '{}_{}_{}.csv'.format(sheet_type, term_type,
                                                   stock_code))
    else:
        return os.path.join(
            sheet_directory, '{}_{}_{}_{}.csv'.format(sheet_type, sheet_part,
                                                      term_type, stock_code))
Exemplo n.º 11
0
def raise_exception(request, e, content=None):
    from jsonrpc.exceptions import ServerError
    import traceback

    if isinstance(e, error.ServerException):
        raise_e = e
    else:
        raise_e = error.ServerException(error.SERVER_ERR_INTERNAL, '{}\n{}'.format(e, traceback.format_exc()))

    err_msg = 'download error code: {}, msg: {}'.format(raise_e.err_code, raise_e.err_msg)
    # log err
    if content is not None:
        logger.error('{} failed. {}'.format(content, err_msg))
    # raise exception
    if request is not None:
        raise ServerError(err_msg)
    else:
        raise raise_e
Exemplo n.º 12
0
 def crawler_object(code):
     try:
         return CrawlerModel.objects.get(code=code)
     except ObjectDoesNotExist as e:
         raise error.ServerException(error.SERVER_ERR_OBJECT_NOT_EXIST,
                                     error.exception_string(e))