示例#1
0
import requests
from bs4 import BeautifulSoup

from cnsipo.utils import retry, JobQueue, threaded
from cnsipo.shared import get_logger, ContentError, FORGIVEN_ERROR, \
    DETAIL_KINDS


KINDS = ['fmgb', 'fmsq', 'syxx', 'wgsq']
STR_SRC = ['fmmost', 'fmmost', 'xxmost', 'wgmost']
STR_WHERE = ['GB', 'SQ', 'GB', 'SQ']
DELAY = 3
RETRIES = 1000

logger = get_logger()


def detail_params(patent_id, kind):
    params = {
        'strSources': STR_SRC[kind],
        'strWhere': "申请号='{}' and {}INDEX=1".format(
            patent_id, STR_WHERE[kind]), 'strLicenseCode': "", 'pageNow': 1
    }
    return "http://epub.sipo.gov.cn/patentdetail.action", params


def detail_parse(bs, kind):
    # TODO: not work for kind 'wgsq'
    details = {}
    tbl = bs.table.table
示例#2
0
# -*- coding: utf-8 -*-
"""
Create a UIG(University/Industry/Government) database
"""

from __future__ import print_function

import sys
from optparse import OptionParser

import psycopg2

from cnsipo.shared import get_logger
from cnsipo.patent_parser import PatentParser

logger = get_logger()

APP_NO = 'app_no'
APP_YEAR = 'app_year'
STATE = 'state'
ADDRESS = 'address'
APPLICANT = 'applicant'
COLLAB = 'collab'
KIND = 'kind'
ORG = 'org'
ORG2 = 'org2'
patent_parser = None


def gen_uig_data(conn, table, aux_tbl, year, batch_size):
    stmt = "SELECT {}, {}, {} FROM {} WHERE {} IN "\