def _get_kindcode_and_startpage(country, patnum, kindcode, target_folder): """ Get kindcode and startpage of a given patent. Iterates through a list of possible kindcodes; for every kindocde, the first page of the patent document is tried to be retrieved. A successful download gives us the correct kindcode, for which the start page of the description is then retrieved. - country: Country code - patnum: Patent number - kindocde: Kindcode (e.g. "A1", "B", ...) - target_folder: Folder that the first page is downloaded to """ startpage = None possible_kindcodes = [] if kindcode: possible_kindcodes = [kindcode] possible_kindcodes += ['A1', 'A2', 'A3', 'A4', 'B1'] print possible_kindcodes for code in possible_kindcodes: try: kindcode = download.get_pdf_page(country, patnum, code, 1, target_folder) if kindcode: meta = download.get_meta_data(country, patnum, 'DESCRIPTION', kindcode, skip=True) print meta if meta: startpage = meta['DESCRIPTION'] break except: e = "Unexpected error:", sys.exc_info()[0] continue print kindcode, startpage return kindcode, startpage
def _parse_pages_to_download(locations, country, patnum, kindcode, folder): """ Parses input and tries to retrieve pages of the description part of a patent. """ pages, columns = parser.get_pages(locations) target_folder = _create_target_folder(country, patnum, folder) kindcode, startpage = _get_kindcode_and_startpage(country, patnum, kindcode, target_folder) if startpage and kindcode: offset = 0 if country != "EP": offset += startpage - 1 for page in pages: download.get_pdf_page(country, patnum, kindcode, page + offset, target_folder, "page-" + str(page)) for column in columns: page = int(math.ceil(column / 2.)) offset = startpage - 1 print country, patnum, kindcode download.get_pdf_page(country, patnum, kindcode, page + offset, target_folder, "column-" + str(column)) return kindcode, startpage