Python Document.save примеры использования

Язык программирования: Python

Пространство имен/Пакет: search.models

Класс/Тип: Document

Метод/Функция: save

Примеров на hotexamples.com: 2

Python Document.save - 2 примера найдено. Это лучшие примеры Python кода для search.models.Document.save, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

save(2)

Пример #1

Показать файл

Файл: spiderAll.py Проект: yuanzai/bblio2

    def parse_item(self, response):
        log.msg('[%s] Parsing Start: %s' % (self.id, response.url),level=log.INFO,spider=self)
        #log.msg('response header' + response.headers['content-type'], level=log.INFO, spider=self)
        try:

            item = {
                    'urlAddress' : response.url,
                    'domain' :  self.allowed_domains,
                    'site' : Site.objects.get(pk=self.id),
                    'response_code' : response.status, 
                    'isUsed' : 0
                    }

            if '.pdf' in str(response.url[-4:]):
                pdf_name = str(self.id) + '_' + str(datetime.now().isoformat()) + '.pdf'
                path = '/home/ec2-user/bblio/scraper/pdf/'
                if not os.path.exists(path):
                    os.makedirs(path)
                item.update({
                        'document_html' : path + pdf_name,
                        'encoding' : 'PDF'
                        })
                log.msg('PDF path: ' + path + pdf_name,level=log.INFO)        
                
                with open(path + pdf_name, "wb") as f: 
                    f.write(response.body)
                f.close()
                #aws.ec2.copy_file_to_web_server(path+pdf_name ,path + pdf_name)
                aws.ec2.copy_file_to_S3(response.url, path + pdf_name)
                os.remove(path + pdf_name)
            else:
                item.update({
                    'encoding' : response.headers['content-type'].split('charset=')[-1],
                    'document_html': (response.body).decode('utf-8','ignore').encode('utf-8')
                    })

            if Document.objects.filter(site_id=self.id).filter(urlAddress=item['urlAddress']).count() == 1:
                logging.info('[%s] Parsing Doc Overwrite: %s' % (self.id, response.url))
                d = Document.objects.filter(site_id=self.id).filter(urlAddress=item['urlAddress'])[0]
                d.document_html = item['document_html']
                d.encoding = item['encoding']
                d.domain = item['domain']
                d.response_code = item['response_code']
                d.isUsed = 0
                d.save()
            else:
                d = Document(**item)
                d.save()
            
            logging.info('[%s] Parsing Success: %s' % (self.id, response.url))

            return
        except AttributeError:
            logging.info('* Cannot parse: ' + response.url)
            logging.info(sys.exc_info()[0])
            return

        except:
            logging.info('* Unexpected error:' + str(sys.exc_info()[0]) + '\n' + str(sys.exc_info()[1]))
            return

Пример #2

Показать файл

Файл: load-for-search.py Проект: dekom/threepress-bookworm-read-only

    return chapter_ordinal

title = xpath('//tei:title', xml)
author = xpath('//tei:author', xml)
id = xpath('/tei:TEI/@xml:id', xml)

d = Document(id=id,
             title=title,

             author=author,
             add_date=datetime.now(),
             pub_date=datetime.now()
             )

d.save()

logging.info("Adding content for id %s" %  d.id)
chapter_ordinal = 1

# Do we have parts?
if len(xml.xpath("//tei:div[@type='part']", namespaces={'tei': TEI})) > 0:
    part_ordinal = 1
    for part in xml.xpath("//tei:div[@type='part']", namespaces={'tei': TEI}):
        part_id = xpath('@xml:id', part)
        part_title = xpath('tei:head[1]', part) 
        logging.debug("Adding part", part_title.encode('utf-8'))
        p = d.part_set.create(id=part_id,
                              title=part_title,
                              ordinal=part_ordinal,
                              label='part')