Пример #1
0
def copy(element):
    """
	beautifulsoup4 객체의 요소를 복제합니다.
	
	오직 해당 요소의 정보만 복사합니다. 자식들에 대한 정보는 원본과 공유됩니다.
	
	.. bugs::
		work around bug where there is no builder set https://bugs.launchpad.net/beautifulsoup/+bug/1307471.
	
	.. 이 함수 작성에 다음 문서를 참조하였음.
		http://stackoverflow.com/questions/23057631/clone-element-with-beautifulsoup
	
	:param element: 복제 할 원소.
	:type element: bs4.Tag, bs4.NavigableString, bs4.Comment
	:return: 복제된 원소.
	"""
    if isinstance(element, (NavigableString, Comment)):
        return type(element)(element)

    clone_element = Tag(None, element.builder, element.name, element.namespace,
                        element.nsprefix)
    clone_element.attrs = dict(element.attrs)
    for attr in ('can_be_empty_element', 'hidden'):
        setattr(clone_element, attr, getattr(element, attr))
    for child in element.contents:
        clone_element.append(child)
    return clone_element
Пример #2
0
def clone_beautiful_soup_tag(elements):
    """
	:type element: Tag
	:rtype: Tag
	"""
    if elements is None:
        raise ElementTypeError('elements is None!')

    if isinstance(elements, (Tag, NavigableString, BeautifulSoup)):
        element = elements
        if isinstance(element, NavigableString):
            return type(element)(element)

        copy = Tag(None, element.builder, element.name, element.namespace,
                   element.nsprefix)

        # work around bug where there is no builder set
        # https://bugs.launchpad.net/beautifulsoup/+bug/1307471
        copy.attrs = dict(element.attrs)
        for attr in ('can_be_empty_element', 'hidden'):
            setattr(copy, attr, getattr(element, attr))
        for child in element.contents:
            copy.append(clone_beautiful_soup_tag(child))
        return copy
    else:
        return [clone_beautiful_soup_tag(x) for x in elements]
Пример #3
0
def deepcopy(element):
    """
	beautifulsoup4 객체의 요소를 재귀적으로 복제합니다.
	
	요소가 가진 정보 및 자식의 모든 정보를 재귀적으로 복제합니다.
	
	이 함수를 통하여, beautifulsoup4 4.0.2에서 append 함수 사용시 발생하는 DOM 깨짐현상(기존에 존재하던 태그에 접근 불가능해지거나, 각 메소드들 별로 원래 정상적으로 접근하게될 결과의 일부분만 얻게 되는 현상)을 피할 수 있습니다.
	
	.. 이 함수 작성에 다음 문서를 참조하였음.
		http://stackoverflow.com/questions/23057631/clone-element-with-beautifulsoup
	
	:param element: 복제 할 원소.
	:type element: bs4.Tag, bs4.NavigableString, bs4.Comment
	:return: 복제된 원소.
	"""
    if isinstance(element, (NavigableString, Comment)):
        return type(element)(element)

    clone_element = Tag(None, element.builder, element.name, element.namespace,
                        element.nsprefix)
    # work around bug where there is no builder set
    # https://bugs.launchpad.net/beautifulsoup/+bug/1307471
    clone_element.attrs = dict(element.attrs)
    for attr in ('can_be_empty_element', 'hidden'):
        setattr(clone_element, attr, getattr(element, attr))
    for child in element.contents:
        clone_element.append(deepcopy(child))
    return clone_element
Пример #4
0
 def construct_xml(self):
     soup = BeautifulSoup(etree.tostring(etree.Element('OTA_AirLowFareSearchRQ')), 'xml')
     query = soup.contents[0]
     query.attrs = {
         'xmlns':'http://www.opentravel.org/OTA/2003/05',
         'xmlns:xsi':'http://www.w3.org/2001/XMLSchema-instance',
         'PrimaryLangId':'en',
         'Version':'2.001',
         'TimeStamp':str(datetime.datetime.now().isoformat()),
         'EchoToken':str(time.mktime(time.gmtime())),
         'xsi:schemaLocation':'http://www.opentravel.org/2006A/OTA_AirLowFareSearchRQ.xsd',
     }
     
     t_pos = Tag(name='POS')
     t_source = Tag(name='Source')
     t_req = Tag(name='RequestorID')
     t_req.attrs = {
         'ID':'weathersick',
         'URL':'http://www.weathersick.com',
         'Type':'18',
     }
     t_source.append(t_req)
     t_pos.append(t_source)
     query.append(t_pos)
     
     t_odinf = Tag(name='OriginDestinationInformation')
     t_odinf.attrs {'RPH':1}
     t_deptime = Tag(name='DepartureDateTime')
     t_deptime.
     
     OriginDestinationInformation RPH="1"
     
     import pdb; pdb.set_trace()
Пример #5
0
 def __call__(self, DOM):
     tag = Tag(name='script', builder=BUILDER)
     tag.attrs = {
         'type': 'text/javascript',
         'src': self.url,
     }
     if not DOM.body:
         DOM.html.insert(0, Tag(name='body'))
     DOM.body.append(tag)
     return DOM
Пример #6
0
 def f(html):
     tag = Tag(name='script', builder=BUILDER)
     tag.attrs = {
         'type': 'text/javascript',
         'src': URL,
     }
     if not html.head:
         html.html.insert(0, Tag(name='head'))
     html.head.append(tag)
     return html
Пример #7
0
 def __call__(self, DOM):
     tag = Tag(name='script', builder=BUILDER)
     tag.attrs = {
         'type': 'text/javascript',
         'src': self.url,
     }
     if not DOM.body:
         DOM.html.insert(0, Tag(name='body'))
     DOM.body.append(tag)
     return DOM
Пример #8
0
 def f(html):
     tag = Tag(name='script', builder=BUILDER)
     tag.attrs = {
         'type': 'text/javascript',
         'src': URL,
     }
     if not html.head:
         html.html.insert(0, Tag(name='head'))
     html.head.append(tag)
     return html
Пример #9
0
 def __call__(self, DOM):
     tag = Tag(name='link', builder=BUILDER)
     tag.attrs = {
         'type': 'text/css',
         'rel': 'stylesheet',
         'href': self.url,
     }
     if not DOM.head:
         DOM.html.insert(0, Tag(name='head'))
     DOM.head.append(tag)
     return DOM
Пример #10
0
 def f(html):
     tag = Tag(name='link', builder=BUILDER)
     tag.attrs = {
         'type': 'text/css',
         'rel': 'stylesheet',
         'href': URL,
     }
     if not html.head:
         html.html.insert(0, Tag(name='head'))
     html.head.append(tag)
     return html
Пример #11
0
def clone(el):
    if isinstance(el, NavigableString):
        return type(el)(el)

    copy = Tag(None, el.builder, el.name, el.namespace, el.nsprefix)
    copy.attrs = dict(el.attrs)
    for attr in ('can_be_empty_element', 'hidden'):
        setattr(copy, attr, getattr(el, attr))
    for child in el.contents:
        copy.append(clone(child))
    return copy
Пример #12
0
 def f(html):
     tag = Tag(name='link', builder=BUILDER)
     tag.attrs = {
         'type': 'text/css',
         'rel': 'stylesheet',
         'href': URL,
     }
     if not html.head:
         html.html.insert(0, Tag(name='head'))
     html.head.append(tag)
     return html
Пример #13
0
 def __call__(self, DOM):
     tag = Tag(name='link', builder=BUILDER)
     tag.attrs = {
         'type': 'text/css',
         'rel': 'stylesheet',
         'href': self.url,
     }
     if not DOM.head:
         DOM.html.insert(0, Tag(name='head'))
     DOM.head.append(tag)
     return DOM
Пример #14
0
def clone(el):
    if isinstance(el, NavigableString):
        return type(el)(el)

    copy = Tag(None, el.builder, el.name, el.namespace, el.nsprefix)
    # work around bug where there is no builder set
    # https://bugs.launchpad.net/beautifulsoup/+bug/1307471
    copy.attrs = dict(el.attrs)
    for attr in ('can_be_empty_element', 'hidden'):
        setattr(copy, attr, getattr(el, attr))
    for child in el.contents:
        copy.append(clone(child))
    return copy
Пример #15
0
def clone(el):
    if isinstance(el, NavigableString):
        return type(el)(el)

    copy = Tag(None, el.builder, el.name, el.namespace, el.nsprefix)
    # work around bug where there is no builder set
    # https://bugs.launchpad.net/beautifulsoup/+bug/1307471
    copy.attrs = dict(el.attrs)
    for attr in ('can_be_empty_element', 'hidden'):
        setattr(copy, attr, getattr(el, attr))
    for child in el.contents:
        copy.append(clone(child))
    return copy
Пример #16
0
 def soup(self):
     '''
         Returns HTML as a BeautifulSoup element.
     '''
     components_soup = Tag(name=self.tagname, builder=BUILDER)
     components_soup.attrs = self.attributes
     for c in flatten(self.components):
         if hasattr(c, 'soup'):
             components_soup.append(c.soup())
         elif type(c) in (str, ):
             # components_soup.append(BeautifulSoup(str(c)))
             components_soup.append(str(c))
         # else:
         # Component should not be integrated
         # pass
     return components_soup
Пример #17
0
 def soup(self):
     '''
         Returns HTML as a BeautifulSoup element.
     '''
     components_soup = Tag(name=self.tagname, builder=BUILDER)
     components_soup.attrs = self.attributes
     for c in flatten(self.components):
         if hasattr(c, 'soup'):
             components_soup.append(c.soup())
         elif type(c) in (str, ):
             # components_soup.append(BeautifulSoup(str(c)))
             components_soup.append(str(c))
         # else:
             # Component should not be integrated
             # pass
     return components_soup
Пример #18
0
def clone_bs4_elem(el):
    """Clone a bs4 tag before modifying it.

    Code from `http://stackoverflow.com/questions/23057631/clone-element-with
    -beautifulsoup`
    """
    if isinstance(el, NavigableString):
        return type(el)(el)

    copy = Tag(None, el.builder, el.name, el.namespace, el.nsprefix)
    # work around bug where there is no builder set
    # https://bugs.launchpad.net/beautifulsoup/+bug/1307471
    copy.attrs = dict(el.attrs)
    for attr in ('can_be_empty_element', 'hidden'):
        setattr(copy, attr, getattr(el, attr))
    for child in el.contents:
        copy.append(clone_bs4_elem(child))
    return copy
Пример #19
0
def clone_bs4_elem(el):
    """Clone a bs4 tag before modifying it.

    Code from `http://stackoverflow.com/questions/23057631/clone-element-with
    -beautifulsoup`
    """
    if isinstance(el, NavigableString):
        return type(el)(el)

    copy = Tag(None, el.builder, el.name, el.namespace, el.nsprefix)
    # work around bug where there is no builder set
    # https://bugs.launchpad.net/beautifulsoup/+bug/1307471
    copy.attrs = dict(el.attrs)
    for attr in ('can_be_empty_element', 'hidden'):
        setattr(copy, attr, getattr(el, attr))
    for child in el.contents:
        copy.append(clone_bs4_elem(child))
    return copy
Пример #20
0
def clean_tag_attributes(tag: Tag) -> None:
    if not isinstance(tag, NavigableString):
        tag.attrs = {k: v for k, v in tag.attrs.items() if k in ALLOWED_ATTRIBUTES}
Пример #21
0
def clean_tag_attributes(tag: Tag) -> None:
    if not isinstance(tag, NavigableString):
        tag.attrs = {
            k: v
            for k, v in tag.attrs.items() if k in allowed_attributes
        }