Exemplo n.º 1
0
        status_last = Person.get_person_lookup_status( person_last )
        print( "status from both in last: " + status_last )
    
    else:
    
        # Name parsed as two words, so go with parsed name?
        pass
    
    #-- END check to see if both first and last name. --#

#-- END check to see if two-part name. --#

# look for people with same full-string name.

# get full name from parsed.
full_name_test = StringHelper.object_to_unicode_string( parsed )
print( "FULL NAME - looking for \"" + full_name_test + "\"" )

full_name_qs = Person.objects.filter( full_name_string__iexact = full_name_test )
full_name_count = full_name_qs.count()
if ( full_name_count > 0 ):

    for full_name_match in full_name_qs:
    
        print( "- FULL NAME - full name match: " + str( full_name_match ) )
        
    #-- END loop over full name matches --#
    
else:

    print( "- FULL NAME - no full name match for \"" + full_name_test + "\"" )
    
    # see if name is br.
    if ( current_name == "br" ):
    
        # yes - paragraph break!  output a message, and the string contents of the tag (just in case).
        print( "=======> paragraph break! - End of paragraph " + str( paragraph_counter ) + ".  HTML element Contents: \"" + str( current_content ) + "\"" )
        
        # add previous paragraph to paragraph list.
        paragraph_text_list = []
        for paragraph_element in current_element_list:
        
            # convert current element to just text.  Is it NavigableString?
            if ( isinstance( paragraph_element, NavigableString) ):
            
                # it is text - convert it to string.
                current_paragraph_text = StringHelper.object_to_unicode_string( paragraph_element )
            
            else:
            
                # not text - just grab all the text out of it.
                #current_paragraph_text = ' '.join( paragraph_element.findAll( text = True ) )
                current_paragraph_text = HTMLHelper.remove_html( str( paragraph_element ) )
                
            #-- END check to see if current element is text. --#

            # clean up - convert HTML entities
            current_paragraph_text = bs_helper.convert_html_entities( current_paragraph_text )
            
            # strip out extra white space
            current_paragraph_text = StringHelper.replace_white_space( current_paragraph_text )