Пример #1
0
    def xml2(self):
        input = '''
        <stuff>
            <users>
                <user x="2">
                    <id>001</id>
                    <name>Chuck</name>
                </user>
                <user x="7">
                    <id>009</id>
                    <name>Brent</name>
                </user>
            </users>
        </stuff>'''

        exu = ExerciseUtils()
        stuff = exu.InternetTreeXML(input)
        stuff.create_tree()
        lst = stuff.findall_users()
        print('User count:', len(lst))

        field_list = [("Name:", "name", "text", ""), ("Id:", "id", "text", ""),
                      ("Attribute:", "", "attr", "x")]
        for tree in lst:
            stuff.replace_tree(tree)
            stuff.print_element_tree(field_list)
Пример #2
0
 def urlwords(self):
     print("urlwords - compute the frequency of each word in the file")
     exu = ExerciseUtils()
     fh = exu.open_url(exu.url_text_doc, None)
     page = exu.get_url_page(fh)
     count = exu.getwords(page)
     assert len(count) == 26
     print(count)
Пример #3
0
    def curl1(self):
        print("curl1 - get and image and write it to a file")
        exu = ExerciseUtils()
        img = exu.open_url_small_img(self.url_jpg, None)
        imglen = len(img)
        assert imglen == 230210
        print("Length of " + self.url_jpg + " is:", imglen)

        rc = exu.write_file(self.url_jpg, "wb", img)
        assert rc is None
Пример #4
0
    def urllib1(self):
        print("urllib1 - use urllib to read a web page like a file")
        exu = ExerciseUtils()
        fh = exu.open_url(exu.url_text_doc, None)
        assert fh != ""

        page = exu.get_url_page(fh)
        assert len(page) > 0

        print(page)
Пример #5
0
    def curl2(self):
        print("curl2 - get and image and write it to a file using a buffer to\
              read any size file")
        exu = ExerciseUtils()
        img = exu.open_url(self.url_jpg, None)
        assert img != ""

        count = exu.get_url_large_img_and_save(img, self.url_jpg)
        assert count == 230210
        print(count, 'characters copied.')
Пример #6
0
    def socket1(self):
        print("socket1 - World's simplest web browser")
        exu = ExerciseUtils()
        mysock, url = exu.init_socket_and_url(exu.url_prefix, exu.url_base,
                                              exu.url_text_doc)
        assert mysock is not None
        page = exu.get_page(mysock, url)
        assert len(page) == 2

        mysock = exu.close_socket(mysock)  # normal socket
Пример #7
0
 def xml1(self):
     data = '''
     <person>
     <name>James</name>
     <phone type="intl">
         +1 503 851 8418
     </phone>
     <email hide="yes" />
     </person>'''
     exu = ExerciseUtils()
     inter_tree = exu.InternetTreeXML(data)
     inter_tree.create_tree()
     inter_tree.print_element_tree([("Name:", "name", "text", "")])
     inter_tree.print_element_tree([("Attr:", "email", "attr", "hide")])
     inter_tree.print_element_tree([("Name:", "name", "text", ""),
                                    ("Attr:", "email", "attr", "hide")])
Пример #8
0
    def geojson(self):

        data = '''
        [
        { "id" : "001",
            "x" : "2",
            "name" : "Chuck"
        } ,
        { "id" : "009",
            "x" : "7",
            "name" : "Brent"
        }
        ]'''

        exu = ExerciseUtils()
        intr_tree = exu.InternetTreeJSON(data)
        intr_tree.create_tree_list()
        print('User count:', intr_tree.tree_list_count())

        field_list = [("Name:", "name", "text", ""), ("Id:", "id", "text", ""),
                      ("Attribute:", "", "attr", "x")]
        for tree in intr_tree.tree_list:
            intr_tree.replace_tree(tree)
            intr_tree.print_element_tree(field_list)
Пример #9
0
    def urljpeg(self):
        print("urljpeg - get a jpeg document")
        exu = ExerciseUtils()
        mysock, url = exu.init_socket_and_url(exu.url_prefix, exu.url_base,
                                              self.url_jpg)
        assert mysock is not None
        pic = exu.get_jpeg(mysock, url)
        assert len(pic) == 230608

        mysock = exu.close_socket(mysock)  # normal socket

        piclen = exu.save_picture(pic, self.local_jpg)
        assert piclen > 0
Пример #10
0
from utils import ExerciseUtils

def_url = "http://data.pr4e.org/mbox-short.txt"

print(
    "urllib1 - World's simplest web browser for any url and displays up to 3000 characters"
)
print("  Format of url must be http(s)://urlbase/page")
print("  Example: " + def_url)

url = input("Enter url to open(" + def_url + "): ")
if url == "":
    url = def_url

exu = ExerciseUtils()
url_prefix, url_base, url_page = exu.split_url(url)
if url_base == "" or url_base == None:
    print("Bad URL")
else:
    char_count = exu.print_page_urllib(url, 3000)
    assert char_count == 94626
    print("Total characters found:", char_count)
Пример #11
0
import re

from utils import ExerciseUtils

def_url = "http://data.pr4e.org/romeo.txt"

print("socket1 - World's simplest web browser for any url")
print("  Format of url must be http(s)://urlbase/page")
print("  Example: " + def_url)

url = input("Enter url to open(" + def_url + "): ")
if url == "":
    url = def_url

exu = ExerciseUtils()
url_prefix, url_base, url_page = exu.split_url(url)
total_chars = exu.print_page_limit(url_prefix,
                                   url_base,
                                   url_page,
                                   3000,
                                   skipheaders=True)  # normal socket
Пример #12
0
import re

from utils import ExerciseUtils

def_url = "http://data.pr4e.org/mbox-short.txt"

print("socket1 - World's simplest web browser for any url and displays up to 3000 characters")
print("  Format of url must be http(s)://urlbase/page")
print("  Example: " + def_url)

url = input("Enter url to open(" + def_url + "): ")
if url == "":
    url = def_url


exu = ExerciseUtils()
url_prefix, url_base, url_page = exu.split_url(url)
if url_base == "" or url_base == None:
    print("Bad URL")
else:
    print("opening socket to:", url)
    mysock, url = exu.init_socket_and_url(url_prefix, url_base, url_page)
    if mysock != None:

        total_chars = exu.print_page_socket(mysock, url, 3000)
        print("Total characters found:", total_chars)

        mysock = exu.close_socket(mysock)  # normal socket
Пример #13
0
 def urllinks2(self):
     print("urllinks2 - Look at the parts of a tag")
     exu = ExerciseUtils()
     html = exu.get_html(exu.url_default2)
     exu.bs4_tags(html, 'a')
Пример #14
0
 def urllinks(self):
     print("urllinks - Search for link values within URL page using\
           BeatifulSoup to parse html")
     exu = ExerciseUtils()
     html = exu.get_html(exu.url_default1)
     exu.bs4_tags(html, 'a', pflags=[False, True, False, False])
Пример #15
0
 def urlregex(self):
     print("urlregex - Search for link values within URL input")
     exu = ExerciseUtils()
     html = exu.get_html(exu.url_default1)
     exu.regexlinks(html)
Пример #16
0
from utils import ExerciseUtils

print(
    'ex_11_02 - Write a program to look for lines of the form: New Revision: 39772 \n \
    Extract the number from each of the lines using a regular expression and the findall() method. Compute the average of the numbers and print out the average as an integer.'
)

files = ["mbox.txt", "mbox-short.txt"]

exu = ExerciseUtils()
for file in files:
    count, avg = exu.run_findall_avg(file, "^New Revision: ([0-9]+)", True)
    print("Found ", str(count),
          " Average is " + str(avg) + " for file " + file)
Пример #17
0
from bs4 import BeautifulSoup

from utils import ExerciseUtils

print(
    "urlpara - Search for paragrapsh (tags beginning with <p>) within URL page using BeatifulSoup to parse html"
)
exu = ExerciseUtils()
urls = [exu.url_default1, exu.url_default2, exu.url_default3]
for url in urls:
    html = exu.get_html(url)
    assert len(html) > 0
    tags = exu.bs4_tags(html, "p", [True, False, False, False])
    print("Reading: ", url)
    print("Number of paragraphs: ", len(tags))
Пример #18
0
import re

from utils import ExerciseUtils

def_url = "http://data.pr4e.org/romeo.txt"

print("socket1 - World's simplest web browser for any url")
print("  Format of url must be http(s)://urlbase/page")
print("  Example: " + def_url)

url = input("Enter url to open(" + def_url + "): ")
if url == "":
    url = def_url

exu = ExerciseUtils()
url_prefix, url_base, url_page = exu.split_url(url)
if url_base == "" or url_base == None:
    print("Bad URL")
else:
    print("opening socket to:", url)
    mysock, url = exu.init_socket_and_url(url_prefix, url_base, url_page)
    if mysock != None:

        page = exu.get_page(mysock, url)

        mysock = exu.close_socket(mysock)  # normal socket
        assert mysock._closed
Пример #19
0
from utils import ExerciseUtils

print(
    'ex_11_01 - Write a simple program to simulate the operation of the grep command on Unix. Ask the user to enter a regular expression and count the number of lines that matched the regular expression'
)

regex_list = list()
regex = input("Enter a regular expression: ")
regex_list.append(regex)
if len(regex_list) > 0:
    regex_list = ["^Author", "^X-", "java$"]

exu = ExerciseUtils()
for regex in regex_list:
    count = exu.run_search1('mbox.txt', regex, False)
Пример #20
0
from utils import ExerciseUtils

print("re01 - Search for lines that contain 'From'")
exu = ExerciseUtils()
count = exu.run_search1('mbox-short.txt', 'From:', False)
assert count == 27

print("re02 - Search for lines that start with 'From'")
exu = ExerciseUtils()
count = exu.run_search1('mbox-short.txt', '^From:', False)
assert count == 27

print(
    "re03 - Search for lines that start with 'F', followed by 2 characters, followed by 'm:'"
)
exu = ExerciseUtils()
count = exu.run_search1('mbox-short.txt', '^F..m:', False)
assert count == 27

print("re04 - Search for lines that start with From and have an '@' sign")
exu = ExerciseUtils()
count = exu.run_search1('mbox-short.txt', '^From:.+@', False)
assert count == 27

print("re05 - Search for an address")
exu = ExerciseUtils()
count = exu.run_findall('mbox-short5.txt', '\\S+@\\S+', False)
assert count == 5

print("re06 - Search for lines that have an at sign between characters")
exu = ExerciseUtils()