forked from jvanasco/metadata_parser
-
Notifications
You must be signed in to change notification settings - Fork 0
/
demo.py
70 lines (58 loc) · 2.21 KB
/
demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from metadata_parser import MetadataParser
if 0:
a= MetadataParser(url='http://cnn.com')
print a.get_metadata('title')
b= MetadataParser(url='http://nyt.com')
print b.get_metadata('title')
c= MetadataParser(url='http://thedailybeast.com')
print c.get_metadata('title')
print "\n-------------------------------------------------------\n"
print a.metadata
print "\n-------------------------------------------------------\n"
print b.metadata
print "\n-------------------------------------------------------\n"
print c.metadata
print "\n-------------------------------------------------------\n"
print c.get_metadata('title')
print c.get_metadata('canonical')
print c.get_metadata('url')
print c.absolute_url(c.get_metadata('canonical'))
print c.absolute_url(c.get_metadata('url'))
print c.get_discrete_url()
if 0:
a= MetadataParser(url='http://liqr.co/rsvpnewyork')
print "title:"
print a.get_metadata('title')
print "canonical:"
print a.get_metadata('canonical')
print "url:"
print a.get_metadata('url')
print "absolute_url-canonical:"
print a.absolute_url(a.get_metadata('canonical'))
print "absolute_url-url:"
print a.absolute_url(a.get_metadata('url'))
print "get_discrete_url:"
print a.get_discrete_url()
if 0:
a= MetadataParser(url='http://www.ted.com/talks/drew_curtis_how_i_beat_a_patent_troll.html')
print a.__dict__
if 0:
broken_html= open('broken.html','r').read()
#a= MetadataParser(url="http://brewskeeball.com/rosenblog")
a= MetadataParser(html=broken_html)
print a.get_metadata('title')
if 0:
urls = [\
'http://www.cnn.com',
'http://www.cnn.com/',
'http://www.michaeleisen.org/blog/?p=358',
'http://www.nasa.gov/externalflash/discovery/index.html',
'http://hw.libsyn.com/p/d/d/6/dd6b0db2d4858640/ARIYNBF_107_JamesGunn.mp3?sid=78edb823ad1b62ff6f329d68bbb2cc6a&l_sid=35168&l_eid=&l_mid=2952818&expiration=1334720066&hwt=7acfe1754c8dedc4f134b473894c9208'
]
for i in urls:
a= MetadataParser(url=i)
print a.__dict__
if 0:
url = 'http://soundcloud.com/electricyouthmusic'
a= MetadataParser(url=url)
print a.__dict__