示例#1
0
def get_attrs(src, tag):
    t = src.count('="')
    n = 0
    di = {}
    a2 = 0
    de = {}
    bi = src.count('<a')
    sc = mcscrp()
    nm = sc.scrp(src, tag)['txt']
    for u in range(bi):
        for i in range(int(t / bi)):
            c = src.find('<{}'.format(tag), n)
            e = src.find(' ', a2 + 1)
            s = src.find('=', e + 1)
            a1 = src.find('"', s + 1)
            a2 = src.find('"', a1 + 1)
            de[src[e + 1:s]] = src[a1 + 1:a2]
        if len(nm[n]) == 0:
            di['a%s' % (n)] = de
        else:
            di[nm[n]] = de
        n += 1
        de = {}

    return di
示例#2
0
 def scrpping(self):
     sc = mcscrp()
     data = get(self.text.text()).text
     tags = sc.get_tags(data)
     self.tableWidget.setColumnCount(len(tags))
     self.tableWidget.setRowCount(len(tags))
     self.tableWidget.setHorizontalHeaderLabels(tags)
     for r, tag in enumerate(tags):
         for c, dt in enumerate(sc.scrp(data, tag)['txt']):
             self.tableWidget.setItem(c, r,
                                      QtWidgets.QTableWidgetItem(str(dt)))
from McScrp import mcscrp
import requests

t = requests.get('https://simple.wikipedia.org/wiki/List_of_colors').text
a = mcscrp()
tb = a.scrp(t, 'tbody')['tag'][0]
tr = a.scrp(tb, 'tr')['tag']
for i in tr:
    g = a.get_attr(i, 'href')
    print(g)

示例#4
0
from McScrp import mcscrp
from requests import get
sc = mcscrp()
data = get(
    'https://www.google.com/search?source=hp&ei=0o1VXJPjM5KvgweJ4IPYBg&q=python&btnK=%D8%A8%D8%AD%D8%AB+Google%E2%80%8F&oq=python&gs_l=psy-ab.3..35i39l2j0i203l8.289.2048..2350...1.0..0.344.2075.0j2j4j2......0....1..gws-wiz.....0..0j0i131.j8hrx2vNR3s'
).text

tags = sc.get_tags(data)
for tag in tags:
    for dt in sc.scrp(data, tag)['txt']:
        print(dt)