from mwclient import Site

language = 'is'
SITE = Site(f'{language}.wikipedia.org')
ALL_PAGES = SITE.allpages()

for page in ALL_PAGES:
    print(page.name)
示例#2
0
import re
from simple_salesforce import Salesforce

api = ConfluenceAPI('admin', '123@qwe', 'http://127.0.0.1:8090')
FileOut = open('PagesBUSList.txt', 'w')
UserAgent = 'Wiki_parser/0.1 run by DremSama'
site = Site(('http', 'wiki.support.veeam.local'),
            path='/',
            clients_useragent=UserAgent)
sf = Salesforce(username='******',
                password='******',
                security_token='dNr44yHsFXaSuRmKXunWPlzS')
PagesList = []
PagesBUGSList = []

for page in site.allpages():
    PagesList.append(page)
    if page.name.startswith('Bug') or page.name.startswith('bug'):
        print('----------------------------------')
        BugID = re.match(r'[A-z,a-z]ug\b.(\d*)[-|\s]*(.*)', page.page_title)
        if BugID:
            BugID_NUM = BugID.group(1)
            BugID_SUBJECT = BugID.group(2)
        textALL = page.text(0)
        if not textALL:
            print('ERROR: Page "' + page.name + '" has no text')
        elif textALL.startswith('#REDIRECT'):
            print('Page "' + page.name + '" is only a redirect page, skipping')
        else:
            print(page.name)
            BugCaseID = re.findall(r"'''Case ID: '''(\d*)", textALL)
示例#3
0
def Hikes():
    site = Site(('http', 'www.oregonhikers.org/'))
    counter = 0
    hikes = []
    for item in site.allpages():
        page_text = item.text()
        if '[[Category:Hikes]]' in page_text:
            if '[[Category:Portland Area]]' in page_text:
                counter += 1
                if (counter == 30):
                    break
                m = re.search('(?<=latitude=).{6}', page_text)
                n = re.search('(?<=longitude=).{7}', page_text)
                p = re.search('(?<=Distance\|)(.*)(?=miles}})', page_text)
                q = re.search('(?<=Difficulty\|)(.*)(?=}})', page_text)
                lat = 0.0
                long = 0.0
                distance = 0.0
                difficulty = "None given"
                if (m):
                    try:
                        lat = float(m.group())
                    except:
                        print("no lat given")
                if (n):
                    try:
                        long = float(n.group())
                    except:
                        print("no long given")
                if (p):
                    try:
                        distance = float(p.group())
                    except:
                        print("invalid distance")
                if (q):
                    difficulty = str(q.group())

                hike = Hike(lat, long)
                l = re.search('Description ===(?s)(.*?)===\s', page_text,
                              re.MULTILINE)
                entry = ""
                if l:
                    page_text = l.group()
                    by_line = []
                    by_line = page_text.splitlines()
                    for line in by_line:
                        try:
                            line = str(line)
                            line = re.sub('[\[]', '', line)
                            line = re.sub('[\]]', '', line)
                            line = re.sub('[\}]', '', line)
                            line = re.sub('[\{]', '', line)
                        except:
                            print("page not cast")
                            #print(line)
                        if line:
                            if not line.startswith(
                                ('Description', '===', 'TripReports',
                                 'RelatedDiscussions', '*', 'Source', '(', '=',
                                 '<')):
                                entry = entry + "\n" + line

                hike.desc = entry
                hike.name = item.page_title
                hike.distance = distance
                hike.difficulty = difficulty
                if entry:
                    hikes.append(hike)
                    print("added: " + hike.name)
                    print("lat: " + str(hike.lat))
                    print("long: " + str(hike.long))
    return hikes