Python getInfo示例，getInfo.getInfo Python示例

示例#1

0

显示文件

文件： getOrgNumber.py 项目： liyi193328/ChineseNgoKnowledgeGraph

def getOrgNumber(urlPage,prefix,filename):
	
	result = set()
	print("getting org number:")
	html = getInfo.getInfo(urlPage)
	# print(html)
	G = re.search(r'org_type=0&field_type=0&area_type=0&province_type=0&market_type=0&keywords=&org_search=&page=(\d+)">末页</a>',html)
	# print(G.group(0))
	if G:
		lastPage = int(G.group(1))
	else:
		 lastPage = 2

	print("last: %d" % lastPage)
	for i in range(1, lastPage):
		url = urlPage + str(i)
		print("for: %d" %i)
		# fp = open(str(i)+".html","w",encoding = "utf-8")
		html = getInfo.getInfo(url)
		# fp.write(html)
		# fp.close()
		result = result.union(findOrg.findOrg(html))
	tmp = list(result)[:]
	tmp.sort()
	result = tmp

	print("writing org number to %s..." %filename)
	fp = open(filename,"w+",encoding = "utf-8")
	for i in result:
		fp.write(prefix+str(i)+"/"+'\n')
	fp.close()
	print("succ writing  to %s!" %filename)

	return result

示例#2

0

显示文件

文件： getOrgNumber.py 项目： liyi193328/ChineseNgoKnowledgeGraph

def getOrgNumber(urlPage,prefix,filename):
	
	result = set()
	print("getting org number:")
	html = getInfo.getInfo(urlPage)
	# print(html)
	G = re.search(r'page=(\d+)">末页</a>',html)
	# print(G.group(0))
	if G:
		lastPage = int(G.group(1))
	else:
		 lastPage = 2

	print("lastPage: %d" %lastPage)

	for i in range(1, lastPage):
		url = urlPage + str(i)
		# print(url)
		print("for: %d" %i)
		html = getInfo.getInfo(url)
		print("html get suc!")
		result = result.union(findOrg.findOrg(html))
	tmp = list(result)[:]
	tmp.sort()
	result = tmp
	# print("result: ",result)
	# print("writing org number to %s..." %filename)
	# fp = open(filename,"w+",encoding = "utf-8")
	# for i in result:
	# 	fp.write(prefix+str(i)+"/"+'\n')
	# fp.close()
	# print("succ writing  to %s!" %filename)

	return result,lastPage-1

示例#3

0

显示文件

文件： readFile.py 项目： lizhaojie8475/BackboneNetworkSpeedTest

def readFile(nameOfFile, helper, command):
    file = open(nameOfFile, 'r')
    list = []  # 保存目标文件中的所有ip
    for line in file:
        list.append(getip(line)[0])
    file.close()

    for i in list:
        getInfo(i, helper, command)

    helper.close()

示例#4

0

显示文件

def submit():
    coin = coinEntry.get()
    coinParts = coin.split()

    if len(coinParts) > 1:
        newCoinList = []
        for part in coinParts:
            newCoinList.append(part.capitalize())
            Coin = ' '.join(newCoinList)
    else:
        Coin = coin.capitalize()

    try:
        statsDict, abrv = gi.getStats(coin)
        infoDict = gi.getInfo(coin)

        infoTitle.config(text=f'{Coin} ({abrv})')
        title.config(text=f'{Coin} ({abrv})')

        fillStats(statsDict)
        fillInfo(infoDict)
        # makes it so if the error message was displayed, it will display again without having to run the code over again
        errorLabel.config(fg=bg1)
    except:
        errorLabel.config(fg=down)
        errorLabel.config(
            text=
            f'\'{coin}\' is not a coin. Please check spelling and try again')
        errorLabel.place(relx=0.18, rely=0.96)

    coinEntry.delete(0, 50)

示例#5

0

显示文件

def findHire(url, ngo, numHire):
    html = getInfo.getInfo(url)
    loc = 0
    while loc != -1:
        perhire = perHire()
        m = re.search(
            r'<td><a href="(.*)" title="(.+)" target= "_blank" class="it1">(.+)</a></td>',
            html[loc:])
        if m == None: break
        # perhire.urlLink =
        perhire.post = m.group(3)

        loc = loc + m.end()
        m = re.search(r'<td>(.*)</td>', html[loc:])
        perhire.unit = m.group(1)
        loc = loc + m.end()

        m = re.search(r'<td>(.*)</td>', html[loc:])
        perhire.worksite = m.group(1)
        loc = loc + m.end()

        m = re.search(r'<td>(.*)</td>', html[loc:])
        perhire.updateTime = m.group(1)
        loc = loc + m.end()

        # perhire.show()

        ngo.hire.append(perhire)
    return ngo

示例#6

0

显示文件

def findLoc(url):
	html = getInfo(url)
	locG = re.search(r"var content = '(.*)'",html)
	return locG.group(1).strip()

# s = findLoc("http://www.chinadevelopmentbrief.org.cn/org30/org_map/")
# print(s)

示例#7

0

显示文件

def searchMachine(keyword):
    print(keyword)
    url = 'https://nifmbapi.maruhan.co.jp/api/v1.4/hall/machine/search'
    query = {'hall_code': 1061, 'key_word': keyword, 'ps_type': 'S'}
    #1クエリストリングの作成
    url_with_query = "{}?{}".format(url, urllib.parse.urlencode(query))
    #print(url_with_query)
    response = urllib.request.urlopen(url_with_query)
    content = json.loads(response.read().decode('utf8'))
    #pprint.pprint(content)

    if not (content["child_halls"]):
        #print("要素なし")
        return
    else:
        model_name = content["child_halls"][0]["models"][0]["model_name"]
        machine_number = content["child_halls"][0]["models"][0]["groups"][0][
            "machines"][0]["machine_number"]
        slump = getSlump.getSlump(content["child_halls"][0]["models"][0]
                                  ["groups"][0]["machines"][0]["machine_id"])
        allStarts, bb, rb = getInfo.getInfo(
            content["child_halls"][0]["models"][0]["groups"][0]["machines"][0]
            ["machine_id"])
        '''
        print("機種名："+ model_name)
        print("台番号："+ machine_number)
        print("差枚："+ str(slump))
        print("総回転数："+ str(allStarts))
        '''
        return createDF(model_name, machine_number, slump, allStarts, bb, rb)

示例#8

0

显示文件

文件： findHire.py 项目： liyi193328/ChineseNgoKnowledgeGraph

def findHire(url,ngo,numHire):
	html = getInfo.getInfo(url)
	loc = 0
	while loc != -1:
		perhire = perHire()
		m = re.search(r'<td><a href="(.*)" title="(.+)" target= "_blank" class="it1">(.+)</a></td>',html[loc:])
		if m == None: break
		# perhire.urlLink = 
		perhire.post = m.group(3)

		loc = loc + m.end()
		m = re.search(r'<td>(.*)</td>',html[loc:])
		perhire.unit = m.group(1)
		loc =  loc + m.end()

		m = re.search(r'<td>(.*)</td>',html[loc:])
		perhire.worksite = m.group(1)
		loc =  loc + m.end()

		m = re.search(r'<td>(.*)</td>',html[loc:])
		perhire.updateTime = m.group(1)
		loc =  loc + m.end()

		# perhire.show()

		ngo.hire.append(perhire)
	return ngo

示例#9

0

显示文件

文件： main.py 项目： prashant2018/Movie-Info-Bot

def thread_download(num):
	#download information
	temp_list = list()
	temp = num
	while True:
		try:
			check = getInfo.getInfo(movie_names[num])
			temp_list.append(check)
			if check == "":
				movie_info[temp] = temp_list
						
				return movie_info[temp]
			else:
				num = num + 8
		except:
			movie_info[temp] = temp_list
			return movie_info[temp]

示例#10

0

显示文件

def thread_download(num):
    #download information
    temp_list = list()
    temp = num
    while True:
        try:
            check = getInfo.getInfo(movie_names[num])
            temp_list.append(check)
            if check == "":
                movie_info[temp] = temp_list

                return movie_info[temp]
            else:
                num = num + 8
        except:
            movie_info[temp] = temp_list
            return movie_info[temp]

示例#11

0

显示文件

def getOrgInfo(url):
    ngo = NGO()
    url = url.strip()
    html = getInfo.getInfo(url)
    soup = BeautifulSoup(html)
    # print(soup)

    ngo.name = soup.find(id="org-header").find("h1").get_text()
    print(ngo.name)

    location = soup.find("h3",text = "办公地址")
    ngo.location = location.findNextSibling("p").get_text()
    print("location: %s" %ngo.location)

    connInfoList = soup.find("h3",text = "联系方式").find_next_siblings("p")
    tem = list()
    for x in connInfoList:
        s = x.get_text().replace(" ","").replace("\n","")
        # print(s)
        # s = x.get_text().replace(" ","").replace("\n"," ").replace("\r","")
        if s == "访问机构网站":
            s = "网站: " + x.a['href']
        tem.append(s)
    ngo.connectionInfo = tem
    # print("connectionInfo: ",ngo.connectionInfo)

    tem = soup.find("span",text = "成立时间: ")
    if tem:
        tem = tem.find_parent("p").get_text()
        Ga = re.search("(\d*)年",tem)
        if Ga:
            if Ga.group(1):
                ngo.esTime = int( Ga.group(1))#year
                print("year: %d" %ngo.esTime )

    Ga = re.search("<span>全职人数: </span>(\d*)",html)
    if Ga:
        if Ga.group(1):
            ngo.scale = int(Ga.group(1)) #scale
            print("scale: %d" %ngo.scale)
    return ngo

示例#12

0

显示文件

def getOrgInfo(url):
    ngo = NGO()
    ngo.url = url
    url = url.strip()
    html = getInfo.getInfo(url)
    soup = BeautifulSoup(html)
    # print(soup.prettify())

    if soup.find("h3"):
        title = soup.find("h3")
        if title.get_text() != "":
            ngo.name = title.get_text().splitlines()[0]
        print(ngo.name)

        content = title.find_next_siblings("p")
        s = ""
        encontent = soup.find("p", class_="sch_con2 l")

        if content != []:
            per = content[0]
            # print(per.prettify())
            for w in per.find_all("p"):
                s += replace_with_newlines(w) + "\n"

            if per.find_all("div"):
                totalDiv = per.find_all("div")
                for w in totalDiv:
                    s += w.get_text() + "\n"

            if encontent != None and encontent.find("b"):
                enb = encontent.find("b")
                ngo.enName = enb.get_text()

            ngo.description = s
            # print(s)

            #format
            # tem = list()
            lines = ngo.description.replace("\t",
                                            "").replace("\xa0",
                                                        " ").splitlines()
            # lines = ngo.description.splitlines()
            # print(lines)

            if ngo.name != "" and re.search('[a-zA-Z]', ngo.name[0]):
                ngo.name, ngo.enName = ngo.enName, ngo.name

            ngo.sponsors = []
            for i in range(0, len(lines)):
                Ga = re.search(".*资助者$", lines[i])
                if Ga:
                    for j in range(i + 1, len(lines)):
                        if lines[j] == "" and j < len(lines) - 1 and (
                                lines[j + 1] == ""
                                or re.search("(.*)合作", lines[j + 1])):
                            for k in range(i + 1, j):
                                if lines[k] != "":
                                    Ga = re.search("（.*）", lines[k])
                                    if Ga:
                                        lines[k] = lines[k].replace(
                                            Ga.group(0), "")
                                    if "、" in lines[k]:
                                        words = lines[k].split("、")
                                    elif "；" in lines[k]:
                                        words = lines[k].split("；")
                                    elif "," in lines[k]:
                                        words = lines[k].split(",")
                                    elif "，" in lines[k] or "," in lines[k]:
                                        words = lines[k].split("，")
                                    else:
                                        words = lines[k].split(" ")
                                    for q in words:
                                        if q:
                                            # print(q)
                                            ngo.sponsors.append(q)
                            break
                    break

            ngo.partners = []
            for i in range(0, len(lines)):
                Ga = re.search(".*合作伙伴$", lines[i])
                if Ga:
                    for j in range(i + 1, len(lines)):
                        if lines[j] == "" and j < len(lines) - 1 and (
                                lines[j + 1] == ""
                                or re.search("(.*)独特性", lines[j + 1])):
                            for k in range(i + 1, j):
                                if lines[k] != "":
                                    # print("lines: ",lines[k])
                                    Ga = re.search("（.*）", lines[k])
                                    if Ga:
                                        lines[k] = lines[k].replace(
                                            Ga.group(0), "\n")
                                    # print("lines: ",lines[k])
                                    if "、" in lines[k]:
                                        words = lines[k].split("、")
                                        # print(words)
                                    elif "；" in lines[k]:
                                        words = lines[k].split("；")
                                    elif "," in lines[k]:
                                        words = lines[k].split(",")
                                    elif "，" in lines[k] or "," in lines[k]:
                                        words = lines[k].split("，")
                                    else:
                                        words = lines[k].split(" ")
                                    for q in words:
                                        if q:
                                            # print(q)
                                            ngo.partners.append(q)
                            break
                    break

            Ga = re.search("(.*)地址[：|:](.*)\n(.*)", ngo.description)
            if Ga:
                if Ga.group(2):
                    ngo.location = Ga.group(2)
                else:
                    ngo.location = Ga.group(3)
                if " " in ngo.location:
                    ngo.location = ngo.location.split(" ")[0]

            Ga = re.search("负责人[：|:](.*)", ngo.description)
            if Ga:
                ngo.personCharge = Ga.group(1)
            Ga = re.search("员工人数[:|：](.*)", ngo.description)
            if Ga:
                ngo.scale = Ga.group(1)
            Ga = re.search("成立时间[：|:](.*)", ngo.description)
            if Ga:
                ngo.esTime = Ga.group(1)

            print(ngo.name, ngo.location, ngo.personCharge, ngo.scale,
                  ngo.esTime, ngo.partners, ngo.sponsors)

    return ngo

示例#13

0

显示文件

文件： check.py 项目： liyi193328/ChineseNgoKnowledgeGraph

import getInfo
import os
import time
import re

s1 = r'<div class="ml_name mt15">'
s2_1 = r'<h1>'
s2_2 = r'</h1>'
s3_1 = r'<font>'
s3_2 = r'</font>'
name = enName = esTime = location = area = scale = description = info = recruit = image = None

fp = open("Pages.txt", "r", encoding="utf-8")  # os.system("pause")
# time.sleep(2)
# url = st.strip()
html = getInfo.getInfo('http://www.chinadevelopmentbrief.org.cn/org33/')
loc = html.find(s1)
if loc != -1:
    x = html.find(s2_1, loc + len(s1))
    if x == -1:pass
    y = html.find(s2_2, x + len(s2_1))
    if y == -1:pass
    name = html[x + len(s2_1):y]
    print(name)
    m = html.find(s3_1, y + len(s2_2))
    if m == -1:pass
    n = html.find(s3_2, m + len(s3_1))
    if(n == -1):pass
    enName = html[m + len(s3_1):n]
    print(enName)
    Gr = re.search(r'<li><font>成立时间：</font>(\d+)年</li>', html[n:])

示例#14

0

显示文件

    fread = file(filename)
    for line in fread:
        uid_list.append(line.strip())


def writefile(filename, content):
    fw = file(filename, 'a')
    fw.write(content)
    fw.close()


if __name__ == '__main__':
    username = ''
    pwd = ''
    WBLogin = weiboLogin.weiboLogin()
    if (WBLogin.login(username, pwd) == 'servertime_error'):
        print 'login failed. check out your network.'
        sys.exit()
    uid_list = []
    get_uid('C:/Result1.txt', uid_list)
    path = 'C:/weibodata'
    if not os.path.exists(path):
        os.mkdir(path)
    for uid in uid_list:
        try:
            InfoPage = getInfo.getInfo()
            InfoPage.get_info(uid)

        except Exception as e:
            writefile('C:/id.txt', str(uid) + '\n')

示例#15

0

显示文件

文件： server.py 项目： sontungtran99/MDAS

from sendToLocations import sendToLocations
from getNgrokUrl import getNgrokUrl
from getInfo import getInfo
from changeWebhookUrl import changeWebhookUrl
import time


previousWarningUrl = input()
start = time.time()
ngrokUrl = ''
# Check for new warnings, ngrok url for each time looping through
while True:
	try:
		message, locations = getInfo(previousWarningUrl)
	except IndexError:
		time.sleep(10)
		continue

	if message != '':
		responses = sendToLocations(locations, message)
		for response in responses:
			if response['status'] == 'success':
				print(response)
			else:
				continue
	else:
		pass

	url = getNgrokUrl()
	print(url)
	if url != ngrokUrl:

示例#16

0

显示文件

from getInfo import getInfo
from wInExcel import openxl
from geturl import getUrl
from getpage import getPage

list = getPage()
urls = getUrl(list)
l = []
for url in urls:
    print(url)
    list = getInfo(url)
    l.extend(list)
openxl(l)

示例#17

0

显示文件

def findImage(url):
    html = getInfo.getInfo(url)
    logo = re.search(
        r'<div class="ml_logo"><img width="230px" height="115px" src="(.*)" /></div>',
        html)
    return logo.group(1)

示例#18

0

显示文件

文件： getOrgInfo.py 项目： liyi193328/ChineseNgoKnowledgeGraph

def getOrgInfo(url):
    ngo = NGO()
    ngo.url = url
    url = url.strip()
    html = getInfo.getInfo(url)
    soup = BeautifulSoup(html)
    # print(soup.prettify())

    if soup.find("h3"):
        title = soup.find("h3")
        if title.get_text() != "":
            ngo.name = title.get_text().splitlines()[0]
        print(ngo.name)

        content = title.find_next_siblings("p")
        s = ""
        encontent = soup.find("p",class_ = "sch_con2 l")

        if content != []:
            per = content[0]
            # print(per.prettify())
            for w in per.find_all("p"):
                s += replace_with_newlines(w) + "\n"

            if per.find_all("div"):
                totalDiv = per.find_all("div")
                for w in totalDiv:
                    s += w.get_text()+"\n"

            if encontent != None and encontent.find("b"):
                enb =encontent.find("b")
                ngo.enName = enb.get_text()

            ngo.description = s
            # print(s)

            #format 
            # tem = list()
            lines = ngo.description.replace("\t", "").replace("\xa0"," ").splitlines()
            # lines = ngo.description.splitlines()
            # print(lines)

            if ngo.name != "" and re.search('[a-zA-Z]',ngo.name[0]):
                ngo.name,ngo.enName = ngo.enName,ngo.name

            ngo.sponsors = []
            for i in range(0,len(lines)):
                Ga = re.search(".*资助者$",lines[i])
                if Ga:
                    for j in range(i+1,len(lines)):
                        if lines[j] == "" and j<len(lines)-1 and ( lines[j+1] == "" or re.search( "(.*)合作",lines[j+1] ) ):
                            for k in range(i+1,j):
                                if lines[k] != "":
                                    Ga = re.search("（.*）",lines[k])
                                    if Ga:
                                        lines[k] = lines[k].replace(Ga.group(0),"")
                                    if "、" in lines[k]:
                                        words = lines[k].split("、")
                                    elif "；" in lines[k]:
                                        words = lines[k].split("；")
                                    elif "," in lines[k]:
                                        words = lines[k].split(",")
                                    elif "，" in lines[k] or "," in lines[k]:
                                        words = lines[k].split("，")
                                    else:
                                        words = lines[k].split(" ")
                                    for q in words:
                                        if q:
                                            # print(q)
                                            ngo.sponsors.append(q)
                            break
                    break

            ngo.partners = []
            for i in range(0,len(lines)):
                Ga = re.search(".*合作伙伴$",lines[i])
                if Ga:
                    for j in range(i+1,len(lines)):
                        if lines[j] == "" and j <len(lines)-1 and ( lines[j+1] == "" or re.search("(.*)独特性",lines[j+1]) ):
                            for k in range(i+1,j):
                                if lines[k] != "":
                                    # print("lines: ",lines[k])
                                    Ga = re.search("（.*）",lines[k])
                                    if Ga:
                                        lines[k] = lines[k].replace(Ga.group(0),"\n")
                                    # print("lines: ",lines[k])
                                    if "、" in lines[k]:
                                        words = lines[k].split("、")
                                        # print(words)
                                    elif "；" in lines[k]:
                                        words = lines[k].split("；")
                                    elif "," in lines[k]:
                                        words = lines[k].split(",")
                                    elif "，" in lines[k] or "," in lines[k]:
                                        words = lines[k].split("，")
                                    else:
                                        words = lines[k].split(" ")
                                    for q in words:
                                        if q:
                                            # print(q)
                                            ngo.partners.append(q)
                            break
                    break
                    
            Ga = re.search("(.*)地址[：|:](.*)\n(.*)",ngo.description)
            if Ga:
                if Ga.group(2):
                    ngo.location = Ga.group(2)
                else:
                    ngo.location = Ga.group(3)
                if " " in ngo.location:
                    ngo.location = ngo.location.split(" ")[0]

            Ga = re.search("负责人[：|:](.*)",ngo.description)
            if Ga:
                ngo.personCharge = Ga.group(1)
            Ga = re.search("员工人数[:|：](.*)",ngo.description)
            if Ga:
                ngo.scale = Ga.group(1)
            Ga = re.search("成立时间[：|:](.*)",ngo.description)
            if Ga:
                ngo.esTime = Ga.group(1)

            print(ngo.name,ngo.location,ngo.personCharge,ngo.scale,ngo.esTime,ngo.partners,ngo.sponsors)

    return ngo

示例#19

0

显示文件

文件： main.py 项目： liyi193328/ChineseNgoKnowledgeGraph

from getOrgInfo import getOrgInfo
import time
import pickle

preSite = "http://www.ngo20map.com/Index/list_index?&p="
lastPage = 84
pat = re.compile('<a href="(.*)" target="_blank">(.*)</a>')
pre = "http://www.ngo20map.com/"
st = 1
cnt = 0
ngo = list()
for i in range(st,lastPage):
	print("page: %d\n" %i)
	# time.sleep(2)
	url = (preSite+str(i)).strip()
	html = getInfo(url)
	soup = BeautifulSoup(html)
	# print(soup)
	fSite = re.findall(pat,html)
	for j in range(0,len(fSite)):
		# time.sleep(0.5)
		cnt = cnt + 1
		orgurl = pre+fSite[j][0]
		print(orgurl)
		ngo.append( getOrgInfo(orgurl) )
		print("\n")
	print("\n")

print(cnt)
fp = open("orginfo.pkl","wb")
pickle.dump(ngo,fp)

示例#20

0

显示文件

文件： findActive.py 项目： liyi193328/ChineseNgoKnowledgeGraph

def findActive(url):
	html = getInfo.getInfo(url)
	m = re.search(r'<div class="flxx">(.*)</div>',html)
	return m.group(1)

示例#21

0

显示文件

文件： findLoc.py 项目： liyi193328/ChineseNgoKnowledgeGraph

def findLoc(url):
	html = getInfo(url)
	locG = re.search(r"var content = '(.*)'",html)
	return locG.group(1).strip()

示例#22

0

显示文件

文件： getOrgInfo.py 项目： liyi193328/ChineseNgoKnowledgeGraph

def getOrgInfo(start,end,orgSiteList):
    s1 = r'<div class="ml_name mt15">'
    s2_1 = r'<h1>'
    s2_2 = r'</h1>'
    s3_1 = r'<font>'
    s3_2 = r'</font>'
    name = enName = esTime = location = field = scale = description = info = recruit = image = ""
    suff = ["org_active/","org_hire/","org_image/","org_map/"]

    ans = list()

    for st in range(start,end):
        print("seq: %d" %st)
        url = orgSiteList[st].strip()
        ngo = orgClass.NGO()
        print(url[url.find(r'/org'):len(url)-1])
        orgNumber = int( url[url.find(r'/org')+len('/org'):len(url)-1] ) 
        # print(orgNumber)

        ngo.orgNumber = orgNumber

        html = getInfo.getInfo(url)
        loc = html.find(s1)
        if loc != -1:
            x = html.find(s2_1, loc + len(s1))
            if x == -1:
                continue
            y = html.find(s2_2, x + len(s2_1))
            if y == -1:
                continue
            ngo.name = html[x + len(s2_1):y]
            m = html.find(s3_1, y + len(s2_2))
            if m == -1:
                continue
            n = html.find(s3_2, m + len(s3_1))
            if(n == -1):
                continue
            ngo.enName = html[m + len(s3_1):n]

            Gr = re.search(r'<li><font>成立时间：</font>(\d*)年</li>', html[n:])
            if Gr.group(1):
                ngo.esTime = int(Gr.group(1))
            print("year: %d" %ngo.esTime)

            Ga = re.search(r'<li><font>工作领域：</font>(.*)</li>', html[Gr.end():])
            if Ga.group(1):
                field = Ga.group(1)
                ngo.field = field.split(" ")

            print(ngo.field)

            Gr = re.search(r'<li><font>机构规模：\s{0,10}</font>(.*)</li>', html[Ga.end():])
            if Gr.group(1):
                ngo.scale = Gr.group(1)
            print(ngo.scale)

            Ga = re.search(r'<div class="jgjs mt20">(.*)>',html[Gr.end():]) 
            # s = Ga.group(0)
            # print(s)
            x = Gr.end() + Ga.end()
            sec = html.find(r"</",x)
            ngo.description = html[x:sec]  #description
            # print(ngo.description)

        ngo.image = findImage.findImage(url + suff[2])
        # print(ngo.image)
        ngo.location = findLoc.findLoc(url+suff[3])
        print("location: %s"%ngo.location)

        # ngo.show()
        ans.append(ngo)

    return ans

示例#23

0

显示文件

文件： getOrgInfo.py 项目： liyi193328/ChineseNgoKnowledgeGraph

def getOrgInfo(url):
    ngo = NGO()
    url = url.strip()
    html = getInfo.getInfo(url)
    soup = BeautifulSoup(html)
    # print(soup.prettify())
    kinddiv = soup.find("div",text="类别 : ")
    if kinddiv:
        kind = kinddiv.parent.find("div",class_ = "OrgInfodataItemContent").get_text()
        print(kind)
        if kind != '非政府组织':
            return -1
    namediv = soup.find("div",text="名称 : ")
    if namediv:
        name = namediv.parent.find("div",class_ = "OrgInfodataItemContent").get_text()
        print(name)
        ga = re.search(r'(([(|（].*[)|）]))',name)
        if ga:
            name = name.replace(ga.group(1),"")
        ngo.name = name
        print(name)
    timediv = soup.find("div",text="成立日期 : ")
    # print(timediv)
    if timediv:
        timestr = timediv.parent.find("div",class_ = "OrgInfodataItemContent").get_text()
        if re.search("(\d+)",timestr):
            time = int( re.search("(\d+)",timestr).group(1) ) 
            ngo.esTime = time
            print(time)

    fi = soup.find("div",text = re.compile("工作领域.*"))
    if fi:
        fie = fi.find_next("div",class_ = "OrgInfodataItemContent")
        if fie:
            ngo.field = [fields[i] for i in replace_with_newlines(fie).splitlines()]
    print(ngo.field)

    des = soup.find("div",text = re.compile("成立背景.*"))
    if des:
        descri = des.find_next("div",class_="OrgInfoSectionContent")
        if descri:
            ngo.description = descri.get_text()
            # print(ngo.description)
    des = soup.find("div",text = re.compile(("在中国的CSR项目.*")))
    if des:
        descri = des.find_next("div",class_="OrgInfoSectionContent")
        if descri:
            ngo.description += replace_with_newlines(descri)
    # print(ngo.description)

    partnersdiv = soup.find("div",text=re.compile(".*主要合作伙伴.*"))
    if partnersdiv:
        partnerstr = replace_with_newlines( partnersdiv.find_next("div",class_="OrgInfoSectionContent") ).splitlines()
        # print(partnerstr)
        tem = list()
        for w in partnerstr:
            if "，" in w:
                for q in w.split("，"):
                    if q.find("\uf0fc") == -1 and q.find("-") == -1:
                        tem.append(q)
            if "、" in w:
                for q in w.split("、"):
                    if q.find("\uf0fc") == -1 and q.find("-") == -1:
                        tem.append(q)
            else:
                for q in w.split():
                    if q.find("\uf0fc") == -1 and q.find("-") == -1:
                        tem.append(q)
        ngo.partners = tem
        if ngo.partners != []:
            s = ngo.partners[len(ngo.partners)-1]
            if s[len(s)-1] == "等":
                ngo.partners[len(ngo.partners)-1] = s.replace(s[len(s)-1],"")
        # print(ngo.partners)

    return ngo

示例#24

0

显示文件

文件： main.py 项目： doumengyu/weibocrawler

    fread = file(filename)
    for line in fread:
        uid_list.append(line.strip())

def writefile(filename,content):
    fw = file(filename,'a')
    fw.write(content)
    fw.close()


if __name__ == '__main__':
    username = ''
    pwd = ''
    WBLogin = weiboLogin.weiboLogin()
    if(WBLogin.login(username, pwd)=='servertime_error'):
        print 'login failed. check out your network.'
        sys.exit()
    uid_list=[]
    get_uid('C:/Result1.txt',uid_list)
    path='C:/weibodata'
    if not os.path.exists(path):
        os.mkdir(path)
    for uid in uid_list:
        try:
            InfoPage=getInfo.getInfo()
            InfoPage.get_info(uid)
           
        except Exception as e:
            writefile('C:/id.txt',str(uid)+'\n')

示例#25

0

显示文件

文件： main.py 项目： xkuang/ChineseNgoKnowledgeGraph

from getOrgInfo import getOrgInfo
import time
import pickle

preSite = "http://www.ngo20map.com/Index/list_index?&p="
lastPage = 84
pat = re.compile('<a href="(.*)" target="_blank">(.*)</a>')
pre = "http://www.ngo20map.com/"
st = 1
cnt = 0
ngo = list()
for i in range(st, lastPage):
    print("page: %d\n" % i)
    # time.sleep(2)
    url = (preSite + str(i)).strip()
    html = getInfo(url)
    soup = BeautifulSoup(html)
    # print(soup)
    fSite = re.findall(pat, html)
    for j in range(0, len(fSite)):
        # time.sleep(0.5)
        cnt = cnt + 1
        orgurl = pre + fSite[j][0]
        print(orgurl)
        ngo.append(getOrgInfo(orgurl))
        print("\n")
    print("\n")

print(cnt)
fp = open("orginfo.pkl", "wb")
pickle.dump(ngo, fp)

示例#26

0

显示文件

def getOrgInfo(url):
    ngo = NGO()
    url = url.strip()
    html = getInfo.getInfo(url)
    soup = BeautifulSoup(html)
    # print(soup.prettify())
    kinddiv = soup.find("div", text="类别 : ")
    if kinddiv:
        kind = kinddiv.parent.find("div",
                                   class_="OrgInfodataItemContent").get_text()
        print(kind)
        if kind != '非政府组织':
            return -1
    namediv = soup.find("div", text="名称 : ")
    if namediv:
        name = namediv.parent.find("div",
                                   class_="OrgInfodataItemContent").get_text()
        print(name)
        ga = re.search(r'(([(|（].*[)|）]))', name)
        if ga:
            name = name.replace(ga.group(1), "")
        ngo.name = name
        print(name)
    timediv = soup.find("div", text="成立日期 : ")
    # print(timediv)
    if timediv:
        timestr = timediv.parent.find(
            "div", class_="OrgInfodataItemContent").get_text()
        if re.search("(\d+)", timestr):
            time = int(re.search("(\d+)", timestr).group(1))
            ngo.esTime = time
            print(time)

    fi = soup.find("div", text=re.compile("工作领域.*"))
    if fi:
        fie = fi.find_next("div", class_="OrgInfodataItemContent")
        if fie:
            ngo.field = [
                fields[i] for i in replace_with_newlines(fie).splitlines()
            ]
    print(ngo.field)

    des = soup.find("div", text=re.compile("成立背景.*"))
    if des:
        descri = des.find_next("div", class_="OrgInfoSectionContent")
        if descri:
            ngo.description = descri.get_text()
            # print(ngo.description)
    des = soup.find("div", text=re.compile(("在中国的CSR项目.*")))
    if des:
        descri = des.find_next("div", class_="OrgInfoSectionContent")
        if descri:
            ngo.description += replace_with_newlines(descri)
    # print(ngo.description)

    partnersdiv = soup.find("div", text=re.compile(".*主要合作伙伴.*"))
    if partnersdiv:
        partnerstr = replace_with_newlines(
            partnersdiv.find_next(
                "div", class_="OrgInfoSectionContent")).splitlines()
        # print(partnerstr)
        tem = list()
        for w in partnerstr:
            if "，" in w:
                for q in w.split("，"):
                    if q.find("\uf0fc") == -1 and q.find("-") == -1:
                        tem.append(q)
            if "、" in w:
                for q in w.split("、"):
                    if q.find("\uf0fc") == -1 and q.find("-") == -1:
                        tem.append(q)
            else:
                for q in w.split():
                    if q.find("\uf0fc") == -1 and q.find("-") == -1:
                        tem.append(q)
        ngo.partners = tem
        if ngo.partners != []:
            s = ngo.partners[len(ngo.partners) - 1]
            if s[len(s) - 1] == "等":
                ngo.partners[len(ngo.partners) - 1] = s.replace(
                    s[len(s) - 1], "")
        # print(ngo.partners)

    return ngo

示例#27

0

显示文件

import getInfo
import os
import time
import re

s1 = r'<div class="ml_name mt15">'
s2_1 = r'<h1>'
s2_2 = r'</h1>'
s3_1 = r'<font>'
s3_2 = r'</font>'
name = enName = esTime = location = area = scale = description = info = recruit = image = None

fp = open("Pages.txt", "r", encoding="utf-8")  # os.system("pause")
# time.sleep(2)
# url = st.strip()
html = getInfo.getInfo('http://www.chinadevelopmentbrief.org.cn/org33/')
loc = html.find(s1)
if loc != -1:
    x = html.find(s2_1, loc + len(s1))
    if x == -1: pass
    y = html.find(s2_2, x + len(s2_1))
    if y == -1: pass
    name = html[x + len(s2_1):y]
    print(name)
    m = html.find(s3_1, y + len(s2_2))
    if m == -1: pass
    n = html.find(s3_2, m + len(s3_1))
    if (n == -1): pass
    enName = html[m + len(s3_1):n]
    print(enName)
    Gr = re.search(r'<li><font>成立时间：</font>(\d+)年</li>', html[n:])

示例#28

0

显示文件

文件： main.py 项目： jsmarui/RRDAutoBidding

#!/usr/bin/python
#! coding:utf-8
from getInfo import getInfo
import os
import time

if __name__ == '__main__':
        successCount = 0
        failedCount = 0
        while True:
                usefulInfo = getInfo();
                if usefulInfo == -1:
                        failedCount=failedCount+1
                        continue
                else:
                        successCount=successCount+1
                        os.system('clear')
                        print time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time()))+"\t成功："+str(successCount)+"\t失败："+str(failedCount)
                        print '----------------------------------------------------------------------'
                        for element in  usefulInfo[0:10]:
                                if element['isTransfer']==True:
                                        print str(element["interest"]) + "\t" + str(element["borrowerLevel"])+ "\t" + str(element["displayLoanType"])+ "\t" + str(element["share"]) + "\t" + "转"
                                else:
                                        print str(element["interest"]) + "\t" + str(element["borrowerLevel"])+ "\t" + str(element["displayLoanType"])+ "\t" + str(element["finishedRatio"])
                        time.sleep(5)

示例#29

0

显示文件

import getUrl
import getInfo
from urllib import request
import re
import time
import os

targetFile = './viewAndRank.txt'
url_filename = './blogUrl.txt'

if os.path.exists(targetFile):
    print(targetFile, ' is existed')
else:
    print('creat ', targetFile, " to store CSDN blog's  view and rank num")
    with open(targetFile, 'w') as t:
        t.write('date\t\t\tViewNum\t\tRankNum\n')
        t.close()

now = time.strftime('%Y-%m-%d %H:%M:%S')

view_num, rank_num = getInfo.getInfo(url_filename)

with open(targetFile, 'a') as t:
    info = now + '\t' + view_num + '\t\t' + rank_num + '\n'
    t.write(info)
    t.close()

示例#30

0

显示文件

文件： findActive.py 项目： xkuang/ChineseNgoKnowledgeGraph

def findActive(url):
    html = getInfo.getInfo(url)
    m = re.search(r'<div class="flxx">(.*)</div>', html)
    return m.group(1)

示例#31

0

显示文件

from getInfo import getInfo
import xlsxwriter
import requests
import traceback

try:
    if __name__ == '__main__':
        info = getInfo()
        saveName = input("请输入结果数据文件名(不要填写尾缀名):")
        savePath = "\\".join(
            info.pathList[0].split("\\")[0:-1]) + "\\" + saveName + ".xlsx"
        newExl = xlsxwriter.Workbook(savePath)
        sheet1 = newExl.add_worksheet()
        #添加列标题
        sheet1.write_row("A1", ["月份"] + info.exlNameList)
        #添加行标题
        sheet1.write_column(
            "A2", ["留存人数", "工作量50-100h人数", "工作量100-150h人数", "工作量大于150h人数"])
        #添加sheet1数据
        sheet1.write_row("B2", info.differenceList)
        sheet1.write_row("B3", info.count50List)
        sheet1.write_row("B4", info.count100List)
        sheet1.write_row("B5", info.count150List)
        #添加名单
        for index, name in enumerate(info.exlNameList):
            sheetNew = newExl.add_worksheet(name)
            #添加列标题
            sheetNew.write_row(
                "A1", ["工作量50-100h人数", "工作量100-150h人数", "工作量大于150h人数"])
            #添加数据
            sheetNew.write_column("A2", info.time50List[index])

示例#32

0

显示文件

文件： findImage.py 项目： liyi193328/ChineseNgoKnowledgeGraph

def findImage(url):
	html = getInfo.getInfo(url)
	logo = re.search(r'<div class="ml_logo"><img width="230px" height="115px" src="(.*)" /></div>',html)
	return logo.group(1)

示例#33

0

显示文件

文件： findLoc.py 项目： xkuang/ChineseNgoKnowledgeGraph

def findLoc(url):
    html = getInfo(url)
    locG = re.search(r"var content = '(.*)'", html)
    return locG.group(1).strip()

示例#34

0

显示文件

def show_type(name):
    Help = getInfo()
    res_data = Help.getDataFromCompany_mul(name)
    return json.dumps(res_data)