Exemplo n.º 1
0
    def 開窗來跑(我):
        '''
        目前只准跑 翻譯後的 中文程式,
        並讓人隨意修改之,
        
        原版英文程式不准改,
        因為那是系統的一部份。
        
        本功能基本上可跑,
        但在「關」窗時,
        會出現一些 「髒」,
        仍待改。
        '''
      
        import sys

        '''
        import idlelib
        from idlelib import PyShell       
        PyShell= importlib.reload(idlelib.PyShell)
        '''
        #import idlelib.ryPyShell as PyShell
        import idlelib.PyShell as PyShell

        fn= 我.module.__file__
        
        #print('fn= ', fn)
        tcDir= 我.tcDir #= 'tcDir'
        if os.path.abspath(os.path.dirname(fn)) != os.path.abspath(tcDir):
            msg=  '%s not in %s\n'%(fn, os.path.abspath(tcDir))
            msg+= '目前只准跑 翻譯後的 中文程式,並隨意修改之。\n'
            print(msg)
            return
            
        sys.argv= ['', '-r', fn]

        #我.root.wm_protocol("WM_DELETE_WINDOW", 我._destroy)

 
        PyShell.main() #### 這行 在 windows 上 OK; 但 mac 不 OK !!!!
Exemplo n.º 2
0
    def 開窗來跑(我):
        '''
        目前只准跑 翻譯後的 中文程式,
        並讓人隨意修改之,
        
        原版英文程式不准改,
        因為那是系統的一部份。
        
        本功能基本上可跑,
        但在「關」窗時,
        會出現一些 「髒」,
        仍待改。
        '''

        import sys
        '''
        import idlelib
        from idlelib import PyShell       
        PyShell= importlib.reload(idlelib.PyShell)
        '''
        #import idlelib.ryPyShell as PyShell
        import idlelib.PyShell as PyShell

        fn = 我.module.__file__

        #print('fn= ', fn)
        tcDir = 我.tcDir  #= 'tcDir'
        if os.path.abspath(os.path.dirname(fn)) != os.path.abspath(tcDir):
            msg = '%s not in %s\n' % (fn, os.path.abspath(tcDir))
            msg += '目前只准跑 翻譯後的 中文程式,並隨意修改之。\n'
            print(msg)
            return

        sys.argv = ['', '-r', fn]

        #我.root.wm_protocol("WM_DELETE_WINDOW", 我._destroy)

        PyShell.main()  #### 這行 在 windows 上 OK; 但 mac 不 OK !!!!
Exemplo n.º 3
0
#!/usr/bin/python

try:
    import idlelib.PyShell
except ImportError:
    # IDLE is not installed, but maybe PyShell is on sys.path:
    try:
        import PyShell
    except ImportError:
        raise
    else:
        import os
        idledir = os.path.dirname(os.path.abspath(PyShell.__file__))
        if idledir != os.getcwd():
            # We're not in the IDLE directory, help the subprocess find run.py
            pypath = os.environ.get('PYTHONPATH', '')
            if pypath:
                os.environ['PYTHONPATH'] = pypath + ':' + idledir
            else:
                os.environ['PYTHONPATH'] = idledir
        PyShell.main()
else:
    idlelib.PyShell.main()
def crawl():
	global lis,bis,data1,data2,url,ratings,reviews,driver
	for uri in start_url:
		url_q.enqueue(uri.strip())
	driver,wait=define_driver()
	print "\n*****************Starting the CRAWL*********************************\n"
	while not url_q.isEmpty():
		url=url_q.dequeue()
		##Going to the Reviews Part of the page
		driver=driver_get(driver,url+'#reviews')

		print "\n************Waiting for the reviews page to load***********\n"
		while True:
			try:
				wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#reviews > div.s-title.mbot0.borderless')))
				break
			except:
				print "......Network Error....."
		print "\n************Reviews Page LOADED********************\n"


		##getting the user_id
		print "\n***************Fetching the user_ID********************\n"
		try:
			user_id=driver.find_element_by_xpath('//div[contains(@class,"follow")]').get_attribute('data-user-id')
		except:
			user_id=-1

		try:
			no_of_reviews=driver.find_element_by_xpath('//a[@data-tab="reviews"]').text
			no_of_reviews = re.findall('\d+',no_of_reviews)
			no_of_reviews = int(no_of_reviews[0])
		except:
			no_of_reviews=0

		if user_id in users_crawled or user_id==-1:
			print "\n__________User already CRAWLED________________\n"
			continue

		try:
			if driver.find_element_by_xpath('//div[contains(@class,"usr-location")]').text.strip()!='Kolkata':
				continue
		except:
			pass
		if no_of_reviews!=0:
			print "\n__________________New USER... Starting the crawl__________________\n"

			#Getting and Clicking the LOAD MORE button
			print "\n**********Clicking the LOAD_MORE button***********\n"
			try:
				load_more = driver.find_element_by_class_name('load-more')
				while True:
					try:
						s=wait.until(EC.element_to_be_clickable((By.CLASS_NAME,'load-more')))
						load_more.click()
						time.sleep(2)
					except Exception,e:
						print "E1: ",str(e)
						break
			except Exception,e:
				print "E2 :",str(e)

			print "\n************ALL data LOADED****************\n"

			##Getting the reviews DIV block
			print "\n********Wait while we fetch Reviews and other data**********\n"
			try:
				elem=driver.find_elements_by_xpath('//*[@id="reviewFeed"]/div')
			except Exception,e:
				print str(e)

			##Getting the total review blocks
			g=elem[0].find_elements_by_xpath("//div[contains(@class,'rev-text')]")

			##Getting the reviews and ratings
			ratings = []
			reviews = []
			for block in g:
				rating = block.find_element_by_tag_name('div').get_attribute('aria-label')
				review = block.text
				if rating!=None:
					rating = rating.strip()
					if review not in reviews and review!='' and review!=' ':
						reviews.append(review)
						ratings.append(rating)

			##Getting ReviewId,RestaurantId,RestaurantName
			##RestaurantAddress and datetime
			lis = []
			bis = []
			for block in elem:
				rev_id = block.get_attribute('data-review_id')
				res_id = block.find_element_by_class_name('snippet__name').find_element_by_class_name('snippet__link').get_attribute('data-entity_id')
				res_name = block.find_element_by_class_name('snippet__name').text
				res_addr = block.find_element_by_class_name('snippet__location').text
				datetime = block.find_element_by_tag_name('time').get_attribute('datetime')
				if (rev_id,res_id) not in lis:
					lis.append([rev_id,res_id])
					bis.append([res_name,res_addr,datetime])
			data1=[]
			data2=[]
			for i in xrange(len(lis)):
				if lis[i] not in data1:
					data1.append(lis[i])
					data2.append(bis[i])

			##Getting other necessary details
			# no_of_reviews=driver.find_element_by_xpath('//a[@data-tab="reviews"]').text
			# no_of_reviews = re.findall('\d+',no_of_reviews)
			# no_of_reviews = int(no_of_reviews[0])
			# user_id=driver.find_element_by_xpath('//div[contains(@class,"follow")]').get_attribute('data-user-id')
			user_link = url
			user_name = driver.find_element_by_class_name('full-name').text
			print no_of_reviews,len(data1),len(ratings),len(reviews)
			print "\n********ALL data for %s fetched**************\n"%user_name

			## Pause for user intervention if the no. of reviews does not equal the list length
			if no_of_reviews!=len(data1) or no_of_reviews!=len(ratings) or no_of_reviews!=len(reviews):
				pyshel.main()



			print "\n**********Writing %s's data to the file************\n"%user_name
			with open(r'..\dust_bin\user_data.csv','ab') as c:
				f=csv.writer(c)
				f.writerow([user_id,user_name,user_link,no_of_reviews])
			with open(r'..\dust_bin\review_data.csv','ab') as c:
				f=csv.writer(c)
				for i in xrange(len(data1)):
					f.writerow([user_id]+data1[i]+map(lambda x:x.encode('utf-8'),data2[i])+[reviews[i].encode('utf-8')]+[ratings[i]])
			print "\n**********Data Written to file************\n"
			##Addding the crawled user
			users_crawled.append(user_id)
			print "\n************ User %s crawled **************\n"%user_name
def crawl():
    global lis, bis, data1, data2, url, ratings, reviews, driver
    for uri in start_url:
        url_q.enqueue(uri.strip())
    driver, wait = define_driver()
    print "\n*****************Starting the CRAWL*********************************\n"
    while not url_q.isEmpty():
        url = url_q.dequeue()
        ##Going to the Reviews Part of the page
        driver = driver_get(driver, url + '#reviews')

        print "\n************Waiting for the reviews page to load***********\n"
        while True:
            try:
                wait.until(
                    EC.presence_of_element_located(
                        (By.CSS_SELECTOR,
                         '#reviews > div.s-title.mbot0.borderless')))
                break
            except:
                print "......Network Error....."
        print "\n************Reviews Page LOADED********************\n"

        ##getting the user_id
        print "\n***************Fetching the user_ID********************\n"
        try:
            user_id = driver.find_element_by_xpath(
                '//div[contains(@class,"follow")]').get_attribute(
                    'data-user-id')
        except:
            user_id = -1

        try:
            no_of_reviews = driver.find_element_by_xpath(
                '//a[@data-tab="reviews"]').text
            no_of_reviews = re.findall('\d+', no_of_reviews)
            no_of_reviews = int(no_of_reviews[0])
        except:
            no_of_reviews = 0

        if user_id in users_crawled or user_id == -1:
            print "\n__________User already CRAWLED________________\n"
            continue

        try:
            if driver.find_element_by_xpath(
                    '//div[contains(@class,"usr-location")]').text.strip(
                    ) != 'Kolkata':
                continue
        except:
            pass
        if no_of_reviews != 0:
            print "\n__________________New USER... Starting the crawl__________________\n"

            #Getting and Clicking the LOAD MORE button
            print "\n**********Clicking the LOAD_MORE button***********\n"
            try:
                load_more = driver.find_element_by_class_name('load-more')
                while True:
                    try:
                        s = wait.until(
                            EC.element_to_be_clickable(
                                (By.CLASS_NAME, 'load-more')))
                        load_more.click()
                        time.sleep(2)
                    except Exception, e:
                        print "E1: ", str(e)
                        break
            except Exception, e:
                print "E2 :", str(e)

            print "\n************ALL data LOADED****************\n"

            ##Getting the reviews DIV block
            print "\n********Wait while we fetch Reviews and other data**********\n"
            try:
                elem = driver.find_elements_by_xpath(
                    '//*[@id="reviewFeed"]/div')
            except Exception, e:
                print str(e)

            ##Getting the total review blocks
            g = elem[0].find_elements_by_xpath(
                "//div[contains(@class,'rev-text')]")

            ##Getting the reviews and ratings
            ratings = []
            reviews = []
            for block in g:
                rating = block.find_element_by_tag_name('div').get_attribute(
                    'aria-label')
                review = block.text
                if rating != None:
                    rating = rating.strip()
                    if review not in reviews and review != '' and review != ' ':
                        reviews.append(review)
                        ratings.append(rating)

            ##Getting ReviewId,RestaurantId,RestaurantName
            ##RestaurantAddress and datetime
            lis = []
            bis = []
            for block in elem:
                rev_id = block.get_attribute('data-review_id')
                res_id = block.find_element_by_class_name(
                    'snippet__name').find_element_by_class_name(
                        'snippet__link').get_attribute('data-entity_id')
                res_name = block.find_element_by_class_name(
                    'snippet__name').text
                res_addr = block.find_element_by_class_name(
                    'snippet__location').text
                datetime = block.find_element_by_tag_name(
                    'time').get_attribute('datetime')
                if (rev_id, res_id) not in lis:
                    lis.append([rev_id, res_id])
                    bis.append([res_name, res_addr, datetime])
            data1 = []
            data2 = []
            for i in xrange(len(lis)):
                if lis[i] not in data1:
                    data1.append(lis[i])
                    data2.append(bis[i])

            ##Getting other necessary details
            # no_of_reviews=driver.find_element_by_xpath('//a[@data-tab="reviews"]').text
            # no_of_reviews = re.findall('\d+',no_of_reviews)
            # no_of_reviews = int(no_of_reviews[0])
            # user_id=driver.find_element_by_xpath('//div[contains(@class,"follow")]').get_attribute('data-user-id')
            user_link = url
            user_name = driver.find_element_by_class_name('full-name').text
            print no_of_reviews, len(data1), len(ratings), len(reviews)
            print "\n********ALL data for %s fetched**************\n" % user_name

            ## Pause for user intervention if the no. of reviews does not equal the list length
            if no_of_reviews != len(data1) or no_of_reviews != len(
                    ratings) or no_of_reviews != len(reviews):
                pyshel.main()

            print "\n**********Writing %s's data to the file************\n" % user_name
            with open(r'..\dust_bin\user_data.csv', 'ab') as c:
                f = csv.writer(c)
                f.writerow([user_id, user_name, user_link, no_of_reviews])
            with open(r'..\dust_bin\review_data.csv', 'ab') as c:
                f = csv.writer(c)
                for i in xrange(len(data1)):
                    f.writerow([user_id] + data1[i] +
                               map(lambda x: x.encode('utf-8'), data2[i]) +
                               [reviews[i].encode('utf-8')] + [ratings[i]])
            print "\n**********Data Written to file************\n"
            ##Addding the crawled user
            users_crawled.append(user_id)
            print "\n************ User %s crawled **************\n" % user_name
Exemplo n.º 6
0
import os
import sys

if sys.version_info[0] == 2:
    from idlelib import PyShell as pyshell
    from idlelib.configHandler import idleConf, IdleUserConfParser
else:
    from idlelib import pyshell
    from idlelib.config import idleConf, IdleUserConfParser

# Change to use our own user configs, instead the one in home dir
this_dir = os.path.dirname(__file__)
for cfg_type in idleConf.config_types:
    cfg_path = os.path.join(this_dir, "config-%s.cfg" % cfg_type)
    idleConf.userCfg[cfg_type] = IdleUserConfParser(cfg_path)
    idleConf.userCfg[cfg_type].Load()

# Back on track
pyshell.main()