Exemplo n.º 1
0
def Clients():
    A = GoogleQuery()
    Mozarr = []
    listO = open('credentials.json')
    B = json.load(listO)
    zip_list = zip(A, cycle(B)) if len(A) > len(B) else zip(cycle(A), B)
    for zipList in zip_list:
        try:
            #print zipList[-1]['key'], zipList[-1]['value']
            values = zipList[-1]['key'] + zipList[-1]['value']
            print zipList[0]['rootDomain']
            client = Mozscape(zipList[-1]['key'], zipList[-1]['value'])
            authorities = client.urlMetrics(zipList[0]['root_domain'],
                                            Mozscape.UMCols.domainAuthority)
            Links = client.urlMetrics(
                zipList[0]['rootDomain'], Mozscape.UMCols.pageAuthority
                | Mozscape.UMCols.mozRank | Mozscape.UMCols.links)
            internal_dictionary = {}
            internal_dictionary['backURL'] = zipList[0]['rootDomain']
            internal_dictionary['PA'] = Links['upa']
            internal_dictionary['DA'] = authorities['pda']
            internal_dictionary['MozRank'] = Links['umrp']
            internal_dictionary['links'] = Links['uid']
            #print internal_dictionary['backURL']
            Mozarr.append(internal_dictionary)
        except MozscapeError:
            sleep(11)
            continue
    with open('mozscapedata.json', 'wb') as outfile:
        json.dump(Mozarr, outfile, indent=4)
Exemplo n.º 2
0
def calBackLinkCount(index):
    __location__ = os.path.realpath(
        os.path.join(os.getcwd(), os.path.dirname(__file__)))
    # for testing purpose to limit to 10.
    # file = os.path.join(__location__, '../Sites/sites10.txt')
    file = configs.getSiteFile(index)
    client = MongoClient()
    # A new ACCESS_KEY & SECRET_KEY has to be generated for each new user
    # "https://moz.com/community/join"
    mozClient = Mozscape(
        'mozscape-211f9c0fa5',  # ACCESS ID
        'f38b3766d3d991a4054f38f9746d5d2d'  # SECRET_KEY
    )

    totalLength = sum(1 for line in open(file))
    x = 0
    tempTenSites = []
    counter = 0
    # backLinkObjects = []

    # file is opened and each url is read
    with open(file) as sites:
        for site in sites:
            counter = counter + 1
            site = site.rstrip('\n')

            if (
                    x < 9 or counter == totalLength - 1
            ):  # logic to read 10 websites from the file before calling the API
                tempTenSites.append(site)
                x = x + 1
            else:
                print("Completed for 10 websites..!")
                tempTenSites.append(site)
                # resetting the counter
                x = 0
                # MOZ api free tier has a limit of accessing the api once in 10 seconds
                # Each api call can process 10 websites and respond accordingly
                if counter > 11:
                    time.sleep(10)
                urlMetrics = mozClient.urlMetrics(
                    tempTenSites)  # MOZ API call for url metrics
                for urlMetric in urlMetrics:
                    # backLinkObjects.append(urlMetric)
                    saveBacklinkCount(urlMetric['uid'], urlMetric['uu'],
                                      client)
                # empty the temp array
                tempTenSites = []

    print("Backlink calculation completed!!")
    print("Backlink count has been added to innovationCount.")
    client.close()
Exemplo n.º 3
0
def retrieve_mozrank(url, accessid, secret_key):
    # if access id or secret is not provided, don't query Mozscape metrics.
    # https://moz.com/help/guides/moz-api/mozscape/api-reference/url-metrics
    if not accessid or not secret_key:
        return {}
    # If URL is local, skip it too.
    client = Mozscape(accessid, secret_key)
    for i in range(0, 3):
        try:
            return client.urlMetrics([url])[0]
        except Exception as e:
            print("mozscape failed trial %s: %s (%s)" % (i, url, str(e)))
            time.sleep(11)
    return {}
Exemplo n.º 4
0
class Moz_Search(object):
    def __init__(self, access_id, secret_key):
        self.client = Mozscape(access_id, secret_key)

    def search_backlinks(self, url, limit=5):
        """
        Return a list of urls
        Args:
            limit: maximum number of results to return
        """
        urls = []
        try:
            results = self.client.links(url,
                                        scope="page_to_page",
                                        sort="page_authority",
                                        filters=["external"],
                                        limit=limit)
            #results = self.client.links(url, scope="page_to_page", sort="spam_score", filters=["external"], limit=limit)
            #results = self.client.links(url, scope="page_to_page", sort="page_authority")

            for res in results:
                if 'uu' in res:
                    url = URLUtility.normalize(res['uu'])
                    if url:
                        urls.append(url)
                else:
                    print "Error: key does not exisit"
                    print res
        except:
            traceback.print_exc()

        return urls
Exemplo n.º 5
0
def moz_metrics(url):
    """
    Return data from moz.com for exact URL.
    """
    try:
        client = Mozscape('mozscape-d6c8be9444', '3801b7906546108d881d9153131b412e')
        metrics = client.urlMetrics(
            [url],
            Mozscape.UMCols.links | Mozscape.UMCols.domainAuthority | Mozscape.UMCols.pageAuthority)

        metrics = metrics[0]
        metrics['pda'] = round(metrics['pda'], 2)
        metrics['upa'] = round(metrics['upa'], 2)

    except MozscapeError:  # maybe use general exception
        metrics = {'uid': None, 'pda': None, 'upa': None}

    return metrics
def MozscapeData(query):
    urls = Google.query.filter_by(googleQuery=query).all()
    A = []
    Mozarr = []
    for items in urls:
        mozMiniDict = {}
        mozMiniDict['full_url'] = items.googleFullURL
        mozMiniDict['root_domain'] = items.googleRootDomain
        A.append(mozMiniDict)
    listO = open('credentials.json')
    B = json.load(listO)
    for zipList in zip(A, cycle(B)):
        try:
            client = Mozscape(zipList[-1]['key'], zipList[-1]['value'])
            authorities = client.urlMetrics(str(zipList[0]['root_domain']),
                                            Mozscape.UMCols.domainAuthority)
            Links = client.urlMetrics(
                str(zipList[0]['full_url']), Mozscape.UMCols.pageAuthority
                | Mozscape.UMCols.mozRank | Mozscape.UMCols.links)
            internal_dictionary = {}
            internal_dictionary['root_domain'] = zipList[0]['root_domain']
            internal_dictionary['backURL'] = zipList[0]['full_url']
            internal_dictionary['PA'] = Links['upa']
            internal_dictionary['DA'] = authorities['pda']
            internal_dictionary['MozRank'] = Links['umrp']
            internal_dictionary['links'] = Links['uid']
            Mozarr.append(internal_dictionary)
        except MozscapeError:
            print "Moz threw error!"
            sleep(11)
            continue

    for updateMoz in Mozarr:
        update = Google.query.filter_by(
            googleRootDomain=updateMoz['root_domain']).first()
        update.Links = updateMoz['links']
        update.PA = updateMoz['PA']
        update.DA = updateMoz['DA']
        update.moz_rank = updateMoz['MozRank']
        db.session.commit()
Exemplo n.º 7
0
rearr = []
print len(biggerArr)
d = cycle(m_dictionary.iteritems())
for items in biggerArr[:2]:
    eachQuery = items
    domainArray = []
    eachPageWhoisResult = []
    async_list = []
    url_list = []
    for eachQueryString in eachQuery:
        bingDictionary = {}

        bingDictionary['prospect_url'] = eachQueryString['displayUrl']
        try:
            defined = d.next()
            client = Mozscape(str(defined[0]), str(defined[1]))

            mozscape_dictionary = {}
            metrics = client.urlMetrics(str(bingDictionary['prospect_url']))
            bingDictionary['PA'] = metrics['upa']
            bingDictionary['DA'] = metrics['pda']
            bingDictionary['MozRank'] = metrics['ut']
        except:
            bingDictionary['PA'] = 0
            bingDictionary['DA'] = 0
            bingDictionary['MozRank'] = 0
            pass
        try:
            if "https://" in str(bingDictionary['prospect_url']):
                response = requests.get('http://graph.facebook.com/?id=' +
                                        str(eachQueryString['displayUrl']))
Exemplo n.º 8
0
#!/usr/bin/env python

from mozscape import Mozscape
from datetime import datetime
import csv

# Mozscape API Details
client = Mozscape('mozscape-f03b16db58', '5f7418e041cf61841d72ef26c6f7a905')

domain = "smythson.com"

smythsonMetrics = client.urlMetrics(domain)
smythsonDA = smythsonMetrics['pda']

# smythsonMetrics contains other metrics, as well as DA. Alternatively, un-comment the following to find PA:
# authorities = client.urlMetrics(
#    ('www.smythson.com'),
#    Mozscape.UMCols.domainAuthority | Mozscape.UMCols.pageAuthority)

# Get current month in user-friendly format.
now = datetime.now()
month = now.strftime("%B-%y")

update = [month, domain, str(smythsonDA)]
print(update)

with open('smythsonda.csv', 'a') as fd:
    wr = csv.writer(fd, delimiter=',')
    wr.writerow(update)
Exemplo n.º 9
0
from time import sleep
from mozscape import Mozscape
import csv

client = Mozscape('******', '******')
# Now I can make API calls!


def get_MozscapeData(url_list):

    authorities = client.urlMetrics(
        #['www.example.com',],
        url_list,
        Mozscape.UMCols.pageAuthority | Mozscape.UMCols.equityExternalLinks
        | Mozscape.UMCols.domainAuthority)
    sleep(11)
    return (authorities)


myFile = open('sheet1.csv', 'r')
reader = csv.reader(myFile)

row_count = sum(1 for row in myFile)
row_count = int(row_count)
print("row_count in file: ", row_count)

myFile.close()

myFile = open('sheet1.csv', 'r')
reader = csv.reader(myFile)
Exemplo n.º 10
0
#!/usr/bin/env python

from mozscape import Mozscape

client = Mozscape('my-access-id', 'my-secret-key')

# As you may have noticed, there are lots of columns available
# I did what I could to make them easily-accessible, but there
# are a lot, and the names are long. So, the API calls have
# defaults

# Let's get some URL metrics. Results are now an array of dictionaries
# the i'th dictionary is the results for the i'th URL
metrics = client.urlMetrics(['www.moz.com', 'www.moz.com/blog'])
# Now let's say we only want specific columns in the results
authorities = client.urlMetrics(['www.moz.com'],
                                Mozscape.UMCols.domainAuthority
                                | Mozscape.UMCols.pageAuthority)
# Or if you just need results for one URL
mozMetrics = client.urlMetrics('www.moz.com')

# Now for some anchor text results
anchorResults = client.anchorText('www.moz.com/blog')
# Or for just specific columns
anchorTermResults = client.anchorText('www.moz.com/blog',
                                      cols=Mozscape.ATCols.term)

# Now for some links results
links = client.links('www.moz.com')
# The links API has more columns to specify, as well as sort, scope, etc.
links = client.links('www.moz.com',
Exemplo n.º 11
0
from mozscape import Mozscape
from contextlib import closing
import traceback
import time
import pandas as pd
from urllib.parse import urlparse
import csv
import random

# Imput your member ID and secret key here
client = Mozscape('mozscape-c75d15e616', '5128f75efae32ed916a6eac395831ee3')
data = pd.read_csv('ICO-URL-Symbol.csv', encoding = "utf8")
url_column = data['URL']
remove_row_index = []
urls = data.drop(data.index[remove_row_index])
url_columns = urls['URL']
clean_list = []

for i in url_columns:

        clean_list.append(i)
        
clean_urls = []

for i in clean_list:
    clean_urls.append(i.split('/', 1)[0])
urls['index_URL'] = clean_urls
urls_10 = clean_urls

for i in urls_10:
#    metrics = client.urlMetrics(i)
Exemplo n.º 12
0
#!/usr/bin/env python

from mozscape import Mozscape
import argparse
import csv
import sys
import pandas as pd

client = Mozscape('my-access-id', 'my-secret-key')

# The links API has more columns to specify, as well as sort, scope, etc.
links = client.links('ENTER URL',
                     scope='page_to_page',
                     sort='page_authority',
                     filters=['internal'],
                     targetCols=Mozscape.UMCols.url)

## PUT ABOVE INTO A PANDAS DATAFRAME ##
#df = pd.DataFrame(authorities.items())
df = pd.DataFrame(links)
df = df.rename(columns={'upa': 'Page Authority', 'uu': 'URL'})

print(df)
df.to_excel('mozLinks.xls', index=False)
# use xlsx to stop limiting of row export - check csv module

anchorTermResults = client.anchorText('ENTER_URL',
                                      cols=Mozscape.ATCols.freeCols2)

df2 = pd.DataFrame(anchorTermResults)
df2 = df2.rename(
Exemplo n.º 13
0
from mozscape import Mozscape
from contextlib import closing
import traceback
import time
import pandas as pd

# Imput your member ID and secret key here
client = Mozscape('mozscape-d3909f420d', 'a223a4a9f87f71c8ba7c353598ec43ff')

url = 'https://filecoin.io/'
metrics = client.urlMetrics(url)

# Or if you just need results for one URL
mozMetrics = client.urlMetrics(url)

df = pd.DataFrame(mozMetrics)

# https://moz.com/help/guides/moz-api/mozscape/api-reference/url-metrics
# fmrp: normalized measure of the MozRank of the subdomain of the target URL
# fmrr: raw measure of the MozRank of the subdomain of the target URL
# pda: domain authority of the target URL's paid-level domain
# ueid: The number of external equity links from other root domains to pages on the target URL's root domain
# uid: number of internal and external, equity and non-equity links to the subdomain of the target URL
# umrp: normalized (ten-point, logarithmically-scaled) MozRank of the target URL
# umrr: raw (zero to one, linearly-scaled) MozRank of the target URL
# upa: normalized (zero to one hundred, logarithmically-scaled) page authority of the target URL
# us: HTTP status of the target URL
# ut: title of the target URL, if a title is available
# uu: canonical form of the source URL

df = df.rename(index=str,
Exemplo n.º 14
0
def querygoogle():
    form = QueryGoogle(request.form)
    arr = []
    index = 0
    if form.validate_on_submit():
        arr = []
        query = form.QueryGoogle.data
        response = requests.get('https://www.google.com/search?num=3&q=' +
                                query + '&oq=' + query + '&&start=10',
                                headers=headers,
                                proxies=proxies).text
        soup = BeautifulSoup(response)
        title = soup.findAll('div', attrs={'class': 'g'})
        for titles in title:
            try:
                dictionary = {}
                index += 1
                #dictionary['index#'] = str(index)
                dictionary['index#'] = str(index)
                dictionary['meta_title'] = titles.find('h3').text
                dictionary['full_url'] = titles.find('a')['href']
                rootDomain = dictionary['full_url'].replace('/url?q=', '')
                parsed_uri = urlparse(rootDomain)
                dictionary['rootDomain'] = rootDomain
                domain = '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri)
                dictionary['root_domain'] = domain
                #print dictionary['root_domain']
                dictionary['description'] = titles.find('span',
                                                        attrs={
                                                            'class': 'st'
                                                        }).text
                addUser = Google(googleQuery=query,
                                 googleMetaTitle=dictionary['meta_title'],
                                 googleFullURL=dictionary['rootDomain'],
                                 googleRootDomain=dictionary['root_domain'])
                db.session.add(addUser)
                arr.append(dictionary)
            except AttributeError:
                continue
        db.session.commit()

        Mozarr = []
        secondaryURL = []
        listO = open('credentials.json')
        B = json.load(listO)
        A = arr
        zip_list = zip(A, cycle(B)) if len(A) > len(B) else zip(cycle(A), B)
        for zipList in zip_list:
            try:
                #print zipList[0]['rootDomain']
                #print zipList[-1]['key'], zipList[-1]['value']
                values = zipList[-1]['key'] + zipList[-1]['value']
                #print zipList[0]['rootDomain']
                client = Mozscape(zipList[-1]['key'], zipList[-1]['value'])
                authorities = client.urlMetrics(
                    zipList[0]['root_domain'], Mozscape.UMCols.domainAuthority)
                Links = client.urlMetrics(
                    zipList[0]['rootDomain'], Mozscape.UMCols.pageAuthority
                    | Mozscape.UMCols.mozRank | Mozscape.UMCols.links)
                internal_dictionary = {}
                internal_dictionary['backURL'] = zipList[0]['rootDomain']
                internal_dictionary['PA'] = Links['upa']
                internal_dictionary['DA'] = authorities['pda']
                internal_dictionary['MozRank'] = Links['umrp']
                internal_dictionary['links'] = Links['uid']
                #print internal_dictionary['backURL']
                Mozarr.append(internal_dictionary)
                secondaryURL.append(zipList[0]['rootDomain'])
            except MozscapeError:
                sleep(11)
                continue

        removeDuplicates = list(set(secondaryURL))
        for addtoDb in removeDuplicates:
            updateDatabase = Google.query.filter_by(
                googleRootDomain=addtoDb).all()
            print updateDatabase

        return render_template('google.html', form=form, arr=arr)
    return render_template('google.html', form=form, arr=arr)
Exemplo n.º 15
0
#!/usr/bin/env python

from mozscape import Mozscape

client = Mozscape(
    'my-access-id',
    'my-secret-key')

# As you may have noticed, there are lots of columns available
# I did what I could to make them easily-accessible, but there
# are a lot, and the names are long. So, the API calls have
# defaults

# Let's get some URL metrics. Results are now an array of dictionaries
# the i'th dictionary is the results for the i'th URL
metrics = client.urlMetrics(['www.moz.com', 'www.moz.com/blog'])
# Now let's say we only want specific columns in the results
authorities = client.urlMetrics(
    ['www.moz.com'],
    Mozscape.UMCols.domainAuthority | Mozscape.UMCols.pageAuthority)
# Or if you just need results for one URL
mozMetrics = client.urlMetrics('www.moz.com')

# Now for some anchor text results
anchorResults = client.anchorText('www.moz.com/blog')
# Or for just specific columns
anchorTermResults = client.anchorText(
    'www.moz.com/blog', cols=Mozscape.ATCols.term)

# Now for some links results
links = client.links('www.moz.com')
Exemplo n.º 16
0
from mozscape import Mozscape
# import Optimization_Return_Functions as orf
# previously where
# keys were used to get data
import time
# an Instance of the Optimization_Return_Functions class

# arguments are required to use this tool
moz_client = Mozscape('', '')
"""Through constructors for each selection option -- we can use same url
for all functions"""
"""Begin cols= Calls & desired data Returns"""


def return_canonical_url(url):
    time.sleep(1.5)
    data = moz_client.urlMetrics(url, cols=4)
    return data['uu']
    # this function uses the key within itself
    # uses no outside objects


def return_external_links(url):
    time.sleep(1.5)
    data = moz_client.urlMetrics(url, cols=549755813888)
    return data['ued']  # int
    #  for the URL


def return_external_root_domain_links(url):
    time.sleep(1.5)
Exemplo n.º 17
0
def parse_sites(sites_file):

	#get the list of websites for testing
	with open(sites_file) as sl:
		sites = [x.strip().lstrip('http://').rstrip('/') for x in sl.readlines()]
		
	print(sites)	
	

	#get MOZ API credentials
	with open('keys.txt') as fp:
		credentials = [x.strip() for x in fp.readlines()]

	moz_id = credentials[0]
	moz_key = credentials[1]

	#access the MOZ client using credentials
	client = Mozscape(moz_id, moz_key)

	#creating new list for storing websites MOZ DA data
	sites_da = []
	sites_backlinks = []
	sites_ips = []

	for i in sites:

			print 'Getting DA data for ', i
			i = client.urlMetrics([i], Mozscape.UMCols.domainAuthority)
			sites_da.append(math.ceil(i[0]['pda']))
			print 'Complete. Going to next domain in list'
			print '...'
			time.sleep(5)
			
	for k in sites:		
			
		print 'Getting total backlinks for ', k 
		k = client.urlMetrics([k], Mozscape.UMCols.links)
		sites_backlinks.append(k[0]['uid'])
		print 'Complete. Going to next domain in list'
		print '...'
		time.sleep(5)		
		
	for s in sites:
	
		try:
	
			print 'Getting the ip address of ', s 
			s = socket.gethostbyname(s)
			sites_ips.append(s)
			
		except:
			print 'Could not get ip address for ', s
			sites_ips.append('Error getting ip')
			continue
			
	
			

	#create a dictionary for storing the websites, their DA score and number of backlinks
	sites_metrics = dict(zip(sites, zip(sites_da, sites_backlinks, sites_ips)))

	print sites_metrics

	
	
	df = pd.DataFrame.from_dict(data=sites_metrics, orient='index')
	df.columns = ['Domain Authority', 'Backlinks', 'IP']
	
	print(df)

	to_file(df)
def OutReacherDesk(query):
    m_dictionary = {}
    m_dictionary['member-79ea116cb0'] = '43053334ef958fa5668a8afd8018195b'
    m_dictionary['member-89df24f83c'] = '0d08685d31a8f724047decff5e445861'
    m_dictionary['member-aad6e04a94'] = '8a08a4f2477b3eda0a7b3afa8eb6faaf'
    m_dictionary['member-1e51eae111'] = '4f1deaa49d0f4ec8f36778b80a58dba5'
    m_dictionary['member-c1d37816b1'] = '47501159d505413721caac9687818f68'
    m_dictionary['member-700eebf334'] = '0e7136b3468cd832f6dda555aa917661'
    m_dictionary['member-774cfbde7e'] = '481981b24f4a4f08d7c7dc9d5038428f'
    m_dictionary['member-34c9052fba'] = '999d2d727bfc11256421c42c529331de'
    m_dictionary['member-587eb1767c'] = '8c36e3b36b7d6d352fd943429d97837e'
    m_dictionary['member-5fa34d7383'] = '3986edd244ae54e1aa96c71404914578'

    headers = {
        'User-Agent':
        'Mozilla/5.0 (iPhone; CPU iPhone OS 10_1 like Mac OS X)AppleWebKit/600.1.3 (KHTML, like Gecko) Version/8.0'
    }
    arr = ['0', '23', '37', '51', '65', '79']
    appendArr = []
    biggerArr = []
    for i in arr:
        response = requests.get(
            'https://c.bingapis.com/api/custom/opal/otherpage/search?q=' +
            str(query) + '&first=' + str(i) + '&rnoreward=1',
            headers=headers).text
        LoadAsJson = json.loads(response)
        with open('check_thisoutput.json', 'wb') as outfile:
            json.dump(LoadAsJson, outfile, indent=4)
        actualItem = LoadAsJson['answers'][0]['webResults']
        appendArr.append(actualItem)

        try:
            biggerArr.append(appendArr[0] + appendArr[1] + appendArr[2] +
                             appendArr[3] + appendArr[4] + appendArr[5])
        except:
            pass

    with open("check_output.json", 'wb') as outfile:
        json.dump(biggerArr, outfile, indent=4)

    rearr = []
    d = cycle(m_dictionary.iteritems())
    for items in biggerArr:
        eachQuery = items
        domainArray = []
        eachPageWhoisResult = []
        async_list = []
        url_list = []
        for eachQueryString in eachQuery:
            try:
                ## moz goes here

                bingDictionary = {}

                bingDictionary['prospect_url'] = eachQueryString['displayUrl']
                try:
                    defined = d.next()
                    client = Mozscape(str(defined[0]), str(defined[1]))

                    mozscape_dictionary = {}
                    metrics = client.urlMetrics(
                        str(eachQueryString['displayUrl']))
                    print metrics
                    bingDictionary['PA'] = metrics['upa']
                    bingDictionary['DA'] = metrics['pda']
                    bingDictionary['MozRank'] = metrics['ut']
                except:
                    #pass
                    bingDictionary['PA'] = "none"
                    bingDictionary['DA'] = "none"
                    bingDictionary['MozRank'] = "none"

                response = requests.get('http://graph.facebook.com/?id=' +
                                        str(eachQueryString['displayUrl']))
                loadAsJson = json.loads(response.text)
                try:
                    bingDictionary['facebook_shares'] = loadAsJson['share'][
                        'share_count']
                except:
                    bingDictionary['facebook_shares'] = "null"

                bingDictionary['meta_title'] = eachQueryString[
                    'shortTitle'].encode('ascii', 'ignore')
                url = urlparse(eachQueryString['url'])
                domain = '{uri.scheme}://{uri.netloc}/'.format(uri=url)
                bingDictionary['root_domain'] = domain
                likes = requests.get(
                    'https://c.bingapis.com/api/custom/opal/otherpage/search?q=site:facebook.com%20'
                    + str(domain)).text
                loadAsJson = json.loads(likes)
                try:
                    parse_likes = loadAsJson['answers'][0]['webResults']
                    for items in parse_likes:
                        if "likes" in items['snippet']:
                            list_of_words = items['snippet'].split()
                            next_word = list_of_words[
                                list_of_words.index("likes") - 1]
                            bingDictionary['facebook_likes'] = next_word
                            bingDictionary['facebook_url'] = items['url']
                except:
                    bingDictionary['facebook_likes'] = None
                    bingDictionary['facebook_url'] = None

                twitter = requests.get(
                    'https://c.bingapis.com/api/custom/opal/otherpage/search?q=site:twitter.com%20'
                    + str(domain)).text
                loadAsJson = json.loads(twitter)
                try:
                    parse_likes = loadAsJson['answers'][0]['webResults']
                    bingDictionary['twitter_url'] = parse_likes[0]['url']
                    bingDictionary['twitter_followers'] = parse_likes[0][
                        'formattedFacts'][0]['items'][0]['text']
                except:
                    bingDictionary['twitter_followers'] = None
                    bingDictionary['twitter_url'] = None

                google_plus = requests.get(
                    'https://c.bingapis.com/api/custom/opal/otherpage/search?q=site:https://plus.google.com%20http://www.jackdaniels.com/'
                ).text
                loadAsJson = json.loads(google_plus)
                try:
                    parse_likes = loadAsJson['answers'][0]['webResults']
                    dictionary = {}
                    for items in parse_likes:
                        list_of_words = items['snippet'].split()
                        for items in list_of_words:
                            if "follower" in items:
                                next_word = list_of_words[
                                    list_of_words.index(items) - 1]
                                bingDictionary[
                                    'google_plus_followers'] = next_word
                                bingDictionary['google_plus_url'] = items[
                                    'url']
                except:
                    bingDictionary['google_plus_followers'] = None
                    bingDictionary['google_plus_url'] = None
                formatDomain = str(domain).replace('http://',
                                                   '').replace('https://', '')
                fixedDomain = formatDomain.split('/')[0].replace(
                    'https://www.', '').replace('http://www.',
                                                '').replace('www.', '')
                print fixedDomain
                whoisAPI = 'http://api.whoxy.com/?key=f5bd9ed47568013u5c00d35155ec3884&whois=' + \
                    str(fixedDomain)
                domainArray.append(whoisAPI)
                bingDictionary['whoisData'] = "None"
                bingDictionary['social_shares'] = "None"
                miniArz = []
                try:
                    response = requests.get('http://104.131.43.184/whois/' +
                                            str(fixedDomain)).text
                    min_text = 'http://104.131.43.184/whois/' + str(
                        fixedDomain)
                    url_list.append(str(min_text))
                    loadAsJson = json.loads(response)
                except:
                    pass
                whoisDictionary = {}
                try:
                    whoisDictionary['domain_name'] = loadAsJson['domain_name']
                except:
                    whoisDictionary['domain_name'] = "None"
                try:
                    whoisDictionary['whois_full_name'] = loadAsJson[
                        'registrant']['name']
                except:
                    whoisDictionary['whois_full_name'] = "None"
                try:
                    whoisDictionary['whois_city_name'] = loadAsJson[
                        'registrant']['city_name']
                except:
                    whoisDictionary['whois_city_name'] = "None"
                try:
                    whoisDictionary['whois_country_code'] = loadAsJson[
                        'registrant']['country_code']
                except:
                    whoisDictionary['whois_country_code'] = "None"
                try:
                    whoisDictionary['whois_email_address'] = loadAsJson[
                        'registrant']['email']
                except:
                    whoisDictionary['whois_email_address'] = "None"
                try:
                    whoisDictionary['whois_phone_number'] = loadAsJson[
                        'registrant']['phone_number']
                except:
                    whoisDictionary['whois_phone_number'] = "None"
                try:
                    email_response = requests.get(domain).text
                except:
                    pass
                phoneRegex = re.compile(
                    r'''
                    # 415-555-0000, 555-9999, (415) 555-0000, 555-000 ext 12345, ext. 12345 x12345
                    (
                    ((\d\d\d) | (\(\d\d\d\)))?          #area code (optional)
                    (/s|-)                              #first seperator
                    \d\d\d                              #first 3 digits
                    -                                   #second seperator
                    \d\d\d\d                            #last 4 digits
                    (((ext(\.)?\s) |x)                  #extension word-part (optional)
                    (\d{2,5}))?                         #extension number-part (optional)
                    )                                   
                    ''', re.VERBOSE)

                soup = BeautifulSoup(email_response)
                # print soup
                extractedPhone = phoneRegex.findall(str(soup))
                RSS_ARR = []
                for link in soup.find_all("link",
                                          {"type": "application/rss+xml"}):
                    href = link.get('href')
                    RSS_ARR.append(href)
                allPhoneNumbers = []
                for phoneNumber in extractedPhone:
                    allPhoneNumbers.append(phoneNumber[0])
                email_arr = []
                bingDictionary['phone_numbers'] = allPhoneNumbers
                bingDictionary['RSS_URL'] = RSS_ARR
                emails = re.search(r'[\w\.-]+@[\w\.-]+', str(soup))
                if emails:
                    if "." in emails.group()[-1]:
                        new_emails = emails.group()[:-1]
                        email_validator = lepl.apps.rfc3696.Email()
                        if not email_validator(new_emails):
                            pass
                        else:
                            email_arr.append(new_emails)
                    else:
                        email_string = emails.group()
                        email_validator = lepl.apps.rfc3696.Email()
                        if not email_validator(email_string):
                            pass
                        else:
                            email_arr.append(email_string)
                bingDictionary['emails'] = email_arr
                miniArz.append(whoisDictionary)
                bingDictionary['whoisData'] = miniArz
                rearr.append(bingDictionary)
            except KeyError, RuntimeError:
                pass
Exemplo n.º 19
0
from mozscape import Mozscape, MozscapeError
client = Mozscape('member-79ea116cb0', '43053334ef958fa5668a8afd8018195b')
Links = client.urlMetrics(
    'https://www.benfolds.com/&sa=U&ved=0ahUKEwjSxZKtiP_PAhXIqFQKHaomCSsQFggsMAM&usg=AFQjCNGQFScP5dGElKPvz8zL1VyZvkQlxg',
    Mozscape.UMCols.pageAuthority | Mozscape.UMCols.mozRank
    | Mozscape.UMCols.links)
DA = client.urlMetrics('https://www.benfolds.com/',
                       Mozscape.UMCols.domainAuthority)
print DA
Exemplo n.º 20
0
# client = Mozscape('member-34c9052fba','999d2d727bfc11256421c42c529331de')

# metrics = client.urlMetrics('roc7o.1homevideos.us/dY')
# print metrics
# print metrics
# #print metrics
# # metrics = client.urlMetrics(str(items))
# # #zip_list = zip(B, cycle(dictionary.iteritems()))
d = cycle(dictionary.iteritems())
#print d.next()
for items in B:
    defined = d.next()
    try:
        #print defined[0] + ' ' + defined[1]
        # 		#print type(d.next()[1])
        client = Mozscape(defined[0], defined[1])
        metrics = client.urlMetrics(str(items))
        print metrics
# 		print metrics
    except:
        print defined[0] + ' ' + defined[1] + ' ' + items
        # print metrics
# 	#print metrics
# 	except:
# 		pass
#print d.next()[0]#items, d.next()

#	print d
# for items in zip_list:
# 	try:
# 		mozscape_dictionary = {}
Exemplo n.º 21
0
 def __init__(self, access_id, secret_key):
     self.client = Mozscape(access_id, secret_key)
Exemplo n.º 22
0
import json
import time
from datetime import datetime
from mozscape import Mozscape, MozscapeError, Alias
from domain_importer import domains, chunks
from data_utils import write_chunk, log, log_domain
import report
import settings

domain_chunks = chunks(domains, 10)

client = Mozscape(settings.ACCESS_ID, settings.SECRET_KEY)


def get_result(metric):
    try:
        canonical_url = metric[Alias.CANONICAL_URL]
    except KeyError:
        canonical_url = ''

    try:
        mozrank = round(metric[Alias.MOZRANK], 2)
    except KeyError:
        mozrank = 0

    try:
        subdomain_mozrank = round(metric[Alias.MOZRANK_SUBDOMAIN], 2)
    except KeyError:
        subdomain_mozrank = 0

    try:
Exemplo n.º 23
0
from mozscape import Mozscape

client = Mozscape('XXXXX', 'XXXXX')
Exemplo n.º 24
0
import argparse, csv, time, math
from urlparse import urlparse
from bs4 import BeautifulSoup
from mozscape import Mozscape
import pandas as pd

reload(sys)
sys.setdefaultencoding('utf-8')

with open('keys.txt', 'r') as fp:
    credentials = [x.strip() for x in fp.readlines()]

moz_id = credentials[0]
moz_key = credentials[1]

client = Mozscape(moz_id, moz_key)


def page_parser(url):

    links_list = []
    r = requests.get(url)
    html = r.text
    soup = BeautifulSoup(html, "html.parser")
    print soup.title
    for link in soup.find_all('a'):
        links_list.append(link.get('href'))

    ignore_domains = [
        'twitter.com', 'facebook.com', 'linkedin.com', 'listiller.com'
    ]