Python resolve示例，urlunshort.resolve Python示例

示例#1

0

显示文件

文件： crawler.py 项目： alabarga/SocialLearning

def createMention(url,res):
    if resolve(url)!=None:
        furl=resolve(url)
    else:
        furl=url
    url=hashlib.md5(furl).hexdigest()
    response=urllib2.urlopen("http://feeds.delicious.com/v2/json/url/"+url)        
    delRes=json.loads(response.read())
    delRes=[]#EL RSS DE DELICIOUS NO ESTA FUNCIONANDO PARA LAS URL, EL DE USUARIOS SIGUE ACTIVO
    s=twittApi
    twitRes=s.search(furl)
    for a in delRes:
        print a
        author=a["a"]
        s=SocialProfile.objects.filter(username=author)
        if len(s)==0:
            user_url="https://delicious.com/"+author
            s=SocialProfile.objects.create(username=author,social_network="Delicious",url=user_url)
            print "Creado el usuario "+s
            Mention.objects.create(profile=s,resource=res)
            print "Creada mention para delicious"
    for r in twitRes:
        author=r.user.screen_name
        s=SocialProfile.objects.filter(username=author)
        if len(s)==0:
            user_url="https://twitter.com/"+author
            sn=SocialNetwork.objects.create(name="Twitter",url="http://twitter.com/")
            s=SocialProfile.objects.create(username=author,social_network=sn,url=user_url)
            print "Creado el usuario "+str(s)
            Mention.objects.create(profile=s,resource=res)
            print "Creada mention para twitter"

示例#2

0

显示文件

文件： crawler.py 项目： alabarga/SocialLearning

def createResource(url):
    if resolve(url)!=None:
        url=resolve(url)
    g = Goose()
    a= g.extract(url=url)
    if len(url)>200:
        print "Los links largos de duckduckgo no funcionan"
        return None
    else:
        r=Resource.objects.filter(url=url)
        if len(r)>0:        
            print "El recurso ya lo tenia"
            r=r[0]
        else:
            if a.title==None or a.title=="":
                title="notitle"
            else:
                title=a.title
            try:
                r=Resource.objects.create(title=title,url=url)
            except:
                print "no ha ido bien"
                print title
                print url
            print "Creado el recurso para "+url
        return r

示例#3

0

显示文件

文件： parsers.py 项目： alabarga/SocialLearning

 def clean2(self, url):
     furl=url
     i=0
     while resolve(url)!=None and i<5:
         furl=url
         url=resolve(url)
         i+=1
         print i
     return furl

示例#4

0

显示文件

文件： social.py 项目： alabarga/SocialLearning

    def get_expand(url,user,tag):
        tagl=[]
        tagl.append(str(tag))
        relatedToTweet=[]        

        response=twittApi.user_timeline(screen_name=user,count=10)
        for tweet in response:
            ht=extract_hash_tags(tweet.text)           
            intersect=list(set(tagl) & set(ht))
            if len(intersect)>0:
                #relatedToTweet.append(tweet)
                ##mirar si en el texto hay enlaces
                ##para cada enlace dle texto
                links= extract_urls(tweet.text.encode('utf-8'))
                for link in links:
                    link=resolve(link)
                    if link!=url:
                        print link
                        print "Fecha: "+str(tweet.created_at)
                        #call_command('add',URL=link)
                        feed=feedfinder.feed(link)
                        print feed
                        if feed: 
                            rc=ResourceContainer.objects.get_or_create(rss=feed,url=link)
                            add_feed(feed)

示例#5

0

显示文件

文件： start.py 项目： felipemoraes/Multi4SQ

 def process_url(self, url):
     expanded_url = resolve(url)
     response = urllib2.urlopen(url)
     page = response.read()
     user_id = re.findall("\"id\":\"[0-9]+\"", page)
     user_id = "{" + user_id[0] + "}"
     user_id = json.loads(user_id)
     user_id = user_id["id"]
     print user_id
     api = FoursquareAPI()
     listids = api.user_friendship(user_id)
     api.user_tips(user_id)
     api.user_venueslikes(user_id)
     server = socket.socket()
     server.connect((HOST,PORT))
     server.send(json.dumps({"command": "UPDATE_ID","clientid":client_id, "idlist":user_id, "level": 0}))
     message = json.loads(server.recv(BUFFER))
     server.close()
     print listids
     api.user_basic_info(user_id)
     for user_id in listids:
         server = socket.socket()
         server.connect((HOST,PORT))
         server.send(json.dumps({"command": "INSERT_ID","clientid":client_id, "idlist":user_id}))
         server.close()

示例#6

0

显示文件

文件： twitter_scraper.py 项目： Trodis/TwitterScraper

def writeCells(ws, user_id, username, message, keyword, url, email):
    sheet_copy = ws
    row_number = ws.max_row+1
    url = resolve(url)
    date_now = datetime.datetime.now()
    formated_time = date_now.strftime('%Y-%m-%d %H:%M')
    sheet_copy.cell(row=row_number, column=1, value=user_id)
    sheet_copy.cell(row=row_number, column=2, value=username.strip().encode('utf-8'))
    sheet_copy.cell(row=row_number, column=3, value=message.strip().encode('utf-8'))
    sheet_copy.cell(row=row_number, column=4, value=keyword)
    sheet_copy.cell(row=row_number, column=5, value=formated_time)
    sheet_copy.cell(row=row_number, column=6, value=url)
    sheet_copy.cell(row=row_number, column=7, value=email)
    return sheet_copy

示例#7

0

显示文件

文件： expand.py 项目： alabarga/SocialLearning

def get_expand(url,user,tag,social_network):
    tagl=[]
    tagl.append(str(tag))
    relatedToTweet=[]        
    if social_network=="Twitter":
        print"----------------------------"
        print "En twitter para el usuario "+user+" y tag "+str(tag)+": "
        response=twittApi.user_timeline(screen_name=user,count=10)
        for tweet in response:
            ht=extract_hash_tags(tweet.text)           
            intersect=list(set(tagl) & set(ht))
            if len(intersect)>0:
                #relatedToTweet.append(tweet)
                ##mirar si en el texto hay enlaces
                ##para cada enlace dle texto
                links= extract_urls(tweet.text.encode('utf-8'))
                for link in links:
                    link=resolve(link)
                    if link!=url:
                        print link
                        print "Fecha: "+str(tweet.created_at)
                        #call_command('add',URL=link)
                        feed=feedfinder.feed(link)
                        print feed
                        if feed: 
                            rc=ResourceContainer.objects.get_or_create(rss=feed,url=link)
                            add_feed(feed)
        print "__________________________"
        print ""
    elif social_network=="delicious":  
        print"----------------------------"  
        print "En delicious para el usuario "+user+" y tag "+str(tag)+": "
        url_to_call="http://feeds.delicious.com/v2/json/"+str(user)+"/"+urllib2.quote(str(tag),'') 
        response=urllib2.urlopen(url_to_call)
        response=json.loads(response.read())
        for res in response:
            if url!=str(res["u"]):
                print str(res["u"])
                print "Fecha: "+res["dt"]
                call_command('add',URL=str(res["u"]))
                feed=feedfinder.feed(str(res["u"]))
                if feed:
                    rc=ResourceContainer.objects.get_or_create(rss=feed,url=str(res["u"]))
                    add_feed(feed)
        print "__________________________"
        print ""     
    else:
        print "Este enlace no tiene nada de twitter ni deli"

示例#8

0

显示文件

文件： utils.py 项目： JNRowe-retired/bleeter

def url_expand(match):
    """Generate links with expanded URLs.

    Args:
        match (SRE_Match): Regular expression match object

    Returns:
        str: HTML formatted link for URL
    """
    url = match.group()
    if url not in URLS:
        if urlunshort.is_shortened(url):
            URLS[url] = GLib.markup_escape_text(urlunshort.resolve(url))
        else:
            URLS[url] = GLib.markup_escape_text(url)
    return '<a href="{}">{}</a>'.format(URLS[url], URLS[url])

示例#9

0

显示文件

文件： twitter_scraper.py 项目： Trodis/TwitterScraper

def verifyUrl(url):
    resolved_url = resolve(url)
    if resolved_url is not None:
        parts = urlsplit(resolved_url)
        hostname = parts.hostname
        valid = checkHostname(hostname)
        if valid: 
            base_url = "{0.scheme}://{0.netloc}".format(parts)
            return base_url
        else:
            return None
    else:
        parts = urlsplit(url)
        hostname = parts.hostname
        valid = checkHostname(hostname)
        if valid: 
            base_url = "{0.scheme}://{0.netloc}".format(parts)
            return base_url
        else:
            return None

示例#10

0

显示文件

def url_expand(match):
    """Generate links with expanded URLs

    # Test mocks
    >>> URLS["http://bit.ly/dunMgV"] = "terminal.png"
    >>> from mock import Mock
    >>> match = Mock()
    >>> match.group = Mock(return_value=URLS.keys()[0])

    >>> url_expand(match)
    '<a href="terminal.png">terminal.png</a>'

    :param SRE_Match match: Regular expression match object
    :rtype: ``str``
    :return: HTML formatted link for URL
    """
    url = match.group()
    if not url in URLS:
        if urlunshort.is_shortened(url):
            URLS[url] = glib.markup_escape_text(urlunshort.resolve(url))
        else:
            URLS[url] = glib.markup_escape_text(url)
    return '<a href="%s">%s</a>' % (URLS[url], URLS[url])

示例#11

0

显示文件

from urlunshort import resolve
import httplib
import urlparse
import csv

#list of shortlinks:
urls = ['http://zite.to/12Uq1nW']

#store file
outfile = 'longlinks' ".csv"
myfile = open(
    outfile,
    "wb",
)
w = csv.writer(myfile)

for url in urls:
    results = []
    results += [resolve(url)]
    w.writerow(results)

myfile.close()

示例#12

0

显示文件

def entrypoint_urlunshort():
    parser = optparse.OptionParser(usage="%prog url")
    options, args = parser.parse_args()
    if args:
        print(resolve(args[0]))

示例#13

0

显示文件

文件： Check_file.py 项目： Ldeath77/Short_Urls-AP-

        },
        "threatInfo": {
            "threatTypes": ["MALWARE", "SOCIAL_ENGINEERING"],
            "platformTypes": ["ALL_PLATFORMS"],
            "threatEntryTypes": ["URL"],
            "threatEntries": [
                {
                    "url": url
                },
            ]
        }
    }
    headers = {'content-type': 'application/json'}
    r = requests.post(req_url, data=json.dumps(payload), headers=headers)
    resp = json.loads(r.text)
    return resp


for k in dic:
    print k
    k = resolve(k[:-2])
    #print hum
    resp = google_url_shorten(k)
    #print resp
    if len(resp) != 0:
        malicious[k] = resp
        print malicious[k]
    with open("C:\Users\KC-L\Documents\maliciouscollection1.txt",
              "wb") as myFile:
        pickle.dump(malicious, myFile)

示例#14

0

显示文件

import json
import configparser
import pyphishtank
from pyphishtank import PhishTank
#import urllib.parse as ub
#from urllib.parse import resolve
from urlunshort import resolve

new_tweets = []
for line in open('data.json'):  # reading the lines in given json file
    new_tweets.append(json.loads(
        line))  # appending the dictionaries in json file to a the list tweets

# print(new_tweets)
urls = []
for tweet in new_tweets:
    if 'expanded_url' in tweet:
        urls.append(tweet['expanded_url'])
    else:
        pass

api = PhishTank()

for url in urls:
    unshortenUrl = resolve(url)
    print(api.check(str(unshortenUrl)))

示例#15

0

显示文件

文件： link_expander.py 项目： Humans-Huddle/json-chaos

from urlunshort import resolve
import httplib
import urlparse
import csv

#list of shortlinks:
urls = ['http://zite.to/12Uq1nW'
]

#store file
outfile = 'longlinks'".csv"
myfile = open(outfile, "wb",)
w = csv.writer(myfile)



for url in urls:
    results = []
    results += [resolve(url)]
    w.writerow(results)

myfile.close()

示例#16

0

显示文件

 try:
     rand = ''.join(
         random.choice(string.ascii_lowercase + string.ascii_uppercase +
                       string.digits) for _ in range(3))
     rand = '6n' + rand
     print rand
     url = domain + rand
     #print url
     r = requests.get(url)
     #print requests
     status = r.status_code
     print status
     if status != 404 and status != 403:
         file = open("C:\Users\KC-L\Documents\\qgs_collection", "a")
         file.write(url + '\n')
         file.write(resolve(url))
         print url
         file.flush()
         new = resolve(url)
         print new
         resp = google_url_shorten(new)
         print resp
         if len(resp) != 0:
             malicious[k] = resp
             print malicious[k]
         with open("C:\Users\KC-L\Documents\maliciouscollection2.txt",
                   "wb") as myFile:
             pickle.dump(malicious, myFile)
 except:
     continue
     time.sleep(1)

示例#17

0

显示文件

文件： console.py 项目： licheng5625/coder

def entrypoint_urlunshort():
    parser = optparse.OptionParser(usage="%prog url")
    options, args = parser.parse_args();
    if args:
        print(resolve(args[0]))