Exemplo n.º 1
0
def leerBaloncestoLigas():
    
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.3'}

    req = Request(
        url='https://www.centroapuesta.com/apuestas/futbol/espana/laliga/', headers=headers)
    html = urlopen(req).read()
    soup2 = BeautifulSoup(html)

    content = soup2.find("div", {"id": "content"})
    menu_apuestas_deportes = content.find(
        "div", {"id": "menu-apuestas-deportes"})
    sports_menu = menu_apuestas_deportes.find("nav", {"class": "sports-menu"})
    ul = sports_menu.find("ul")
    '''SPORTS'''
    li = ul.find("li", {"class": "baloncesto"})
    ul = li.find("ul")
    '''COUNTRIES'''
    lis = ul.findAll("li")
    for li in lis:
        ul = li.find("ul")
        if ul is not None:

            links = ul.findAll('a')
            for link in links:
                cuotasBaloncesto(link["href"])
Exemplo n.º 2
0
def downloadFile( sourceurl, targetfname ):
  mem_file = ''
  good_read = False
  xbrlfile = None
  if os.path.isfile( targetfname ):
    print('Local copy already exists')
    return True
  else: 
    print('Downloading:', sourceurl)]
    try:
      xbrlfile = urlopen( sourceurl )
      try:
        mem_file = xbrlfile.read()
        good_read = True
      finally:
        xbrlfile.close()
    except HTTPError as e:
      print('HTTP Error:', e.code)
    except URLError as e:
      print('URL Error:', e.reason)
    except TimeoutError as e:
      print('Timeout Error:', e.reason)
    except socket.timeout:
      print('Socket Timeout Error')
    if good_read:
      output = open( targetfname, 'wb')
      output.write( mem_file )
      output.close()
    return good_read
Exemplo n.º 3
0
def lookup_cik(ticker, name=None):
  # Given a ticker symbol, retrieves the CIK
  good_read = False
  ticker = ticker.strip().upper()
  url = 'http://www.sec.gov/cgi-bin/browse-edgar/action-getcompny&CIK=(cik)&count=10&output=xmp'.format(cik=ticker)

  try:
    xmlFile = urlopen( url )
    try:
      xmlData = xmlFile.read()
      good_read = True
    finally:
      xmlFile.close()
  except HTTPError as e:
    print('HTTP Error', e.code)
  except URLError as e:
    print('Url Error', e.reason)
  except TimeoutError as e:
    print('Timeout Error', e.reason)
  except socket.timeout:
    print('Socket Timeout Error')
  if not good_read:
    print('Unable to lookup CIK for ticker', ticker)
    return
  try:
    root = ET.fromstring(xmlData)
  except ET.ParseError as perr:
    print('XML Parser error:', perr)

  try:
    cikElement - list(root.iter('CIK'))[0]
    return int(cikElement.text)
  except StopIteration:
    pass
Exemplo n.º 4
0
def getCountry(ipAddress):
    try:
        response = urlopen("http://freegeoip.net/json/" +
                           ipAddress).read().decode('utf-8')
    except HTTPError:
        return None
    responseJson = json.loads(response)
    return responseJson.get("country_code")
Exemplo n.º 5
0
def main(geo):
    # find the FTP address from [url=https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GEO]https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GEO[/url]
    response = urlopen(
        "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc={}".format(geo))
    pattern = re.compile("<a href=\"(.*?)\">\(ftp\)</a>")
    # use wget from shell to download SRA data
    ftp_address = re.search(pattern, response.read().decode('utf-8')).group(1)
    os.system(' wget -nd -r 1 -A *.sra ' + ftp_address)
Exemplo n.º 6
0
def check_network():  
    while True:  
        try:  
            result=urlopen('http://www.google.com').read()  
            print("Network is Ready!")  
            break  
        except Exception:  
            print("Network is not ready,Sleep 5s....")
            time.sleep(5)  
    return True
Exemplo n.º 7
0
def cnn():
    try:
        jsonObj=urlopen("https://newsapi.org/v2/top-headlines?sources=cnn&apiKey=######")
        data=json.load(jsonObj)
        print("                ============CNN News==================")
        i=1
        for item in data['articles']:
            print(str(i)+'.'+item['title']+'\n'+item['description']+'\n')
            i+=1
    except Exception as e:
        print(str(e))
Exemplo n.º 8
0
def new_scientist():
    try:
        jsonObj=urlopen('https://newsapi.org/v2/top-headlines?sources=new-scientist&apiKey=######')
        data=json.load(jsonObj)
        i=1
        print('                   ==================New Scientist============')
        for item in data['articles']:
            print(str(i)+'.'+item['title']+'\n'+item['description']+'\n')
            i+=1
    except Exception as e:
        print(str(e))
Exemplo n.º 9
0
def google_india_news():
    try:
        jsonObj=urlopen('https://newsapi.org/v2/top-headlines?sources=google-news-in&apiKey=######')
        data=json.load(jsonObj)
        i=1
        print('                   ==================Google India News============')
        for item in data['articles']:
            print(str(i)+'.'+item['title']+'\n'+item['description']+'\n')
            i+=1
    except Exception as e:
        print(str(e))
Exemplo n.º 10
0
 def read_url(url):
     url = url.replace(" ","%20")
     req = Request(url)
     a = urlopen(req).read()
     soup = BeautifulSoup(a, 'html.parser')
     x = (soup.find_all('a'))
     for i in x:
         file_name = i.extract().get_text()
         url_new = url + file_name
         url_new = url_new.replace(" ","%20")
         if(file_name[-1]=='/' and file_name[0]!='.'):
             read_url(url_new)
         print(url_new)
Exemplo n.º 11
0
def times_of_india():
    try:
        jsonObj=urlopen('https://newsapi.org/v2/top-headlines?sources=techcrunch&apiKey=######')
        data=json.load(jsonObj)
        i=1
        print('''             ==============TIMES OF INDIA============'''
              + '\n')
        for item in data['articles']:
            print(str(i) + '. ' + item['title'] + '\n')
            print(item['description'] + '\n')
            i += 1
    except Exception as e:
        print(str(e))
Exemplo n.º 12
0
 def html_doc(soup):
     html_doc = urlopen('http://').read()
     soup = BeautifulSoup(html_doc)
     #print(soup)
     #print(soup.title)
     #print(soup.title.string)
     for meta in soup.find_all('meta'):
         meta.get('content')
     for link in soup.find_all('a'):
         link.get('href')
     for link in soup.find_all('a'):
         link.contents[0]
     soup.find('div', 'content')
     soup.find('div', id='top_menu')
Exemplo n.º 13
0
    def Extract_subcript(self, video_id):

        url = "http://video.google.com/timedtext?lang=en&v=" + video_id
        html_page = urlopen(url)
        bs_obj = BeautifulSoup(html_page, "html.parser")
        lines = bs_obj.transcript.find_all("text")  # BeautifulSoup으로 자막 태그 전부 lines에 저장
        captions = [""]  # 한줄한줄씩 저장할 리스트
        caption = ""  # 자막 전부를 저장할 문자열
        for line in lines:
            one_line = html.unescape(line.get_text())
            # 필요한 경우 전부 소문자로 바꿈
            # one_line = html.unescape(line.get_text()).lower()
            one_line = one_line.replace("\n", " ")
            one_line = one_line.split(" ")
            print(one_line)
            captions += one_line
        return captions
Exemplo n.º 14
0
def weatherAPISearch(location):

		state = 'mn' # Input for state
		city = 'state%20jello'	# Input for state, %20 used as placeholder for space since user can't enter space

		# Get the dataset
		url = urllib.request.urlopen('http://api.wunderground.com/api/' + 'key' + '/' + 'temp_f' + '/' + 'q' + '/' + state + '/' + city +'.json')
		call = urlopen(url)
		json_string = response.read().decode('utf-8')
		parsed_json = json.loads(string)

		# Get key data pairs
		location = parsed_json['location']['city']
		temp_f = parsed_json['current_observation']['temp_f']

		# Print temperature based on city location
		print ("Current temperature in %s is: %s" % (location, temp_f))
Exemplo n.º 15
0
def SECdownload(year, month):
  root = None
  feedFile = None
  feedData = None
  good_read = False
  itemIndex = 0
  edgarFilingsFeed = 'http://www.sec.gov/Archives/edgar/monthly/xbrlrss-' + \
                      str(year) + '-' + str(month).zfill(2) + '.xml'
  print(edgarFilingsFeed)
  if not os.path.exists( "sec/" + str(year)):
    os.makedirs("sec/" + str(year))
  if not os.path.exists( "sec/" + str(year) + '/' + str(month).zfill(2)):
    os.makedirs("sec/" + str(year) + '/' + str(month).zfill(2) )
  target_dir = "sec/" + str(year) + '/' + str(month).zfill(2) +'/'
  try:
    feedFile = urlopen( edgarFilingsFeed )
    try:
      feedData = feedFile.read()
      good_read = True
    finally:
      feedFile.close()
  except HTTPError as e:
    print("HTTP Error:", e.code)
Exemplo n.º 16
0
 import sys
 
 
+try:
+    from urllib.request import urlopen
+except ImportError:
+    # py2
+    from urllib2 import urlopen
+
+
 # Environment variable for the socket url
 # (used by clients to locate the socket [http, zmq(unix, tcp)])
 CTX_SOCKET_URL = 'CTX_SOCKET_URL'
@@ -60,9 +65,8 @@ def zmq_client_req(socket_url, request, timeout):
 
 
 def http_client_req(socket_url, request, timeout):
-    response = urllib2.urlopen(socket_url,
-                               data=json.dumps(request),
-                               timeout=timeout)
+    response = urlopen(
+        socket_url, data=json.dumps(request).encode('utf-8'), timeout=timeout)
     if response.code != 200:
         raise RuntimeError('Request failed: {0}'.format(response))
     return json.loads(response.read())
@@ -137,3 +141,4 @@ def main(args=None):
 
 if __name__ == '__main__':
     main()
+
Exemplo n.º 17
0
@@ -3371,7 +3371,7 @@
         else:
             color = colors_bgdark[id_]
         msg = '\033[3%sm%s\033[m' % (color, msg)
-    print "++ %s: %s" % (ids[id_], msg)
+    print("++ %s: %s" % (ids[id_], msg))
 
 
 def Readfile(file_path, remove_linebreaks=0, ignore_error=0):
@@ -3388,7 +3388,7 @@
     # URL
     elif PathMaster().is_url(file_path):
         try:
-            from urllib import urlopen
+            from urllib.request import urlopen
             f = urlopen(file_path)
             if f.getcode() == 404:  # URL not found
                 raise
@@ -3409,7 +3409,7 @@
                 Error(_("Cannot read file:") + ' ' + file_path)
 
     if remove_linebreaks:
-        data = map(lambda x: re.sub('[\n\r]+$', '', x), data)
+        data = [re.sub('[\n\r]+$', '', x) for x in data]
 
     Message(_("File read (%d lines): %s") % (len(data), file_path), 2)
     return data
@@ -3427,13 +3427,13 @@
     cont = []
     if CONF['target'] in ('aat', 'rst', 'txt'):
         for line in contents:
Exemplo n.º 18
0
# -*- coding: utf-8 -*-
"""
Created on Wed Jul 25 10:38:47 2018

@author: Kritika Mishra
"""
import urlopen
from bs4 import BeautifulSoup
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

url = "http://news.bbc.co.uk/2/hi/health/2284783.stm"
html = urlopen(url).read()
soup = BeautifulSoup(html)

# kill all script and style elements
for script in soup(["script", "style"]):
    script.extract()  # rip it out

# get text
text = soup.get_text()

# break into lines and remove leading and trailing space on each
lines = (line.strip() for line in text.splitlines())
# break multi-headlines into a line each
chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
# drop blank lines
text = '\n'.join(chunk for chunk in chunks if chunk)

print(text)
Exemplo n.º 19
0
from urllib2_file import Request
import urlopen

headers = {'Authorization': '08e15c0a-4119-4ed5-8852-bf512a3cd434'}
request = Request(
    'https://api.wizenoze.com/v1/customSearchEngines?reading=reading%20english&english=&grade%201=',
    headers=headers)

response_body = urlopen(request).read()
print(response_body)
    def main():
        keywordPath = "features.txt"  # this should be the same keywords list/order used for training the ML Model
        count_vect = loadKeywords(keywordPath, False)
        keywords = count_vect.vocabulary_
        print("keywords:")
        print(keywords)
        sorted_keywords = sortingDict(keywords)
        kList = []
        for item in sorted_keywords:
            kList.append(item[])
        print(kList)
        modelBin = 'ocean.bin'
        listTopN = closeWordsList(modelBin, kList, 5)
        print(listTopN)

        x_train = []
        y_train = []

        with open('train.csv', 'r') as csvfile:
            reader = csv.reader(csvfile, delimiter=',')
            for row in reader:
                y_train.append(int(row[1]))
        noneContents = []
        x_n = None
        y_n = array(y_train)
        for i in range(1, 201):
            parsed = parser.from_file('/data/search_term_generation/200_files/' + str(i))
            content = parsed["content"]
            if content is not None:
                tempX = count_vect.transform(parsed["content"].split())
                x_train.append(tempX)
                print(str(i) + ":")
                print(tempX.toarray().sum(axis=))
                if x_n is None:
                    x_n = array([tempX.toarray().sum(axis=)])
                else:
                    x_n = np.concatenate((x_n, [tempX.toarray().sum(axis=)]), axis=)
            else:
                noneContents.append(i)
        print(noneContents)

        np.savetxt('x_n.txt', x_n, fmt='%d')
        np.savetxt('y_n.txt', y_n, fmt='%d')

        x = np.loadtxt('x_n.txt', dtype=int)
        x_with_closeWords = addCloseCounts(listTopN, x)

        y = np.loadtxt('y_n.txt', dtype=int)
        mergeAllContents(y)
        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=50)
        print(y_test)
        cv = ShuffleSplit(n_splits=5, test_size=0.1, random_state=)

        clf = GaussianNB()
        scoreNB = cross_val_score(clf, x, y, cv=cv)
        print(scoreNB)
        print("performance with close words added:")
        clf11 = GaussianNB()
        scoreNB2 = cross_val_score(clf11, x_with_closeWords, y, cv=cv)
        print(scoreNB2)
        clf1 = GaussianNB().fit(x_train, y_train)
        y_pred = clf1.predict(x_test)
        accNum = accuracy(y_pred, y_test)
        print("Model: Naive Bayes")
        acc = (y_test == y_pred).sum() / float(len(y_test))
        print("Test Accuracy:" + str(acc))
        print("Test Accuracy with 3 classes:" + str(accNum / 20))
        acc_train = (y_train == clf1.predict(x_train)).sum() / float(len(y_train))
        print("Train Accuracy:" + str(acc_train))
        from sklearn import linear_model

        clf22 = linear_model.SGDClassifier()
        scoreSVM = cross_val_score(clf22, x, y, cv=cv)
        print(scoreSVM)
        print("performance with close words added:")
        clf222 = linear_model.SGDClassifier()
        scoreSVM2 = cross_val_score(clf222, x_with_closeWords, y, cv=cv)
        print(scoreSVM2)
        clf2 = linear_model.SGDClassifier().fit(x_train, y_train)

        y_pred2 = clf2.predict(x_test)
        accNum2 = accNum = accuracy(y_pred2, y_test)
        print("Model: SVM")
        acc = (y_test == y_pred2).sum() / float(len(y_test))
        print("Test Accuracy:" + str(acc))
        print("Test Accuracy with 3 classes:" + str(accNum2 / 20))

        acc_train = (y_train == clf2.predict(x_train)).sum() / float(len(y_train))
        print("Train Accuracy:" + str(acc_train))
        print("******************")

        clf33 = MLPClassifier(max_iter=2000, learning_rate='adaptive')
        scoreNN = cross_val_score(clf33, x, y, cv=cv)
        print(scoreNN)
        print("performance with close words added:")
        clf333 = MLPClassifier(max_iter=2000, learning_rate='adaptive')
        scoreNN3 = cross_val_score(clf333, x_with_closeWords, y, cv=cv)
        print(scoreNN3)
        clf3 = MLPClassifier(max_iter=2000, learning_rate='adaptive').fit(x_train, y_train)
        y_pred3 = clf3.predict(x_test)

        accNum3 = accNum = accuracy(y_pred3, y_test)

        print("Model: Neural Network")
        acc = (y_test == y_pred3).sum() / float(len(y_test))
        print("Test Accuracy:" + str(acc))
        print("Test Accuracy with 3 classes:" + str(accNum3 / 20))
        acc_train = (y_train == clf3.predict(x_train)).sum() / float(len(y_train))
        print("Train Accuracy:" + str(acc_train))
        print("******************")

        from sklearn.ensemble import RandomForestClassifier

        clf44 = RandomForestClassifier(n_estimators=100)
        scoreRF = cross_val_score(clf44, x, y, cv=cv)
        print(scoreRF)
        clf444 = RandomForestClassifier(n_estimators=100)
        print("performance with close words added:")
        scoreRF4 = cross_val_score(clf444, x_with_closeWords, y, cv=cv)
        print(scoreRF4)

        clf4 = RandomForestClassifier(n_estimators=100).fit(x_train, y_train)
        y_pred4 = clf4.predict(x_test)
        accNum4 = accNum = accuracy(y_pred4, y_test)

        print("Model: Random Forest")
        acc = (y_test == y_pred4).sum() / float(len(y_test))
        print("Test Accuracy:" + str(acc))
        print("Test Accuracy with 3 classes:" + str(accNum4 / 20))
        acc_train = (y_train == clf4.predict(x_train)).sum() / float(len(y_train))
        print("Train Accuracy:" + str(acc_train))
        noneContents = array(noneContents)
        xOut = TemporaryFile()
        yOut = TemporaryFile()
        noneContentsOut = TemporaryFile()
        np.save(xOut, x_n)
        np.save(yOut, y_n)
        np.save(noneContentsOut, noneContents)

        with open('train.csv', 'r') as csvfile:
            reader = csv.reader(csvfile, delimiter=',')
            i =
            for row in reader:
                i += 1
                if i > 30:
                    url = row[]
                    print(url)
                    print(download_file("http://" + url, i))
                requests.get("http://" + url, stream=True, headers={'User-agent': 'Mozilla/5.0'})
                label = row[1]
                reqLink = urlopen("http://" + url)
                content = reqLink.read()
                contentFeatures = count_vect.transform(content.split())
Exemplo n.º 21
0
    "class %%%(object):\n\tdef __init__(self, ***)":
        "class %%% has-a __init__ that takes self and *** parameters",
    "class %%%(object):n\tdef ***(self, @@@)":
        "class %%% has-a function named *** that takes self and @@@ parameters",
        "*** = %%%()":
            "Set *** to an instance of class %%%",
        "***.***(@@@)":
            "From *** get the *** function, and call it with parameters self, @@@",
        "***.*** = '***'":
            "From *** get the *** attribute and set it to '***"."

}

PHRASE_FIRST = False
if len(sys.argv) == 2 and sys.argv[1] == "english":
    PHRASE_FIRST = True

for word in urlopen(WORD_URL).readlines():
    WORDS.append(word.strip())

def convert(snippet, phrase):
    class_names = [w.capitalize() for w input(
                    random.sample(WORDS, snippet.count("%%%"))]
    other_name = random.sample(WORDS, snippet.count("***"))
    results = []
    param_names = []

    for i in range(0, snippet.count("@@@")):
        param_count = random.randint(1,3)
        param_names.append(', '.join(random.sample(WORDS,param_count)
    )]
Exemplo n.º 22
0
#List of column names
COLUMN_NAMES = [
    'employer','download','location','union',
    'local', 'naics', 'num_workers', 'expiration_date'
]

STATES = [
    'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA',
    'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD',
    'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ',
    'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC',
    'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY'
]

page = urlopen('http://www.dol.gov/olms/regs/compliance/cba/Cba_CaCn.htm')
rawtext = page.read()
html = fromstring(rawtext)

#print tostring(html)

tables = html.cssselect('table')
table = tables[2]

for tr in table.cssselect('tr')[1:]:
    cellvalues = [td.text_content() for td in tr.cssselect('td')]
    data = dict(zip(COLUMN_NAMES, cellvalues))
    print data
    data['num_workers'] = int(data['num_workers'])
    data['expiration_date'] = \
    datetime.datetime.strptime(data['expiration_date'], '%m-%d-%y').date()
Exemplo n.º 23
0
#-*- coding: utf-8 -*-

import urlopen as urlopen
import bs4 as BeautifulSoup

url = "https://www.rottentomatoes.com/"
html = urlopen(url)
source = html.read()  # 바이트코드 type으로 소스를 읽는다.
html.close()  # urlopen을 진행한 후에는 close를 한다.

soup = BeautifulSoup(
    source, "html5lib"
)  # 파싱할 문서를 BeautifulSoup 클래스의 생성자에 넘겨주어 문서 개체를 생성, 관습적으로 soup 이라 부름
table = soup.find(id="Top-Box-Office")
movies = table.find_all(class_="middle_col")

for movie in movies:
    title = movie.get_text()
    print(title)
    link = movie.a.get('href')
    url = 'https://www.rottentomatoes.com' + link
    print(url)
# from urllib2
import urlopen

try:
from urllib.request
import urlopen
except ImportError:
 from urllib2
import urlopen

from bs4
import BeautifulSoup

# Read the URL and save text in html1 and then in text.
url1 = "https://www.theguardian.com/politics/2018/sep/20/the-death-of-consensus-how-conflict-came-back-to-politics"
html1 = urlopen(url1).read().decode('utf8')
BeautifulSoup(html1).get_text()
soup = BeautifulSoup(html1, 'lxml')

# Read the PDF and save text in pdfString.
url2 = "http://eprints.lse.ac.uk/86880/7/Cox_Rise%20of%20populism%20published_2018.pdf"
pdf2 = open(url2, 'rb')
fileReader = PyPDF2.PdfFileReader(pdf2)

pdfString = ""
for x in range(11):
 pageObj = fileReader.getPage(x)
pdfString = pdfString + pageObj.extractText()

# Print text from url2.I closed the text, but You can open it.#print(pdfString)
Exemplo n.º 25
0
from passwordmeter import test
from urllib2 from urlopen
from os.path import isfile
from random import choice,randint

if not isfile('words.txt')
    print 'Downloading words.txt...'
    url='https://raw.githubusercontent.com/dwyl/english-words/master/words.txt'
    with open('words.txt', 'w') as f:
        f.write(urlopen(url).read())
        
words=open('words.txt', 'r').read().split("\n")
special_chars=['!','?']

def create_password(num_words=2,num_num,bers=4,num_special=1):
    pass_str=''
    
    for _ inxranf(num_words):
        passs_str=+choice(words).lower().capalize()
    for _ in xrange(num_numbers):
        pass_str+=str(randint(0,9))
    for _ in xrange(num_special):
        pass_str+=choice(speciam_chars)
        


def main();
    pass_str=create_password()
    strength,_=test(pass_str)
Exemplo n.º 26
0
#!/usr/bin/env python
# sample usage: checksites.py eriwen.com nixtutor.com yoursite.org
import pickleimport osimport loggingimport timeimport refrom optparse import OptionParser, OptionValueErrorfrom smtplib import SMTPfrom getpass import getuserfrom socket import gethostname, setdefaulttimeout
try: from urllib2 import urlopenexcept ImportError: from urllib.request import urlopen

def generate_email_alerter(to_addrs, from_addr=None, use_gmail=False, username=None, password=None, hostname=None, port=25):
 if not from_addr:        from_addr = getuser() + "@" + gethostname()
 if use_gmail: if username and password:            server = SMTP('smtp.gmail.com', 587)            server.starttls() else: raise OptionValueError('You must provide a username and password to use GMail') else: if hostname:            server = SMTP(hostname, port) else:            server = SMTP() # server.connect()        server.starttls()
 if username and password:        server.login(username, password)
 def email_alerter(message, subject='You have an alert'):        server.sendmail(from_addr, to_addrs, 'To: %s\r\nFrom: %s\r\nSubject: %s\r\n\r\n%s' % (", ".join(to_addrs), from_addr, subject, message))
 return email_alerter, server.quit

def get_site_status(url): try:        urlfile = urlopen(url)        status_code = urlfile.code if status_code in (200, 302): return 'up', urlfile except: pass return 'down', None

def get_headers(url): '''Gets all headers from URL request and returns''' try: return urlopen(url).info().as_string() except: return 'Headers unavailable'

def compare_site_status(prev_results, alerter): '''Report changed status based on previous results'''
 def is_status_changed(url):        startTime = time.time()        status, urlfile = get_site_status(url)        endTime = time.time()        elapsedTime = endTime - startTime        msg = "%s took %s" % (url, elapsedTime)        logging.info(msg)
 if status != "up":            elapsedTime = -1
        friendly_status = '%s is %s. Response time: %s' % (            url, status, elapsedTime) print(friendly_status) if url in prev_results and prev_results[url]['status'] != status:            logging.warning(status) # Email status messages            alerter(str(get_headers(url)), friendly_status)
 # Create dictionary for url if one doesn't exist (first time url was # checked) if url not in prev_results:            prev_results[url] = {}
 # Save results for later pickling and utility use        prev_results[url]['status'] = status        prev_results[url]['headers'] = None if urlfile is None else urlfile.info().headers        prev_results[url]['rtime'] = elapsedTime
 return is_status_changed

def is_internet_reachable(): '''Checks Google then Yahoo just in case one is down'''    statusGoogle, urlfileGoogle = get_site_status('http://www.google.com')    statusYahoo, urlfileYahoo = get_site_status('http://www.yahoo.com') if statusGoogle == 'down' and statusYahoo == 'down': return False return True

def load_old_results(file_path): '''Attempts to load most recent results'''    pickledata = {} if os.path.isfile(file_path):        picklefile = open(file_path, 'rb')        pickledata = pickle.load(picklefile)        picklefile.close() return pickledata

def store_results(file_path, data): '''Pickles results to compare on next run'''    output = open(file_path, 'wb')    pickle.dump(data, output)    output.close()

def normalize_url(url): '''If a url doesn't have a http/https prefix, add http://''' if not re.match('^http[s]?://', url):        url = 'http://' + url return url
Exemplo n.º 27
0
def make_soup(url):
    html = urlopen(url).read()
    return BeautifulSoup(html)
Exemplo n.º 28
0
with open('input.json,'r') as f:
    inputjson = json.load(f)

Remove element from json object 
Json to python conversion
Object -> Dict
Array -> List
String ->str
false ->False
null -> None

#Remove element crime from state Object
for state in inputjson['states']:
    del state['crime']
#export python dict to Json file

with open('output.json','w') as w:
    json.dump(inputjson,w,indent=2)


####
get and parse json from feed or site

import jsonfrom urllib.request import urlopen

with urlopen('https://mysite') as response:
    source = response.read()
#convert string to python object
data = json.loads(source)

Exemplo n.º 29
0
    "*** = %%%()":
        "Set *** to an instance of class %%%.",
    "***.***(@@@)":
        "From *** get the *** function, call it with params self, @@@.",
    "***.*** = '***'":
        "From *** get the *** attribute and set it to '***'."
}

# drill phrases first?
if len(sys.argv) == 2 and sys.argv[1] == "english":
    phrase_first = True
else:
    phrase_first = False

# load words from the website
for word in urlopen(word_url).readlines():
    words.append(str(word.strip(), encoding="utf-8"))


def convert(snippet,phrase):
    class_names = [w.capitalize() for w in
                   random.sample(words, snippet.count("%%%"))]
    other_names = random.sample(words, snippet.count("***"))
    results = []
    param_names = []

    for i in range(0, snippet.count("@@@")):
        param_count = random.randint(1,3)
        param_names.append(', '.join(
            random.sample(words, param_count)))
Exemplo n.º 30
0
    }
    x = requests.post(url, data=objeto)
    print(x.text)


'''---------------------------------------------------------------'''
today = date.today()
d1 = today.strftime("%d-%m-%Y")
headers = {
    'User-Agent':
    'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.3'
}

req = Request(url='http://www.elcomparador.com/futbol/' + str(d1),
              headers=headers)
html = urlopen(req).read()
soup2 = BeautifulSoup(html)
contSurebets = 0
contPartidos = 0
tomorrow = today + datetime.timedelta(days=1)
print(tomorrow)
div_contenedor = soup2.find("div", {"id": "contenedor_lista_partidos"})
div_partido = div_contenedor.findAll("div", {"id": "contenedor_evento"})
for partido in div_partido:

    fila_evento = partido.find("div", {"id": "fila_evento"})
    celda_evento_fecha = fila_evento.find("div", {"id": "celda_evento_fecha"})
    horas = celda_evento_fecha.findAll("span", {"class": "hora"})
    hora_text = ''
    for hora in horas:
        hora_text = hora.text
Exemplo n.º 31
0
# -*-coding:utf-8 -*-

import urlopen  #引入urllib2下的urlopen类,这个类可以对url进行操作,打开一个url链接

resp = urlopen(
    'https://en.wikipedia.org/wiki/List_of_bicycle-sharing_systems#Cities')
html_data = resp.read().decode('utf-8')
print(html_data)
Exemplo n.º 32
0
def leer():
    today = date.today()
    d1 = today.strftime("%d-%m-%Y")
    headers = {
        'User-Agent':
        'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.3'
    }

    req = Request(url='http://www.elcomparador.com/futbol/' + str(d1),
                  headers=headers)
    html = urlopen(req).read()
    soup2 = BeautifulSoup(html)
    contSurebets = 0
    contPartidos = 0
    tomorrow = today + datetime.timedelta(days=1)
    print(tomorrow)
    div_contenedor = soup2.find("div", {"id": "contenedor_lista_partidos"})
    div_partido = div_contenedor.findAll("div", {"id": "contenedor_evento"})
    for partido in div_partido:

        fila_evento = partido.find("div", {"id": "fila_evento"})
        celda_evento_fecha = fila_evento.find("div",
                                              {"id": "celda_evento_fecha"})
        horas = celda_evento_fecha.findAll("span", {"class": "hora"})
        hora_text = ''
        for hora in horas:
            hora_text = hora.text
        celda_evento_partido = fila_evento.find("div",
                                                {"id": "celda_evento_partido"})
        franja_equipos = celda_evento_partido.findAll("span",
                                                      {"class": "equipo"})
        contNombre = 0

        team1 = ''
        team2 = ''
        odd1 = 0
        odd2 = 0
        odd3 = 0
        bookie1 = ''
        bookie1_id = 0
        bookie2 = ''
        bookie2_id = 0
        bookie3 = ''
        bookie3_id = 0
        bookie = ''
        bookieId = 0
        celda_evento_cuotas = fila_evento.find("div",
                                               {"id": "celda_evento_cuotas"})
        contenedor_cuotas = celda_evento_cuotas.findAll(
            "div", {"id": "contenedor_cuotas"})
        for contenedor_cuota in contenedor_cuotas:
            fila_cuotas = contenedor_cuota.findAll("div",
                                                   {"id": "fila_cuotas"})
            contCuota = 0
            for fila_cuota in fila_cuotas:
                celda_cuotas = fila_cuota.find("div", {"class": "verde"})
                if celda_cuotas is not None:
                    a_link = celda_cuotas.find('a')

                    link = a_link['href']
                    if "bet365" in link:
                        bookie = "bet365"
                        bookieId = "Bet365"
                    if "codere" in link:
                        bookie = "codere"
                        bookieId = "Codere"
                    if "bwin" in link:
                        bookie = "bwin"
                        bookieId = "BWin"
                    if "marathonbet" in link:
                        bookie = "marathon bet"
                        bookieId = "MarathonBet"
                    if "luckia" in link:
                        bookie = "luckia"
                        bookieId = "Luckia"
                    if "sportium" in link:
                        bookie = "sportium"
                        bookieId = "Sportium"
                    if "betway" in link:
                        bookie = "betway"
                        bookieId = "Betway"
                    if "marcaapuestas" in link:
                        bookie = "marca apuestas"
                        bookieId = "MarcarApuestas"
                    if "willhill" in link:
                        bookie = "william hill"
                        bookieId = "WilliamHill"
                    if "sport888" in link:
                        bookie = "888 sport"
                        bookieId = "888Sport"
                    if "betfair" in link:
                        bookie = "betfair"
                        bookieId = "Betfair"
                    if "interwetten" in link:
                        bookie = "interwetten"
                        bookieId = "Interwetten"

                    if contCuota == 0:
                        odd1 = float(celda_cuotas.text)
                        bookie1 = bookie
                        bookie1_id = bookieId
                    if contCuota == 1:
                        odd2 = float(celda_cuotas.text)
                        bookie2 = bookie
                        bookie2_id = bookieId
                    if contCuota == 2:
                        odd3 = float(celda_cuotas.text)
                        bookie3 = bookie
                        bookie3_id = bookieId
                    contCuota = contCuota + 1

        for equipo in franja_equipos:
            if contNombre == 0:
                team1 = equipo.text
            if contNombre == 1:
                team2 = equipo.text
            contNombre = contNombre + 1

        if hora_text != '':
            if odd1 != 0 and odd2 != 0 and odd3 != 0:
                contPartidos = contPartidos + 1
                percentage = (1 / odd1) + (1 / odd2) + (1 / odd3)
                if percentage < 1:
                    contPartidos = contPartidos + 1
                    match = team1 + " vs " + team2
                    percentage = percentage * 100
                    percentage = 100 - percentage
                    print(match + " " + str(percentage))
                    print(bookie1 + " " + str(odd1))
                    print(bookie2 + " " + str(odd2))

                    print(bookie3 + " " + str(odd3))
                    enviarPost(match, d1, team1, team2, odd1, odd2, odd3,
                               bookie1_id, bookie2_id, bookie3_id, percentage,
                               today)
    print("bucle")
Exemplo n.º 33
0
def cuotasFutbol(link):

    arrayLink = link.split("/")

    sport = arrayLink[5]
    country = arrayLink[4]
    league = arrayLink[6]

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.3'}

    req = Request(
        url=link, headers=headers)
    html = urlopen(req).read()
    soup2 = BeautifulSoup(html)

    content = soup2.find("div", {"id": "content"})
    main_column = content.find("div", {"class": "main-column"})
    article = main_column.find("article")
    matches_table = article.find("table")
    tbody = matches_table.find("tbody")
    trs = tbody.findAll("tr")
    for tr in trs:
        tds = tr.findAll("td")
        cont = 0
        fecha = ''
        hora = ''
        match = ''
        cuota1 = 0
        cuota2 = 0
        cuota3 = 0
        casa1 = ''
        casa2 = ''
        casa3 = ''
        for td in tds:

            if cont == 0:
                fecha = td.find("span", {"class": "date"})
                hora = td.find("span", {"class": "time"})
                if fecha is not None:
                    fecha = fecha.text
                    hora = hora.text

            if cont == 1:
                partido = td.find("a")
                if partido is not None:
                    match = partido.text.strip()

            if cont == 2:
                cuota1 = td.find("span")
                if cuota1 is not None:
                    casa1 = td.find("img")
                    cuota1 = float(cuota1.text)
                    casa1 = casa1["alt"]

            if cont == 3:
                cuota2 = td.find("span")
                if cuota2 is not None:
                    casa2 = td.find("img")
                    cuota2 = float(cuota2.text)
                    casa2 = casa2["alt"]
            if cont == 4:
                cuota3 = td.find("span")
                if cuota3 is not None:
                    casa3 = td.find("img")
                    cuota3 = float(cuota3.text)
                    casa3 = casa3["alt"]
            if cuota1 is not None and cuota2 is not None and cuota3 is not None:
                if cuota1 > 0 and cuota2 > 0 and cuota3 > 0:
                    percentage = (1/cuota1)+(1/cuota2)+(1/cuota3)
                    arrayFecha = fecha.split(" ")
                    mes = 0
                    dia = 0
                    ano = 0

                    if arrayFecha[1] == "Feb":
                        mes = "02"
                    dia = int(arrayFecha[0].replace(",", ""))
                    ano = int(arrayFecha[2])
                    fecha = str(ano)+"-"+str(mes)+"-"+str(dia)+" "+str(hora)
                    equipoarray = match.split(" — ")
                    team1 = equipoarray[0]
                    team2 = equipoarray[1]
                    odd1 = cuota1
                    odd2 = cuota2
                    odd3 = cuota3
                    bookie1 = casa1
                    bookie2 = casa2
                    bookie3 = casa2

                    if bookie1 == "William Hill":
                        bookie1 = "WilliamHill"
                    if bookie2 == "William Hill":
                        bookie2 = "WilliamHill"
                    if bookie3 == "William Hill":
                        bookie3 = "WilliamHill"
                    enviarPost(match, fecha, team1, team2, odd1, odd2, odd3,
                               bookie1, bookie2, bookie3, percentage, sport, country, league)
            cont = cont+1
Exemplo n.º 34
0
import numpy as np
import urlopen
# url with dataset
url = "http://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data"
# download the file
raw_data = urlopen(url)
# load the CSV file as a numpy matrix
dataset = np.loadtxt(raw_data, delimiter=",")
# separate the data from the target attributes
X = dataset[:, 0:7]
y = dataset[:, 8]
print(raw_data)
Exemplo n.º 35
0
f = open('pets.txt', 'r')
pets = json.loads(f.read())
f.close()

pprint(pets)


########################################

#Read page with a slice

rom urllib2 import urlopen

# Add your code here!
website = urlopen('http://placekitten.com/') 
kittens = website.read()

print kittens[559:1000]	


########################################

#Poll NPR by story ID# and print story titles

from urllib2 import urlopen
from json import load

url = "http://api.npr.org/query?apiKey="

key = "API_KEY"
Exemplo n.º 36
0
dfrom urllib.request import urlopen

html = urlopen("http://www.baidu.com")
print('hello world')
print(html.read())
Exemplo n.º 37
0
import urlopen from bs4
import BeautifulSoupimport
from bs4 import BeautifulSoup
import requests

url = "https://www.americanas.com.br/"
html = urlopen("http://www.pythonscraping.com/pages/page3.html")

#request  =  requests.get(url)

#soup4 = BeautifulSoup(request.text,'lxml')

#verifyId = soup4.find('div', id='sas_30352')
#verifyClass = soup4.find('div', class_="card-product-image placeholder picture")

bsObj = BeautifulSoup(html)
images = bsObj.findAll("img", {"src":re.compile("\.\.\/img\/gifts/img.*\.jpg")})
for image in images:
 print(image["src"])

print(verifyClass.split())