Пример #1
0
def grab(channel, timespan):
    tz = pytz.timezone("Europe/Athens")
    now = datetime.datetime.now(tz)
    charset = "windows-1253"
    shows = []
    a = 0
    if now.time().hour < 7:
        a = -1
    for i in range(a, 14):
        date = now + datetime.timedelta(days=i)
        text = helper.download("https://program.ert.gr/Ert1/index.asp?id=" +
                               channel + "&pdate=" + date.strftime("%d/%m/%Y"),
                               encoding=charset)
        if text is None:
            continue

        sections = helper.split(
            text, "<td width=\"50\" align=\"center\" class=\"table\">",
            "</tr></table>")
        laststart = datetime.datetime.min.replace(tzinfo=tz)
        for section in sections:
            show = {}

            temp = re.search("(\d\d):(\d\d)", section)
            show["start"] = date.replace(hour=int(temp.group(1)),
                                         minute=int(temp.group(2)),
                                         second=0,
                                         microsecond=0)
            if show["start"] < laststart:
                date += datetime.timedelta(days=1)
                show["start"] += datetime.timedelta(days=1)

            if (show["start"] - now).total_seconds() / 3600 > timespan:
                lastshow = True
            else:
                lastshow = False

            laststart = show["start"]

            temp = re.search("<a class=\"black\".*href=\"(.*)\">(.*)</a>",
                             section)

            show["title"] = temp.group(2)

            subtitle = helper.cut(
                section, "<td width=\"3\"></td><td><font color=\"#6e6868\">",
                "</font>")
            if subtitle is not None and subtitle:
                show["sub-title"] = subtitle

            link = temp.group(1)
            if link[0] == "/":
                link = "https://program.ert.gr" + link
            if link:
                show["details-url"] = link

            shows.append(show)
            if lastshow:
                return shows
    return shows
Пример #2
0
def grabdetails(url):
    charset = "windows-1253"
    text = helper.download(url, encoding=charset)
    if text is None:
        return None
    show = {}
    temp = helper.split(text, "<div align=\"justify\" class=\"black\">",
                        "</div>")
    description = ""
    for d in temp:
        description += d
    if description:
        show["desc"] = helper.cleanup(description)
    director = re.search("Σκηνοθεσία</b>: (.*?)(?:\n|<br>)", text)
    if director is not None:
        show["director"] = helper.cleanup(director.group(1))
    presenter = re.search("Παρουσίαση</b>: (.*?)(?:\n|<br>)", text)
    if presenter is not None:
        show["presenter"] = helper.cleanup(presenter.group(1))
    producer = re.search("Οργάνωση παραγωγής: (.*?)(?:\n|<br>)", text)
    if producer is not None:
        show["producer"] = helper.cleanup(producer.group(1))
    writer = re.search("Αρχισυνταξία: (.*?)(?:\n|<br>)", text)
    if writer is not None:
        show["writer"] = helper.cleanup(writer.group(1))
    return show
Пример #3
0
def grab(channel, timespan):
    tz = pytz.timezone("UTC")
    now = datetime.datetime.now(tz)
    shows = []
    for i in range(9):
        text = helper.download("https://www.freesat.co.uk/tv-guide/api/" +
                               str(i) + "/?channel=" + channel)
        if text is None:
            continue
        events = json.loads(text)[0]["event"]

        for event in events:
            show = {}
            show["start"] = datetime.datetime.fromtimestamp(
                event["startTime"], tz)
            if (show["start"] - now).total_seconds() / 3600 > timespan:
                return shows
            show["stop"] = show["start"] + datetime.timedelta(
                seconds=event["duration"])
            show["title"] = event["name"]
            show["desc"] = event["description"]
            if "episodeNo" in event:
                show["episode-num"] = (event["episodeNo"], "onscreen")

            shows.append(show)
    return shows
Пример #4
0
def channellist():
	text = helper.download("http://programm.ard.de/")
	channels = helper.split(text, "Tagesprogramm::", "</a>")
	result = []
	for channel in channels:
		temp = re.search("Tagesprogramm::(.*?)\".*\?sender\=-?(.*?)\&", channel)
		result.append((temp.group(2), temp.group(1), temp.group(1)))
	return result
Пример #5
0
def channellist():
    text = helper.download("https://www.freesat.co.uk/tv-guide/api/")
    channels = json.loads(text)
    result = []
    for channel in channels:
        result.append((channel["channelid"], channel["channelname"],
                       channel["channelname"]))
    return result
Пример #6
0
def channellist():
    text = helper.download("http://www.ishow.gr/tvNow.asp")
    channels = helper.split(text, "<b><a style=\"color:#E1D8BE\"", "</a>")
    result = []
    for channel in channels:
        temp = re.search("\?cid=(.*?)\">(.*)</a>", channel)
        result.append((temp.group(1), temp.group(2), temp.group(2)))
    result.sort(key=lambda r: int(r[0]))
    return result
Пример #7
0
def grabdetails(url):
	text = helper.download(url)
	if text is None:
		return None
	show = {}
	description = helper.cut(text, "<meta name=\"description\" content=\"", "\" />")
	if description is not None:
		show["desc"] = helper.cleanup(description)
	return show
Пример #8
0
def grab(channel, timespan):
    tz = pytz.timezone("Europe/Athens")
    now = datetime.datetime.now(tz)
    shows = []
    a = 0
    if now.time().hour < 4:
        a = -1
    for i in range(a, 6):
        date = now + datetime.timedelta(days=i)
        text = helper.download(
            "http://ishow.gr/showTodayChannelProgramm.asp?cid=" + channel +
            "&gotoDay=" + str(i))
        if text is None:
            continue

        sections = helper.split(text, "<tr id=\"progTr", "</tr>")
        laststart = datetime.datetime.min.replace(tzinfo=tz)
        for section in sections:
            show = {}

            temp = re.search(
                "<td class=\"progTd progTdTime\".*?>(\d\d):(\d\d)", section)
            show["start"] = date.replace(hour=int(temp.group(1)),
                                         minute=int(temp.group(2)),
                                         second=0,
                                         microsecond=0)
            if show["start"] < laststart:
                date += datetime.timedelta(days=1)
                show["start"] += datetime.timedelta(days=1)

            if (show["start"] - now).total_seconds() / 3600 > timespan:
                lastshow = True
            else:
                lastshow = False

            laststart = show["start"]

            title = re.search("<div class=\"grandTitle\".*>(.+)\s*?</div>",
                              section)
            show["title"] = helper.cleanup(title.group(1))

            subtitle = helper.cut(section, "<div class=\"subTitle\">",
                                  "</div>")
            if subtitle is not None and subtitle:
                show["sub-title"] = helper.cleanup(subtitle)

            temp = re.search("<div class=\"grandTitle\">.*?href=\"(.*?)\"",
                             section)
            if temp is not None:
                show["details-url"] = "http://ishow.gr" + temp.group(1)

            shows.append(show)
            if lastshow:
                return shows
    return shows
Пример #9
0
def grabdetails(url):
    text = helper.download(url)
    if text is None:
        return None
    show = {}
    description = helper.cut(text,
                             "<meta property=\"og:description\" content=\"",
                             "/>")
    temp = re.search(
        "<meta property=\"og:description\" content=\"(.*?)(?:\"/>|<)", text)
    if temp is not None:
        description = temp.group(1)
        if description:
            show["desc"] = helper.cleanup(description)
    return show
Пример #10
0
	def update(self):
		'''update(self) - Fill Queue with new Pastie IDs'''
		print '[*] Retrieving Pastie ID\'s'
		results = [tag for tag in BeautifulSoup(helper.download(self.BASE_URL + '/pastes')).find_all('p','link') if tag.a]	
		new_pastes = []
		if not self.ref_id: results = results[:60]
		for entry in results:
			paste = PastiePaste(entry.a['href'].replace(self.BASE_URL + '/pastes', ''))
			# Check to see if we found our last checked URL
			if paste.id == self.ref_id:
				break
			new_pastes.append(paste)
		for entry in new_pastes[::-1]:
			print '[+] Adding URL: ' + entry.url
			self.put(entry)
Пример #11
0
	def update(self):
		'''update(self) - Fill Queue with new Slexy IDs'''
		print '[*] Retrieving Slexy ID\'s'
		results = BeautifulSoup(helper.download(self.BASE_URL + '/recent')).find_all(lambda tag: tag.name=='td' and tag.a and '/view/' in tag.a['href'])	
		new_pastes = []
		if not self.ref_id: results = results[:60]
		for entry in results:
			paste = SlexyPaste(entry.a['href'].replace('/view/', ''))
			# Check to see if we found our last checked URL
			if paste.id == self.ref_id:
				break
			new_pastes.append(paste)
		for entry in new_pastes[::-1]:
			print '[+] Adding URL: ' + entry.url
			self.put(entry)
Пример #12
0
def grabdetails(url):
	text = helper.download(url)
	if text is None:
		return None
	show = {}
	subtitle = helper.cut(text, "<h3 class=\"overlay-subtitle\">", "</h3>")
	if subtitle is not None and subtitle:
		show["sub-title"] = helper.cleanup(subtitle)

	description = helper.cut(text, "<p class=\"overlay-text\">", "</p>")
	if description is not None and description:
		show["desc"] = helper.cleanup(description)

	if text.find("Untertitel für Hörgeschädigte") != -1:
		show["subtitles"] = True
	return show
Пример #13
0
def grab(channel, timespan):
	tz = pytz.timezone("Europe/Berlin")
	now = datetime.datetime.now(tz)
	shows = []
	a = 0
	if now.time().hour < 7:
		a = -1

	for i in range(a, 14):
		date = now + datetime.timedelta(days=i)
		text = helper.download("http://www.zdf.de/live-tv?airtimeDate=" + date.strftime("%Y-%m-%d"))
		if text is None:
			continue

		text = helper.cut(text, "<section class=\"b-epg-timeline timeline-" + channel, "</section>")

		sections = helper.split(text, "<li", "</li>")
		laststart = datetime.datetime.min.replace(tzinfo=tz)
		for section in sections:
			show = {}

			temp = helper.cut(section, "<span class=\"time\">", "</span>")
			temp = re.search("(\d\d):(\d\d) - (\d\d):(\d\d)", temp)	
			show["start"] = date.replace(hour=int(temp.group(1)), minute=int(temp.group(2)), second=0, microsecond=0)
			if show["start"] < laststart:
				date += datetime.timedelta(days=1)
				show["start"] += datetime.timedelta(days=1)

			if (show["start"] - now).total_seconds() / 3600 > timespan:
				return shows

			laststart = show["start"]
			show["stop"] = date.replace(hour=int(temp.group(3)), minute=int(temp.group(4)), second=0, microsecond=0)
			if show["stop"] < show["start"]:
				show["stop"] += datetime.timedelta(days=1)
			temp = re.search("<span class=\"overlay-link-category\">(.*?)<span class=\"visuallyhidden\">:</span></span>\s*(?:<.*>)*\s*(.*?)\s*?</a>", section)
			if temp.group(1):
				show["title"] = helper.cleanup(temp.group(1) + " - " + temp.group(2))
			else:
				show["title"] = helper.cleanup(temp.group(2))

			temp = re.search("contentUrl\": \"(.*)\"", section)
			if temp is not None:
				show["details-url"] = "http://www.zdf.de" + temp.group(1)

			shows.append(show)
	return shows
Пример #14
0
 def update(self):
     '''update(self) - Fill Queue with new Slexy IDs'''
     logging.info('[*] Retrieving Slexy ID\'s')
     results = BeautifulSoup(helper.download(self.BASE_URL + '/recent')).find_all(
         lambda tag: tag.name == 'td' and tag.a and '/view/' in tag.a['href'])
     new_pastes = []
     if not self.ref_id:
         results = results[:60]
     for entry in results:
         paste = SlexyPaste(entry.a['href'].replace('/view/', ''))
         # Check to see if we found our last checked URL
         if paste.id == self.ref_id:
             break
         new_pastes.append(paste)
     for entry in new_pastes[::-1]:
         logging.info('[+] Adding URL: ' + entry.url)
         self.put(entry)
Пример #15
0
def grab(channel, timespan):
	tz = pytz.timezone("Europe/Berlin")
	now = datetime.datetime.now(tz)
	shows = []
	a = 0
	if now.time().hour < 7:
		a = -1
	for i in range(a, 14):
		date = now + datetime.timedelta(days=i)
		datestring = "%s.%s.%s" % (date.day, date.month, date.year)
		text = helper.download("http://programm.ard.de/TV/Programm/Sender?datum=" + date.strftime("%d.%m.%Y") + "&hour=0&sender=" + channel)
		if text is None:
			continue

		sections = helper.split(text, "<li class=\"eid", "</li>")
		laststart = datetime.datetime.min.replace(tzinfo=tz)
		for section in sections:
			show = {}
			temp = re.search("<span class=\"date[\s\S]*?(\d\d):(\d\d)", section)
			show["start"] = date.replace(hour=int(temp.group(1)), minute=int(temp.group(2)), second=0, microsecond=0)
			if show["start"] < laststart:
				date += datetime.timedelta(days=1)
				show["start"] += datetime.timedelta(days=1)

			if (show["start"] - now).total_seconds() / 3600 > timespan:
				lastshow = True
			else:
				lastshow = False

			laststart = show["start"]

			show["title"] = helper.cleanup(re.search("<span class=\"title[\s\S]*?>\s*([^<]*?)[\t\n]", section).group(1))
			temp = re.search("<span class=\"subtitle[\s\S]*?>\s*([^<]*?)[\t\n]", section)
			if temp is not None:
				subtitle = temp.group(1)
				if subtitle:
					show["sub-title"] = helper.cleanup(subtitle)

			temp = re.search("<a class=\"sendungslink[\s\S]*?href=\"(.*?)\"", section)
			if temp is not None:
				show["details-url"] = "http://programm.ard.de" + temp.group(1)
			shows.append(show)
			if lastshow:
				return shows
	return shows
Пример #16
0
 def update(self):
     '''update(self) - Fill Queue with new Pastie IDs'''
     logging.info('Retrieving Pastie ID\'s')
     results = [
         tag for tag in BeautifulSoup(
             helper.download(self.BASE_URL +
                             '/pastes'), 'lxml').find_all('p', 'link')
         if tag.a
     ]
     new_pastes = []
     if not self.ref_id:
         results = results[:60]
     for entry in results:
         paste = PastiePaste(entry.a['href'].replace(
             self.BASE_URL + '/pastes/', ''))
         # Check to see if we found our last checked URL
         if paste.id == self.ref_id:
             break
         new_pastes.append(paste)
     for entry in new_pastes[::-1]:
         if self.put(entry):
             logging.debug('Adding URL: ' + entry.url)
Пример #17
0
	def monitor(self, bot, l_lock, t_lock):
		self.update()
		while(1):
			while not self.empty():
				paste = self.get()
				self.ref_id = paste.id
				with l_lock:
					helper.log('[*] Checking ' + paste.url)
				paste.text = helper.download(paste.url)
				with l_lock:
					tweet = helper.build_tweet(paste)
				if tweet:
					print tweet
					with t_lock:
						helper.record(tweet)
						bot.PostUpdate(tweet)
			self.update()
			# If no new results... sleep for 5 sec
			while self.empty():
				with l_lock:
					helper.log('[*] No results... sleeping')
				sleep(SLEEP_SLEXY)
				self.update()
Пример #18
0
	def monitor(self, bot, l_lock, t_lock):
		self.update()
		while(1):
			while not self.empty():
				paste = self.get()
				self.ref_id = paste.id
				with l_lock:
					helper.log('[*] Checking ' + paste.url)
				# goober pastie - Not actually showing *raw* text.. Still need to parse it out
				paste.text = BeautifulSoup(helper.download(paste.url)).pre.text
				with l_lock:
					tweet = helper.build_tweet(paste)
				if tweet:
					print tweet
					with t_lock:
						helper.record(tweet)
						bot.PostUpdate(tweet)
			self.update()
			# If no new results... sleep for 5 sec
			while self.empty():
				with l_lock:
					helper.log('[*] No results... sleeping')
				sleep(SLEEP_PASTIE)
				self.update()
Пример #19
0
def grab(channel, timespan):
    # for olympia this is https://olympia.zdf.de
    eventurl = "https://european-championships.zdf.de"
    # length of the event in days
    eventduration = 11

    tz = pytz.timezone("UTC")
    now = datetime.datetime.now(tz)
    shows = []

    for i in range(eventduration + 1):
        text = helper.download(eventurl + "/feeds/epg-" + str(i))
        if text is None:
            continue
        events = json.loads(text)["epg-" +
                                  str(i)]["data"][int(channel)]["shows"]

        for event in events:
            show = {}
            show["start"] = datetime.datetime.fromtimestamp(event["start"], tz)
            if (show["start"] - now).total_seconds() / 3600 > timespan:
                return shows
            show["stop"] = datetime.datetime.fromtimestamp(event["end"], tz)
            title = event["title"]
            category = event["category"]["name"]
            if category in title:
                show["title"] = title
            else:
                show["title"] = category + ": " + title
            show["desc"] = event["text"]
            show["presenter"] = event["presenter"]
            show["url"] = eventurl + event["url"]
            show["icon"] = "https:" + event["image"]

            shows.append(show)
    return shows
Пример #20
0
def grab(channel, timespan):
    tz = pytz.timezone("UTC")
    now = datetime.datetime.now(tz)
    shows = []

    laststart = datetime.datetime.min.replace(tzinfo=tz)
    for i in range(1 + timespan // 4):
        timestamp = int(time.time()) + i * 14400
        text = helper.download("https://www.dw.com/epg/data/4765/1/" +
                               str(timestamp) + "000")
        if text is None:
            continue

        channeldata = helper.cut(text, "data-channel-id=\"" + channel + "\"",
                                 "data-channel-id")
        if not channeldata:
            try:
                channeldata = text.split("data-channel-id=\"" + channel +
                                         "\"")[1]
            except IndexError:
                continue
        sections = helper.split(channeldata, "<div class=\"epgProgram\"",
                                "<div class=\"broadcastlinks\">")

        for section in sections:
            show = {}
            day = helper.cut(section, "data-day=\"", "\"")
            begintime = helper.cut(section, "data-begin-time=\"", "\"")
            endtime = helper.cut(section, "data-end-time=\"", "\"")

            show["start"] = pytz.utc.localize(
                datetime.datetime.strptime(day + begintime, "%Y-%m-%d%H:%M"))
            if show["start"] <= laststart:
                continue
            if (show["start"] - now).total_seconds() / 3600 > timespan:
                return shows
            laststart = show["start"]

            show["stop"] = pytz.utc.localize(
                datetime.datetime.strptime(day + endtime, "%Y-%m-%d%H:%M"))
            if show["stop"] < show["start"]:
                show["stop"] += datetime.timedelta(days=1)

            show["title"] = helper.cleanup(
                helper.cut(section, "<h2 class=\"title\">", "</h2>"))
            url = helper.cut(section, "<a href=\"", "\">")
            if url is not None and url:
                show["url"] = "https://www.dw.com" + url
            description = helper.cleanup(
                helper.cut(section, "<ul class=\"topics\">", "</ul>"))
            if description is not None and description:
                show["desc"] = description

            try:
                icon = re.search("<img[\s\S]*?/>", section).group(0)
                width = helper.cut(icon, "width=\"", "\"")
                height = helper.cut(icon, "height=\"", "\"")
                src = "https://www.dw.com" + helper.cut(icon, "src=\"", "\"")
                show["icon"] = (src, {"width": width, "height": height})
            except (AttributeError, IndexError):
                pass

            shows.append(show)
    return shows
Пример #21
0
import requests
from helper import download
from bs4 import BeautifulSoup

url = "https://en.wikipedia.org/wiki/List_of_Dragon_Ball_Z_episodes"

response = requests.get(url)

soup = BeautifulSoup(response.content, 'html5lib')

outer_div = soup.find('div', {'class': 'thumb tright'})
img_tag = outer_div.find('img')
img_url = 'https:' + img_tag['src']

print(img_url)
download(img_url, "Goku.jpg")
Пример #22
0
# Check for an environment variable defined in CNTK's test infrastructure
envvar = 'CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'
def is_test(): return envvar in os.environ

path = './models/vgg16_weights.bin'
url = 'https://cntk.ai/jup/models/vgg16_weights.bin'

# We check for the model locally
if not os.path.exists(path):
    # If not there we might be running in CNTK's test infrastructure
    if is_test():
        path = os.path.join(os.environ[envvar],'PreTrainedModels','Vgg16','v0',path)
    else:
        #If neither is true we download the file from the web
        print('downloading VGG model (~0.5GB)')
        helper.download(url, path)

layers = helper.load_vgg(path)

print('loaded VGG model')

# A convolutional layer in the VGG network
def vggblock(x, arrays, layer_map, name):
    f = arrays[0]
    b = arrays[1]
    k = C.constant(value=f)
    t = C.constant(value=np.reshape(b, (-1, 1, 1)))
    y = C.relu(C.convolution(k, x, auto_padding=[False, True, True]) + t)
    layer_map[name] = y
    return y
Пример #23
0
def final():
    helper.download(session['title'])
    return send_file('{}.mp3'.format(session['title']),
                     as_attachment=True,
                     attachment_filename='{}.mp3'.format(session['title']))
Пример #24
0
 def get_paste_text(self, paste):
     return BeautifulSoup(helper.download(paste.url)).pre.text
Пример #25
0
 def get_paste_text(self, paste):
     return helper.download(paste.scraping_url)
Пример #26
0
def test_download():
    url = 'https://www.google.com/logos/doodles/2020/wear-a-mask-save-lives-copy-6753651837108810-s.png'
    img1 = os.path.join('sample_data', 'download.png')
    helper.download(url, img1)
    helper.show(img1)
# ----------------------------------------------------

import json
import pandas as pd
import os
from helper import url_paths, download

# read user inputs
with open('nam_download_inputs.json', 'r') as f:
    inputs = json.load(f)

# format list of dates to download NAM data for
dates_to_download = pd.date_range(inputs['start_date'], inputs['end_date'])
months_to_download = [1, 2, 3, 4, 5, 11, 12]  # only scrape months during ski season
dates_to_download = dates_to_download[dates_to_download.month.isin(months_to_download)]

# function to create list of url paths
url_paths = url_paths(dates_to_download)  # commment out once list of paths generated

# create dataframe of url paths
df_url_paths = pd.DataFrame(url_paths)
df_url_paths.columns = ['url_paths']

# export url paths to csv
df_url_paths.to_csv(os.path.join(inputs['relative_data_path'], 'nam_data_url_paths.csv'), index=False)

# download model wind speed and direction data
df_output = download(os.path.join(inputs['relative_data_path'], 'nam_data_url_paths.csv'), inputs['requested_lat'], inputs['requested_lon'], inputs['pressure_levels'])

# export data to gzip compressed csv
df_output.to_csv(os.path.join(inputs['relative_data_path'], 'nam_data.csv.gz'), index=False, compression='gzip')
Пример #28
0
for i in range(2):

    url = "https://arxiv.org/search/advanced?advanced=&terms-0-operator=AND&terms-0-term=&terms-0-field=title&classification-computer_science=y&classification-physics_archives=all&classification-include_cross_list=include&date-filter_by=all_dates&date-year=&date-from_date=&date-to_date=&date-date_type=announced_date_first&abstracts=hide&size=50&order=-announced_date_first&start={page_num}"
    url = url.format(page_num=i * 50)

    response = requests.get(url)

    tree = html.fromstring(response.content)

    range_of_files_per_page = "position()<=2"

    filename_xpath = '/html/body/main/div[2]/ol/li[{range_files}]/div/p/a'
    element_xpath = '/html/body/main/div[2]/ol/li[{range_files}]/div/p/span/a[1]'

    filename_xpath = filename_xpath.format(range_files=range_of_files_per_page)
    element_xpath = element_xpath.format(range_files=range_of_files_per_page)

    filenames = tree.xpath(filename_xpath)
    elements = tree.xpath(element_xpath)

    for filename, element in zip(filenames, elements):
        pdf_file_name = filename.text
        pdf_file_name = pdf_file_name.replace(':', '_')
        pdf_file_name = pdf_file_name.replace('.', '_')
        pdf_file_name += ".pdf"

        pdf_file_link = element.attrib['href']
        pdf_file_link += ".pdf"

        download(pdf_file_link, pdf_file_name)
Пример #29
0
                                dest='tags',
                                help='Tags to Search for',
                                nargs='+',
                                required=True)
    parserDownload.add_argument('-f',
                                '--folder',
                                dest='downloadFolder',
                                help='Folder to Download Images to',
                                required=True)
    parserDownload.add_argument('-b',
                                '--booru',
                                dest='booru',
                                help='Booru Site to Search',
                                required=True)
    parserDownload.add_argument('-p',
                                '--page',
                                dest='page',
                                help='Page to start downloading images from',
                                nargs='?',
                                default=1)

    #parserLoad = subParsers.add_parser('load', help='Load New Folders/Images')
    #parserLoad.add_argument('-f', '--folder', dest='imageFolder', help='Location of Folder to Load', required=True)

    args = parser.parse_args()

    config = startup(args.configLoc)

    if args.downloadFolder:
        helper.download(config['banned_tags'], config['ratings'], args.tags,
                        args.downloadFolder, args.booru, args.page)
Пример #30
0
Vector representation of Words
Neural Probablistic Approach
Skip-Gram Model
Theano Implementation
Github : peymanbey
"""
from __future__ import division
from helper import download, read_data, build_dataset, gen_batch
from math import sqrt
import numpy as np
from six.moves import urllib
from six.moves import xrange  # pylint: disable=redefined-builtin
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
#%%
name = download('text8.zip')
#%%
# store the data into a list
# the list contains the text as sequential words
words = read_data(name)
print 'Data size', len(words)
#%%
# Build a Dictionary and replace rare words with UNK tokken
# translate the input text in terms of unique numerical IDs
vocab_size = 50000
data, count, dictionary, reverse_dictionary = build_dataset(words, vocab_size)
#%%
# you can delete the 'words' to reduce memory usage
del words
print 'Most common words: ', count[:5]
print 'Sample data:', '\n', data[:10], '\n', [
import os
import helper

project_name = "project"

# Create the project's directory if it doesn't exist already.

helper.softcreate(project_name)

# Download the tutorial's repository

tutorial_path = os.path.join(project_name, "tutorial.zip")

helper.download(
    "https://github.com/EdjeElectronics/TensorFlow-Object-Detection-API-Tutorial-Train-Multiple-Objects-Windows-10/archive/master.zip",
    tutorial_path)

helper.unzip(
    tutorial_path, project_name,
    "TensorFlow-Object-Detection-API-Tutorial-Train-Multiple-Objects-Windows-10-master"
)

# Download the model's directory

model_path = os.path.join(project_name, "model.zip")

helper.download("https://github.com/tensorflow/models/archive/master.zip",
                model_path)

helper.unzip(model_path, project_name, "models-master")
Пример #32
0
 def get_paste_text(self, paste):
     return BeautifulSoup(helper.download(paste.url)).pre.text
Пример #33
0
 def get_paste_text(self, paste):
     return helper.download(paste.url, paste.headers)