Пример #1
0
def checker():
    collection = load_collection()
    for song in collection:
        resp = r.get(song.url)

        if resp.status_code != 200:
            delete_from_collection(song.filename)
            remove_from_index(preprocess(song.lyrics, lemma=False),
                              song.filename)

        soup = BS(resp.text, features='html.parser')

        # Get artist, title and text from the doc
        artist = soup.find(class_='lyric-artist').text[:-13]
        title = soup.find(class_='lyric-title').text
        lyrics = soup.find(class_='lyric-body').text

        song2 = Song(artist, title, lyrics, song.url)
        ''' !!!! Need to get tokens from the song and subtruct that doesn't in song2!!!!'''
        if song != song2:
            tokens_1 = preprocess(song.lyrics, lemma=False)
            tokens_2 = preprocess(song2.lyrics, lemma=False)

            # Find tokens not presented in updated
            to_remove = tokens_1 - tokens_2
            remove_from_index(to_remove, song.filename)

            # Find tokens that new in updated
            to_update = tokens_2 - tokens_1
            update_index(to_update, song.filename)

        print(f'CHECKED: {song.filename}')
Пример #2
0
def parse_loop():
	logfile = config.logdir + "/aprs.log"
	os.system("mkdir -p " + config.logdir)
	os.system("touch " + logfile)
	#start aprs_decoder
	os.system("./aprs_decoder >%s&" % logfile)

	try:
		#start web updaters
		thread.start_new_thread(uploader_thread, ())
		thread.start_new_thread(downloader_thread, ())


		wait_start = True
		where = 0
		while 1:
			file = open(logfile,'r')
			file.seek(where)
			d = file.readline()
			where = file.tell()
			file.close()
			if not d:
				time.sleep(0.1)
			else:
				if wait_start:
					#Check for correct config.sender address
					if d.startswith("AFSK1200: fm %s" % config.sender.upper()):
						wait_start = False
				else:
					wait_start = True
					if d[1:7].isdigit() and d[7] == 'h':
						name = d[1:7]
					elif config.log_all_messages:
						now = datetime.utcnow()
						name = "%02d%02d%02d" % (now.hour, now.minute, now.second)
					else:
						print "Unhandled message:", d.strip()
						continue

					msg_name = config.logdir + "/" + name

					found = False
					for msg in glob.glob(config.logdir + "/" + "[0-9]" * 6 + "*"):
						if msg.startswith(msg_name):
							print "Message", msg, "already present"
							found = True
							break

					if not found:
						utils.write_file(config.logdir + "/" + name, d)
						utils.write_file(config.logdir + "/" + name + ".unsent", d)
						utils.update_index(config.logdir)

	except KeyboardInterrupt:
		print "\nCTRL-C pressed, exit"
	finally:
		os.system("killall aprs_decoder")
Пример #3
0
#!/usr/bin/env python

import config

import utils

def web_index():
	return utils.http_get(config.msg_index_url).split()

def parse_index(index):
	s = set()
	for f in index:
		s.add(f.strip())

	return s

if __name__ == "__main__":
	try:
		l = open(config.logdir + "/" + config.msg_index)
		local = parse_index(l)
	except IOError:
		local = set()

	web = parse_index(web_index())
	diff_web = web - local
	for d in reversed(sorted(diff_web)):
		print "Getting", d
		msg = utils.http_get(config.base_url + d)
		utils.write_file(config.logdir + "/" + d, msg)
		utils.update_index(config.logdir)