Пример #1
0
    def __init__(self, verbose=False):
        if verbose:
            print("Initializing Scraper")

        self.verbose = verbose
        self.parser = Parser(verbose)
        self.persister = Persister()
Пример #2
0
 def OnAddedToSpace(self, ballpark, dbspacecomponent):
     persister = Persister(ballpark.solarsystemID, self.itemID,
                           dbspacecomponent)
     bountyEscrowBonus, bounties = persister.GetStateForSystem()
     iskRegistry = IskRegistry(bounties)
     iskMover = IskMover(ballpark.broker.account)
     itemCreator = GetItemCreator(ballpark.inventory2, ballpark,
                                  self.attributes.tagTypeIDs.keys())
     escrow = Escrow(self, ballpark, iskRegistry, iskMover, itemCreator,
                     persister)
     item = ballpark.inventory2.GetItem(self.itemID)
     eventLogger = EventLogger(ballpark.broker.eventLog,
                               ballpark.solarsystemID, item.ownerID,
                               self.itemID)
     notifier = Notifier(ballpark.broker.notificationMgr)
     self.rangeNotifier = RangeNotifier(ballpark.solarsystemID, ballpark,
                                        ballpark.broker.machoNet,
                                        self.GetWallclockTime)
     ballpark.proximityRegistry.RegisterForProximity(
         self.itemID, 30000, self.rangeNotifier.PlayerInRange)
     lock = Lock(self)
     self.warpScrambler = WarpScrambler(self.itemID, lock, ballpark.dogmaLM)
     self.Initialize(ballpark, escrow, lock, persister, eventLogger,
                     notifier)
     self.escrow.SetBonus(bountyEscrowBonus)
Пример #3
0
class Scraper():
    OPEN_URL_ONE   = "http://games.crossfit.com/scores/leaderboard.php?stage=0&sort=0&page="
    OPEN_URL_TWO   = "&division=1&region=0&numberperpage=100&competition=0&frontpage=0&expanded=0&year=" 
    OPEN_URL_THREE = "&full=0&showtoggles=1&hidedropdowns=1&showathleteac=0&=&is_mobile=&scaled=0&fittest=1&fitSelect=0"
    ATHLETE_URL    = "http://games.crossfit.com/athlete/"

    # Start Region Public Methods

    def scrape_athletes(self, starting_id = 0):
        athlete_ids = self.persister.get_athlete_ids()
        for athlete_id in athlete_ids:
            self.scrape_athlete(athlete_id)

    def scrape_open(self, start = 1, end = 50):
        while start <= end:
            url = self.get_crossfit_games_url(str(start))
            self.scrape_open_url(url)
            start = start + 1

    # End Region Public Methods

    # Start Region Private Methods
    def __init__(self, verbose=False):
        if verbose:
            print("Initializing Scraper")

        self.verbose = verbose
        self.parser = Parser(verbose)
        self.persister = Persister()

    def scrape_athlete(self, athlete_id):
        if self.verbose:
            print("Scraping athlete " + str(athlete_id))

        response = requests.get(self.ATHLETE_URL + str(athlete_id))
        response.raise_for_status()
        athlete = self.parser.parse_athlete(athlete_id, response)
        self.persister.persist_athlete(athlete)

    def scrape_open_url(self, url):
        response = requests.get(url)
        response.raise_for_status()
        
        event_data = self.parser.parse_open(response)
        self.persister.persist_events(event_data)

    def get_crossfit_games_url(self, page="1", year="16"):
        url = self.OPEN_URL_ONE + page + self.OPEN_URL_TWO + year + self.OPEN_URL_THREE

        if self.verbose:
            print("Getting URL:\n" + url + "\n") 

        return url

    # End Region Private Methods
Пример #4
0
class Scraper():
    OPEN_URL_ONE = "http://games.crossfit.com/scores/leaderboard.php?stage=0&sort=0&page="
    OPEN_URL_TWO = "&division=1&region=0&numberperpage=100&competition=0&frontpage=0&expanded=0&year="
    OPEN_URL_THREE = "&full=0&showtoggles=1&hidedropdowns=1&showathleteac=0&=&is_mobile=&scaled=0&fittest=1&fitSelect=0"
    ATHLETE_URL = "http://games.crossfit.com/athlete/"

    # Start Region Public Methods

    def scrape_athletes(self, starting_id=0):
        athlete_ids = self.persister.get_athlete_ids()
        for athlete_id in athlete_ids:
            self.scrape_athlete(athlete_id)

    def scrape_open(self, start=1, end=50):
        while start <= end:
            url = self.get_crossfit_games_url(str(start))
            self.scrape_open_url(url)
            start = start + 1

    # End Region Public Methods

    # Start Region Private Methods
    def __init__(self, verbose=False):
        if verbose:
            print("Initializing Scraper")

        self.verbose = verbose
        self.parser = Parser(verbose)
        self.persister = Persister()

    def scrape_athlete(self, athlete_id):
        if self.verbose:
            print("Scraping athlete " + str(athlete_id))

        response = requests.get(self.ATHLETE_URL + str(athlete_id))
        response.raise_for_status()
        athlete = self.parser.parse_athlete(athlete_id, response)
        self.persister.persist_athlete(athlete)

    def scrape_open_url(self, url):
        response = requests.get(url)
        response.raise_for_status()

        event_data = self.parser.parse_open(response)
        self.persister.persist_events(event_data)

    def get_crossfit_games_url(self, page="1", year="16"):
        url = self.OPEN_URL_ONE + page + self.OPEN_URL_TWO + year + self.OPEN_URL_THREE

        if self.verbose:
            print("Getting URL:\n" + url + "\n")

        return url
Пример #5
0
    def __init__(self, verbose=False):
        if verbose:
            print("Initializing Scraper")

        self.verbose = verbose
        self.parser = Parser(verbose)
        self.persister = Persister()
Пример #6
0
    def run(self):
        logger.info('starting skyline analyzer')
        pid = getpid()
        Analyzer(pid).start()
        Persister(pid).start()

        while 1:
            sleep(100)
Пример #7
0
    def __init__(self, user_id):
        """
        Initialize a Presenter object to read and process data.

        Params:
            None

        Return:
            None
        """
        self.user_id = user_id
        self.persister = Persister(self.user_id)
        self.result = self.persister.read()  # init

        self.config = self.result.get('config')
        self.data = self.result.get('data')

        self.TF_data = self.data.get('top_favorites')
        self.LF_data = self.data.get('latent_factors')
        self.NN_data = self.data.get('nearest_neighbours')
		i += 1
		pass

	config_file = os.path.join(statedir, config_file)

	config = Config(basedir = statedir, plugin_basedir = '.')
	try:
		config.load(config_file)
	except ConfigError, err:
		print >>sys.stderr, "In "+config_file+":"
		print >>sys.stderr, err
		return 1
	if verbose:
		config["verbose"] = True

	persister = Persister(os.path.join(statedir, "state"), Rawdog, locking)
	try:
		rawdog = persister.load()
	except KeyboardInterrupt:
		return 1
	except:
		print "An error occurred while reading state from " + statedir + "/state."
		print "This usually means the file is corrupt, and removing it will fix the problem."
		return 1

	if not rawdog.check_state_version():
		print "The state file " + statedir + "/state was created by an older"
		print "version of rawdog, and cannot be read by this version."
		print "Removing the state file will fix it."
		return 1
Пример #9
0
from applog import logger

TEACHER_ID = -1
RH3K_ID = -2
OK = 'OK'
DISCONNECTED = 'disconnected'
STUDENT_NS = '/student'
TEACHER_NS = '/teacher'
ALL_NS = (TEACHER_NS, STUDENT_NS)

names: List[str] = []
stations: List[Dict[str, Any]] = [{} for i in range(settings['columns'] * settings['rows'])]
teacher_password = ''  # Change this
authenticated = False

persister = Persister()

app = Flask(__name__)
app.config['SECRET_KEY'] = 'secret!'
socketio = SocketIO(app, ping_interval=20)  # A bit more frequent than the default of 25, to try to avoid timeouts causing disconnections


@app.route('/')
def index():
    r = request
    seat_index = persister.seat_indexes_by_ip.get(r.remote_addr, -1)
    logger.info(f'Student page requested from {r.remote_addr} (last seat index: {seat_index})')
    return render_template('student.html', settings=json.dumps(settings), names=names, lastSeatIndex=seat_index)


@app.route('/teacher')
Пример #10
0
from geoetl import GeoEtl
from persister import Persister

if __name__ == "__main__":
    getl = GeoEtl()
    getl.obtainData()
    getl.getDataKpis()

    persist = Persister(getl.dataframe)
    print("Data de paises preparada satisfactoriamente")
Пример #11
0
"""
    build_data.py - Build data
    Author: Hoanh An ([email protected])
    Date: 04/27/18
"""

from persister import Persister
from config import *

if __name__ == "__main__":
    for i in range(MIN_ID, MAX_ID):
        persister = Persister(i)
        persister.write()
        print(persister.read())
Пример #12
0
		print "No " + statedir + " directory"
		return 1

	sys.path.append(".")

	config = Config()
	try:
		config.load("config")
	except ConfigError, err:
		print >>sys.stderr, "In config:"
		print >>sys.stderr, err
		return 1
	if verbose:
		config["verbose"] = True

	persister = Persister("state", Rawdog, locking)
	try:
		rawdog = persister.load(no_block = no_lock_wait)
		if rawdog is None:
			return 0
	except KeyboardInterrupt:
		return 1
	except:
		print "An error occurred while reading state from " + statedir + "/state."
		print "This usually means the file is corrupt, and removing it will fix the problem."
		return 1

	if not rawdog.check_state_version():
		print "The state file " + statedir + "/state was created by an older"
		print "version of rawdog, and cannot be read by this version."
		print "Removing the state file will fix it."
Пример #13
0
class Presenter():
    def __init__(self, user_id):
        """
        Initialize a Presenter object to read and process data.

        Params:
            None

        Return:
            None
        """
        self.user_id = user_id
        self.persister = Persister(self.user_id)
        self.result = self.persister.read()  # init

        self.config = self.result.get('config')
        self.data = self.result.get('data')

        self.TF_data = self.data.get('top_favorites')
        self.LF_data = self.data.get('latent_factors')
        self.NN_data = self.data.get('nearest_neighbours')

    def process(self, data):
        """
        Parse string to URL scheme.

        Params:
            data <list>: List of data to process

        Return:
            List of workable string endpoints.
        """
        endpoints = []
        for item in data:
            # Chop off unnecessary part to get a broader search result
            item = item.split('(')[0]

            # Parse URL
            query = urllib.parse.quote(item)
            endpoint = MOVIE_SEARCH_ENDPOINT + query + '&page=1'
            endpoints.append(endpoint)

        return endpoints

    def get_config(self):
        """
        Get configurations.

        Params:
            None

        Return:
            Configuration data in dictionary format.
        """
        return self.config

    def get_data(self, method):
        """
        Get trained data for a specific method.

        Params:
            method <str>: Method to get

        Return:
            Data in dictionary format.
        """
        if method == 'TF':
            data = self.TF_data
        elif method == 'NN':
            data = self.NN_data
        elif method == 'LF':
            data = self.LF_data

        return data

    def get_endpoints(self, method):
        """
        Get processed endpoints for a specific method.

        Params:
            method <str>: Method to get

        Return:
            List of workable string endpoints.
        """
        endpoints = []

        if method == 'TF':
            data = self.TF_data
        elif method == 'NN':
            data = self.NN_data
        elif method == 'LF':
            data = self.LF_data

        for item in self.process(data):
            endpoints.append(item)

        return endpoints

    def get_posters(self, method, n):
        """
        Get posters with titles and links for a specific method.

        Params:
            method <str>: Method to get
            n <int>: Number of returned posters

        Return:
            List of poster objects with titles and links.
        """
        poster_paths = []
        for endpoint in self.get_endpoints(method):
            poster_path_r = requests.get(endpoint)
            first_result = json.loads(poster_path_r.text).get('results')
            try:
                first_result = json.loads(poster_path_r.text).get('results')[0]
            except Exception as e:
                print(e)
                first_result = ""

            if first_result != "":
                # pprint(first_result)
                original_title = first_result.get('original_title')
                poster_path = first_result.get('poster_path')
                poster = 'https://image.tmdb.org/t/p/w500{}'.format(
                    poster_path)
            else:
                poster = ""

            poster_paths.append(poster)

        titles = self.get_data(method)
        posters = []
        for i in range(len(titles)):
            posters.append({'title': titles[i], 'link': poster_paths[i]})

        return posters[:n]
Пример #14
0
def upgrade(olddir, newdir):
    """Given a rawdog 1.x state directory and a rawdog 2.x state directory,
	copy the ordering information from the old one into the new one. Since
	rawdog 2.0 mangles articles in a slightly different way, this needs to
	do approximate matching to find corresponding articles."""
    print "Importing state from " + olddir + " into " + newdir

    print "Loading old state"
    f = open(olddir + "/state")
    oldrawdog = pickle.load(f)

    print "Loading new state"
    os.chdir(newdir)
    persister = Persister("state", Rawdog)
    newrawdog = persister.load()

    print "Copying feed state"
    oldfeeds = {}
    newfeeds = {}
    for url, oldfeed in oldrawdog.feeds.items():
        if newrawdog.feeds.has_key(url):
            last_update = oldfeed.last_update
            print "Setting feed", url, "last update time to", format_time(last_update)
            newrawdog.feeds[url].last_update = last_update
            oldfeeds[url] = {}
            newfeeds[url] = {}
        else:
            print "Old feed", url, "not in new state"

    print "Copying article state"

    # Seperate out the articles by feed.
    for oldhash, oldarticle in oldrawdog.articles.items():
        if oldfeeds.has_key(oldarticle.feed):
            oldfeeds[oldarticle.feed][oldhash] = oldarticle
    for newhash, newarticle in newrawdog.articles.items():
        if newfeeds.has_key(newarticle.feed):
            newfeeds[newarticle.feed][newhash] = newarticle

            # Now fuzzily match articles.
    for url, oldarticles in oldfeeds.items():
        for newhash, newarticle in newfeeds[url].items():
            matches = []
            for oldhash, oldarticle in oldarticles.items():
                score = 0

                olink = oldarticle.link
                nlink = newarticle.entry_info.get("link")
                if olink is not None and nlink is not None and olink == nlink:
                    score += 1

                otitle = oldarticle.title
                ntitle = newarticle.entry_info.get("title")
                if otitle is not None and ntitle is not None and approximately_equal(otitle, ntitle):
                    score += 1

                odesc = oldarticle.description
                ndesc = newarticle.entry_info.get("description")
                if odesc is not None and ndesc is not None and approximately_equal(odesc, ndesc):
                    score += 1

                matches.append((score, oldhash))

            matches.sort()
            if matches != [] and matches[-1][0] > 1:
                oldhash = matches[-1][1]
                oldarticle = oldarticles[oldhash]
                newarticle.sequence = oldarticle.sequence
                newarticle.last_seen = oldarticle.last_seen
                newarticle.added = oldarticle.added
                print "Matched new", newhash, "to old", oldhash, "in", url
            else:
                print "No match for", newhash, "in", url

    print "Saving new state"
    newrawdog.modified()
    persister.save()

    print "Done"