Пример #1
0
def start_local_cluster() -> Client:
    global cluster
    global client
    if cluster is None:
        cluster = LocalCluster(n_workers=config.dask_workers)
        client = Client(cluster)
    log.info('Cluster info: {}'.format(str(cluster)))
    return client
Пример #2
0
    def dl_dir(self):
        loc = self.status['loc']
        web = self.status['web']

        # mkdir
        dir_fullname = os.path.join(self.parent_dir, loc['name'])
        log.info(dir_fullname)
        if not os.path.exists(dir_fullname):
            os.mkdir(dir_fullname)

        # start a thread to watch the download process
        dir_thread = DirThread(self)
        dir_thread.start()

        for i in web['aaData']:
            p = pq(i[0])
            value = p('input').attr('value')
            dl_type = p('input').attr('name')
            p = pq(i[1])
            name = p('a').text()
            if 'file' in dl_type:
                url = p('a').attr('href')
                if loc.get(value) and loc[value]:
                    log.info('file {} {} downloaded, skip it'.format(
                        value, name))
                else:
                    lst = list(self.urlparsed)
                    lst[3] = lst[4] = lst[5] = ""
                    lst[2] = url
                    ct_file = CtFile(urlunparse(lst), self.args, name, value,
                                     dir_fullname, self.s)
                    file_thread = FileThread(ct_file)

                    # get the semaphare before create thread
                    r = False
                    while not r:
                        r = g_sem.acquire(timeout=1)
                        if dir_thread.link_timeout:
                            dir_thread.quit = True
                            return False, DL_ERROR_FILELINKTIMEOUT

                    file_thread.start()
                    dir_thread.add(file_thread)

            elif 'folder' in dl_type:
                log.debug('{} {} is a folder'.format(value, name))
                ct_subdir = CtDir(self.args, dir_fullname, value)
                ct_subdir.get_dir_list(True)
                success, error = ct_subdir.dl_dir()
                if not success:
                    dir_thread.quit = True
                    return success, error

        dir_thread.quit = True
        return True, None
Пример #3
0
    def get_dir_list(self, get_dir_from_web):
        log.info('Get list for {}'.format(self.url))
        if get_dir_from_web or not self.status.get('web'):
            loc = self.status['loc']
            log.info('get list from website')
            headers = {'origin': self.urlparsed.netloc}
            # parameters
            params = {
                'd': self.urlparsed.path.split('/')[-1],
                'folder_id': self.urlparsed.query,
                'passcode': '',
                'r': str(random.random()),
                'ref': '',
            }
            r = self.s.get(GET_DIR_URL, params=params, headers=headers)
            j = json.loads(r.text.encode().decode('utf-8-sig'))
            requests_debug(r)
            if j.get('code') == 404 or j.get('code') == 503:
                log.error('dl_dir_list error: {}, {}'.format(
                    self.url, j.get('message')))

            loc['name'] = j['folder_name']
            loc['url'] = j['url']  # real url
            log.info('folder name: {}'.format(loc['name']))
            log.info('folder url: {}'.format(loc['url']))

            r = self.s.get(WEBAPI_HOST + loc['url'])
            self.status['web'] = json.loads(r.text)

            self.save_status()
Пример #4
0
    async def on_ready():
        """for guild in bot.guilds:
            if bot.user.id == guild.owner_id:
                await guild.delete()   """
        try:
            log.info(
                "Connected to Discord. Generating Nest messages and sending them."
            )
            emote_refs = await get_emotes(bot, nesting_mons, config)
            for d, area in discord_message_data:
                """if len(config.emote_server) > 0:
                    log.info("Createing emotes")
                    server = await bot.fetch_guild(config.emote_server)
                    for mon_id in [nest.mon_id for nest in [area.nests for area in full_areas][0]]:
                        emote_name = f"m{mon_id}"
                        image_url = config.icon_repo + f"pokemon_icon_{str(mon_id).zfill(3)}_00.png"
                        image = requests.get(image_url).content

                        emote = await server.create_custom_emoji(name=emote_name, image=image)
                        emote_refs[mon_id] = emote.id"""
                channel = await bot.fetch_channel(d)
                found = False
                embed_dict, _ = area.get_nest_text(config, emote_refs)
                embed = discord.Embed().from_dict(embed_dict)
                async for message in channel.history():
                    if message.author == bot.user:
                        embeds = message.embeds
                        if len(embeds) > 0:
                            if embeds[0].title == embed.title:
                                found = True
                                break
                if found:
                    await message.edit(embed=embed)
                    log.success(
                        f"Found existing Nest message for {area.name} and edited it"
                    )
                else:
                    await channel.send(embed=embed)
                    log.success(f"Sent a new Nest message for {area.name}")
                """if len(emote_refs) > 0:
                    log.info("Deleting emotes again")
                    for emote_id in emote_refs.values():
                        emote = await server.fetch_emoji(emote_id)
                        await emote.delete()"""
        except Exception as e:
            log.exception(e)
        await bot.logout()
Пример #5
0
def get_osm_data(bbox, date, osm_file_name):
    got_data = False
    while not got_data:
        free_slot = False
        while not free_slot:
            r = requests.get("http://overpass-api.de/api/status").text
            if "available now" in r:
                free_slot = True
            else:
                if "Slot available after" in r:
                    rate_seconds = int(
                        r.split(", in ")[1].split(" seconds.")[0]) + 15
                    log.warning(
                        f"Overpass is rate-limiting you. Gonna have to wait {rate_seconds} seconds before continuing"
                    )
                    time.sleep(rate_seconds)
                else:
                    log.warning(
                        "Had trouble finding out about your overpass status. Waiting 1 minute before trying again"
                    )
                    time.sleep(60)

        log.info(
            "Getting OSM data. This will take ages if this is your first run.")
        osm_time_start = timeit.default_timer()
        nest_json = fetch_data(bbox, date)
        osm_time_stop = timeit.default_timer()
        seconds = round(osm_time_stop - osm_time_start, 1)
        if len(nest_json.get("elements", [])) == 0:
            log.error(
                f"Did not get any data from overpass in {seconds} seconds. This probably means that you were rate-limited by overpass. Sleeping 5 minutes and trying again.\nIf you want, you can share the below log entry in Discord"
            )
            log.error(nest_json.get("remark"))
            time.sleep(60 * 5)
        else:
            got_data = True
            with open(osm_file_name, mode='w', encoding="utf-8") as osm_file:
                osm_file.write(json.dumps(nest_json, indent=4))
            log.success(
                f"Done. Got all OSM data in {seconds} seconds and saved it.")
    return nest_json
Пример #6
0
def main():
    global g_sem
    parser = argparse.ArgumentParser(description='Download from CTDisk.')
    parser.add_argument('-d', '--dir', help='download a directory')
    parser.add_argument('-f', '--file', help='download a file')
    parser.add_argument('-s',
                        '--split',
                        type=int,
                        default=SPLIT_CNT,
                        help='split a files to parts')
    parser.add_argument('-c',
                        '--dl_cnt',
                        type=int,
                        default=DOWNLOAD_CNT,
                        help='download files concorrent')
    args = parser.parse_args()

    g_sem = threading.Semaphore(args.dl_cnt)

    if args.dir:
        stop = False
        get_dir_from_web = False
        error = None
        ct_dir = CtDir(args)
        while not stop:
            if error == DL_ERROR_FILELINKTIMEOUT:
                get_dir_from_web = True
            else:
                get_dir_from_web = False
            ct_dir.get_dir_list(get_dir_from_web)
            success, error = ct_dir.dl_dir()
            if success:
                stop = True
                log.info('download finished')
    elif args.file:
        ct_file = CtFile(args.file, args)
        ct_file.dl()
        log.info('download finished')
Пример #7
0
async def get_emotes(bot, nesting_mons, config):
    try:
        with open("data/emotes.json", "r") as f:
            emote_servers = json.load(f)
    except (IOError, OSError):
        emote_servers = {}
        log.info("This seems to be your first run. Your bot will now create 1 server and fill it with emotes, so prepare for some wait time.")
    emotes = {}
    final_emotes = {}
    for server, data in emote_servers.items():
        for monid, emoteid in data.items():
            final_emotes[int(monid)] = int(emoteid)
            emotes[monid] = {
                "emote_id": int(emoteid),
                "server_id": int(server)
            }
    log.info("Comparing your bot's emotes to needed nesting mons.")
    for monid in nesting_mons:
        if monid in emotes.keys():
            emotes.pop(monid)
            continue
        free_emotes = False
        for guild_id in emote_servers.keys():
            guild = await bot.fetch_guild(guild_id)
            if len(guild.emojis) < 50:
                free_emotes = True
                break
        if not free_emotes:
            guild = await bot.create_guild("Nest Emotes")
            channel = await guild.create_text_channel("hello")
            invite = await channel.create_invite()
            emote_servers[guild.id] = {}
            log.info(f"Created Emote Server. Invite code: {invite.code}")

        emote_name = "m" + monid
        image_url = config.icon_repo + f"pokemon_icon_{monid.zfill(3)}_00.png"
        image = requests.get(image_url).content

        emote = await guild.create_custom_emoji(name=emote_name, image=image)
        emote_servers[guild.id][monid] = emote.id
        final_emotes[int(monid)] = emote.id

    for monid, data in emotes.items():
        guild = await bot.fetch_guild(data["server_id"])
        emote = await guild.fetch_emoji(data["emote_id"])
        await emote.delete()

    with open("data/emotes.json", "w+") as f:
        f.write(json.dumps(emote_servers, indent=4))   
    return final_emotes
Пример #8
0
def analyze_nests(config, area, nest_mons, queries, reset_time, nodelete):
    OSM_DATE = osm_date()
    # Getting OSM/overpass data
    
    osm_file_name = f"data/osm_data/{area.name} {OSM_DATE.replace(':', '')}.json"
    try:
        with open(osm_file_name, mode="r", encoding="utf-8") as osm_file:
            nest_json = json.load(osm_file)
    except:
        nest_json = get_osm_data(area.bbox, OSM_DATE, osm_file_name)

    # Getting area data

    area_file_name = f"data/area_data/{area.name}.json"
    area_file_data = {}
    try:
        with open(area_file_name, mode="r", encoding="utf-8") as area_file:
            log.info("Found area data file. Reading and using data from it now")
            area_file_data_raw = json.load(area_file)
        for k, v in area_file_data_raw.items():
            area_file_data[int(k)] = v

    except FileNotFoundError:
        pass

    """db_file_name = f"data/db_data/{area.name}.json"
    try:
        with open(db_file_name, mode="r", encoding="utf-8") as db_file:
            db_data = json.load(db_file)
    except FileNotFoundError:
        db_data = {}"""
    
    if not nodelete:
        queries.nest_delete(area.sql_fence)

    log.info(f"Got all relevant information. Searching for nests in {area.name} now")

    nodes = {}
    ways = []
    relations = []
    for element in nest_json['elements']:
        if not "type" in element:
            continue
        if element["type"] == "node":
            nodes[element["id"]] = {
                "lat": element["lat"],
                "lon": element["lon"]
            }
        elif element["type"] == "way":
            if "nodes" not in element and not element["nodes"]:
                continue
            ways.append(WayPark(element, config))
        elif element["type"] == "relation":
            if "members" not in element and not element["members"]:
                continue
            relations.append(RelPark(element, config))

    parks = ways + relations

    # Check Relations

    failed_nests = defaultdict(int)
    failed_nests["Total Nests found"] = 0
    double_ways = []

    start = timeit.default_timer()

    if config.less_queries:
        log.info("Getting DB data")
        all_spawns = [(str(_id), geometry.Point(lon, lat)) for _id, lat, lon in queries.spawns(area.sql_fence)]
        all_mons = queries.all_mons(str(tuple(nest_mons)), str(reset_time), area.sql_fence)
        all_mons = [(_id, geometry.Point(lon, lat)) for _id, lat, lon in all_mons]
    
    with Progress() as progress:
        #check_rels_task = progress.add_task("Generating Polygons", total=len(parks))
        for park in relations:
            double_ways = park.get_polygon(nodes, ways, double_ways)
            #progress.update(check_rels_task, advance=1)
        for park in ways:
            park.get_polygon(nodes)
            #progress.update(check_rels_task, advance=1)

        for osm_id, data in area_file_data.items():
            for connect_id in data["connect"]:
                for i, park in enumerate(parks):
                    if park.id == osm_id:
                        big_park = park
                        big_park_i = i
                    if park.id == connect_id:
                        small_park = park
                        small_park_i = i

                parks[big_park_i].connect.append(connect_id)
                parks[big_park_i].polygon = cascaded_union([big_park.polygon, small_park.polygon])
                parks.pop(small_park_i)

        # NOW CHECK ALL AREAS ONE AFTER ANOTHER
        check_nest_task = progress.add_task("Nests found: 0", total=len(parks))
        nests = []

        for park in parks:
            progress.update(check_nest_task, advance=1, description=f"Nests found: {failed_nests['Total Nests found']}")

            if not park.is_valid:
                failed_nests["Geometry is not valid"] += 1
                continue

            if not area.polygon.contains(park.polygon):
                failed_nests["Not in Geofence"] += 1
                continue

            if park.id in double_ways:
                failed_nests["Avoiding double nests"] += 1
                continue

            pokestop_in = None
            stops = []
            if config.scanner == "rdm" and config.pokestop_pokemon:
                # Get all Pokestops with id, lat and lon
                for pkstp in queries.stops(park.sql_fence):
                    stops.append(str(pkstp[0]))
                pokestop_in = "'{}'".format("','".join(stops))

            if config.less_queries:
                spawns = [s[0] for s in all_spawns if park.polygon.contains(s[1])]
            else:
                spawns = [str(s[0]) for s in queries.spawns(park.sql_fence)]

            if not stops and not spawns:
                failed_nests["No Stops or Spawnpoints"] += 1
                continue
            if (len(stops) < 1) and (len(spawns) < area.settings['min_spawnpoints']):
                failed_nests["Not enough Spawnpoints"] += 1
                continue
            spawnpoint_in = "'{}'".format("','".join(spawns))
            if spawnpoint_in == "''": spawnpoint_in = "NULL" # This will handle the SQL warning since a blank string shouldn't be used for a number

            if config.less_queries:
                mons = [s[0] for s in all_mons if park.polygon.contains(s[1])]
                if len(mons) == 0:
                    failed_nests["No Pokemon"] += 1
                    continue
                most_id = max(set(mons), key=mons.count)
                poke_data = [most_id, mons.count(most_id)]

            else:
                poke_data = queries.mons(spawnpoint_in, str(tuple(nest_mons)), str(reset_time), pokestop_in)

                if poke_data is None:
                    failed_nests["No Pokemon"] += 1
                    continue
            park.mon_data(poke_data[0], poke_data[1], area.settings['scan_hours_per_day'], len(spawns) + len(stops))

            if park.mon_count < area.settings['min_pokemon']:
                failed_nests["Not enough Pokemon"] += 1
                continue
            if park.mon_avg < area.settings['min_average']:
                failed_nests["Average spawnrate too low"] += 1
                continue
            if park.mon_ratio < area.settings['min_ratio']:
                failed_nests["Average spawn ratio too low"] += 1
                continue

            try:
                park.generate_details(area_file_data, failed_nests["Total Nests found"])
            except TopologicalError:
                failed_nests["Geometry is not valid"] += 1

            # Insert Nest data to db
            insert_args = {
                "nest_id": park.id,
                "name": park.name,
                "form": park.mon_form,
                "lat": park.lat,
                "lon": park.lon,
                "pokemon_id": park.mon_id,
                "type": 0,
                "pokemon_count": park.mon_count,
                "pokemon_avg": park.mon_avg,
                "pokemon_ratio": park.mon_ratio,
                "poly_path": json.dumps(park.path),
                "poly_type": 1 if isinstance(park, RelPark) else 0,
                "current_time": int(time.time())
            }

            failed_nests["Total Nests found"] += 1
            nests.append(park)

            queries.nest_insert(insert_args)
    stop = timeit.default_timer()
    log.success(f"Done finding nests in {area.name} ({round(stop - start, 1)} seconds)")
    for k, v in failed_nests.items():
        log.info(f" - {k}: {v}")

    def sort_avg(nest):
        return nest.mon_avg

    new_area_data = {}
    for nest in sorted(nests, key=sort_avg, reverse=True):
        new_area_data[nest.id] = {
            "name": nest.name,
            "center": [nest.lat, nest.lon],
            "connect": nest.connect
        }
    for oid, data in area_file_data.items():
        if oid not in [n.id for n in nests]:
            new_area_data[oid] = {
                "name": data["name"],
                "center": data["center"],
                "connect": data["connect"]
            }
    with open(area_file_name, mode="w+") as area_file:
        area_file.write(json.dumps(new_area_data, indent=4))

        log.info("Saved area data")
    log.success(f"All done with {area.name}\n")

    return nests
Пример #9
0
import time

from filters.barcode import get_collisions
from loaders.file_provenance import load_file_provenance
from tasks.check_barcodes import config
from utils.logging import log

## load fpr
start_time = time.time()
log.info('Loading file provenance report from: {}'.format(config.fpr_path))
fpr = load_file_provenance(config.fpr_path)
log.info(
    'Completed loading file provenance report in {:.1f}s'.format(time.time() -
                                                                 start_time))

## select only CASAVA lane_name + sample_name + workflow_run + barcode

lanes = fpr.loc[(fpr['Workflow Name'] == "CASAVA") &
                (fpr['Sample Name'].notnull()),
                ['Lane Name', 'Sample Name', 'Workflow Run SWID', 'IUS Tag'
                 ]].drop_duplicates()
lanes.reset_index()

start_time = time.time()
collisions_df = get_collisions(lanes, config.collision_threshold,
                               config.collision_operator)
log.info('get_collisions completed in {:.1f}s'.format(time.time() -
                                                      start_time))
log.info('Collisions (threshold = {}, operator = {}) = {}'.format(
    config.collision_threshold, str(config.collision_operator.__name__),
    len(collisions_df)))
Пример #10
0
    def dl(self):
        log.info('download {}'.format(self.url))

        # step 1
        headers = {
            'origin': self.urlparsed.netloc,
        }

        # parameters
        params = {
            'f': self.url.split('/')[-1],
            'passcode': '',
            'r': str(random.random()),
            'ref': '',
        }
        r = self.s.get(GET_FILE_URL1, params=params, headers=headers)
        j = json.loads(r.text)
        log.debug('step 1')
        requests_debug(r)

        # link error handler
        if j.get('code') == 404:
            log.error('dl_file error: {}'.format(j.get('message')))
            if j.get('message') == DL_ERROR_FILELINKTIMEOUT:
                log.error('need get dir list again')
            return False, j.get('message')

        if not self.filename:
            self.filename = j['file_name']

        # step 2
        params = {
            'uid': j['userid'],
            'fid': j['file_id'],
            'folder_id': 0,
            'file_chk': j['file_chk'],
            'mb': 0,
            'app': 0,
            'acheck': 1,
            'verifycode': '',
            'rd': str(random.random())
        }
        while True:
            r = self.s.get(GET_FILE_URL2, params=params, headers=headers)
            j = json.loads(r.text)
            log.debug('step 2')
            requests_debug(r)
            if j.get('code') == 503:
                params['rd'] = str(random.random())
            else:
                break

        # create an empty file
        filename = os.path.join(self.parent_dir, self.filename)
        filesize = int(j['file_size'])
        temp_filename = filename + '.ctdown'
        log.debug('create empty file {} size {}'.format(
            temp_filename, filesize))
        with open(temp_filename, 'wb') as fd:
            fd.truncate(filesize)

        # donwload with thread
        threads = []
        for i in range(self.args.split):
            start = i * filesize // self.args.split
            end = (
                i + 1
            ) * filesize // self.args.split - 1 if i != self.args.split - 1 else filesize

            t = SplitThread(i, j['downurl'].replace(r'\/', r'/'), params,
                            headers, filename, start, end)

            log.debug('dl-{:03d} download range start={} end={}'.format(
                i + 1, start, end))

            threads.append(t)
            t.start()
            # time.sleep(1)

        progressbar = tqdm.tqdm(total=filesize,
                                desc=filename,
                                ascii=' #',
                                unit="B",
                                unit_scale=True,
                                unit_divisor=1024)
        downloaded_bytes = 0
        last_downloaded_bytes = 0
        download_success = True
        while downloaded_bytes < filesize:
            downloaded_bytes = 0
            for t in threads:
                if t._status == DL_Thread_status.E404:
                    log.error('dl-{:03d} download {} Fail'.format(
                        t._index, filename))
                    download_success = False
                    break
                downloaded_bytes += t.downloaded_bytes()

            if not download_success:
                log.error('exit')
                break

            progressbar.update(downloaded_bytes - last_downloaded_bytes)
            last_downloaded_bytes = downloaded_bytes
            log.debug("{} {}".format(downloaded_bytes, filesize))
            time.sleep(1)

        log.debug('quit')
        for i in range(self.args.split):
            threads[i].join()

        os.rename(temp_filename, filename)
        return True, None
Пример #11
0
        event_start = datetime.strptime(event["start"], "%Y-%m-%d %H:%M")
        if event_start > local_time:
            continue
        event_end = datetime.strptime(event["end"], "%Y-%m-%d %H:%M")

        if event_end <= last_migration:
            continue

        if event_start <= last_migration:
            continue

        if event_end < local_time:
            td = local_time - event_end
            last_migration = event_end
            log.info(
                f"Overwriting nest migration with the end time of {event['name']}"
            )
        else:
            td = local_time - event_start
            last_migration = event_start
            log.info(
                f"Overwriting nest migration with the start time of {event['name']}"
            )

    days, seconds = td.days, td.seconds
    config.hours_since_change = math.floor(days * 24 + seconds / 3600)
    log.success(f"Hours since last migration: {config.hours_since_change}")

if args.hours is not None:
    config.hours_since_change = int(args.hours)
    log.info(