Пример #1
0
def _analyze(recording):
    # We pass a tuple because SQLAlchemy objects are not fully process-safe.
    # They can be pickled and unpickled, but they won't be bound to a Session
    # anymore. This means they can't refresh their attributes, which is
    # something they try to do after a commit() from inside the main process.
    recording_id, sonogram_url_small = recording
    try:
        # Create one fetcher per process.
        global _sonogram_fetcher # pylint: disable=global-statement
        if not _sonogram_fetcher:
            _sonogram_fetcher = fetcher.Fetcher(cache_group='xc_sonograms_small', pool_size=1)

        sonogram = None
        try:
            sonogram = _sonogram_fetcher.fetch_cached(sonogram_url_small)
        except fetcher.FetchError as ex:
            logging.warning(f'Sonogram for recording {recording_id} could not be fetched', exc_info=True)

        sonogram_quality = -999999
        if sonogram:
            sonogram_quality = analysis.sonogram_quality(recording_id, sonogram)

        return (recording_id, sonogram_quality)
    except Exception as ex:
        # Re-raise as something that's guaranteed to be pickleable.
        logging.error('Exception during analysis', exc_info=True)
        raise RuntimeError(f'Exception during analysis: {ex}')
Пример #2
0
def _process_image(image):
    '''
    Entry point for parallel processing.
    '''

    global _fetcher  # pylint: disable=global-statement
    if not _fetcher:
        _fetcher = fetcher.Fetcher('wp_images', pool_size=1)

    full_output_file_name = os.path.join(_args.image_output_dir,
                                         image.output_file_name)
    if os.path.exists(full_output_file_name) and not _args.recreate_images:
        return image.output_file_name

    image_data = _fetcher.fetch_cached(image.image_file_url)

    pil_image = PIL.Image.open(io.BytesIO(image_data))

    if pil_image.width > _args.image_size or pil_image.height > _args.image_size:
        if pil_image.width >= pil_image.height:
            output_width = _args.image_size
            output_height = round(output_width / pil_image.width *
                                  pil_image.height)
        else:
            output_height = _args.image_size
            output_width = round(output_height / pil_image.height *
                                 pil_image.width)
        pil_image = pil_image.resize((output_width, output_height),
                                     resample=PIL.Image.LANCZOS)

    pil_image.save(full_output_file_name,
                   format='WebP',
                   quality=_args.image_quality)

    return image.output_file_name
Пример #3
0
 def __init__(self, weekfile, weeknumber):
     self.weekreview = fetcher.Fetcher(weekfile, end_ind="tag")
     self.weekreview.read_file()
     self.weektable = tablemd.tablemd(self.weekreview.content_data)
     weektitle = [self.weektable.readcell(0,2*i+1).encode('ascii','ignore') 
                             for i in range(3)]
     weeknumbers = [int(filter(str.isdigit,strt)) 
                             for strt in weektitle]
     self.week_index = 1 + weeknumbers.index(weeknumber) * 2
Пример #4
0
def FetchAsyncWithAuth(*args, **kwargs):
  credential = model.GetOAuth2Credential('github')
  if credential:
    url_auth_suffix = ('?client_id={0}&client_secret={1}'
                       .format(credential.client_id,
                               credential.client_secret))
  else:
    url_auth_suffix = ''
  return fetcher.Fetcher(*args, url_auth_suffix=url_auth_suffix, **kwargs)
Пример #5
0
    def PopulateRepos(self):
        shared.EnsureRunningInTask()  # gives us automatic retries
        baseurl = self.repo_collection.key.id()
        fetched = fetcher.Fetcher(baseurl, follow_redirects=True)
        page = fetched.content
        candidate_repos = self._GetChildPaths(page)
        fetches = []

        # we found a project in the root directory
        if 'app.yaml' in candidate_repos:
            candidate_repos.insert(0, '')

        if common.IsDevMode():
            # fetch fewer repos during development
            candidate_repos = candidate_repos[:1]

        for c in candidate_repos:
            if c and not c.endswith('/'):
                continue
            project_url = '{0}{1}'.format(baseurl, c)
            app_yaml_url = '{0}app.yaml'.format(project_url)
            fetched = fetcher.Fetcher(app_yaml_url, follow_redirects=True)
            fetches.append((c, project_url, app_yaml_url, fetched))

        repos = []
        for c, project_url, app_yaml_url, fetched in fetches:
            try:
                content = fetched.content
                shared.i('found app.yaml: {}'.format(app_yaml_url))
                name = c.rstrip('/') or project_url
                description = 'Sample code from {0}'.format(project_url)
                model.CreateRepoAsync(repo_url=project_url,
                                      html_url=project_url,
                                      name=name,
                                      description=description,
                                      open_files=[])
            except urlfetch_errors.Error:
                exc_info = sys.exc_info()
                formatted_exception = traceback.format_exception(
                    exc_info[0], exc_info[1], exc_info[2])
                shared.w('skipping {0}'.format(project_url))
                for line in [line for line in formatted_exception if line]:
                    shared.w(line)
Пример #6
0
async def main(delay_rate, server_upper_limit):
    server_upper_limit = asyncio.Semaphore(server_upper_limit)
    obj = fetcher.Fetcher(delay_rate=delay_rate,
                          server_upper_limit=server_upper_limit)
    resultDataSet = await obj.main()
    resultDataSet = {"database": resultDataSet}
    with open('./data.json', 'w') as json_file:
        json_file.write(json.dumps(resultDataSet))
        print("The entire data has been stored into the data.json file")

    time.sleep(0.1)
Пример #7
0
    def crawl(self, config):
        print 'start crawling'
        #实例化多线程抓取模块,指定3个线程
        f = fetcher.Fetcher(config)
        f.start()

        while f.get_running_count() > 0:
            print '------------ running threads : %s ------------' % f.get_running_count(
            )
            time.sleep(5)

        f.printFinishLog()
Пример #8
0
    def __init__(self):
        self.working = True
        self.threads = []

        self.log = log.Log(self)

        self.fetcher = fetcher.Fetcher(self)
        self.process = process.Process(self)
        self.db = db.Db(self)

        self.fetcher.start()
        self.process.start()
        self.db.start()
Пример #9
0
 def test_login_failure(self):
     cookieFile = tempfile.mkstemp()[1]
     try:
         os.remove(cookieFile)
         get_HTML = fetcher.Fetcher(lambda x: sys.stdout.write(x + '\n'),
                                    lambda x: cookieFile).getHTML
         user = User(get_HTML)
         userID, real_name = user.login(self.username,
                                        self.password + "not")
         self.assertIsNone(userID)
         self.assertIsNone(real_name)
     finally:
         os.remove(cookieFile)
Пример #10
0
 def test_login_success(self):
     cookieFile = tempfile.mkstemp()[1]
     try:
         os.remove(cookieFile)
         get_HTML = fetcher.Fetcher(lambda x: sys.stdout.write(x + '\n'),
                                    lambda x: cookieFile).getHTML
         user = User(get_HTML)
         # Weak assertions but don't want to tie to a particular user.
         userID, real_name = user.login(self.username, self.password)
         # Weak assertions but don't want to tie to a particular user.
         self.assertIsNotNone(userID)
         self.assertIsNotNone(real_name)
     finally:
         os.remove(cookieFile)
Пример #11
0
 def add_files(dirname):
     url = os.path.join(repo_url, dirname)
     fetched = fetcher.Fetcher(url, follow_redirects=True)
     page = fetched.content
     paths = self._GetChildPaths(page)
     shared.i('{0} -> {1}', url, paths)
     if not paths:
         shared.i('- {0}'.format(dirname))
         tree.SetFile(dirname, page)
     for path in paths:
         if common.GetExtension(path) in settings.SKIP_EXTENSIONS:
             continue
         relpath = os.path.join(dirname, path)
         add_files(relpath)
def get_wiktionary_fields(entry, language):
    """
    Fetch Wiktionary info about the work and update fields
    """

    myFetcher = fetcher.Fetcher()
    fetchedPage = myFetcher.fetch(entry)

    if fetchedPage:
        myParser = parser.Parser()
        myParser.setLanguage(language)
        myParser.setWordName(entry)
        extractedWord = myParser.extractData(fetchedPage)
        return extractedWord
    else:
        return False
Пример #13
0
def scan(settings: Settings):
    logger.warning("Scan houses from web site")

    storage: fetcher.HouseStore = store.LiteHouseStore(
        settings.storage["sqlite"]["path"])

    notifier: fetcher.Notifier
    if settings.core["should_notify"]:
        notifier = notify.IftttNotifier(
            logger,
            fetcher.get_591_url,
            settings.notify["ifttt"]["webhook_token"],
            settings.notify["ifttt"]["event_name"],
        )
    else:
        notifier = notify.EmptyNotifier()

    agent = fetcher.Fetcher(logger, settings.core["page_delay"])
    scanner = fetcher.Scanner(logger, storage, notifier, agent,
                              settings.core["batch_delay"])

    scanner.scan_house(settings.query)
Пример #14
0
def files():
    return fetcher.Fetcher(datadir).fetch_files(
        'benchdens', 'e0(2|4|6|8)n((100))\\.00(0|1|2)\\.in')
Пример #15
0
def fileshighhk90():
    return fetcher.Fetcher(datadir).fetch_files(
        'holmefinal', 'n90d(6|8)0p(\\d\\d)\\.00(0|1|2|3|4)\\.in')
Пример #16
0
def filesn90d60fixedpart():
    return fetcher.Fetcher(datadir).fetch_files(
        'fixedpart', 'p(\d)n90d0(6)\\.00(0|1)\\.in')
Пример #17
0
def fileshigh90():
    return fetcher.Fetcher(datadir).fetch_files(
        'benchdens', 'e0(6|8)n((90))\\.00(0|1|2|3|4)\\.in')
Пример #18
0
def fileshigh():
    return fetcher.Fetcher(datadir).fetch_files(
        'benchdens', 'e0(6|8)n((100))\\.00(3|4)\\.in')
Пример #19
0
#
# Fetch full images from the library for a random subset of records.

from optparse import OptionParser
import fetcher
import random
import record


if __name__ == '__main__':
  parser = OptionParser()
  parser.add_option("-n", "--num", dest="num",
                    default=100,
                    help="How many images to fetch.")
  parser.add_option("", "--seed", dest="seed", default=12345,
                    help="Random number seed.")
  parser.add_option("-c", "--output_dir", dest="cache_dir", default="images",
                    help="Images destination dir")
  parser.add_option("-s", "--secs", dest="secs", default=5,
                    help="Number of seconds to wait between fetches.")

  (options, args) = parser.parse_args()

  rs = record.AllRecords()
  rand = random.Random(options.seed)
  rand.shuffle(rs)
  f = fetcher.Fetcher(options.cache_dir, int(options.secs))
  for i, r in enumerate(rs[0:int(options.num)]):
    print "%03d Fetching %s" % (i, r.photo_url)
    f.Fetch(r.photo_url)
Пример #20
0
 def __init__(self):
     self.database = database.Database()
     self.fetcher = fetcher.Fetcher()
     current_version = self.fetcher.version()
     if current_version > self.database.version():
         self.update_database(str(current_version))
Пример #21
0
def filesp1():
    return fetcher.Fetcher(datadir).fetch_files(
        'singlepart', 'e0(2|4|6|8)n((100))p1\\.00(0|1|2)\\.in')
Пример #22
0
# Example for Fetcher.cpp

import cv2
import fetcher

myFetcher = fetcher.Fetcher()

ptNumber = 0


def onMouse(event, x, y, flags, param):
    global ptNumber

    if event == cv2.EVENT_LBUTTONUP:
        myFetcher.setPt(x, y, ptNumber)
        ptNumber = ptNumber + 1


cv2.namedWindow("Video Captured")

cv2.setMouseCallback("Video Captured", onMouse)

cap = cv2.VideoCapture(0)
while (True):
    # Capture frame-by-frame
    ret, frame = cap.read()

    key = cv2.waitKey(1)

    # Our operations on the frame come here
    fileName = myFetcher.calibrate(frame, 5, 200, key)
Пример #23
0
#!/usr/bin/python

import record
import fetcher
import os

rs = record.AllRecords()
f = fetcher.Fetcher('images', 0)
rs = [r for r in rs if (r.photo_url and f.InCache(r.photo_url))]

for idx, r in enumerate(rs):
  in_image = f.CacheFile(r.photo_url)
  out_image = 'thumbnails/%s.jpg' % r.photo_id()
  cmd = 'convert %s -resize 200x200 %s' % (in_image, out_image)
  print '%05d %s' % (idx, cmd)
  os.system(cmd)
Пример #24
0
def fileshk():
    return fetcher.Fetcher(datadir).fetch_files(
        'holme', 'n100d0(1|2|3|4)\\.00(3|4)\\.in')
Пример #25
0
 def __init__(self, cap, key=1):
     self.cap = cap
     self.key = key
     self.patch = None
     self.myFetcher = fetcher.Fetcher()
     signal.signal(signal.SIGTSTP, self.handler)
Пример #26
0
def filesvlow():
    return fetcher.Fetcher(datadir).fetch_files(
        'benchdens', 'e0(2)n((100))\\.00(3|4)\\.in')
Пример #27
0
consumer_key = os.getenv("CONSUMER_KEY", None)
consumer_secret = os.getenv("CONSUMER_SECRET", None)
access_token = os.getenv("ACCESS_TOKEN", None)
access_token_secret = os.getenv("ACCESS_TOKEN_SECRET", None)

if (
    consumer_key is None
    or consumer_secret is None
    or access_token is None
    or access_token_secret is None
):
    print("No credentials provided")
    sys.exit(1)

fetcher = fetcher.Fetcher(
    consumer_key, consumer_secret, access_token, access_token_secret
)
app = flask.Flask(__name__)


@app.route("/", methods=["GET", "POST"])
def handler():
    if flask.request.method == "GET":
        return {"status": "ok"}, 200
    else:
        if flask.request.json is None:
            app.logger.error("POST req without req body received")
            return {"status": "error"}, 500
        else:
            try:
                ids = flask.request.json["ids"]
Пример #28
0
    #Parse the command line arguments, and load the config file.
    args = get_args()
    config = configreader.Configurator(args.settings)
    cfg, notices = config.read_settings('spatial')
    #cfg, notices = configreader.read_settings(args.settings)

    today = datetime.date.today()
    if args.weekly:
        searchdate = today + datetime.timedelta(days=-7)
        dbname = 'w_' + cfg['localdb']
    else:
        searchdate = today
        dbname = cfg['localdb']
        #test date please ignore.
        #searchdate = datetime.date(2014, 11, 8)

    print 'Fetching from {0} to now...'.format(searchdate.isoformat())

    lamagetter = fetcher.Fetcher(cfg['connection'])
    localdb = LocalStore(dbname)
    for notice in notices:
        try:
            rows = lamagetter.fetch(notice['sql'].format(searchdate))
        except fetcher.FetcherError as e:
            print ' - Error accessing DB: {0}'.format(e.message)
            sys.exit(1)
        localdb.save_data(notice['table'], rows, notice['uidfield'])
        print ' - {0} rows saved to table {1}'.format(len(rows), notice['table'])
    localdb.close_db()

    print 'NoticeLAMA Complete!'
Пример #29
0
def trim_recording(recording,
                   skip_if_exists=True,
                   skip_write=False,
                   debug_otsu_threshold=False,
                   debug_utterances=False):
    '''
    Trims the given recording and stores it to a file.
    Returns the file name, or None if this recording is permanently untrimmable for some reason.
    '''

    global _fetcher  # pylint: disable=global-statement
    if not _fetcher:
        _fetcher = fetcher.Fetcher('recordings', pool_size=1)

    output_file_name = trimmed_recording_file_name(recording)
    if skip_if_exists and os.path.exists(output_file_name):
        return output_file_name

    try:
        data = _fetcher.fetch_cached(recording.audio_url)
    except fetcher.FetchError as ex:
        logging.error(f'Error fetching {recording.recording_id}: {ex}')
        return None

    try:
        sound = pydub.AudioSegment.from_file(io.BytesIO(data), 'mp3')
    except Exception as ex:  # pylint: disable=broad-except
        # These errors can get extremely long.
        logging.error(
            f'Failed to decode audio file for {recording.url} '
            f'(cache file {_fetcher.cache_file_name(recording.audio_url)}): {str(ex)[:5000]}'
        )
        return None

    # pydub does everything in milliseconds, and so do we, unless otherwise
    # specified.
    sound = sound[:1000 * _AUDIO_SCAN_DURATION]
    sound = sound.set_channels(1)
    sound = sound.set_frame_rate(_AUDIO_SAMPLE_RATE)

    min_duration = round(1000 * _MIN_AUDIO_DURATION)
    max_duration = round(1000 * _MAX_AUDIO_DURATION)
    padding_duration = round(1000 * _AUDIO_PADDING_DURATION)
    fade_duration = round(1000 * _AUDIO_FADE_DURATION)

    # Find longest utterance, the end of which is a good place to cut off the
    # sample.
    utterances = list(
        _detect_utterances(sound, debug_otsu_threshold=debug_otsu_threshold))
    # This should not happen, because the threshold is such that there is
    # always something above it.
    assert utterances, f'No utterances detected in {recording.url}'

    # Exhaustively search all possible ranges of consecutive utterances that we
    # want to include, and score them by desirability.
    candidates = []
    # We try to start only from the first three utterances, because recordists
    # tend to trim the audio such that it starts on a relevant bit. This seems
    # to help to avoid including (unlabelled) background species and other
    # noise.
    for i, start_utterance in enumerate(utterances[:3]):
        start_ms = max(0, start_utterance[0] - padding_duration)
        utterance_duration = 0
        for end_utterance in utterances[i:]:
            utterance_duration += end_utterance[1] - end_utterance[0]
            end_ms = min(len(sound), end_utterance[1] + padding_duration)
            total_duration = end_ms - start_ms
            # First criterion: it must be long enough. More negative is more bad.
            longness_score = min(0.0, total_duration - min_duration)
            # Second criterion: it must not be too long. More negative is more bad.
            shortness_score = min(0.0, max_duration - total_duration)
            # Third criterion: it must have a good utterance to silence ratio.
            utterance_score = utterance_duration / total_duration
            score_vector = (longness_score, shortness_score, utterance_score)
            candidates.append((score_vector, (start_ms, end_ms)))
    _, (start_ms, end_ms) = max(candidates)
    duration_ms = end_ms - start_ms
    # Never go above the maximum duration.
    if duration_ms > max_duration:
        end_ms = start_ms + max_duration
    # Never go below the minimum duration.
    if duration_ms < min_duration:
        # Try adding half of the missing duration before and half after.
        margin_ms = (min_duration - duration_ms + 1) // 2
        start_ms -= margin_ms
        end_ms += margin_ms
        if start_ms < 0:
            # Running up to the start of the sound.
            start_ms = 0
            end_ms = min(len(sound), start_ms + min_duration)
        if end_ms > len(sound):
            # Running up to the end of the sound.
            end_ms = len(sound)
            start_ms = max(0, end_ms - min_duration)

    sound = sound[start_ms:end_ms]
    sound = sound.fade_in(fade_duration).fade_out(fade_duration)
    sound = pydub.effects.normalize(sound)

    if debug_utterances:
        import subprocess  # pylint: disable=import-outside-toplevel
        import tempfile  # pylint: disable=import-outside-toplevel
        from PIL import Image, ImageDraw  # pylint: disable=import-outside-toplevel
        sonogram_data = _fetcher.fetch_cached(recording.sonogram_url_full)
        sonogram = Image.open(io.BytesIO(sonogram_data))
        draw = ImageDraw.Draw(sonogram, mode='RGBA')

        def highlight(start_ms, end_ms, color):
            # Fixed parameters for full sonograms drawn by xeno-canto.
            # Visual left margin is at 62px, but it seems the audio starts
            # 4px later.
            margin_left = 66
            px_per_ms = 75 / 1000
            left_px = margin_left + px_per_ms * start_ms
            right_px = margin_left + px_per_ms * end_ms
            draw.rectangle(((left_px, 0), (right_px, sonogram.height)),
                           fill=color)

        highlight(start_ms, end_ms, (128, 128, 255, 32))
        for (s, e) in utterances:
            highlight(s, e, (128, 255, 128, 64))
        with tempfile.NamedTemporaryFile() as f:
            sonogram.save(f, format='png')
            subprocess.run(['eog', f.name], check=False)

    if skip_write:
        return None

    tmp_file_name = output_file_name + '.tmp'
    sound.export(tmp_file_name,
                 format='ogg',
                 parameters=['-q:a', str(_AUDIO_QUALITY)])
    os.rename(tmp_file_name, output_file_name)

    return output_file_name
Пример #30
0
#!/usr/bin/env python3
"""Download all the images referenced from an images.ndjson file.

Usage: ./fetch_images.py images.ndjson
"""

import fileinput
import json
import os

import requests

import fetcher

if __name__ == '__main__':
    f = fetcher.Fetcher()
    os.makedirs('images', exist_ok=True)

    for i, line in enumerate(fileinput.input()):
        image = json.loads(line)
        url = image.get('imageLink')
        if not url:
            continue
        path = os.path.join('images', os.path.basename(url))
        if os.path.exists(path):
            continue
        try:
            content = f.fetch_url(url)
        except requests.exceptions.HTTPError as e:
            if e.response.status_code == 404:
                continue  # sadly, some images are just missing