Пример #1
0
def on_mouse(event, x, y, flags, param):
    """
		Function: on_mouse
		------------------
		callback for clicking the image;
		has you input the board coords of the point you just clicked
		stores results in corner_board_points and corner_image_points
	"""
    #=====[ Step 1: only accept button down events	]=====
    if not event == cv2.EVENT_LBUTTONDOWN:
        return

    #=====[ Step 2: get corresponding points	]=====
    print "=====[ Enter board coordinates: ]====="
    # board_x = int(raw_input ('>>> x: '))
    # board_y = int(raw_input ('>>> y: '))
    # board_point = (board_x, board_y)
    keypoint = get_closest_keypoint((x, y), param)
    image_point = keypoint.pt
    print "Stored as: "
    # print "	- board_point: ", board_point
    print "	- image_point: ", image_point
    # corner_board_points.append (board_point)
    corner_image_points.append(image_point)
    corner_keypoints.append(keypoint)
    print_message("ESC to exit")
Пример #2
0
def on_mouse(event, x, y, flags, param):
	"""
		Function: on_mouse
		------------------
		callback for clicking the image;
		has you input the board coords of the point you just clicked
		stores results in corner_board_points and corner_image_points
	"""
	#=====[ Step 1: only accept button down events	]=====
	if not event == cv2.EVENT_LBUTTONDOWN:
		return

	#=====[ Step 2: get corresponding points	]=====
	print "=====[ Enter board coordinates: ]====="
	# board_x = int(raw_input ('>>> x: '))
	# board_y = int(raw_input ('>>> y: '))
	# board_point = (board_x, board_y)
	keypoint = get_closest_keypoint ((x, y), param)
	image_point = keypoint.pt
	print "Stored as: "
	# print "	- board_point: ", board_point
	print "	- image_point: ", image_point
	# corner_board_points.append (board_point)
	corner_image_points.append (image_point)
	corner_keypoints.append (keypoint)
	print_message ("ESC to exit")
Пример #3
0
def tf_idf():
    util.print_message('Start counting tf-idf...', debug=True)
    if not os.path.exists(settings.TFIDF_FILE_PATH):
        os.mkdir(settings.TFIDF_FILE_PATH)

    c = Calculator()
    file_names = util.get_file_list(settings.WORD_COUNT_FILE_PATH)
    for file_name in file_names:
        util.print_message('Processing tf-idf on {0}', arg=file_name)        
        c.tf_idf(file_name, None, None)
Пример #4
0
def youtube_dl_download(url):
    util.print_message('Downloading mp3 from URL...')
    try:
        with youtube_dl.YoutubeDL(conf.YOUTUBE_DL_OPTS) as ydl:
            ydl.download([url])
    except:
        util.print_message('Failed to download file from {}\n'
                           'Please try again...'.format(url),
                           color='red',
                           exit=True)
Пример #5
0
def push_to_mongo(db, message):
    try:
        record = db.arch.find_one({'url': message.url})
        if record is None:
            db.arch.insert_one(message.__dict__)
        else:
            util.print_message(
                '{0} already exists in mongo'.format(message.url))
    except:
        e = sys.exc_info()[0]
        util.print_message('Exception happened {0}'.format(e))
Пример #6
0
def main():
    if len(sys.argv) == 2:
        url = sys.argv[1]
    elif len(sys.argv) > 2:
        # TODO: Reject all and prompt or quit with usage.
        error_warning('WARNING: Only using first command line argument',
                      color='yellow')

    try:
        util.display_intro()

        username = raw_input('Enter your Spotify username: '******'url' not in locals():
            url = raw_input('Please enter a valid YouTube/SoundCloud URL: ')

        youtube_dl_download(url)

        result = acoustid_search()
        percentage = float(result[0]) * 100
        match_track = clean_track(str(result[2]))
        match_artist = clean_artist(str(result[3]))
        util.print_message('{:04.2f}% MATCH: {} by {}' \
                           .format(percentage, match_track, match_artist),
                           color='cyan')

        track = spotted.search_song(match_track, match_artist)
        full_title = '{0} by {1}'.format(track['name'],
                                         track['artists'][0]['name'])

        while True:
            try:
                prompt = 'Would you like to add {} to {}? (y/n): ' \
                         .format(full_title, spotted.playlist['name'])
                status = raw_input(prompt).lower()[0]
                if status == 'n':
                    util.print_message('Thanks for trying! Come again.',
                                       exit=True)
                    return
                elif status == 'y':
                    spotted.add_song_to_playlist(track)
                    return
                else:
                    util.print_message('Select from either Y or N.',
                                       color='red')
            except TypeError:
                util.print_message('Enter a valid option: Y or N.',
                                   color='red')

    except KeyboardInterrupt:
        util.print_message('\nExiting Spotted on Spotify...', exit=True)
Пример #7
0
def count_words():
    util.print_message('Start counting words...', debug=True)
    if not os.path.exists(settings.WORD_COUNT_FILE_PATH):
        os.mkdir(settings.WORD_COUNT_FILE_PATH)

    client = MongoClient(settings.MONGO_HOST, 27017)
    db = client[settings.MONGO_DATABASE]
    c = Calculator()

    cursor = db.arch.find()
    for post in cursor:
        c.tf(str(post['_id'])+'.txt', post['url'], post['body'])
Пример #8
0
def authenticate(username,password,token):
    global print_exceptions,bend_url

    

    pagename = "Authentication %s"
    caption = "Authentication %s"
    msg = 'authentication %s!'

    try:
        if username:
            bend = SOAPpy.SOAPProxy(bend_url)
            success,ref = bend.authenticate(ip,username,password,token)
        
        
            xref = ref
            if token == "0":
                xref = "/cgi-bin/auth.py?a=z&token=0"
        
            if success:
                flog.info("User %s authentication successful from %s" % (username,ip))
                xmsg = msg % ("successful! You will be now redirected to originally requested site",)
                if token == 0:
                    xmsg = msg % ("successful! You will be redirected to your status page",)
                
                util.print_message(pagename % ("succeeded",), 
                                    caption % ("succeeded",),
                                    xmsg,
                                    redirect_url=xref,
                                    redirect_time=0)
                    
            else:
                flog.info("User %s authentication failed from %s" % (username,ip))
                util.print_message(pagename % ("failed",),
                                caption % ("failed",),
                                msg % ("failed",),
                                redirect_url=xref,
                                redirect_time=1)
        else:
            util.print_message(" ",
                            " ",
                            "...",
                            redirect_url="/cgi-bin/auth.py?token=%s" % (token,),
                            redirect_time=0)

        
    except Exception,e:
        flog.error("exception caught: %s" % (str(e),))
        if print_exceptions:
            util.print_message("Whoops!","Exception caught!",str(e) +" " + traceback.format_exc(100)) + " backend URL %s" % (bend_url,)
        else:
            util.print_message("Authentication failed","Authentication failed","There was a problem to validate your credentials. Please contact system administrator.")
Пример #9
0
def get_keywords_map():
    util.print_message('Start counting keywords map...', debug=True)
    keywords_map = {}
    file_names = util.get_file_list(settings.TFIDF_FILE_PATH)
    for file_name in file_names:
        util.print_message('Processing keywords on {0}', arg=file_name)
        tf_idf_dict = util.file2dict(settings.TFIDF_FILE_PATH, file_name)
        for item in sorted(tf_idf_dict.items(), key=operator.itemgetter(1), reverse=True)[:20]:
            if keywords_map.has_key(item[0]):
                keywords_map[item[0]] += 1
            else:
                keywords_map[item[0]] = 1
    util.save_sorted_dict(settings.DATA_PATH, settings.KEYWORD_MAP, keywords_map)
Пример #10
0
def test_keywords():
    client = MongoClient(settings.MONGO_HOST, settings.MONGO_PORT)
    db = client[settings.MONGO_DATABASE]

    cursor = db.arch.find().limit(10)
    for post in cursor:
        id = post['_id']
        title = post['title']
        tf_idf_dict = util.file2dict(
            settings.TFIDF_FILE_PATH, str(id) + '.txt')
        util.print_message(title.encode('gbk'))
        for item in sorted(tf_idf_dict.items(), key=operator.itemgetter(1), reverse=True)[:10]:
            util.print_message('{0}:{1}'.format(item[0].decode(
                'utf8').encode('gbk'), item[1]))
 def respond(self, response):
     msg = "Sending complete job to the dashboard with response {}".format(response)
     print_message(msg, 'ok')
     request = json.dumps({
         'job_id': self.options.get('job_id'),
         'request': 'complete',
         'output': response
     })
     url = 'http://' + FRONTEND_POLLER_HOST
     try:
         r = requests.post(url, request)
     except Exception as e:
         raise e
     return
Пример #12
0
def update_keywords():
    util.print_message('Start updating keywords...', debug=True)
    client = MongoClient(settings.MONGO_HOST, settings.MONGO_PORT)
    db = client[settings.MONGO_DATABASE]

    cursor = db.arch.find()
    for post in cursor:
        id = post['_id']
        tf_idf_dict = util.file2dict(settings.TFIDF_FILE_PATH, str(id) + '.txt')
        tags = []
        for item in sorted(tf_idf_dict.items(), key=operator.itemgetter(1), reverse=True)[:20]:
            tags.append(item[0])
        util.print_message(' '.join(tags))
        db.arch.update_one({'_id': id}, {'$set': {'tags': tags}})
 def sanitize_input(self, options):
     validated_options = {}
     print_message(options, 'ok')
     expected_params = [
         'server',
         'username',
         'password',
         'path',
         'job_id'
     ]
     for key in options:
         if key in expected_params:
             validated_options[key] = options[key]
         else:
             print_message('Unexpected option {}'.format(key))
     return validated_options
Пример #14
0
def count_all_words():
    util.print_message('Start counting all words...', debug=True)
    all_word_count = {}
    file_names = util.get_file_list(settings.WORD_COUNT_FILE_PATH)
    for file_name in file_names:
        util.print_message('Processing all word count on {0}', arg=file_name)
        word_count_dict = util.file2dict(settings.WORD_COUNT_FILE_PATH, file_name, True)
        for key, value in word_count_dict.iteritems():
            if all_word_count.has_key(key):
                all_word_count[key] = all_word_count[key] + value
            else:
                all_word_count[key] = value
    f = open(settings.WORD_COUNT_TOTAL, 'w+')
    for item in sorted(all_word_count.items(), key=operator.itemgetter(1), reverse=True):
        f.write('{0}:{1}\n'.format(item[0], item[1]))
    f.close()
Пример #15
0
def onpress (event):
	"""
		Function: press
		---------------
		callback for user pressing keys;
		user enters esc -> this quits
	"""
	#=====[ Step 1: verify key	]=====
	if not event.key == 'escape':
		return

	#=====[ Step 2: save BoardImage ]=====
	image_name = 'board_image.bi'
	board_image.save (image_name)
	print_message ("BoardImage saved to " + image_name)

	exit ()
 def handle(self):
     server = self.options.get('server', DIAG_VIEWER_HSOT)
     client = DiagnosticsViewerClient(
         server=server,
         cert=False)
     try:
         id, key = client.login(
             self.options['username'],
             self.options['password'])
     except Exception as e:
         print_debug(e)
         return -1
     path = self.options.get('path')
     print_message('Uploading directory {}'.format(path))
     try:
         dataset_id = client.upload_package(path)
     except Exception as e:
         print_debug(e)
         return -1
     return json.dumps({'dataset_id': dataset_id})
    def handle(self):
        args = ' '.join(self.call_args)
        msg = "Starting job: {}".format(args)
        print_message(msg, 'ok')
        process = Popen(args, stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=True)
        output = process.communicate()
        archive_path = '{prefix}{user}/run_archives/{run_name}_{id}/'.format(
            prefix=USER_DATA_PREFIX,
            user=self.config.get('user', 'default'),
            id=self.config.get('job_id'),
            run_name=self.config.get('run_name', 'default'))
        archive_filename = archive_path + 'output_archive'

        if not os.path.exists(archive_filename + '.tar.gz'):
            try:
                print_message(
                    'creating output archive {}'.format(archive_filename + '.tar.gz'),
                    'ok')
                shutil.make_archive(
                    archive_filename,
                    'gztar',
                    self.config.get('output_dir'))
            except:
                raise
        else:
            print_message('archive {} already exists'.format(archive_filename + '.tar.gz'))
        return output
Пример #18
0
def recover_privkey_helper(pubkey, keysize, keyparams):
    print('[*] [RSA-%d] Finding the prime number p...' % keysize)
    n_bytes = util.to_bytes(pubkey.n)

    kp_keysize = keysize // 2
    kp_pubkey = RSA.importKey(keyparams.get_pubkey(kp_keysize))
    kp_r2_bytes = base64.b64decode(keyparams.get_r2(kp_keysize))
    kp_r2 = util.to_number(kp_r2_bytes)
    kp_privkey = RSA.importKey(keyparams.get_privkey(kp_keysize))

    encrypted_p_xor_r1_xor_r2 = n_bytes[0:len(kp_r2_bytes)]
    original_kp_r1_bytes = base64.b64decode(keyparams.get_r1(kp_keysize))

    for i in range(0, 0xffffff):
        kp_r2_bytes = util.to_bytes(kp_r2)
        encrypted_p_xor_r1 = bytes_xor(encrypted_p_xor_r1_xor_r2, kp_r2_bytes)
        p_xor_r1_bytes = util.rsa_decrypt(kp_privkey,
                                          bytes(encrypted_p_xor_r1))
        kp_r1 = util.to_number(original_kp_r1_bytes)
        for j in range(0, 0xa):
            for k in range(0, 0xa):
                kp_r1_bytes = util.to_bytes(kp_r1)
                p_bytes = bytes_xor(p_xor_r1_bytes, kp_r1_bytes)
                p = util.to_number(p_bytes)
                util.print_message('[RSA-%d] [%d:%d:%d] %d' %
                                   (keysize, i, j, k, p))
                if sympy.isprime(p) and sympy.isprime((p - 1) // 2):
                    privkey = util.rsa_construct_private_key(p, pubkey)
                    if privkey:
                        print('\n[+] [RSA-%d] p = %d' % (keysize, p))
                        print('[+] [RSA-%d] Private key is recovered' %
                              keysize)
                        return privkey
                kp_r1 += 1
            kp_r1 = permute_r_key(kp_r1, keysize)
        kp_r2 += 1
    print('\n[-] [RSA-%d] Cannot recover the private key' % keysize)
    return None
Пример #19
0
def idf():
    word_doc_freq = {}
    util.print_message('Start counting idf...', debug=True)
    tag_dict = util.load_dictionary(settings.DATA_PATH, settings.USER_DICT)
    file_names = util.get_file_list(settings.WORD_COUNT_FILE_PATH)
    for file_name in file_names:
        util.print_message('Processing all word count on {0}', arg=file_name)
        word_count_dict = util.file2dict(settings.WORD_COUNT_FILE_PATH, file_name)
        for key in word_count_dict.iterkeys():
            if not tag_dict.has_key(key):
                continue
            if word_doc_freq.has_key(key):
                word_doc_freq[key] = word_doc_freq[key] + 1
            else:
                word_doc_freq[key] = 1
    util.save_sorted_dict(settings.DATA_PATH, settings.WDF_FILE, word_doc_freq)

    doc_number = len(file_names)
    inverse_doc_freq = {k: math.log(float(doc_number) / (1 + v))
                        for k, v in word_doc_freq.items()}

    util.save_sorted_dict(settings.DATA_PATH, settings.IDF_FILE, inverse_doc_freq)
    return inverse_doc_freq
Пример #20
0
def get_first_valid_encoding(results):
    for result in results:
        if all(_ is not None for _ in result):
            result = list(result)
            try:
                result[2] = result[2].replace(u'\u2018', u'\'') \
                                     .replace(u'\u2019', u'\'') \
                                     .replace(u'\u201C', u'\"') \
                                     .replace(u'\u201D', u'\"')
                result[2].encode('ascii', 'ignore')
                result[3] = result[3].replace(u'\u2018', u'\'') \
                                     .replace(u'\u2019', u'\'') \
                                     .replace(u'\u201C', u'\"') \
                                     .replace(u'\u201D', u'\"')
                result[3].encode('ascii', 'ignore')
                return result
            except UnicodeEncodeError:
                continue
    util.print_message(
        'ERROR: There were no matches for your track with valid '
        'ascii encodings. Could not search for your track on '
        'Spotify.',
        color='red',
        exit=True)
Пример #21
0
def finishup(config, job_sets, state_path, event_list, status, display_event,
             thread_list, kill_event):
    message = 'Performing post run cleanup'
    event_list.push(message=message)
    if not config.get('global').get('no_cleanup', False):
        print 'Not cleaning up temp directories'
    else:
        tmp = os.path.join(config['global']['output_path'], 'tmp')
        if os.path.exists(tmp):
            rmtree(tmp)

    message = 'All processing complete' if status == 1 else "One or more job failed"
    emailaddr = config.get('global').get('email')
    if emailaddr:
        event_list.push(
            message='Sending notification email to {}'.format(emailaddr))
        try:
            if status == 1:
                msg = 'Post processing for {exp} has completed successfully\n'.format(
                    exp=config['global']['experiment'])
            else:
                msg = 'One or more job(s) for {exp} failed\n\n'.format(
                    exp=config['global']['experiment'])

            for job_set in job_sets:
                msg += '\nYearSet {start}-{end}: {status}\n'.format(
                    start=job_set.set_start_year,
                    end=job_set.set_end_year,
                    status=job_set.status)
                for job in job_set.jobs:
                    if job.status == JobStatus.COMPLETED:
                        if job.config.get('host_url'):
                            msg += '    > {job} - COMPLETED  :: output hosted :: {url}\n'.format(
                                url=job.config['host_url'], job=job.type)
                        else:
                            msg += '    > {job} - COMPLETED  :: output located :: {output}\n'.format(
                                output=job.output_path, job=job.type)
                    elif job.status in [JobStatus.FAILED, JobStatus.CANCELLED]:
                        output_path = os.path.join(
                            job.config['run_scripts_path'],
                            '{job}_{start:04d}_{end:04d}.out'.format(
                                job=job.type,
                                start=job.start_year,
                                end=job.end_year))
                        msg += '    > {job} - {status} :: console output :: {output}\n'.format(
                            output=output_path,
                            job=job.type,
                            status=job.status)
                    else:
                        msg += '    > {job} - {state}\n'.format(
                            job=job.type, state=job.status)
                msg += '\n\n'

            m = Mailer(src='*****@*****.**', dst=emailaddr)
            m.send(status=message, msg=msg)
        except Exception as e:
            print_debug(e)
    event_list.push(message=message)
    display_event.set()
    print_type = 'ok' if status == 1 else 'error'
    print_message(message, print_type)
    logging.info("All processes complete")
    for t in thread_list:
        kill_event.set()
        t.join(timeout=1.0)
    time.sleep(2)
Пример #22
0
def poll():

    params = {'request': 'next'}
    url = 'http://' + FRONTEND_POLLER_HOST
    options = {}
    try:
        job = requests.get(url, params).content
        job = json.loads(job)
        print_message(job, 'ok')
    except ConnectionError as ce:
        print_message("Error requesting job from frontend poller")
        print_debug(e)
        return -3, None

    if not job:
        return -2, None

    try:
        options['user'] = job.get('user')
        options['run_name'] = job.get('run_name')
        options['job_id'] = job.get('job_id')
        if not job.get('diag_type'):
            options['diag_type'] = 'amwg'
        print_message('job options: {}'.format(options), 'ok')
    except Exception as e:
        print_debug(e)
        return -1, options['job_id']

    run_type = job.get('run_type')
    if not run_type:
        print_message("No run type in job request")
        return -1, None

    if run_type == 'diagnostic':
        try:
            sets = json.loads(job.get('diag_set'))
        except Exception as e:
            print_message('Unable to unpack diag_set')
            sets = '5'
        options['set'] = sets
        options['model_path'] = job.get('model_path')
        options['obs_path'] = job.get('obs_path')
        options['output_dir'] = job.get('output_dir')
        print_message('Got a new job with parameters:\n{}'.format(options), 'ok')
        handler = StartDiagHandler(options)
    elif run_type == 'model':
        handler = StartModelHandler(options)
    elif run_type == 'update':
        handler = UpdateJobHandler(options)
    elif run_type == 'upload_to_viewer':
        options['server'] = job.get('request_attr').get('server')
        options['username'] = job.get('request_attr').get('username')
        options['password'] = job.get('request_attr').get('password')
        options['path'] = job.get('request_attr').get('path')
        handler = UploadOutputHandler(options)
    else:
        print_message("Unrecognized request: {}".format(run_type))
        return -1, None

    try:
        response = handler.handle()
    except Exception as e:
        print_message("Error in job handler with options \n {}".format(options))
        print_debug(e)
        return -1, None
    try:
        print_message('Sending message to frontend poller: {}'.format(response))
        handler.respond(response)
    except Exception as e:
        print_message("Error sending response to job \n {}".format(options))
        print_debug(e)
        return -1, None

    return 0, None
Пример #23
0
        handler.respond(response)
    except Exception as e:
        print_message("Error sending response to job \n {}".format(options))
        print_debug(e)
        return -1, None

    return 0, None


if __name__ == "__main__":
    while(True):
        retval, id = poll()
        if retval == 0:
            continue
        elif retval == -2:
            print_message('No new jobs', 'ok')
            time.sleep(5)
            continue
        elif retval == -3:
            time.sleep(5)
            continue
        if retval:
            print_message('Job run error')
            # send error message to frontend poller
            request = json.dumps({
                'job_id': id,
                'request': 'error',
            })
            url = 'http://' + FRONTEND_POLLER_HOST
            try:
                r = requests.post(url, request)
Пример #24
0
            % (ip, ref, token))

        print auth_page % (style, tok_str, str(port) + "-" + tenant_name +
                           "-" + str(tenant_index))
    else:
        if ip:
            bend = SOAPpy.SOAPProxy(bend_url)
            logon_info = bend.whois(ip)

            flog.debug("logon_info: " + str(logon_info))

            if logon_info != []:

                if logoff == "0":
                    if status > 0:
                        print logged_page_small % (style_small, logon_info[1])
                    else:
                        print logged_page % (style, logon_info[1])
                else:
                    bend.deauthenticate(ip)
                    print auth_page % (style, "0", str(port) + "-" +
                                       tenant_name + "-" + str(tenant_index))

            else:
                print auth_page % (style, "0", str(port) + "-" + tenant_name +
                                   "-" + str(tenant_index))

except Exception, e:
    util.print_message("Error", "Error occured:", str(e), "/error.html")
    flog.error("auth.py: exception caught: " + str(e))
Пример #25
0
def acoustid_search():
    util.print_message('Analyzing audio fingerprint...')
    try:
        search = list(
            acoustid.match(conf.ACOUSTID_API_KEY,
                           conf.YOUTUBE_DL_OPTS['outtmpl']))
    except acoustid.NoBackendError:
        util.print_message('ERROR: Chromaprint library/tool not found.',
                           color='red',
                           exit=True)
    except acoustid.FingerprintGenerationError:
        util.print_message('ERROR: Audio fingerprint could not be calculated.',
                           color='red',
                           exit=True)
    except acoustid.WebServiceError as exc:
        util.print_message('ERROR: Web service request failed: {}.' \
                           .format(exc.message), color='red', exit=True)
    except Exception as ecx:
        util.print_message('ERROR: {}'.format(ecx.args[1]),
                           color='red',
                           exit=True)
    if len(search) == 0:
        util.print_message(
            'Failed to find a match for your track in the '
            'MusicBrainz database.',
            color='red',
            exit=True)
    return get_first_valid_encoding(sorted(search, reverse=True))
 def __init__(self, options=None):
     self.options = self.sanitize_input(options)
     print_message(self.options)
 def sanitize_input(self):
     args = ['metadiags']
     path_prefix = "path=" + USER_DATA_PREFIX
     for x in self.config:
         print_message('key: {}\nval: {}'.format(x, self.config.get(x)))
         option_key = ''
         option_val = ''
         if x == 'diag_type':
             option_key = '--package'
             # Check for valid package
             if self.config.get(x) != 'AMWG' and self.config.get(x) != 'amwg':
                 print_message("{} is not a valid package".format(self.config.get(x)))
                 return -1
             option_val = self.config.get(x)
         elif x == 'model_path':
             option_key = "--model "
             # Check for valid paths
             if os.path.exists(self.config.get(x)):
                 option_val = 'path=' + self.config.get(x) + ',climos=yes'
             else:
                 print_message('model_path {} does not exist'.format(self.config.get(x)))
         elif x == 'obs_path':
             option_key = '--obs'
             # Check for valid obs path
             if os.path.exists(self.config.get(x)):
                 option_val = 'path=' + self.config.get(x) + ',climos=yes'
             else:
                 print_message('model_path {} does not exist'.format(self.config.get(x)))
         elif x == 'output_dir':
             option_key = '--outputdir'
             if not os.path.exists(self.config.get(x)):
                 print_message('output_dir {} does not exist'.format(self.config.get(x)))
             else:
                 option_val = self.config.get(x)
             print_message(option_val)
         elif x == 'set':
             option_key = '--set'
             sets = []
             for s in self.config.get(x):
                 if s not in self.allowed_sets:
                     print_message('invalid set: {}'.format(s))
                 else:
                     sets.append(s)
             # Check for valid set
             option_val = ' '.join(sets)
         #
         # etc etc etc moar options
         #
         else:
             print "Unrecognized option passed to diag handler: {}".format(x)
             continue
         args.append(option_key)
         args.append(option_val)
     return args
Пример #28
0
def initialize(argv, **kwargs):
    """
    Parse the commandline arguments, and setup the master config dict

    Parameters:
        argv (list): a list of arguments
        event_list (EventList): The main list of events
        kill_event (threading.Event): An event used to kill all running threads
        __version__ (str): the current version number for processflow
        __branch__ (str): the branch this version was built from
    """
    # Setup the parser
    pargs = parse_args(argv=argv)
    if pargs.version:
        msg = 'Processflow version {}'.format(kwargs['version'])
        print msg
        sys.exit(0)
    if not pargs.config:
        parse_args(print_help=True)
        return False, False, False
    event_list = kwargs['event_list']
    event = kwargs['kill_event']
    print_line(line='Entering setup', event_list=event_list)

    # check if globus config is valid, else remove it
    globus_config = os.path.join(os.path.expanduser('~'), '.globus.cfg')
    if os.path.exists(globus_config):
        try:
            conf = ConfigObj(globus_config)
        except:
            os.remove(globus_config)

    if not os.path.exists(pargs.config):
        print "Invalid config, {} does not exist".format(pargs.config)
        return False, False, False

    # Check that there are no white space errors in the config file
    line_index = check_config_white_space(pargs.config)
    if line_index != 0:
        print '''
ERROR: line {num} does not have a space after the \'=\', white space is required.
Please add a space and run again.'''.format(num=line_index)
        return False, False, False

    # read the config file and setup the config dict
    try:
        config = ConfigObj(pargs.config)
    except Exception as e:
        print_debug(e)
        print "Error parsing config file {}".format(pargs.config)
        parse_args(print_help=True)
        return False, False, False

    # run validator for config file
    messages = verify_config(config)
    if messages:
        for message in messages:
            print_message(message)
        return False, False, False

    try:
        setup_directories(pargs, config)
    except Exception as e:
        print_message('Failed to setup directories')
        print_debug(e)
        sys.exit(1)

    if pargs.resource_path:
        config['global']['resource_path'] = os.path.abspath(
            pargs.resource_path)
    else:
        config['global']['resource_path'] = os.path.join(
            sys.prefix, 'share', 'processflow', 'resources')

    # Setup boolean config flags
    config['global']['host'] = True if config.get('img_hosting') else False
    config['global']['always_copy'] = True if pargs.always_copy else False
    config['global']['dryrun'] = True if pargs.dryrun else False
    config['global']['debug'] = True if pargs.debug else False
    config['global']['verify'] = True if pargs.verify else False
    config['global']['max_jobs'] = pargs.max_jobs if pargs.max_jobs else False

    # setup logging
    if pargs.log:
        log_path = pargs.log
    else:
        log_path = os.path.join(config['global']['project_path'], 'output',
                                'processflow.log')
    print_line(line='Log saved to {}'.format(log_path), event_list=event_list)
    if not kwargs.get('testing'):
        from imp import reload
        reload(logging)
    config['global']['log_path'] = log_path
    if os.path.exists(log_path):
        logbak = log_path + '.bak'
        if os.path.exists(logbak):
            os.remove(logbak)
        copyfile(log_path, log_path + '.bak')
    log_level = logging.DEBUG if pargs.debug else logging.INFO
    logging.basicConfig(format='%(asctime)s:%(levelname)s: %(message)s',
                        datefmt='%m/%d/%Y %I:%M:%S %p',
                        filename=log_path,
                        filemode='w',
                        level=log_level)
    logging.getLogger('globus_sdk').setLevel(logging.ERROR)
    logging.getLogger('globus_cli').setLevel(logging.ERROR)

    logging.info("Running with config:")
    msg = json.dumps(config, sort_keys=False, indent=4)
    logging.info(msg)

    if pargs.max_jobs:
        print_line(line="running with maximum {} jobs".format(pargs.max_jobs),
                   event_list=event_list)

    if not config['global']['host'] or not config.get('img_hosting'):
        print_line(line='Not hosting img output', event_list=event_list)

    msg = 'processflow version {} branch {}'.format(kwargs['version'],
                                                    kwargs['branch'])
    logging.info(msg)

    # Copy the config into the input directory for safe keeping
    input_config_path = os.path.join(config['global']['project_path'], 'input',
                                     'run.cfg')
    try:
        copy(pargs.config, input_config_path)
    except:
        pass

    if config['global']['always_copy']:
        msg = 'Running in forced-copy mode, previously hosted diagnostic output will be replaced'
    else:
        msg = 'Running without forced-copy, previous hosted output will be preserved'
    print_line(line=msg, event_list=event_list)

    # initialize the filemanager
    db = os.path.join(config['global'].get('project_path'), 'output',
                      'processflow.db')
    msg = 'Initializing file manager'
    print_line(msg, event_list)
    filemanager = FileManager(database=db,
                              event_list=event_list,
                              config=config)

    filemanager.populate_file_list()
    msg = 'Starting local status update'
    print_line(msg, event_list)

    filemanager.update_local_status()
    msg = 'Local status update complete'
    print_line(msg, event_list)

    msg = filemanager.report_files_local()
    print_line(msg, event_list)

    filemanager.write_database()
    all_data = filemanager.all_data_local()
    if all_data:
        msg = 'all data is local'
    else:
        msg = 'Additional data needed'
    print_line(msg, event_list)

    logging.info("FileManager setup complete")
    logging.info(str(filemanager))

    if all_data:
        print_line(line="skipping globus setup", event_list=event_list)
    else:
        if config['global'].get('local_globus_uuid'):
            endpoints = [endpoint for endpoint in filemanager.get_endpoints()]
            local_endpoint = config['global'].get('local_globus_uuid')
            if local_endpoint:
                endpoints.append(local_endpoint)
            msg = 'Checking authentication for {} endpoints'.format(endpoints)
            print_line(line=msg, event_list=event_list)
            setup_success = setup_globus(endpoints=endpoints,
                                         event_list=event_list)

            if not setup_success:
                print "Globus setup error"
                return False, False, False
            else:
                print_line(line='Globus authentication complete',
                           event_list=event_list)
    # setup the runmanager
    runmanager = RunManager(event_list=event_list,
                            event=event,
                            config=config,
                            filemanager=filemanager)
    runmanager.setup_cases()
    runmanager.setup_jobs()
    runmanager.write_job_sets(
        os.path.join(config['global']['project_path'], 'output', 'state.txt'))
    return config, filemanager, runmanager
Пример #29
0
def main():
    opts = get_opts()
    # Paths and device
    current_device = opts.device
    train_data_path = opts.data_dir
    pretrained_path = opts.pretrain_model_path
    model_path = opts.out_dir
    # training settings
    pretrained = opts.pretrained_option
    num_epochs = opts.num_epochs
    learning_rate = opts.learning_rate
    num_query = opts.num_query
    num_passage = opts.num_passage
    active_learning = opts.active_learning_stage
    # network settings
    network_type = opts.network_type
    embed_size = opts.embed_size
    num_hidden_nodes = opts.num_hidden_nodes
    num_hidden_layers = opts.num_hidden_layers
    dropout_rate = opts.dropout_rate

    if not os.path.exists(model_path):
        os.makedirs(model_path)
    torch.manual_seed(318)

    if pretrained == "Yes":
        checkpoint = torch.load(pretrained_path)
        network_type = checkpoint['network_type']
        embed_size = checkpoint['embed_size']
        num_hidden_nodes = checkpoint['num_hidden_nodes']
        num_hidden_layers = checkpoint['num_hidden_layers']
        dropout_rate = checkpoint['dropout_rate']
        if network_type == "append":
            net = AppendNet(embed_size=embed_size,
                            num_hidden_nodes=num_hidden_nodes,
                            num_hidden_layers=num_hidden_layers,
                            dropout_rate=dropout_rate)
        if network_type == "residual":
            net = ResidualNet(embed_size=embed_size,
                              num_hidden_nodes=num_hidden_nodes,
                              num_hidden_layers=num_hidden_layers,
                              dropout_rate=dropout_rate)
        net.load_state_dict(checkpoint['model'])
        net.to(current_device)
        optimizer = optim.Adam(net.parameters(), lr=learning_rate)
        optimizer.load_state_dict(checkpoint['optimizer'])
    else:
        if network_type == "append":
            net = AppendNet(embed_size=embed_size,
                            num_hidden_nodes=num_hidden_nodes,
                            num_hidden_layers=num_hidden_layers,
                            dropout_rate=dropout_rate).to(current_device)
        if network_type == "residual":
            net = ResidualNet(embed_size=embed_size,
                              num_hidden_nodes=num_hidden_nodes,
                              num_hidden_layers=num_hidden_layers,
                              dropout_rate=dropout_rate).to(current_device)
        optimizer = optim.Adam(net.parameters(), lr=learning_rate)
    print("Loading data")
    train_pos_dict, train_neg_dict, query_dict, passage_dict = load(
        train_data_path)
    print("Data successfully loaded.")
    print("Negative Pair dict size: " + str(len(train_neg_dict)))
    print("Positive Pair dict size: " + str(len(train_pos_dict)))
    print("Num of queries: " + str(len(query_dict)))
    print("Num of passages: " + str(len(passage_dict)))
    print("Finish loading.")

    arg_str = active_learning + "_" + network_type + "_" + str(
        num_query) + "_" + "query" + "_" + str(num_passage) + "_" + "passage"
    unique_path = model_path + arg_str + ".model"
    output_path = model_path + arg_str + ".csv"
    print("Total number of parameters: {}".format(net.parameter_count()))

    for ep_idx in range(num_epochs):
        train_loss = train(net, optimizer, opts, train_pos_dict,
                           train_neg_dict, query_dict, passage_dict)
        print_message([ep_idx, train_loss])
        with open(output_path, mode='a+') as output:
            output_writer = csv.writer(output)
            output_writer.writerow([ep_idx, train_loss])
        torch.save(
            {
                "model": net.state_dict(),
                "optimizer": optimizer.state_dict(),
                "n_epoch": ep_idx,
                "train_loss": train_loss,
                "network_type": network_type,
                "embed_size": embed_size,
                "num_hidden_nodes": num_hidden_nodes,
                "num_hidden_layers": num_hidden_layers,
                "dropout_rate": dropout_rate,
                "num_passage": num_passage,
                "num_query": num_query
            }, unique_path)
Пример #30
0

def push_to_mongo(db, message):
    try:
        record = db.arch.find_one({'url': message.url})
        if record is None:
            db.arch.insert_one(message.__dict__)
        else:
            util.print_message(
                '{0} already exists in mongo'.format(message.url))
    except:
        e = sys.exc_info()[0]
        util.print_message('Exception happened {0}'.format(e))


if __name__ == '__main__':
    calculator = Calculator()
    consumer = KafkaConsumer(settings.KAFKA_TOPIC,
                             bootstrap_servers=settings.KAFKA_SERVERS)
    db = connect_mongo()
    for m in consumer:
        p = util.json2obj(m.value)
        name = util.get_md5_hash(p.url)
        util.print_message(p.url)
        d = calculator.tf_idf(name + '.txt', p.url, p.body)
        tags = pick_tags(d)
        keywords = getattr(p, 'keywords', '')
        excerpt = getattr(p, 'excerpt', '')
        post = Post(p.url, p.title, p.body, keywords, excerpt, tags)
        push_to_mongo(db, post)