Пример #1
0
def upgrade_model_batch():
    enable_crossfeature = Config(ALGOCONFIG_PATH).get('enable-cross-feature')
    _model = generate_model(REALTIME_MODE, cross_features=enable_crossfeature)
    model_name = export_model(_model, REALTIME_MODE, enable_crossfeature)
    Config(ALGOCONFIG_PATH).set('model-name/realtime', model_name)
    _model = generate_model(BATCH_MODE, cross_features=enable_crossfeature)
    model_name = export_model(_model, BATCH_MODE, enable_crossfeature)
    Config(ALGOCONFIG_PATH).set('model-name/batch', model_name)
Пример #2
0
 def __init__(self):
     access_key_id = Config(DEPLOY_CONFIG_PATH).get('nectar/access_key_id')
     access_key_secret = Config(DEPLOY_CONFIG_PATH).get('nectar/access_key_secret')
     region = RegionInfo(name='melbourne', endpoint='nova.rc.nectar.org.au')
     self.ec2_conn = boto.connect_ec2(aws_access_key_id=access_key_id,
                                      aws_secret_access_key=access_key_secret,
                                      is_secure=True,
                                      region=region,
                                      port=8773,
                                      path='/Services/Cloud',
                                      validate_certs=False)
Пример #3
0
def export_stats(stats, mode, enable_crossfeature):
    json_data = stats.to_json()
    stat_name = _generate_stat_name(mode, enable_crossfeature)
    filename = MODEL_FILE_BASE_PATH + "{}.json".format(stat_name)
    if mode == REALTIME_MODE:
        Config(ALGOCONFIG_PATH).set('model-name/train-stats/realtime',
                                    "{}.json".format(stat_name))
    else:
        Config(ALGOCONFIG_PATH).set('model-name/train-stats/batch',
                                    "{}.json".format(stat_name))
    with open(filename, "w") as json_file:
        json_file.write(json_data)
Пример #4
0
 def add_instance(self):
     instance_type = Config(DEPLOY_CONFIG_PATH).get('nectar/instance_type')
     keypair_name = Config(DEPLOY_CONFIG_PATH).get('nectar/keypair_name')
     security_groups = Config(DEPLOY_CONFIG_PATH).get('nectar/security_groups')
     zone = Config(DEPLOY_CONFIG_PATH).get('nectar/zone')
     image_id = self._get_image_id(Config(DEPLOY_CONFIG_PATH).get('nectar/image_name'))
     reservation = self.ec2_conn.run_instances(image_id=image_id,
                                               key_name=keypair_name,
                                               instance_type=instance_type,
                                               security_groups=security_groups,
                                               placement=zone)
     instance = reservation.instances[0]
     instance_info = {"id": instance.id, "ip": instance.private_ip_address, "state": instance.state}
     logger.info('Successfully created instance: {}'.format(instance_info))
     return instance_info
 def _calc(self, data, mode):
     model_name = Config(ALGOCONFIG_PATH).get('model-name/{}'.format(
         mode_name[mode]))
     stat_path = MODEL_FILE_BASE_PATH + Config(ALGOCONFIG_PATH).get(
         'model-name/train-stats/{}'.format(mode_name[mode]))
     model = import_model(model_name)
     train_stats = import_stats(stat_path)
     data['vector']['label'] = -1
     feature_vectors = generate_feature_vectors(
         items=[data], mode=mode, cross_features=enable_crossfeature)
     test_set = generate_dataset(data_list=feature_vectors,
                                 mode=mode,
                                 cross_features=enable_crossfeature)
     test_set.pop('label')
     normed_test_data = norm(test_set, train_stats)
     test_predictions = model.predict(normed_test_data).flatten()
     return _limit_range(test_predictions[0], lower=0, upper=1)
Пример #6
0
 def __init__(self, db_name):
     server_config = Config(CONFIG_PATH).get('couchdb')
     self.client = CouchDB(server_config['username'],
                           server_config['password'],
                           url=server_config['server_addr'],
                           connect=True,
                           auto_renew=True)
     self.select_db(db_name)
Пример #7
0
def uclassify_topics(text):
    try:
        key = Config(CONFIG_PATH).get('uclassify/apikey')
        url = 'https://api.uclassify.com/v1/uClassify/Topics/classify'
        data = {'texts': [text]}
        header = {'Authorization': 'Token {}'.format(key), 'Content-Type': 'application/json'}
        response = requests.post(url=url, data=json.dumps(data), headers=header)
        if response.status_code == 200:
            resp_data = ast.literal_eval(response.text)[0]['classification']
            res = {x['className']: x['p'] for x in resp_data}
            return res
        else:
            keygen = UclassifyKeyGenerator()
            new_key = keygen.get_key()
            keygen.close()
            Config(CONFIG_PATH).set('uclassify/apikey', new_key)
            return uclassify_topics(text)
    except Exception as ex:
        logger.error('Error when uClassifying text: {}'.format(ex))
Пример #8
0
 def add_volume(self, instance_id):
     zone = Config(DEPLOY_CONFIG_PATH).get('nectar/zone')
     volume = self.ec2_conn.create_volume(size=50, zone=zone)
     info = self.get_volume_info(volume.id)
     while info["status"] != "available":
         logger.info("Volume state: " + info["status"])
         time.sleep(3)
         info = self.get_volume_info(volume.id)
     successful = volume.attach(instance_id, "/dev/vdc")
     if successful:
         logger.info("volume: " + volume.id + " set and bounded to /dev/vdc")
     else:
         logger.error("ERROR: volume creating failed.")
Пример #9
0
class Mrisa:
    port = Config(CONFIG_PATH).get('mrisa/port')
    service = Config(CONFIG_PATH).get('mrisa/server-path')
    proc = None

    def start(self):
        try:
            self.proc = subprocess.Popen(['python3', self.service, '--port', str(self.port)])
            time.sleep(3)
            logger.info('MRISA service started at port {}.'.format(self.port))
        except Exception as ex:
            logger.error('Error occured when starting MRISA: {}'.format(ex))

    def get_image_info(self, image_url):
        data = json.dumps({"image_url": image_url})
        url = 'http://localhost/search'

        storage = BytesIO()
        c = pycurl.Curl()
        c.setopt(c.URL, str(url))
        c.setopt(c.PORT, self.port)
        c.setopt(c.HTTPHEADER, ['Content-Type: application/json'])
        c.setopt(pycurl.POST, 1)
        c.setopt(pycurl.POSTFIELDS, data)
        c.setopt(c.WRITEFUNCTION, storage.write)
        c.perform()
        c.close()

        returned_json = storage.getvalue().decode('UTF-8')
        return json.loads(returned_json)

    def stop(self):
        logger.info('Terminating process pid: {}'.format(self.proc.pid))
        self.proc.terminate()
        self.proc.kill()
        time.sleep(1)
        os.system('kill -9 {}'.format(self.proc.pid))
        logger.info('MRISA service stopped.')
Пример #10
0
    def __vectorize(self, info1, info2, mode):
        # todo: handle modes.
        result = {}
        profile1, profile2 = info1['profile'], info2['profile']
        logger.info('Evaluating usename...')
        result['username'] = singleword_similarity(profile1, profile2)
        logger.info('Evaluating profile image...')
        result['profileImage'] = profile_img_sim(profile1['image'],
                                                 profile2['image'])
        logger.info('Evaluating self description text...')
        result['self_desc'] = self.semantic_sim.similarity(
            profile1.get('description', ''), profile2.get('description', ''))

        logger.info('Evaluating self description url...')
        result['desc_overlap_url_count'] = desc_overlap_url(
            {
                'platform': info1['platform'],
                'username': profile1['username'],
                'desc': profile1.get('description', '')
            }, {
                'platform': info2['platform'],
                'username': profile2['username'],
                'desc': profile2.get('description', '')
            })

        posts1 = info1['posts_content'] if 'posts_content' in info1.keys(
        ) else []
        posts2 = info2['posts_content'] if 'posts_content' in info2.keys(
        ) else []

        if mode == REALTIME_MODE and len(posts1) == 0 and len(posts2) == 0:
            return result

        logger.info('Evaluating writing style...')
        result['writing_style'] = writing_style_sim(posts1, posts2)
        logger.info('Evaluating post similarity...')

        max_post_enabled = bool(
            Config(ALGOCONFIG_PATH).get('max-post-similarity-enabled'))
        result['post_text'] = self.max_post_sim(
            posts1, posts2) if max_post_enabled else self.overall_post_sim(
                posts1, posts2)

        logger.info('Evaluating uClassify topical similarity...')
        result['uclassify'] = uclassify_similarity(
            " ".join(_get_post_text(posts1)), " ".join(_get_post_text(posts2)))

        logger.info('Calculation finished.')
        return result
Пример #11
0
import ast
import random
import os

from constant import CONFIG_PATH
from similarity.Config import Config

CONFIG = Config(CONFIG_PATH)
PAIRING_FILE_PATH = CONFIG.get("sampler/pairing_file")
MODE = CONFIG.get("sampler/mode")
INSTA_FOLDER = CONFIG.get("sampler/instagram_folder")
TWITTER_FOLDER = CONFIG.get("sampler/twitter_folder")


class Sampler:
    def __init__(self):
        self.items = []
        if MODE == 'file':
            with open(PAIRING_FILE_PATH, "r") as file:
                while True:
                    line = file.readline()
                    if not line:
                        break
                    self.items.append(ast.literal_eval(line))
            dictlist = [{x['instagram']: x['twitter']} for x in self.items]
            self.ins_to_twi_dict = dict(kv for d in dictlist
                                        for kv in d.items())
            dictlist = [{x['twitter']: x['instagram']} for x in self.items]
            self.twi_to_ins_dict = dict(kv for d in dictlist
                                        for kv in d.items())
Пример #12
0
 def __init__(self):
     keyfile = Config(CONFIG_PATH).get('google/keyfile_path')
     os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = keyfile
Пример #13
0
 def __init__(self):
     subscription_key = Config(CONFIG_PATH).get('microsoft/subscription_key')
     self.headers = {'Ocp-Apim-Subscription-Key': subscription_key}
Пример #14
0
            time.sleep(5)
        
    def getResultTable(self):
        while True:
            try:
                target = self.browser.find_element_by_id("ctl00_ContentPlaceHolder1_tblChart")
                return target
            except Exception as e:
                time.sleep(5)
                continue

    def close(self):
        self.browser.quit()


tea_enabled = bool(Config(ALGOCONFIG_PATH).get('tea-enabled'))


def query_writing_style(text):
    if len(text) == 0:
        return {}
    text = ''.join(c for c in text if c <= '\uFFFF')
    try:
        readbility_metrics = dict(readability.getmeasures(text, lang='en')['readability grades'])
    except ValueError:
        readbility_metrics = 0.5
        logger.warning('Text is Empty, readability return 0.5 as default.')
    if tea_enabled:
        text = ' '.join(text.split(' ')[:300])
        tea = TeaUtils()
        tea_metrics = tea.getTextMetrics(text)
Пример #15
0
 def __init__(self):
     self.config = Config(CONFIG_PATH)
     self.semantic_sim = TensorSimilarity()
Пример #16
0
import datetime

from constant import CONFIG_PATH
from similarity.Config import Config

INFO = 0
WARNING = 1
ERROR = 2

enable_color = Config(CONFIG_PATH).get('logger/color-enabled')
write_to_file = Config(CONFIG_PATH).get('logger/write-to-file')
log_file_path = Config(CONFIG_PATH).get('logger/logfile-path')


def info(message):
    __print_message(message, INFO)


def error(message):
    __print_message(message, ERROR)


def warning(message):
    __print_message(message, WARNING)


def __print_message(message, level):
    color = {INFO: '\033[0m', WARNING: '\033[93m ', ERROR: '\033[91m '}
    prefix = {INFO: '(INFO)', WARNING: '(WARNING)', ERROR: '(ERROR)'}
    time = str(datetime.datetime.now().strftime("[%Y-%m-%d %H:%M:%S]"))
    log_body = '{prefix} {time} {message}'.format(prefix=prefix[level], time=time, message=message)
from automation.batch.ModelUpgrade import import_model, import_stats, generate_dataset, norm, generate_feature_vectors
from constant import ALGOCONFIG_PATH, BATCH_MODE, REALTIME_MODE, MODEL_FILE_BASE_PATH
from similarity.Config import Config

mode_name = {BATCH_MODE: 'batch', REALTIME_MODE: 'realtime'}
enable_crossfeature = Config(ALGOCONFIG_PATH).get('enable-cross-feature')


class OverallSimilarityCalculator:
    def _calc(self, data, mode):
        model_name = Config(ALGOCONFIG_PATH).get('model-name/{}'.format(
            mode_name[mode]))
        stat_path = MODEL_FILE_BASE_PATH + Config(ALGOCONFIG_PATH).get(
            'model-name/train-stats/{}'.format(mode_name[mode]))
        model = import_model(model_name)
        train_stats = import_stats(stat_path)
        data['vector']['label'] = -1
        feature_vectors = generate_feature_vectors(
            items=[data], mode=mode, cross_features=enable_crossfeature)
        test_set = generate_dataset(data_list=feature_vectors,
                                    mode=mode,
                                    cross_features=enable_crossfeature)
        test_set.pop('label')
        normed_test_data = norm(test_set, train_stats)
        test_predictions = model.predict(normed_test_data).flatten()
        return _limit_range(test_predictions[0], lower=0, upper=1)

    def calc(self, data):
        if has_full_property(data):
            return self._calc(data, BATCH_MODE)
        return self._calc(data, REALTIME_MODE)