Пример #1
0
def main():
        # 保存站到站
        temp_trains = ['C6903','C6905','C6907','C6909','C6911','C6913','C6915','C6917','C6919','C6921','C6923','C6925','C6953','C6955','C6957','C6959','C6961','C6963','C6965','C6967','C6969','C6971','C6973','C6975','C6902','C6904','C6906','C6908','C6910','C6912','C6918','C6920','C6922','C6924','C6926','C6952','C6954','C6956','C6958','C6960','C6962','C6966','C6968','C6970','C6972','C6974','C6976','C6929','C6914','C6928','C6930','C6978','C6964','C6980','C6977','C6979','C6981']
        for code in temp_trains:
            names = DataBaseUtil.select("select name from train_line_stop where train_code = '%s' order by sequence" % code)
            if len(names) >  0:
                key = 0
                name_list = {}
                for n in names:
                    name_list[key] = n[0]
                    key = key+1
                kvs = name_list.items()
                i = -1
                for ki,vi in kvs:
                    i += 1
                    j = -1
                    for kj,vj in  kvs:
                        j += 1
                        if i == j:
                            continue
                        start_station = vi
                        end_station = vj
                        try:
                            start_py = DataService.get_alia_by_station(start_station)
                            if start_py == '':
                                start_py =  py_util.hanzi2pinyin_split(string=start_station, split="", firstcode=False)
                            end_py = DataService.get_alia_by_station(end_station)
                            if end_py == '':
                                end_py =  py_util.hanzi2pinyin_split(string=end_station, split="", firstcode=False)
                            DataService.save_s2s(start_station,start_py,end_station,end_py)
                        except:
                            t, v, tb = sys.exc_info()
Пример #2
0
def register_topic(topic):
    request_json = request.get_json()
    agent_ip = request_json['agent_ip']
    agent_name = request_json['agent_name']
    agent = Agent("", topic, agent_name, agent_ip)
    dataService = DataService(agent, "", "")
    return dataService.insertAgent()
Пример #3
0
class DataServiceTest(unittest.TestCase):
    def __init__(self, methodName):
        super().__init__(methodName)
        self.dao = None     # DOC
        self.service = None # SUT

    def test_csv_data(self):
        # Setup
        self.dao = Mock()  # Mock() replaces DataAccessObject
        self.dao.read_data.return_value = [0.8273, 0.7822, 0.9731, 0.1239, 0.9898]
        self.service = DataService(self.dao)

        # Exercise
        values = self.service.csv_data()

        # State verification
        expected = "0.8273,0.7822,0.9731,0.1239,0.9898"
        self.assertEqual(expected, values)
        # Behavioral verification
        self.dao.read_data.assert_called_once()
        self.assertEqual(1, self.dao.read_data.call_count)

    def test_data_access_error(self):
        # Setup
        self.dao = Mock()  # Mock() replaces DataAccessObject
        self.dao.read_data.side_effect = DataAccessError('Can not read data!')
        self.service = DataService(self.dao)
        # Exercise
        with self.assertRaises(ServiceError):
            self.service.csv_data()
Пример #4
0
 def test_data_access_error(self):
     # Setup
     self.dao = Mock()  # Mock() replaces DataAccessObject
     self.dao.read_data.side_effect = DataAccessError('Can not read data!')
     self.service = DataService(self.dao)
     # Exercise
     with self.assertRaises(ServiceError):
         self.service.csv_data()
Пример #5
0
 def parse_content(self, content, link_job):
     json_obj = json.loads(content)
     list = json_obj['list']
     for shop in list:
         try:
             category_id = shop['categoryId']
             city_id = shop['cityId']
             shop_id = shop['id']
             url = self.shop_url_pattern % shop_id
             DataService.save_dp_shop(url, shop_id, city_id, category_id)
         except:
             t, v, tb = sys.exc_info()
             log.error("%s,%s,%s" % (t, v, traceback.format_tb(tb)))
Пример #6
0
    def test_csv_data(self):
        # Setup
        self.dao = Mock()  # Mock() replaces DataAccessObject
        self.dao.read_data.return_value = [0.8273, 0.7822, 0.9731, 0.1239, 0.9898]
        self.service = DataService(self.dao)

        # Exercise
        values = self.service.csv_data()

        # State verification
        expected = "0.8273,0.7822,0.9731,0.1239,0.9898"
        self.assertEqual(expected, values)
        # Behavioral verification
        self.dao.read_data.assert_called_once()
        self.assertEqual(1, self.dao.read_data.call_count)
Пример #7
0
def data_service_handler():
    """Get or create a DataService instance for a tenant."""
    tenant = tenant_handler.tenant()
    handler = tenant_handler.handler('data', 'data', tenant)
    if handler is None:
        handler = tenant_handler.register_handler(
            'data', tenant, DataService(tenant, app.logger))
    return handler
def sample_test():

    data, char_to_ix, ix_to_char = DataService.get_data()
    chars = list(set(data))
    data_size, vocab_size = len(data), len(chars)

    np.random.seed(2)
    _, n_a = 20, 100
    Wax, Waa, Wya = np.random.randn(n_a, vocab_size), np.random.randn(
        n_a, n_a), np.random.randn(vocab_size, n_a)
    b, by = np.random.randn(n_a, 1), np.random.randn(vocab_size, 1)
    parameters = {"Wax": Wax, "Waa": Waa, "Wya": Wya, "b": b, "by": by}

    indices = ModelService.sample(parameters, char_to_ix, 0)
    print("Sampling:")
    print("list of sampled indices:\n", indices)
    print("list of sampled characters:\n", [ix_to_char[i] for i in indices])
Пример #9
0
api = Api(
    app,
    version='1.0',
    title='Data service API',
    description='API for QWC Data service',
    default_label='Data edit operations',
    doc='/api/',
)
# disable verbose 404 error message
app.config['ERROR_404_HELP'] = False

# Setup the Flask-JWT-Extended extension
jwt = jwt_manager(app, api)

# create data service
data_service = DataService(app.logger)

# Api models
geojson_crs_properties = create_model(api, 'CRS Properties', [
    [
        'name',
        fields.String(required=True,
                      description='OGC CRS URN',
                      example='urn:ogc:def:crs:EPSG::3857')
    ],
])

geojson_crs = create_model(
    api, 'CRS', [[
        'type',
        fields.String(required=True, description='CRS type', example='name')
Пример #10
0
 def parse_content(self,content,link_job):
     json_obj = json.loads(content)
     data = json_obj['data']['data']
     typeName = data[0]['train_class_name']
     start_time = data[0]['start_time']
     pre_time = start_time
     duration = 0
     name_list = {}
     days = 0
     train_no = link_job.task.train_no
     # 保存站点数据
     for i,d in enumerate(data):
         arrive_time = d['arrive_time']
         depart_time = d['start_time']
         if i==0 :
             arrive_time = 'null'
         if i != 0:
             if compareSS(pre_time,arrive_time)<0:
                 days +=1
             duration = duration + getDaysSS(start_time,d['arrive_time'],days)
             pre_time = arrive_time
             if int(d['station_no'])==1:
                 raise  Exception
         stayTime = 0
         if i!=0 and i!= len(data)-1 :
             stayTime = d['stopover_time']
             try:
                 stayTime = stayTime[0:str(stayTime).index('分')]
             except Exception as e:
                 stayTime=0
                 # raise e
         if i == len(data)-1:
             depart_time = 'null'
         try:
             train_stop = TrainStop(link_job.task.train_code,d['station_name'],int(d['station_no']),arrive_time,stayTime,days,duration,depart_time,typeName,train_no)
             DataService.save_train_stop(train_stop)
             log.info((link_job.task.train_code,d['station_name'],int(d['station_no']),arrive_time,stayTime,days,duration,depart_time,typeName,train_no))
             name_list.update({d['station_no']:d['station_name']})
         except:
             t, v, tb = sys.exc_info()
             log.error("%s,%s,%s" % (t, v, traceback.format_tb(tb)))
     # 保存站到站
     kvs = name_list.items()
     i = -1
     for ki,vi in kvs:
         i += 1
         j = -1
         for kj,vj in  kvs:
             j += 1
             if i == j:
                 continue
             start_station = vi
             end_station = vj
             try:
                 start_py = DataService.get_alia_by_station(start_station)
                 if start_py == '':
                     start_py =  py_util.hanzi2pinyin_split(string=start_station, split="", firstcode=False)
                 end_py = DataService.get_alia_by_station(end_station)
                 if end_py == '':
                     end_py =  py_util.hanzi2pinyin_split(string=end_station, split="", firstcode=False)
                 DataService.save_s2s(start_station,start_py,end_station,end_py)
             except:
                 t, v, tb = sys.exc_info()
                 log.error("%s,%s,%s" % (t, v, traceback.format_tb(tb)))
Пример #11
0
from data_service import DataService
from model_service import ModelService

data, char_to_ix, ix_to_char = DataService.get_data()
parameters = ModelService.model(data, ix_to_char, char_to_ix, verbose=True)
Пример #12
0
from environment import Environment
from data_service import DataService
import threading
import time

'''the application iterates at a frequency defined at runtime, triggering the data-service task on each iteration'''


def iterate():
    '''calls the task method on the data service, and requeues itself in a thread'''
    timeout = time.time() + freq
    ds.task()
    print('completed.. waiting {} seconds before next run'.format(
        int(round(timeout - time.time()))))
    threading.Timer(timeout - time.time(), iterate).start()


ds = DataService()
freq = Environment.get_frequency()
iterate()
Пример #13
0
def getTopic(gitUrl):
    ds = DataService()
    result = ds.get_topic(gitUrl)
    print '**********'
    print result
    return result
Пример #14
0
#!/usr/bin/env python3

from config import Config
from data_service import DataService

from parser import run

import logging
logging.basicConfig(level=logging.INFO, filename='parse.log')

if __name__ == '__main__':
    config = Config()
    data_service = DataService(
        config.get('db_connection_url'),
        config.get('db_name'),
    )
    run(config.get('start_url'), data_service)
Пример #15
0
def main():

    data_service = DataService()
    news_sources = data_service.refresh_and_return_news_sources()

    source_articles = {}
    for news_source in news_sources:
        articles = data_service.pull_articles_from_source(news_source['url'])
        source_articles[news_source['_id']] = articles
        if 'docs' in source_articles.keys():
            for article in source_articles['docs']['source']['enriched'][
                    'url']:
                pass

        stop_here = ""
    stop_here = ""

    query_url = 'https://topics.mediacloud.org/api/collections/9139458/stories'
    parameters = {
        'snapshotId': '1477',
        'timespandId': '39849',
        'sort': 'inlink',
        'limit': '5000'
    }
    # response = requests.get(query_url, params=parameters)
    # stop_here = ""

    client = MongoClient('localhost', 27017)
    database = client['AkashicRecords']
    articles = database['historic_political_article_data']
    cbt = database['cleaned_breitbart_test']

    articles_found = []
    for article in articles.find():
        articles_found.append(article)

    cleaned_articles = []
    for article in articles_found:
        if len(article['entities']) > 5:
            article['entities'] = article['entities'][:5]

            stop_here = ""

    for article in articles_found:
        relation_by_sent_id = {}
        for relation in article['relations']:
            try:
                sent_id = hash(relation['sentence'])
                if 'subject' in relation.keys():
                    if 'object' in relation.keys():
                        pass
                if 'object' in relation.keys():
                    pass
            except Exception as e:
                print(e.args)

    # watson_service = WatsonService()

    # articles_found = data_service.pull_from_source('http://www.breitbart.com')
    cleaned_articles = data_service.clean_source_articles(
        'breitbart.com', articles_found)
    cbt.insert_many(cleaned_articles)

    article_data_list = []
    for article in articles.find():
        article_element_types = []
        relevance_threshold = 0.80

        for entity in article['entities']:
            if entity['relevance'] > relevance_threshold:
                if 'knowledgeGraph' in entity.keys():
                    if 'typeHierarchy' in entity['knowledgeGraph'].keys():
                        article_element_types.append(
                            entity['knowledgeGraph']['typeHierarchy'].split(
                                '/')[1:])

        for keyword in article['keywords']:
            if keyword['relevance'] > relevance_threshold:
                if 'knowledgeGraph' in keyword.keys():
                    if 'typeHierarchy' in keyword['knowledgeGraph'].keys():
                        article_element_types.append(
                            keyword['knowledgeGraph']['typeHierarchy'].split(
                                '/')[1:])

        article_data_list.append(article_element_types)

    stop_here = ""
Пример #16
0
 def parse_content(self, content, link_job):
     content = content[content.index('(') + 1:content.rindex(')')]
     json_obj = json.loads(content)
     trains = json_obj['data']['trains']
     for train in trains:
         try:
             train_code = train['trainNum']
             start_station = train['fromCity']
             end_station = train['toCity']
             origin = train['beginPlace']
             terminal = train['endPlace']
             depart_time = train['fromTime']
             arrive_time = train['toTime']
             duration = int(train['usedTimeInt']) * 60
             note = train['note']
             A1 = get_price('hardseat', train['ticketState'])
             A2 = get_price('softseat', train['ticketState'])
             A3 = get_price('hardsleepermid', train['ticketState'])
             A4 = get_price('softsleeperdown', train['ticketState'])
             A6 = get_price('advancedsoftsleeper', train['ticketState'])
             A9 = get_price('businessseat', train['ticketState'])
             O = get_price('secondseat', train['ticketState'])
             M = get_price('firstseat', train['ticketState'])
             P = get_price('specialseat', train['ticketState'])
             sequence = 0
             days = 0
             stayTime = 0
             grade = ''
             state = 0
             train_no = ''
             exist = DataService.check_traincode_exist(train_code)
             # is_correct 0:错误信息 1:正确
             is_correct = 1
             if len(train['ticketState']) == 0:
                 is_correct = 0
             #  state 0:正常   1:未收录此车次    2:收录此车次但是此站点已经取消
             if not exist:
                 state = 1
             else:
                 station_s = DataService.find_station(
                     train_code, start_station)
                 station_e = DataService.find_station(
                     train_code, end_station)
                 if len(station_e) != 0 and len(station_s) != 0:
                     days_s = int(station_s[0][6])
                     if date_util.compareSS(station_s[0][4],
                                            station_s[0][8]) < 0:
                         days_s += 1
                     sequence = station_e[0][3]
                     days = station_e[0][6] - days_s
                     stayTime = station_e[0][5]
                     grade = station_e[0][9]
                     train_no = station_e[0][10]
                 else:
                     state = 2
             price = Price(train_code, end_station, start_station,
                           depart_time, arrive_time, duration, A1, A2, A3,
                           A4, O, M, A6, A9, grade, days, P, origin,
                           terminal, sequence, train_no, stayTime,
                           is_correct, state, note)
             DataService.save_train_price(price)
         except:
             t, v, tb = sys.exc_info()
             log.error("%s,%s,%s" % (t, v, traceback.format_tb(tb)))
Пример #17
0
from data_service import DataService
from plot_drawing import PlotDrawingService

ds = DataService("1 1 2\n2 2 3\n3 3 4\n4 4 5")
x, ys = ds.columns(0, [1, 2])
PlotDrawingService().draw_plot(x, ys, "time", "mass", "plot1.png")
Пример #18
0
import logging

from constants import LOG_FILENAME
from data_service import DataService

logging.basicConfig(filename=LOG_FILENAME, level=logging.INFO)
data_service = DataService()
# data_service.execute_scout_data_generation_cycle(page_number=6014)
data_service.copy_geo_hash()
Пример #19
0
def main():

    data_service = DataService()
    news_journals = data_service.get_known_online_news_journals()

    news_journal_articles = []
    for news_journal in news_journals:
        try:
            articles_of_journal_over_timeframe = data_service.get_articles_by_new_journal_over_timeframe(
                news_journal['url'])

        except Exception as query_failure:
            print(
                'source name, source id, resulted in xmany articles or none at all within the timeframe'
            )

    source_articles_by_source_id = {}
    for news_source in news_sources:
        articles = data_service.get_articles_from_online_news_journal_by_url(
            news_source['url'])
        source_articles_by_source_id[news_source['_id']] = articles
        if 'docs' in source_articles_by_source_id.keys():
            for article in source_articles_by_source_id['docs']['source'][
                    'enriched']['url']:
                pass

    # find the best possible news across one or more sources the most both singularly about a topic and with an
    # interesting or strong perspective
    # want news that is unique due to its paring of strange characters doing odd things, expressing a strong
    # and unique opinion towards common actions performed by a character in novel way

    # Hierarchy of boxes model, where the color of the lies of each rectange can convey different information, same as the
    # spacing between the rectangles

        stop_here = ""
    stop_here = ""

    query_url = 'https://topics.mediacloud.org/api/collections/9139458/stories'
    parameters = {
        'snapshotId': '1477',
        'timespandId': '39849',
        'sort': 'inlink',
        'limit': '5000'
    }
    # response = requests.get(query_url, params=parameters)
    # stop_here = ""

    client = MongoClient('localhost', 27017)
    database = client['AkashicRecords']
    articles = database['historic_political_article_data']
    cbt = database['cleaned_breitbart_test']

    articles_found = []
    for article in articles.find():
        articles_found.append(article)

    cleaned_articles = []
    for article in articles_found:
        if len(article['entities']) > 5:
            article['entities'] = article['entities'][:5]

            stop_here = ""

    for article in articles_found:
        relation_by_sent_id = {}
        for relation in article['relations']:
            try:
                sent_id = hash(relation['sentence'])
                if 'subject' in relation.keys():
                    if 'object' in relation.keys():
                        pass
                if 'object' in relation.keys():
                    pass
            except Exception as e:
                print(e.args)

    # watson_service = WatsonService()

    # articles_found = data_service.pull_from_source('http://www.breitbart.com')
    cleaned_articles = data_service.clean_source_articles(
        'breitbart.com', articles_found)
    cbt.insert_many(cleaned_articles)

    article_data_list = []
    for article in articles.find():
        article_element_types = []
        relevance_threshold = 0.80

        for entity in article['entities']:
            if entity['relevance'] > relevance_threshold:
                if 'knowledgeGraph' in entity.keys():
                    if 'typeHierarchy' in entity['knowledgeGraph'].keys():
                        article_element_types.append(
                            entity['knowledgeGraph']['typeHierarchy'].split(
                                '/')[1:])

        for keyword in article['keywords']:
            if keyword['relevance'] > relevance_threshold:
                if 'knowledgeGraph' in keyword.keys():
                    if 'typeHierarchy' in keyword['knowledgeGraph'].keys():
                        article_element_types.append(
                            keyword['knowledgeGraph']['typeHierarchy'].split(
                                '/')[1:])

        article_data_list.append(article_element_types)

    stop_here = ""