예제 #1
0
def api_issue():
    log_request(request, 'api-issue')
    # prepare constants
    headers = {'Content-Type': 'application/json; charset=utf-8'}
    bad_payload = {
        'success': False,
        'num': None,
        'url': None,
    }
    bad_request_response = (dump_to_json(bad_payload), 400, headers)
    # get and validate user's data
    valid, why = is_issue_request_valid(request)
    if not valid:
        log_request(request, 'api-issue-errors', why)
        return bad_request_response
    user_data = request.get_json()
    user_data = escape_sel_context(user_data)
    # get template
    exp_keys = {'owner', 'repo', 'title', 'body', 'labels', 'assignees'}
    issue_tmpl = get_config('issue_template', exp_keys)
    # specify template
    as_is_keys = {'owner', 'repo', 'labels', 'assignees'}
    issue = {k: issue_tmpl[k] for k in as_is_keys}
    issue['title'] = issue_tmpl['title'].format_map(user_data)
    issue['body'] = ''
    line_prefix = ''
    for line in issue_tmpl['body']:
        if isinstance(line, dict):
            line_prefix = line['line_prefix']
        else:
            line = line.format_map(user_data)
            line = ('\n' + line_prefix).join(line.split('\n'))
            issue['body'] += line_prefix + line
    issue['body'] = issue['body'].rstrip('\n')
    # create issue
    try:
        num, url = gitHub.create_issue(**issue)
        payload = {
            'success': True,
            'num': num,
            'url': url,
        }
    except:
        why = 'The exception raised in api_issue:\n' + format_exc()
        app.logger.warning(why)
        log_request(request, 'api-issue-errors', why)
        return bad_request_response
    return (dump_to_json(payload), 200, headers)
예제 #2
0
파일: expanded.py 프로젝트: anderscui/nlpy3
def main():
    docs = {}
    batch = 1
    for fname in glob.glob('text/*/wiki*', recursive=True):
        print(fname)
        with open(fname) as f:
            in_doc = False
            cur_doc = {}
            cur_lines = []
            for line in f:
                if not in_doc:
                    if line.startswith('<doc id="'):
                        in_doc = True
                        doc_id, title = extract_title_id(line)
                        cur_doc['id'] = doc_id
                        cur_doc['title'] = clean_title(title)
                    continue

                if line.startswith('</doc>'):
                    doc_id = cur_doc['id']
                    del cur_doc['id']

                    text = ''.join(cur_lines)
                    cats = RE_CAT.findall(text)
                    cats = [c.split('|')[0].strip() for _, c, _ in cats]
                    if cats:
                        cur_doc['cats'] = cats
                    is_disam = any(disam in text for disam in DISAMS)
                    if is_disam:
                        cur_doc['dis'] = 1

                    docs[doc_id] = cur_doc

                    in_doc = False
                    cur_doc = {}
                    cur_lines = []

                else:
                    cur_lines.append(line)

        if len(docs) >= 100000:
            dump_to_json(docs, 'expanded/expanded_{}.json'.format(batch))
            docs = {}
            batch += 1

    if docs:
        dump_to_json(docs, 'expanded/expanded_{}.json'.format(batch))
        docs = {}
예제 #3
0
파일: process_idp.py 프로젝트: adieyal/PMIS
def process_file(filename, processor):
    all_projects = []
    workbook = spreadsheet.WorkBook(filename)
    for sheet in workbook.sheets():
        #yesno = raw_input("Process this sheet? ")
        #if yesno.lower() == "y":
        #if "(ONGOING)" in sheet.name or "(COMPLETE" in sheet.name:
        for label in ["ONGOING", "ON GOING", "ON-GOING", "ON-GO"]:
            if label in sheet.name.upper():
                
                projects = process_sheet(sheet, processor)
                all_projects.extend(projects)
                break
        #else:
        #    print sheet.name
    print utils.dump_to_json(all_projects)
예제 #4
0
def log_request(request, name, why=None):
    """ Log request with particular logger (to corresponding file) """
    logger = logging.getLogger(LOGGER_NAME_PREFIX + name)
    logger.info('=================')
    logger.info('Method: {method}'.format(method=request.method))
    # headers from WSGI environment are not sorted
    headers = dump_to_json(dict(request.headers))
    logger.info('---- Headers ----\n' + headers)
    if request.is_json:
        json = dump_to_json(request.get_json())
        logger.info('---- JSON ----\n' + json)
    else:
        data = request.get_data().decode()
        logger.info('---- Data ----\n' + data)
    if why:
        logger.info('---- Why bad ----\n' + why)
예제 #5
0
def main(data_dir):
    redirects = {}
    batch = 1
    for fname in glob.glob(data_dir + '/*/wiki*', recursive=False):
        print(fname)
        with open(fname) as f:
            in_doc = False
            cur_doc = {}
            cur_lines = []
            for line in f:
                if not in_doc:
                    if line.startswith('<doc id="'):
                        in_doc = True
                        doc_id, title = extract_title_id(line)
                        cur_doc['id'] = doc_id
                        cur_doc['title'] = clean_title(title)
                    continue

                if line.startswith('</doc>'):
                    doc_id = cur_doc['id']
                    del cur_doc['id']

                    for cur_line in cur_lines:
                        m = RE_REDIRECT.search(cur_line)
                        if m:
                            cur_doc['redirect'] = m.group(1)
                            break

                    if 'redirect' in cur_doc:
                        redirects[doc_id] = cur_doc

                    in_doc = False
                    cur_doc = {}
                    cur_lines = []

                else:
                    cur_lines.append(line)

        if len(redirects) >= 100000:
            dump_to_json(redirects, 'expanded/expanded_{}.json'.format(batch))
            redirects = {}
            batch += 1

    if redirects:
        dump_to_json(redirects, 'expanded/expanded_{}.json'.format(batch))
        redirects = {}
예제 #6
0
파일: database.py 프로젝트: adieyal/PMIS
 def edit(cls, uuid):
     data = connection.get('/project/%s/edit' % (uuid))
     if not data:
         details = Project.get(uuid, as_json=True)
         data = dump_to_json(details)
         connection.set('/project/%s/edit' % (uuid), data)
     else:
         details = json.loads(data)
     project = cls(details)
     project.edit = True
     return project
예제 #7
0
파일: database.py 프로젝트: adieyal/PMIS
 def save(self):
     uuid = self._uuid
     timestamp = str(uuid1())
     self._details['_uuid'] = uuid
     self._details['_timestamp'] = timestamp
     data = dump_to_json(self._details)
     if self.edit:
         connection.set('/project/%s/edit' % (uuid), data)
     else:
         connection.sadd('/project', uuid)
         connection.sadd('/project/%s' % (uuid), timestamp)
         connection.set('/project/%s/%s' % (uuid, timestamp), data)
예제 #8
0
def main(args):
    filename = args.input_file
    with open(filename, "rb") as f:
        html_string = f.read()

    filename = remove_path(filename)
    g = Goose()
    article = g.extract(raw_html=html_string)
    data = {}
    data["text"] = text
    data["id"] = filename
    data = dump_to_json(data)
    return data
예제 #9
0
def main(args):
    filename = args.input_file
    if args.no_byte:
        with open(filename, "r") as f:
            html_string = f.read()
    else:
        with open(filename, "rb") as f:
            html_string = f.read()

    filename = remove_path(filename)
    extractor = Extractor(extractor='ArticleExtractor', html=html_string)
    extracted_text = extractor.getText()
    data = {}
    data["text"] = extracted_text
    data["id"] = filename
    data = dump_to_json(data)
    return data
        stdout = stdout.decode('utf-8').replace('\n', '\t').split('\t')
        return stdout[1]
    except CalledProcessError as e:
        print(e)


if __name__ == "__main__":
    jardir = "/emw_pipeline_nf/bin/DTC_Nextflow"
    args = get_args()
    with open("asd", "w") as f:
        f.write(args.data)
    data = load_from_json(args.data)
    filename = args.input_dir + "/" + data["id"]

    if args.no_byte:
        with open(filename, 'r') as fr:
            html_string = str(fr.read())
    else:
        with open(filename, 'rb') as fr:
            html_string = str(fr.read())

    compileCmd = 'javac -cp .:{0}/dct-finder-2015-01-22.jar:{0}/commons-lang3-3.8.1.jar:{0}/commons-cli-1.4.jar  {0}/main.java -d .'.format(
        jardir)
    excuteCmd = 'java -cp .:{0}/dct-finder-2015-01-22.jar:{0}/commons-lang3-3.8.1.jar:{0}/commons-cli-1.4.jar DTC_Nextflow.main {1}'.format(
        jardir, html_string)
    compiled = compile_java(compileCmd)
    pd = execute_java(excuteCmd)
    data["publish_time"] = pd

    print(dump_to_json(data))
예제 #11
0
def retrieve_prices_by_compare(all_styles, compare_url, years, out_dir):
    '''
    - all_styles {make: models}
        - models [model1, model2, ...]
            - model {name: 'name', year: styles}
                - styles [style1, style2, ...]
                    - style {'name', 'id', 'attr1', 'attr2', ...}
                             (this function adds 'price' attr to it)
    '''

    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    for make in all_styles:
        models = all_styles[make]
        for model in models:
            model_name = model.get('name')
            if model_name is None:
                cprint("Empty model name in list for make {}".format(make),
                       'err')
                continue

            for year in years:
                styles = model.get(year)
                if styles is None:
                    continue
                for style in styles:
                    vehicle_id = style.get('id')
                    if vehicle_id is None:
                        if style.get('name') is None:
                            cprint(
                                "Empty vehicle id for {} {} {} UNKNOWN TYPE".
                                format(year, make, model_name), 'r')
                        else:
                            cprint(
                                "Empty vehicle id for {} {} {} style {}".
                                format(year, make, model_name,
                                       style['name']), 'r')
                        continue

                    url = '{}{}-{}-{}-{}/'.format(compare_url, year, make,
                                                  model_name, vehicle_id)
                    try:
                        page = urllib.urlopen(url)
                    except:
                        cprint("Failed to open url: {}".format(url), 'err')
                        continue

                    soup = BeautifulSoup(page, 'lxml')
                    if page_not_found(soup):
                        cprint(
                            "Compare page for {} {} {} {} does not exist".
                            format(year, make, model_name, vehicle_id), 'r')
                        continue

                    for td in soup.find_all('td', {'class': ''}):
                        spans = td.find_all('span')
                        if len(spans) == 2 and spans[
                                0].text == 'KBB Suggested Retail':
                            style['price'] = spans[1].text
                            cprint(
                                "Suggested price {} for {} {} {} style {}".
                                format(style['price'], year, make, model_name,
                                       style['name']), 'g')
        cprint("Saving data for make {}".format(make), 'hi')
        out_file = out_dir + make + '.json'
        dump_to_json(all_styles[make], out_file)
예제 #12
0
def main(args):

    # Source 1 times
    # Source 2 newind
    # Source 3 ind
    # Source 4 thehin
    # Source 5 scm
    # Source 6 people

    data = load_from_json(args.data)
    filename = args.input_dir + "/" + data["id"]

    with open(filename, "rb") as g:
        html_string = g.read()

    text = data["text"].splitlines()

    stoplist1 = None
    stoplist2 = None
    stoplist3 = None
    stoplist4 = None
    if args.source == 1:
        text = deletesamesubstr(text)
        stoplist1 = [
            "RELATED", "From around the web", "More from The Times of India",
            "Recommended By Colombia", "more from times of india Cities",
            "You might also", "You might also like",
            "more from times of india", "All Comments ()+^ Back to Top",
            "more from times of india News", "more from times of india TV",
            "more from times of india Sports",
            "more from times of india Entertainment",
            "more from times of india Life & Style",
            "more from times of india Business"
        ]
        stoplist2 = ["FOLLOW US", "FOLLOW PHOTOS", "FOLLOW LIFE & STYLE"]

    elif args.source == 3:
        stoplist1 = [
            "Tags:", "ALSO READ", "Please read our before posting comments",
            "TERMS OF USE: The views expressed in comments published on indianexpress.com are those of the comment writer's alone. They do not represent the views or opinions of The Indian Express Group or its staff. Comments are automatically posted live; however, indianexpress.com reserves the right to take it down at any time. We also reserve the right not to publish comments that are abusive, obscene, inflammatory, derogatory or defamatory."
        ]

    elif args.source == 4:
        stoplist3 = [
            "ShareArticle", "Updated:", "MoreIn", "SpecialCorrespondent",
            "METRO PLUS", "EDUCATION PLUS", "PROPERTY PLUS", "CINEMA PLUS",
            "DISTRICT PLUS"
        ]
        stoplist4 = [
            "METRO PLUS", "EDUCATION PLUS", "PROPERTY PLUS", "CINEMA PLUS",
            "DISTRICT PLUS"
        ]

    elif args.source == 5:
        stoplist1 = ["Print Email", "Video"]
        stoplist2 = [
            "Viewed", "Associated Press", "Get updates direct to your inbox",
            "Opinion"
        ]

    elif args.source == 6:
        stoplist2 = [
            'Email | Print', '+', 'stumbleupon', 'More Pictures',
            'Save Article',
            'Click the "PLAY" button and listen. Do you like the online audio service here?',
            'Good, I like it', 'Do you have anything to say?', 'Name'
        ]
        text = [line for line in text if not line.startswith("Source")]

    if text:
        text = deletecertainstr(text,
                                stoplist1=stoplist1,
                                stoplist2=stoplist2,
                                stoplist3=stoplist3)
        if text:
            text, data = addnewstime(text,
                                     html_string,
                                     data,
                                     args.source,
                                     stoplist=stoplist4)
            if args.source == 1:
                text = deletesamesubstr(text)
            if text:
                text = "".join([
                    line.strip() + "\n" if line.strip() != "" else ""
                    for line in text
                ])[:-1]
                data["text"] = text
                data = dump_to_json(data)

    return data
예제 #13
0
    parser.add_argument('--out_dir', help="output folder")
    args = parser.parse_args()

    return (args)


def request(id, text):
    r = requests.post(url="http://localhost:5000/queries",
                      data={
                          'identifier': id,
                          'text': text
                      },
                      json={"Content-Type": "application/json"})
    return json.loads(r.text)


if __name__ == "__main__":
    args = get_args()
    data = load_from_json(args.data)

    rtext = request(id=data["id"], text=data["text"])
    data["doc_label"] = int(rtext["output"])
    data["length"] = len(data["text"])

    if data["doc_label"] == 0:
        write_to_json(data, data["id"], extension="json", out_dir=args.out_dir)
    else:
        data["sentences"] = rtext["event_sentences"]

    print(dump_to_json(data, add_label=True))
def replace_old_testing_json(raw_orders, json_fname: str):
    '''deletes old json, exports raw orders to json file'''
    output_dir = get_output_dir(client_file=False)
    json_path = os.path.join(output_dir, json_fname)
    delete_file(json_path)
    dump_to_json(raw_orders, json_fname)
예제 #15
0
    dataset = loader.load(dataset_name,
                          encoding='utf8',
                          batch_size=params.batch_size,
                          to_tensor=True,
                          to_cuda=params.cuda)
    logger.info("- done.")

    # add datasets parameters into params
    params.update(datasets_params)

    # create model, optimizer and so on.
    model, optimizer, criterion, metrics = model_factory(params)

    # restore model, optimizer
    status = Serialization(checkpoint_dir=model_dir).restore(
        model=model, checkpoint=checkpoint)
    
    if not status:
        logger.error("Restore model from the checkpoint: {}, failed".format(
            checkpoint))

    logger.info("Starting evaluate model on test dataset...")
    metrics_result = evaluate(model, dataset, criterion, metrics)
    logger.info("- done.")

    logger.info("Save metrics results...")
    metrics_file = os.path.join(model_dir, 
                                metrics_filename.format(checkpoint))
    dump_to_json(metrics_result, metrics_file)
    logger.info("- done.")