Пример #1
0
def main():
    if len(sys.argv) != 3:
        raise Exception(
            "Usage: python summarize.py <timestamp_file> <transcript_file>")

    _, timestamp_path, transcript_path = sys.argv
    transcript_json = parse.get_transcript_json(transcript_path)
    # print(len(transcript_json))
    selected_texts = ht.text_generator(timestamp_path, transcript_json)
    # print(len(selected_texts))

    key_points = []
    for selected_text in selected_texts:
        key_point = [
            summary.get_summary(selected_text),
            summary.get_keywords(selected_text)
        ]
        key_points.append(key_point)
    # print(key_points)

    with open("output.txt", "w") as f:
        for i in range(0, len(selected_texts)):
            f.write("Selected Texts:\n")
            f.write(selected_texts[i])
            f.write("\n\n")
            f.write("Summary:\n")
            f.write(key_points[i][0])
            f.write("\nKeywords:\n")
            f.write(key_points[i][1])
            f.write("\n\n")
Пример #2
0
def index():
    if request.method == 'POST':
        text = request.form['text']
        summaries  = get_summary(text)
        return render_template("summary.html", summaries=summaries)

    return render_template("index.html")
Пример #3
0
def content():

    link = request.args.get('url')
    title = request.args.get('title')
    updated = request.args.get('updated')
    html = requests.get(link).text
    soup = BeautifulSoup(html)
    text = ''.join(soup.findAll(text=True))

    sum = ''

    try:
        sum = summary.get_summary(text.encode('utf-8'))
        getEntityGraph.kindamain(text.encode('utf-8'))
    except:
        pass

    info, entity = analyze_policy.analyze(text)

    return render_template('index.html',
                           info=info,
                           entity=entity,
                           sum=sum,
                           title=title,
                           updated=updated)  # return ''
Пример #4
0
def init():
    """Inits the bot."""

    reddit = praw.Reddit(client_id=config.APP_ID, client_secret=config.APP_SECRET,
                         user_agent=config.USER_AGENT, username=config.REDDIT_USERNAME,
                         password=config.REDDIT_PASSWORD)

    processed_posts = load_log()
    whitelist = load_whitelist()

    for subreddit in config.SUBREDDITS:

        for submission in reddit.subreddit(subreddit).new():

            if submission.id not in processed_posts:

                clean_url = submission.url.replace("amp.", "")
                ext = tldextract.extract(clean_url)
                domain = "{}.{}".format(ext.domain, ext.suffix)

                if domain in whitelist:

                    try:
                        article, title = extract_article_from_url(clean_url)
                        summary_dict = summary.get_summary(article, title)
                    except Exception as e:
                        log_error("{},{}".format(clean_url, e))
                        update_log(submission.id)
                        print("Failed:", submission.id)
                        continue

                    # To reduce low quality submissions, we only process those that made a meaningful summary.
                    if summary_dict["reduction"] >= MINIMUM_REDUCTION_THRESHOLD and summary_dict["reduction"] <= MAXIMUM_REDUCTION_THRESHOLD:

                        # Create a wordcloud, upload it to Imgur and get back the url.
                        image_url = cloud.generate_word_cloud(
                            summary_dict["article_words"])

                        # We start creating the comment body.
                        post_body = ""

                        for sentence in summary_dict["top_sentences"]:
                            post_body += """> {}\n\n""".format(sentence)

                        top_words = ""

                        for index, word in enumerate(summary_dict["top_words"]):
                            top_words += "{}^#{} ".format(word, index+1)

                        post_message = HEADER.format(
                            summary_dict["title"], submission.url, summary_dict["reduction"]) + post_body + FOOTER.format(image_url, top_words)

                        reddit.submission(submission).reply(post_message)
                        update_log(submission.id)
                        print("Replied to:", submission.id)
                    else:
                        update_log(submission.id)
                        print("Skipped:", submission.id)
Пример #5
0
def summary():
    json_data = {}
    json_data = get_summary(connect=conn, my_log=LOG)
    module_list = json_data['module_list']
    host_list = json_data['host_list']
    return render_template('Summary.html',
                            data=json_data,
                            host_list=host_list,
                            module_list=module_list
                            )
Пример #6
0
async def articlePost(req):

    start = time.time()

    article = req.json
    article_text = pdf_to_string_process(article['path'])

    article['text'] = article_text
    article['tokentree'] = preprocessing_article(article_text)
    article['summary'] = get_summary(article_text, 10)

    end = time.time()
    print(f"Total time in python create new article: {end - start:.2f} s")

    return res.json(article)
Пример #7
0
def get_families(**kwargs):
    base = {
        "title":
        "Family",
        "families":
        db.execute('''
            SELECT family, COUNT (DISTINCT genus)
            FROM fgs GROUP BY family
            ''').fetchall(),
        "f":
        "",
        "genera": [],
        "g":
        "",
        "species": [],
        "s":
        ""
    }
    base.update(kwargs)
    base.update({"summary": get_summary(base["f"], base["g"], base["s"])})
    return render_template("display.html", **base)
Пример #8
0
def sumry():
    if request.method == 'POST':
        text = request.form['content']
        title = request.form['title']
        updated = request.form['updated']

        sum = summary.get_summary(text)
        return json.dumps({
            "modal": [{
                "title": "Summary",
                "content": sum,
                "type": "text"
            }],
            "title":
            title,
            "updated":
            updated,
            "status":
            "success"
        })
    return '{"status": "error", "message": "invalid request method"}'
Пример #9
0
def infer(database: sqlite3.Connection, table: schema.Node,
          column: schema.Node) -> str:
    tname = schema.get_attributes(table, 'name')
    cname, type_ = schema.get_attributes(column, 'name', 'type')
    if not type_:
        return UNKNOWN
    type_ = type_.lower()

    if _is_boolean(type_) or _is_single_char(type_):
        return BAR_CHART

    if _is_characters(type_):
        col_summary = summary.get_summary(database, schema.get_name(table),
                                          schema.get_name(column))
        if col_summary['distinct'] <= 100 and col_summary['max'] <= 100:
            return HORIZONTAL_BAR_CHART
        return SUMMARY

    if _is_date(type_) or _is_numeric(type_):
        return HISTOGRAM

    return SUMMARY
Пример #10
0
def init():
    """Inits the bot."""

    reddit = praw.Reddit(client_id=config.APP_ID, client_secret=config.APP_SECRET,
                         user_agent=config.USER_AGENT, username=config.REDDIT_USERNAME,
                         password=config.REDDIT_PASSWORD)

    processed_posts = load_log()
    whitelist = load_whitelist()

    for subreddit in config.SUBREDDITS:

        for submission in reddit.subreddit(subreddit).new(limit=50):

            if submission.id not in processed_posts:

                clean_url = submission.url.replace("amp.", "")
                ext = tldextract.extract(clean_url)
                domain = "{}.{}".format(ext.domain, ext.suffix)

                if domain in whitelist:

                    try:
                        with requests.get(clean_url, headers=HEADERS, timeout=10) as response:

                            # Sometimes Requests makes an incorrect guess, we force it to use utf-8
                            if response.encoding == "ISO-8859-1":
                                response.encoding = "utf-8"

                            html_source = response.text

                        article_title, article_date, article_body = scraper.scrape_html(
                            html_source)

                        summary_dict = summary.get_summary(article_body)
                    except Exception as e:
                        log_error("{},{}".format(clean_url, e))
                        update_log(submission.id)
                        print("Failed:", submission.id)
                        continue

                    # To reduce low quality submissions, we only process those that made a meaningful summary.
                    if summary_dict["reduction"] >= MINIMUM_REDUCTION_THRESHOLD and summary_dict["reduction"] <= MAXIMUM_REDUCTION_THRESHOLD:

                        # Create a wordcloud, upload it to Imgur and get back the url.
                        image_url = cloud.generate_word_cloud(
                            summary_dict["article_words"])

                        # We start creating the comment body.
                        post_body = "\n\n".join(
                            ["> " + item for item in summary_dict["top_sentences"]])

                        top_words = ""

                        for index, word in enumerate(summary_dict["top_words"]):
                            top_words += "{}^#{} ".format(word, index+1)

                        post_message = TEMPLATE.format(
                            article_title, clean_url, summary_dict["reduction"], article_date, post_body, image_url, top_words)

                        reddit.submission(submission).reply(post_message)
                        update_log(submission.id)
                        print("Replied to:", submission.id)
                    else:
                        update_log(submission.id)
                        print("Skipped:", submission.id)
Пример #11
0
import os
import summary as sum
#import same_doc_tf_idf_summary as sum
#import retuers_corpus_tf_idf_summary as sum
article_text=""
with open("in.txt", "r") as lines:
    for line in lines:
        article_text=article_text+line

summary_text=sum.get_summary(article_text)
f = open("out.txt", "w") 
f.write(summary_text)
f.close()


Пример #12
0
def get_summary():
    check_auth(request.headers)
    summary_data = request.json
    summary_text = summary.get_summary(summary_data["text"])
    return jsonify({"summary": summary_text})
Пример #13
0
def train(gpu, args):
    # Initialize workers
    # NOTE : the worker with gpu=0 will do logging
    dist.init_process_group(backend='nccl',
                            init_method='env://',
                            world_size=args.num_gpus,
                            rank=gpu)
    torch.cuda.set_device(gpu)

    # Prepare dataset
    data = get_data(args)

    data_train = data(args, 'train')
    data_val = data(args, 'val')

    sampler_train = DistributedSampler(data_train,
                                       num_replicas=args.num_gpus,
                                       rank=gpu)
    sampler_val = DistributedSampler(data_val,
                                     num_replicas=args.num_gpus,
                                     rank=gpu)

    batch_size = args.batch_size // args.num_gpus

    loader_train = DataLoader(dataset=data_train,
                              batch_size=batch_size,
                              shuffle=False,
                              num_workers=args.num_threads,
                              pin_memory=True,
                              sampler=sampler_train,
                              drop_last=True)
    loader_val = DataLoader(dataset=data_val,
                            batch_size=1,
                            shuffle=False,
                            num_workers=args.num_threads,
                            pin_memory=True,
                            sampler=sampler_val,
                            drop_last=False)

    # Network
    model = get_model(args)
    net = model(args)
    net.cuda(gpu)

    if gpu == 0:
        if args.pretrain is not None:
            assert os.path.exists(args.pretrain), \
                "file not found: {}".format(args.pretrain)

            checkpoint = torch.load(args.pretrain)
            net.load_state_dict(checkpoint['net'])

            print('Load network parameters from : {}'.format(args.pretrain))

    # Loss
    loss = get_loss(args)
    loss = loss(args)
    loss.cuda(gpu)

    # Optimizer
    optimizer, scheduler = utility.make_optimizer_scheduler(args, net)

    net = apex.parallel.convert_syncbn_model(net)
    net, optimizer = amp.initialize(net,
                                    optimizer,
                                    opt_level=args.opt_level,
                                    verbosity=0)

    if gpu == 0:
        if args.pretrain is not None:
            if args.resume:
                try:
                    optimizer.load_state_dict(checkpoint['optimizer'])
                    scheduler.load_state_dict(checkpoint['scheduler'])
                    amp.load_state_dict(checkpoint['amp'])

                    print('Resume optimizer, scheduler and amp '
                          'from : {}'.format(args.pretrain))
                except KeyError:
                    print('State dicts for resume are not saved. '
                          'Use --save_full argument')

            del checkpoint

    net = DDP(net)

    metric = get_metric(args)
    metric = metric(args)
    summary = get_summary(args)

    if gpu == 0:
        utility.backup_source_code(args.save_dir + '/code')
        try:
            os.makedirs(args.save_dir, exist_ok=True)
            os.makedirs(args.save_dir + '/train', exist_ok=True)
            os.makedirs(args.save_dir + '/val', exist_ok=True)
        except OSError:
            pass

    if gpu == 0:
        writer_train = summary(args.save_dir, 'train', args, loss.loss_name,
                               metric.metric_name)
        writer_val = summary(args.save_dir, 'val', args, loss.loss_name,
                             metric.metric_name)

        with open(args.save_dir + '/args.json', 'w') as args_json:
            json.dump(args.__dict__, args_json, indent=4)

    if args.warm_up:
        warm_up_cnt = 0.0
        warm_up_max_cnt = len(loader_train) + 1.0

    for epoch in range(1, args.epochs + 1):
        # Train
        net.train()

        sampler_train.set_epoch(epoch)

        if gpu == 0:
            current_time = time.strftime('%y%m%d@%H:%M:%S')

            list_lr = []
            for g in optimizer.param_groups:
                list_lr.append(g['lr'])

            print('=== Epoch {:5d} / {:5d} | Lr : {} | {} | {} ==='.format(
                epoch, args.epochs, list_lr, current_time, args.save_dir))

        num_sample = len(
            loader_train) * loader_train.batch_size * args.num_gpus

        if gpu == 0:
            pbar = tqdm(total=num_sample)
            log_cnt = 0.0
            log_loss = 0.0

        for batch, sample in enumerate(loader_train):
            sample = {
                key: val.cuda(gpu)
                for key, val in sample.items() if val is not None
            }

            if epoch == 1 and args.warm_up:
                warm_up_cnt += 1

                for param_group in optimizer.param_groups:
                    lr_warm_up = param_group['initial_lr'] \
                                 * warm_up_cnt / warm_up_max_cnt
                    param_group['lr'] = lr_warm_up

            optimizer.zero_grad()

            output = net(sample)

            loss_sum, loss_val = loss(sample, output)

            # Divide by batch size
            loss_sum = loss_sum / loader_train.batch_size
            loss_val = loss_val / loader_train.batch_size

            with amp.scale_loss(loss_sum, optimizer) as scaled_loss:
                scaled_loss.backward()

            optimizer.step()

            if gpu == 0:
                metric_val = metric.evaluate(sample, output, 'train')
                writer_train.add(loss_val, metric_val)

                log_cnt += 1
                log_loss += loss_sum.item()

                current_time = time.strftime('%y%m%d@%H:%M:%S')
                error_str = '{:<10s}| {} | Loss = {:.4f}'.format(
                    'Train', current_time, log_loss / log_cnt)

                if epoch == 1 and args.warm_up:
                    list_lr = []
                    for g in optimizer.param_groups:
                        list_lr.append(round(g['lr'], 6))
                    error_str = '{} | Lr Warm Up : {}'.format(
                        error_str, list_lr)

                pbar.set_description(error_str)
                pbar.update(loader_train.batch_size * args.num_gpus)

        if gpu == 0:
            pbar.close()

            writer_train.update(epoch, sample, output)

            if args.save_full or epoch == args.epochs:
                state = {
                    'net': net.module.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'scheduler': scheduler.state_dict(),
                    'amp': amp.state_dict(),
                    'args': args
                }
            else:
                state = {'net': net.module.state_dict(), 'args': args}

            torch.save(state,
                       '{}/model_{:05d}.pt'.format(args.save_dir, epoch))

        # Val
        torch.set_grad_enabled(False)
        net.eval()

        num_sample = len(loader_val) * loader_val.batch_size * args.num_gpus

        if gpu == 0:
            pbar = tqdm(total=num_sample)
            log_cnt = 0.0
            log_loss = 0.0

        for batch, sample in enumerate(loader_val):
            sample = {
                key: val.cuda(gpu)
                for key, val in sample.items() if val is not None
            }

            output = net(sample)

            loss_sum, loss_val = loss(sample, output)

            # Divide by batch size
            loss_sum = loss_sum / loader_val.batch_size
            loss_val = loss_val / loader_val.batch_size

            if gpu == 0:
                metric_val = metric.evaluate(sample, output, 'train')
                writer_val.add(loss_val, metric_val)

                log_cnt += 1
                log_loss += loss_sum.item()

                current_time = time.strftime('%y%m%d@%H:%M:%S')
                error_str = '{:<10s}| {} | Loss = {:.4f}'.format(
                    'Val', current_time, log_loss / log_cnt)
                pbar.set_description(error_str)
                pbar.update(loader_val.batch_size * args.num_gpus)

        if gpu == 0:
            pbar.close()

            writer_val.update(epoch, sample, output)
            print('')

            writer_val.save(epoch, batch, sample, output)

        torch.set_grad_enabled(True)

        scheduler.step()
Пример #14
0
def test(args):
    # Prepare dataset
    data = get_data(args)

    data_test = data(args, 'test')

    loader_test = DataLoader(dataset=data_test,
                             batch_size=1,
                             shuffle=False,
                             num_workers=args.num_threads)

    # Network
    model = get_model(args)
    net = model(args)
    net.cuda()

    if args.pretrain is not None:
        assert os.path.exists(args.pretrain), \
            "file not found: {}".format(args.pretrain)

        checkpoint = torch.load(args.pretrain)
        key_m, key_u = net.load_state_dict(checkpoint['net'], strict=False)

        if key_u:
            print('Unexpected keys :')
            print(key_u)

        if key_m:
            print('Missing keys :')
            print(key_m)
            raise KeyError

    net = nn.DataParallel(net)

    metric = get_metric(args)
    metric = metric(args)
    summary = get_summary(args)

    try:
        os.makedirs(args.save_dir, exist_ok=True)
        os.makedirs(args.save_dir + '/test', exist_ok=True)
    except OSError:
        pass

    writer_test = summary(args.save_dir, 'test', args, None,
                          metric.metric_name)

    net.eval()

    num_sample = len(loader_test) * loader_test.batch_size

    pbar = tqdm(total=num_sample)

    t_total = 0

    for batch, sample in enumerate(loader_test):
        sample = {
            key: val.cuda()
            for key, val in sample.items() if val is not None
        }

        t0 = time.time()
        output = net(sample)
        t1 = time.time()

        t_total += (t1 - t0)

        metric_val = metric.evaluate(sample, output, 'train')

        writer_test.add(None, metric_val)

        # Save data for analysis
        if args.save_image:
            writer_test.save(args.epochs, batch, sample, output)

        current_time = time.strftime('%y%m%d@%H:%M:%S')
        error_str = '{} | Test'.format(current_time)
        pbar.set_description(error_str)
        pbar.update(loader_test.batch_size)

    pbar.close()

    writer_test.update(args.epochs, sample, output)

    t_avg = t_total / num_sample
    print('Elapsed time : {} sec, '
          'Average processing time : {} sec'.format(t_total, t_avg))
Пример #15
0
            if step == 3:
                system, step = get_matrixproperties(system, materials, step)
            if step == 4: system, step = get_sorptionproperties(system, step)
            if step == 5: system, step = get_layerproperties(system, step)
            if step == 6: system, step = get_reactionproperties(system, step)
            if step == 7: system, step = get_reactioncoefficients(system, step)
            if step == 8: system, step = get_systemproperties(system, step)
            if step == 9: system, step = get_layerconditions(system, step)
            if step == 10:
                system, step = get_solidlayerconditions(system, step)
            if step == 11: system, step = get_solveroptions(system, step)
            if step == 12: system, step = get_inputoptions(system, step)
            if step == 13:
                while (1):
                    #show the summary window
                    system = get_summary(system, database, materials)
                    #run the simulation
                    if system is not None:
                        output, main = solve_system(system)
                        #postprocess
                        if output is not None:
                            main = postprocess_data(system, output)
                        if main == 1: break
                    else: break

    #Loads an existing cpsm file
    if option == 1:
        cpsmfile = open(filename, 'r')
        system = pickle.load(cpsmfile)
        cpsmfile.close()
    def run(self):

        if callable(self.dataset):
            X, y = self.dataset()
            loader = self.dataset
        else:
            X, y = self.dataset
            loader = lambda: (X, y)

        for name, estimator, grid in self.estimators:
            print(name)
            cache_dir = '%s/%s/' % (self.dir, name)

            if hasattr(self.cv, '__len__'):
                cv = list(self.cv)
            elif callable(self.cv):
                cv = list(self.cv(y))
            else:
                raise NotImplementedError()

            meta = {
                'X_shape': X.shape,
                'y_unique': np.unique(y),
                'cv': cv,
                'name': name,
                'estimator': estimator,
                'grid': grid,
                'search': self.search,
                'search_kwargs': self.search_kwargs
            }

            old_meta = None
            meta_filename = cache_dir + 'meta.pkl'
            if os.path.exists(meta_filename):
                try:
                    with open(meta_filename, 'rb') as f:
                        old_meta = dill.load(f)
                except:
                    pass

            if old_meta:
                validate_cache(meta, old_meta)

            cacher = MultipleFilesCacher(cache_dir, flush_every_n=5)

            callback = TqdmCallback()

            def record_metadata(index, fit_arguments):
                meta_cacher = RemoteMultipleFilesCacher(
                    cache_dir,
                    flush_every_n=1,
                    file_name_source=lambda key: '%d_meta.pkl' % key)
                X = fit_arguments.pop('X')
                y = fit_arguments.pop('y')
                estimator = fit_arguments.pop('estimator')
                test = fit_arguments['test']
                y_pred = estimator.predict(X[test])
                fit_arguments['y_pred'] = y_pred
                fit_arguments['y_true'] = y[test]
                meta_cacher[index] = fit_arguments

            search = self.search(estimator,
                                 grid,
                                 scoring=self.scorer,
                                 cv=cv,
                                 callback=callback,
                                 cacher=cacher,
                                 loader=loader,
                                 mapper=self.mapper,
                                 fit_callback=record_metadata,
                                 **self.search_kwargs)

            try:

                with open(meta_filename, 'wb') as f:
                    dill.dump(meta, f, -1)

                search.fit(X, y)

                cacher.save()

                print(name, search.best_score_)

                summary = get_summary(self.experiment_name, X, y, cv,
                                      estimator, search.grid_scores_)
                print(summary)

                meta['best_score'] = search.best_score_
                meta['grid_scores'] = search.grid_scores_
                meta['summary'] = summary
                self.results[name] = meta

                with open(meta_filename, 'wb') as f:
                    dill.dump(meta, f, -1)

            except Exception as e:

                e_type, e_value, e_tb = sys.exc_info()
                tb = ''.join(traceback.format_tb(e_tb))
                print(e_type, e_value)
                print(tb)

            summary = '\n'.join(
                map(lambda m: m['summary'], self.results.itervalues()))
            with open(self.dir + 'summary.txt', 'w') as f:
                f.write(summary)
Пример #17
0
def summariser():
    text = request.form['text']
    if (len(text) > 0):
        print("got it")
    text, text_summary = get_summary(text)
    return render_template('output.html', text=text, text_summary=text_summary)