示例#1
0
def select_file(title, *file_types):
    # we do the import here because we don't want the initialisation
    # output to scramble with the output
    from _a_big_red_button.support.directory import DEPLOYMENT_ROOT
    from _a_big_red_button.support.log import get_logger

    # prepare logger
    logger = get_logger('export')

    # assemble select arguments and run the selecting script
    completed_process = subprocess.run([
        sys.executable, "-m", "_a_big_red_button.support.select_file", title,
        *[item for file_type in file_types for item in file_type]
    ],
                                       cwd=DEPLOYMENT_ROOT,
                                       capture_output=True)

    # parse the result
    if completed_process.returncode != 0:
        logger.error(f"cannot select file: error code "
                     f"{completed_process.returncode}")
        return None

    # parse the return code
    output = completed_process.stdout.decode()
    if not output:
        logger.error(f"cannot select file: unknown error")
        return None
    if output[0] == 'E':
        logger.error(f"cannot select file: {output[2:]}")
        return None
    elif output[0] == 'S':
        file = Path(output[2:].strip())
        logger.info(f"selected file: {file}")
        return file
    else:
        logger.error(f"cannot select file: unrecognised output: {output[2:]}")
        return None
示例#2
0
export scripts, such as listing, creating new, exporting
with a selected script, etc.

Kevin Ni, [email protected].
"""

import os
from pathlib import Path
import importlib
from _a_big_red_button.support.log import get_logger
from _a_big_red_button.support.configuration import get_config
from _a_big_red_button.crawler.export_helper import WokPersistentSessionExportHelper
from _a_big_red_button.crawler.db import WokPersistentSession

# prepare the logger
logger = get_logger('export')

# parse config
config = get_config('export')

# resolve the path to all the export scripts
EXPORT_SCRIPT_DIR: Path = Path(__file__).parent.joinpath('export')


class WokPersistentSessionExportScript:
    def __init__(self, name: str):
        script_full_path = EXPORT_SCRIPT_DIR.joinpath(f'{name}.py')
        if not script_full_path.exists():
            raise RuntimeError(f'the requested export script does not exist: '
                               f'{name} => {script_full_path}')
示例#3
0
"""
This script implements utilities to parse and manage a print page
from Web of Science.

Kevin Ni, [email protected].
"""

from typing import *
from lxml import etree
from _a_big_red_button.support.configuration import get_config
from _a_big_red_button.support.log import get_logger
from _a_big_red_button.crawler.article_attribute_parser import *

# get logger
logger = get_logger('crawler')

# get config
config = get_config('crawler')


def normalize_name_abbr(name: str):
    return name.strip().replace(',', '').replace('.', '').upper()


class WoKCitation:
    @staticmethod
    def make_empty():
        return WoKCitation('', '', 2100, None, '', None, None)

    def __init__(self,
                 journal: str,
示例#4
0
Implements analyser functions for Social Network Analysis.

Kevin Ni, [email protected].
"""

import json
from pathlib import Path
from typing import *
import subprocess
import sys
from _a_big_red_button.support.log import get_logger
from _a_big_red_button.support.singleton import Singleton
from _a_big_red_button.support.directory import DEPLOYMENT_ROOT

# prepare the logger
logger = get_logger('analyser')


class WokAnalyser(metaclass=Singleton):
    def __init__(self):
        self.file: Optional[Path] = None

    def select_file(self):
        # show a file selection dialogue
        completed_process = subprocess.run(
            [sys.executable, "-m", "_a_big_red_button.support.select_file"],
            cwd=DEPLOYMENT_ROOT,
            capture_output=True)
        if completed_process.returncode != 0:
            logger.error(f"cannot select file: error code "
                         f"{completed_process.returncode}")
示例#5
0
from typing import *
import re
import csv
import requests
import time
from io import StringIO
from lxml import etree
from queue import Queue, Empty, Full
import threading
from _a_big_red_button.support.configuration import get_config
from _a_big_red_button.support.log import get_logger
from _a_big_red_button.crawler.print_list import WoKPrintList
from _a_big_red_button.consolesync import CONSOLE_SYNC_HANDLER

# get logger
logger = get_logger('crawler', CONSOLE_SYNC_HANDLER, force_add_additional=True)

# get config
_config = get_config('crawler')


class WokSearchResult:
    def __init__(self, result_url: str, result_count: int,
                 search_id: str, search_term: str,
                 session: requests.Session, headers: Dict[str, str]):
        self.result_url, self.result_count = result_url, result_count
        self.search_id, self.search_term = search_id, search_term
        self.session, self.headers = session, headers

        # threading primitives
        self.task_queue = Queue()
示例#6
0
from pathlib import Path
from flask import Flask, render_template, request

from _a_big_red_button.crawler.controller import Wok
from _a_big_red_button.crawler.db import WokPersistentStorage, WokPersistentSession
from _a_big_red_button.crawler.db_meta import WokPersistentSessionMeta
from _a_big_red_button.support.select_file import select_file
from _a_big_red_button.crawler.export_script_helper import available_export_scripts, get_export_script
from _a_big_red_button.crawler.export_worker import WokPersistentSessionExportScriptThreadedRunner
from _a_big_red_button.support.response import good, bad
from _a_big_red_button.support.log import get_logger
from _a_big_red_button.support.configuration import get_config
from _a_big_red_button.crawler.db_search import search_in_all_sessions

# prepare logger
logger = get_logger('controller-front')


def poll_search_progress():
    if Wok().is_searching:
        return good(finished=False)
    if Wok().search_done:
        if Wok().search_went_wrong:
            what = Wok().search_what_went_wrong
            if what is None:
                return bad("search failed on unknown error, "
                           "check your search term and search again")
            return bad(f"search failed: {what}")
        return good(finished=True, result_count=Wok().search_result_count)
    return bad("cannot poll search progress: have you searched?")