Exemplo n.º 1
0
def load_X_Y_nn(table_name,
                top100_labels=False,
                validation_set=False,
                test_set=False):
    db = DatabaseManager()
    cur = db.get_bag_of_words_vectors(table_name)
    n_patients = cur.rowcount

    Y = np.zeros((n_patients, 100 if top100_labels else 10))

    icd9_codes_map = get_icd9_codes_map(top100_labels=top100_labels)

    data = []
    row_ind = []
    col_ind = []
    for cnt, (subject_id, _, bag_of_words_binary_vector_col_ind,
              bag_of_words_binary_vector_data) in enumerate(cur):
        bag_of_words_vector_col_ind = pickle.loads(
            bag_of_words_binary_vector_col_ind)
        bag_of_words_vector_data = pickle.loads(
            bag_of_words_binary_vector_data)

        data += bag_of_words_vector_data
        row_ind += [cnt] * len(bag_of_words_vector_col_ind)
        col_ind += bag_of_words_vector_col_ind

        # Get the icd9 codes of the diseases this subject_id has.
        diagnoses = db.get_icd9_codes(subject_id=subject_id,
                                      validation_set=validation_set,
                                      test_set=test_set)
        for icd9_code in diagnoses:
            idx = icd9_codes_map[icd9_code]
            Y[cnt][idx] = 1

    return data, row_ind, col_ind, n_patients, 40000, Y  # TODO
Exemplo n.º 2
0
def main():
    """
    The threading in this file works like this

    We have a bottle server listening for submissions. When it gets a submission
    it gives it to the put_submission_on_lb. This makes sure that the user is on the
    leaderboard/ the leaderboard reflects their most up to date submission.

    That method then enqueues the submission for concordance check.
    """
    np.random.seed(1337)

    create_logger()
    db_manager = DatabaseManager()
    fm = FileManager('/tmp/', logging)
    logging.getLogger().info("Creating servers")

    threading.Thread(target=run, kwargs=dict(host='0.0.0.0',
                                             port=int(PORT))).start()
    logging.getLogger().info("Spawning new threads to score concordance")

    threading.Thread(target=put_submission_on_lb,
                     kwargs=dict(db_manager=db_manager,
                                 filemanager=fm)).start()
    threading.Thread(target=score_concordance,
                     kwargs=dict(db_manager=db_manager,
                                 filemanager=fm)).start()

    # clean up the /tmp folder so we don't run out of disk space
    threading.Thread(target=schedule_cleanup,
                     kwargs=dict(filemanager=fm)).start()
Exemplo n.º 3
0
def main():
    thread_count = 0
    dm = '0.0.0.0:5000'
    db_manager = DatabaseManager()
    host_relation = db_manager.get_relation('host')

    j = 0
    watchdogs = []
    hosts = []
    for i in range(0, len(host_relation)):
        if j < number_of_comps:
            hosts.append(host_relation[i]['host'])
            j += 1
            if j >= number_of_comps:
                thread_count += 1
                wd = WatchDog(thread_count, dm, hosts)
                watchdogs.append(wd)
                hosts = []
                j = 0

    if number_of_comps > len(hosts) > 0:
        thread_count += 1
        wd = WatchDog(thread_count, dm, hosts)
        watchdogs.append(wd)

    for wd in watchdogs:
        wd.start()
    def __init__(self, iface, parent=None):
        """Constructor.

        :param iface:
        :param parent:
        """
        super(DownloadDialog, self).__init__(parent)
        # Set up the user interface from Designer.
        # After setupUI you can access any designer object by doing
        # self.<objectname>, and you can use autoconnect slots - see
        # http://qt-project.org/doc/qt-4.8/designer-using-a-ui-file.html
        # #widgets-and-dialogs-with-auto-connect
        QtGui.QDialog.__init__(self, parent)
        self.setupUi(self)
        self.message_bar = None
        self.iface = iface
        self.populate_combo_box()

        self.site_layer = iface.activeLayer()
        self.parcel_layer = None
        self.sg_code_field = None
        self.output_directory = None
        self.all_features = None
        self.log_file = None

        self.database_manager = DatabaseManager(sg_diagrams_database)

        self.restore_state()
Exemplo n.º 5
0
    def getAll(categoryId):
        dicts = DatabaseManager().getQuestions(categoryId)
        questions = []

        for dict in dicts:
            questions.append(Question.getObjectFromDictionary(dict))

        return questions
Exemplo n.º 6
0
    def getAll(questionId):
        dicts = DatabaseManager().getQuestionLogs(questionId)
        questionLogs = []

        for dict in dicts:
            questionLogs.append(QuestionLog.getObjectFromDictionary(dict))

        return questionLogs
Exemplo n.º 7
0
    def getAll():
        dicts = DatabaseManager().getUsers()
        users = []

        for dict in dicts:
            users.append(User.getObjectFromDictionary(dict))

        return users
Exemplo n.º 8
0
def set_background(request,channel_id):
    clock = Clock(logger=logger)
    clock.start()
    dbm = DatabaseManager(channel = channel_id)
    new_background = request.GET['new_background']
    dbm.set_background(background=new_background)
    logger.info("set_background returned in %f seconds" % clock.stop())
    return HttpResponse(1)  
Exemplo n.º 9
0
def run_ui_test():
    """
    This runs a UI test if we run it
    as the main Python program
    """
    db_manager = DatabaseManager('data.json')
    ui = UserInterface(db_manager)
    ui.mainloop()
Exemplo n.º 10
0
 def __init__(self):
     config_path = os.path.join(os.path.abspath(""), "config",
                                "database.config")
     db_storage = os.path.join(os.path.abspath(""), "db_storage")
     with open(config_path) as config:
         user = config.readline().strip()
         pwd = config.readline().strip()
     self.database = DatabaseManager(user, pwd, "nonogramDB", db_storage)
Exemplo n.º 11
0
 def __init__(self, channel):
     assert channel is not None
     self.db_manager = DatabaseManager(channel=channel)
     self.conn = None
     self.paused = True
     self.yth = YoutubeHandler()
     self.__start_pause_ts = 0
     logger.info("Queue started")
Exemplo n.º 12
0
def word_cloud():
    query = request.args.get("query")
    if query is None:
        return jsonify({"error": "missing query"}), 400

    db = DatabaseManager()
    comments = db.comments_containing(query)
    words = [word.lower() for c in comments for word in c.content.split()]
    counts = collections.Counter(words).most_common(25)
    return jsonify(dict(counts))
Exemplo n.º 13
0
    def __init__(self, width, height, path):
        self.width, self.height = width, height

        os_user = windll.user32
        self.x_off, self.y_off = os_user.GetSystemMetrics(0), os_user.GetSystemMetrics(1)

        self.root = tkinter.Tk()
        self.root.withdraw()
        self.root.resizable(False, False)
        self.window = FloatingWindow(self.root)
        self.window.geometry("{}x{}+{}+{}".format(self.width, self.height, int((self.x_off - self.width) / 2), int((self.y_off - self.height) / 2)))
        self.window.overrideredirect(True)

        self.exit_button_size = (30, 30)
        self.sign_in_button_size = (10, 1)
        self.create_account_button_size = (15, 1)
        self.exit_button_padding = 10
        self.exit_button_pressed_padding = 0
        self.entry_size = 25
        self.entry_label_padding = 30

        self.header_label_x, self.header_label_y = self.width / 2, 115
        self.entry_label_x, self.entry_label_y = self.width / 2 - 175, self.height / 2
        self.entry_x, self.entry_y = self.width / 2 - 60, self.height / 2 + 5
        self.login_header_x, self.login_header_y = self.width / 2, 125
        self.sign_in_button_x, self.sign_in_button_y = self.width / 2 + 169, self.height / 2 + 65
        self.error_label_x, self.error_label_y = self.width / 2, self.height / 2 + 100
        self.signup_label_x, self.signup_label_y = self.width - 150, self.height - 8
        self.create_account_button_x, self.create_account_button_y = self.width - 5, self.height - 5

        self.bg_colour = '#171717'
        self.highlight_bg_colour = '#2b2b2b'
        self.highlight_fg_colour = '#7612db'
        self.hover_bg_colour = '#3b3b3b'
        self.text_colour = '#ffffff'
        self.error_colour = '#ffffff'

        self.header_font = ('Montserrat ExtraLight', '35')
        self.entry_label_font = ('Montserrat Light', '16')
        self.entry_font = ('Montserrat Light', '10')
        self.sign_in_button_font = ('Montserrat Medium', '10')
        self.error_label_font = ('Montserrat Medium', '10')
        self.signup_label_font = ('Montserrat Light', '12')

        self.parent_dir = os.path.abspath(os.path.join(path, os.pardir))
        self.db_manager = DatabaseManager(r'{}/data/EngineData.db'.format(self.parent_dir))
        self._launcher_closed = False
        self.error_text = None
        self.account_created_text = None
        self.new_sign_in_button_canvas = None
        self.path = path

        self.construct_launcher()
        self.root.mainloop()
Exemplo n.º 14
0
def index(request,channel_id):
    clock = Clock(logger=logger)
    clock.start()
    backgrounds,backgrounds_directory = __get_backgrounds()
    dbm = DatabaseManager(channel = channel_id)
    context = { "title":dbm.get_title(),
                "backgrounds":backgrounds,
                "current_background":'',
                "channel_id":channel_id
                }
    logger.info("Index returned in %f seconds" % clock.stop())
    return render(request,'channels/index.html',context)
Exemplo n.º 15
0
def most_upvoted():
    query = request.args.get("query", )
    n = request.args.get("n", type=int)
    if query is None:
        return jsonify({"error": "missing query"}), 400
    if n is None:
        return jsonify({"error": "missing n"}), 400

    db = DatabaseManager()
    comments = db.most_upvoted_comments_containing(query, n)
    comments.sort(key=lambda x: x.votes, reverse=True)
    return jsonify([c.to_dict() for c in comments])
Exemplo n.º 16
0
def store_and_delete(mailing_list: str):
    with DatabaseManager(mailing_list) as db:
        md = mailbox.Maildir(settings.STEPOUT_BASE_PATH + mailing_list +
                             '/Maildir')
        for key, msg in md.iteritems():
            # only look at new messages
            if msg.get_subdir() == "new":
                item = process(msg)
                db.store_item(item)
                md.remove(key)
                msg.set_subdir("cur")
                md.add(msg)
Exemplo n.º 17
0
    def __init__(self, previous_start_pos, previous_square_size):
        super(ConfigDialog, self).__init__()
        self.setWindowTitle("Configure Settings")

        self.database = DatabaseManager()
        self.main_layout = QVBoxLayout()

        # Robot select
        self.robot_layout = RobotSelect(self.database)
        self.main_layout.addLayout(self.robot_layout)

        # Room select
        self.room_layout = RoomSelect(self.database, previous_start_pos,
                                      self.validate_input)
        self.main_layout.addLayout(self.room_layout)

        # Square size
        self.square_size_layout = QHBoxLayout()
        self.square_size_label = QLabel("Square Size: ")
        self.square_size_text_edit = QLineEdit()
        if previous_square_size is None:
            previous_square_size = DEFAULT_SQUARE_SIZE
        self.square_size_text_edit.setText(str(previous_square_size))

        self.square_size_layout.addWidget(self.square_size_label)
        self.square_size_layout.addWidget(self.square_size_text_edit)
        self.main_layout.addLayout(self.square_size_layout)

        # Done button
        self.done_button = QPushButton("Done")
        self.main_layout.addWidget(self.done_button)
        self.done_button.clicked.connect(self.close)
        self.done_button.setDefault(True)
        self.robot_layout.set_main_button_default.connect(
            self.done_button.setDefault)
        self.robot_layout.set_main_button_default.connect(
            self.done_button.repaint)

        # Get all info together for input validation
        self.start_pos_line_edits = self.room_layout.get_start_pos_line_edits()
        self.room_select_dropdown = self.room_layout.room_select_dropdown
        self.robot_select_dropdown = self.robot_layout.robot_select_dropdown

        self.start_pos_line_edits[0].textChanged.connect(self.validate_input)
        self.start_pos_line_edits[1].textChanged.connect(self.validate_input)
        self.room_select_dropdown.currentIndexChanged.connect(
            self.validate_input)
        self.robot_select_dropdown.currentIndexChanged.connect(
            self.validate_input)
        self.square_size_text_edit.textChanged.connect(self.validate_input)
        self.validate_input()

        self.setLayout(self.main_layout)
Exemplo n.º 18
0
def parse_semantic_scholar_corpus_file(path, database_path="aip.db"):
        database = DatabaseManager(location=database_path)

        hash, parsed = database.did_parse_file(path)
        if parsed:
            return True

        file_iterator_func = iterload_file_lines_gzip if path.endswith("gz") else iterload_file_lines
        # print(corpus_file)
        # The json files contain stacked json objects, which is bad practice. It should be wrapped in a JSON array.
        # Libraries will throw errors if you attempt to load the file, so now we lazy load each object line by line.
        publication_iterator = file_iterator_func(path)
        for publication in publication_iterator:
            if publication is None:  # Corrupt JSON line possibly. Skip it.
                continue

            if "venue" not in publication:  # While parsing we sometimes get KeyError: 'venue'...
                continue

            # Try to match the publication to a venue we are interested in.
            # Wrap in str() as it sometimes is an int (???)
            venue_string = str(publication['venue'])
            if len(venue_string) == 0:
                continue

            # Check if any of the venue strings are a substring of the mentioned value, add it to that set.
            publication_title = publication['title']
            publication_abstract = publication['paperAbstract']
            publication_year = publication['year'] if 'year' in publication else -1
            publication_journal_volume = publication['journalVolume'].replace(" ",
                                                                              "_")  # Empty for conferences.
            # publication_keywords = publication['entities']
            publication_id = publication['id']

            num_citations = 0
            if "inCitations" in publication:
                num_citations = len(publication["inCitations"])

            publication_doi = publication['doi']
            if publication_doi is None or len(publication_doi) == 0:
                publication_doi_url = publication['doiUrl']
                if "doi.org/" in publication_doi_url:
                    publication_doi = publication['doiUrl'][
                                      publication['doiUrl'].index("doi.org/") + len("doi.org/"):]

            database.update_or_insert_paper(id=publication_id, doi=publication_doi, title=publication_title,
                                            abstract=publication_abstract, raw_venue_string=venue_string,
                                            year=publication_year, volume=publication_journal_volume,
                                            num_citations=num_citations)
        # database.flush_missing_venues()
        database.add_parsed_file(hash)
        database.close()
        return True
Exemplo n.º 19
0
def load_X_Y_rnn(table_name,
                 chunk_idx,
                 total_chunks,
                 top100_labels=False,
                 validation_set=False,
                 test_set=False):
    db = DatabaseManager()

    subject_ids = db.unique_subject_ids(table_name)
    chunked = np.array_split(np.array(subject_ids), total_chunks)
    subject_id_chunk = chunked[chunk_idx]
    m_subject_id_to_idx = dict()
    for i, subject_id in enumerate(subject_id_chunk):
        m_subject_id_to_idx[subject_id] = i

    n_patients = subject_id_chunk.shape[0]

    Y = np.zeros((n_patients, 100 if top100_labels else 10))

    icd9_codes_map = get_icd9_codes_map(top100_labels=top100_labels)

    first_patient, last_patient = subject_id_chunk[0].item(
    ), subject_id_chunk[-1].item()

    seq_length = 20  # TODO
    n_features = 40000  # TODO
    ret = np.zeros((n_patients, seq_length, n_features))

    cur = db.get_bag_of_words_vectors_rnn(table_name, first_patient,
                                          last_patient)

    for note_in_seq, row_id, subject_id, chart_date, bag_of_words_binary_vector_col_ind, bag_of_words_binary_vector_data in cur:
        bag_of_words_vector_col_ind = pickle.loads(
            bag_of_words_binary_vector_col_ind)
        bag_of_words_vector_data = pickle.loads(
            bag_of_words_binary_vector_data)

        for col_ind, data in zip(bag_of_words_vector_col_ind,
                                 bag_of_words_vector_data):
            ret[m_subject_id_to_idx[subject_id]][note_in_seq -
                                                 1][col_ind] = data

        # Get the icd9 codes of the diseases this subject_id has.
        diagnoses = db.get_icd9_codes(subject_id=subject_id,
                                      validation_set=validation_set,
                                      test_set=test_set)
        for icd9_code in diagnoses:
            idx = icd9_codes_map[icd9_code]
            Y[m_subject_id_to_idx[subject_id]][idx] = 1

    return ret, n_patients, n_features, Y
Exemplo n.º 20
0
    def __init__(self):
        self.manager = DatabaseManager()
        self.manager.create_databases()

        self.cinema_hall = [['.', '.', '.', '.', '.', '.', '.', '.', '.', '.'],
                            ['.', '.', '.', '.', '.', '.', '.', '.', '.', '.'],
                            ['.', '.', '.', '.', '.', '.', '.', '.', '.', '.'],
                            ['.', '.', '.', '.', '.', '.', '.', '.', '.', '.'],
                            ['.', '.', '.', '.', '.', '.', '.', '.', '.', '.'],
                            ['.', '.', '.', '.', '.', '.', '.', '.', '.', '.'],
                            ['.', '.', '.', '.', '.', '.', '.', '.', '.', '.'],
                            ['.', '.', '.', '.', '.', '.', '.', '.', '.', '.'],
                            ['.', '.', '.', '.', '.', '.', '.', '.', '.', '.'],
                            ['.', '.', '.', '.', '.', '.', '.', '.', '.', '.']]
Exemplo n.º 21
0
 def aggregate_counts(self, seconds: int, top: int):
     database_manager = DatabaseManager(self.db_file)
     while True:
         cur_counter = self.counter.copy()
         self.counter = collections.Counter()
         now = time.time()
         for symbol in cur_counter:
             self.velocity[symbol] = cur_counter[symbol] - self.prev_counter[symbol]
             self.acceleration[symbol] = self.velocity[symbol] - self.prev_velocity[symbol]
             row = (now, symbol, cur_counter[symbol])
             database_manager.insert_row_data(row)
         self.prev_counter = cur_counter.copy()
         self.prev_velocity = self.velocity.copy()
         self.generate_output(top)
         time.sleep(seconds)
Exemplo n.º 22
0
    def __init__(self, width, height, login_sys, path):

        self.line_colour = (255, 255, 255)
        self.point_colour = (255, 255, 255)
        self.bg_colour = (35, 35, 35)
        self.fps_colour = (255, 255, 255)
        self.relative_line_colour = (118, 18, 219)

        self.displacement_arrows = 0
        
        self.fps_array, self.time_array = [], []
        self.fps_array_max_length = 500
        self.fps_graph_interval = 500
        self.start_time = time.time()

        # Initialising all variables used with chosen point and chosen rotation anchor to None
        self.chosen_point, self.chosen_rotation_anchor, self.input_boxes, self.responsive_text = None, None, None, None
        self.clickable_radius = 5 # The radius at which a point can be clicked beyond its shown radius
        self.translating, self.translating_x, self.translating_y = False, False, False
        self.use_custom_rotation_anchor, self.running = False, True

        self.login_sys = login_sys        
        path = os.getcwd()
        self.parent_dir = os.path.join(path, os.pardir)
        Path(r'{}/data'.format(self.parent_dir)).mkdir(parents=True, exist_ok=True)
        Path(r'{}/data/EngineData.db'.format(self.parent_dir)).touch(exist_ok=True)
        self.db_manager = DatabaseManager(r'{}/data/EngineData.db'.format(self.parent_dir))


        # lighting_factor: Controls the contrast of colour in objects, higher means more contrast
        self.display_surfaces, self.display_lines, self.display_points, \
        self.debug_mode, self.display_hud, self.display_logo, \
        self.rotation_factor, self.scaling_factor, self.translation_factor, \
        self.movement_factor, self.max_frame_rate, self.max_render_distance, \
        self.min_render_distance, self.lighting_factor , self.point_radius = self.db_manager.load_user_settings(self)

        self.camera = Camera(self)
        self.gui = GUI(self, self.db_manager, width, height, path)
        self.engine = Engine3D('orthographic', self.gui.viewer_centre)

        pygame.init()
        self.viewer = pygame.display.set_mode((self.gui.viewer_width, self.gui.viewer_height))
        self.clock = pygame.time.Clock()
        self.font = pygame.font.Font(r'{}/fonts/Montserrat-SemiBold.ttf'.format(self.parent_dir), 16)
        pygame.key.set_repeat(1, self.movement_factor)
        self.logo = pygame.image.load(r'{}/images/FL3D_small.png'.format(self.parent_dir))
        self.logo_size = (197, 70)
    def __init__(self):
        super(ScenarioLoader, self).__init__()
        self.setWindowTitle("Load Scenario")

        main_layout = QVBoxLayout()
        self.database = DatabaseManager()

        self.scenario_select = QComboBox()
        scenarios = self.database.get_names_in_table("Scenario")
        self.scenario_select.addItems(scenarios)
        main_layout.addWidget(self.scenario_select)

        self.done_button = QPushButton("Done")
        main_layout.addWidget(self.done_button)
        self.done_button.clicked.connect(self.close)
        self.done_button.setDefault(True)

        self.setLayout(main_layout)
Exemplo n.º 24
0
    def __init__(self):
        """Does basic window setup."""
        super().__init__(
        )  # Must explicitly call super's constructor if overriding (when inheriting from QWidget)

        # Create the database
        self.db = DatabaseManager()

        # Create the interfaces
        self.init_interfaces()

        # Create the layout for the window
        self.init_buttons()

        # Do basic window setup
        self.resize(400, 400)
        self.setWindowTitle("COMP370 Project")
        self.show()  # Necessary
Exemplo n.º 25
0
def main():
    """
    The threading in this file works like this

    We have a bottle server listening for submissions. When it gets a submission
    it gives it to the put_submission_on_lb. This makes sure that the user is on the
    leaderboard/ the leaderboard reflects their most up to date submission.

    That method then enqueues the submission for concordance and originality checks.
    """
    np.random.seed(1337)

    parser = argparse.ArgumentParser(
        description="Score if submissions are original.")
    parser.add_argument("--num_threads",
                        dest="num_threads",
                        type=int,
                        default=32,
                        help="Number of threads to use.")
    parser.set_defaults(local=False)
    args = parser.parse_args()

    create_logger()
    db_manager = DatabaseManager()
    fm = FileManager('/tmp/', logging)
    logging.getLogger().info("Creating servers")

    threading.Thread(target=run, kwargs=dict(host='0.0.0.0',
                                             port=int(PORT))).start()
    logging.getLogger().info(
        "Spawning new threads to score originality and concordance")

    threading.Thread(target=put_submission_on_lb,
                     kwargs=dict(db_manager=db_manager,
                                 filemanager=fm)).start()
    for _ in range(args.num_threads - 3):
        threading.Thread(target=score_originality,
                         kwargs=dict(db_manager=db_manager,
                                     filemanager=fm)).start()
    threading.Thread(target=score_concordance,
                     kwargs=dict(db_manager=db_manager,
                                 filemanager=fm)).start()
Exemplo n.º 26
0
def courses_by_popularity():
    course_tracker = CourseTracker()
    db = DatabaseManager()
    analyzer = SentimentAnalyzer()
    sentiments = []
    for course in course_tracker.get_all_courses():
        course_comments = db.comments_containing(course.name)
        if len(course_comments) == 0:
            continue
        course_sentiments = [
            analyzer.analyze_sentiment(c.content) for c in course_comments
        ]
        avg_sentiment = sum(course_sentiments) / len(course_sentiments)
        sentiments.append({
            "course": course.name,
            "avg_sentiment": avg_sentiment
        })

    return jsonify(
        sorted(sentiments, key=lambda x: x["avg_sentiment"], reverse=True))
Exemplo n.º 27
0
    def __init__(self, iface):
        """Constructor.

        :param iface: A QGIS QgisInterface instance.
        :type iface: QgisInterface
        """
        canvas = iface.mapCanvas()
        QgsMapTool.__init__(self, canvas)
        self.canvas = canvas
        self.iface = iface
        self.message_bar = None
        self.progress_bar = None
        self.output_directory = None
        self.log_file = None

        self.restore_state()

        sg_diagrams_database = os.path.join(DATA_DIR, 'sg_diagrams.sqlite')

        self.db_manager = DatabaseManager(sg_diagrams_database)
Exemplo n.º 28
0
def add_semantic_scholar_cites_data(path, database_path="aip"):
    database = DatabaseManager(location=database_path)
    file_iterator_func = iterload_file_lines_gzip if path.endswith(
        "gz") else iterload_file_lines
    publication_iterator = file_iterator_func(path)

    for publication in publication_iterator:
        publication_id = publication['id']
        in_citations = []
        out_citations = []

        if "inCitations" in publication:
            in_citations = publication["inCitations"]

        if "outCitations" in publication:
            out_citations = publication["outCitations"]

        database.insert_cites(publication_id, in_citations, out_citations)

    # TODO: add hashing of the file so that is doesn't re compute already
    #  computed files in case of multiple restarts??
    database.close()
    return True
Exemplo n.º 29
0
 def __init__(self):
     self.databasePath = _PATH + "\\data\\database\\test.sqlite3"
     self.table_path = _PATH + "\\data\\tables.json"
     self.table_init_path = _PATH + "\\data\\test_db_init.json"
     self.DM = DatabaseManager(self.databasePath)
Exemplo n.º 30
0
def parse_aminer_corpus_file(path, database_path="aip", logger_disabled=False):
    logger.disabled = logger_disabled
    database = DatabaseManager(location=database_path)

    hash, parsed = database.did_parse_file(path)
    if parsed:
        return True

    # print(path)
    # The json files contain stacked json objects, which is bad practice.
    # It should be wrapped in a JSON array.
    # Libraries will throw errors if you attempt to load the file, so now we lazy load each object.
    file_iterator_func = iterload_file_lines_gzip if path.endswith(
        "gz") else iterload_file_lines
    publication_iterator = file_iterator_func(path)
    for publication in tqdm(publication_iterator):
        if publication is None:  # Corrupt JSON line possibly. Skip it.
            continue

        # Try to match the publication to a venue we are interested in.
        # Warning: contrary to the documentation, the key is "venue" NOT "venue.raw"!
        if 'venue' not in publication:
            logger.warning("Skipping line missing venue: %s in %s.",
                           publication, path)
            continue

        if 'title' not in publication:
            logger.warning("Skipping line missing title: %s in %s.",
                           publication, path)
            continue

        venue_string = publication['venue']

        # Sometimes the venue string is yet another dict...
        if isinstance(venue_string, dict) and "raw" in venue_string:
            venue_string = venue_string["raw"]

        publication_title = str(publication['title']).rstrip(".")
        publication_abstract = publication[
            'abstract'] if 'abstract' in publication else ""

        publication_year = publication[
            'year'] if 'year' in publication else None
        publication_journal_volume = publication[
            'volume'] if 'volume' in publication else None
        # publication_keywords = publication['keywords']
        publication_id = publication['id']
        # citation_count = int(publication['n_citation']) if "n_citation" in publication else None

        publication_doi = publication['doi'] if 'doi' in publication else None
        # Sometimes in the urls, a doi link is used. If there is, we attempt to extract the doi from the link.
        if publication_doi is None or len(publication_doi) == 0:
            publication_doi_urls = publication[
                'url'] if 'url' in publication else []
            for publication_doi_url in publication_doi_urls:
                if "doi.org/" in publication_doi_url:
                    publication_doi = publication_doi_url[publication_doi_url.
                                                          index("doi.org/") +
                                                          len("doi.org/"):]
                    break

        database.update_or_insert_paper(id=publication_id,
                                        doi=publication_doi,
                                        title=publication_title,
                                        abstract=publication_abstract,
                                        raw_venue_string=venue_string,
                                        year=publication_year,
                                        volume=publication_journal_volume)
    # database.flush_missing_venues()
    database.add_parsed_file(hash)
    database.close()
    return True