Exemplo n.º 1
0
def main():
	start_time = datetime.now()
	
	# Input Folder 
	input_path = Path(userpaths.get_my_documents()) / "QuoraScraperData" / "input"	
	pathlib.Path(input_path).mkdir(parents=True, exist_ok=True)
	
	# Read arguments
	parser=argparse.ArgumentParser()
	parser.add_argument("module", choices=['questions', 'answers', 'users'],help="type of crawler")
	group = parser.add_mutually_exclusive_group()
	group.add_argument("-f","--verbose",action="store_true",help="input keywords file path ")
	group.add_argument("-l","--quiet",action="store_true",help="input keywords list")
	parser.add_argument("input", help=" Input filepath or input list")
	parser.add_argument("-i","--index", type=int, default=0,help="index from which to start scraping ")
	args=parser.parse_args()
	
	# set starting crawl index
	list_index = args.index
	
	# set input list for crawling
	# if input is filepath
	keywords_list=[]
	if args.verbose:	
		filename=args.input
		print("Input file is : ", filename)
		if os.path.isfile(filename):
			with  open(filename, mode='r', encoding='utf-8') as keywords_file:
				keywords_list = keywords_file.readlines()
		elif os.path.isfile(Path(input_path) / filename):
			with  open(Path(input_path) / filename, mode='r', encoding='utf-8') as keywords_file:
				keywords_list = keywords_file.readlines()
		else:
			print()
			print("Reading file error: Please put the file in the program directory: ",Path.cwd() ," or in the QuoraScraperData folder :",input_path ,"  and try again")
			print()
	
	# if input is list
	elif args.quiet:
		keywords_list = [item.strip() for item in args.input.strip('[]').split(',')]
	
	keywords_list=keywords_list[list_index:]
   
	#create ouptut folder
	module_name=args.module
	save_path = Path(userpaths.get_my_documents()) / "QuoraScraperData" / module_name
	pathlib.Path(save_path).mkdir(parents=True, exist_ok=True)
	
	# launch scraper
	if module_name.strip()=='questions':
		questions(keywords_list,save_path)
	elif module_name.strip() == 'answers':
		answers(keywords_list,save_path)
	elif module_name.strip() == 'users':
		users(keywords_list,save_path)
	
	end_time = datetime.now()
	print(' Crawling tooks a total time of  : ',end_time-start_time)
Exemplo n.º 2
0
    def __init__(self, master, **kw):
        """Return a new PDF Renamer application window."""

        Frame.__init__(self, master, **kw)

        top = self.winfo_toplevel()
        top.title(config.NAME)

        # List of currently displayed files
        self._files = []

        # Index of the currently selected file in self._files
        self._selected_index = 0

        # Absolute path of the currently selected file
        # This is a StringVar so it can double as a variable for the
        # radio buttons in the Go menu.
        self._selected_file = StringVar()

        # Basename of the current file, minus the extension
        self._new_name = StringVar()

        # Last used directory for the Browse dialog
        self._browse_dir = userpaths.get_my_documents()

        # Last used directory for the "Rename and Move" feature
        self._rename_and_move_dir = userpaths.get_my_documents()

        # ----------------------------------------------------------------

        # Frame for the rename controls
        f = Frame(self)
        f.pack(side="top", fill="x")

        # Icons
        self._icon_back = PhotoImage(data=icons.action_back_gif)
        self._icon_forward = PhotoImage(data=icons.action_forward_gif)
        self._icon_wand = PhotoImage(data=icons.icon_wand_gif)

        # Left-hand command buttons
        b = Toolbutton(f,
                       width=4,
                       text="Prev",
                       image=self._icon_back,
                       command=self.go_previous)
        b.pack(side="left", ipadx=4, fill="y")

        b = Toolbutton(f,
                       width=4,
                       text="Next",
                       image=self._icon_forward,
                       command=self.go_next)
        b.pack(side="left", ipadx=4, fill="y")

        # Right-hand command buttons
        b = Toolbutton(f,
                       width=6,
                       text="Rename",
                       image=self._icon_wand,
                       command=self.rename_and_go_next)
        b.pack(side="right", ipadx=6, fill="y")

        # Text entry for the new filename
        e = self.filename_entry = Entry(f, textvariable=self._new_name)
        e.pack(side="left", expand=1, fill="both", padx=1)

        # Key bindings for the entry box
        e.bind("<Return>", self.rename_and_go_next)
        e.bind("<Shift-Return>", self.rename)
        e.bind("<Control-z>", self.reset_new_name)

        # Separator between the command frame and viewer widget
        sep = Separator(self)
        sep.pack(fill="x")

        # ----------------------------------------------------------------

        # Document viewer widget
        v = self.viewer = DocViewer(self,
                                    borderwidth=0,
                                    scrollbars="vertical",
                                    use_ttk=True)

        # This fits most of a letter-size page on a modern widescreen display
        v.fit_page(8.5, 11.0 * 3 / 5)

        # Allow mouse scrolling when the focus is on the entry box
        v.bind_scroll_wheel(e)

        # Pack the viewer widget
        v.pack(side="top", expand=1, fill="both")

        # Bind viewer events
        v.bind("<<DocumentStarted>>", self._handle_document_started)
        v.bind("<<PageCount>>", self._handle_page_count)
        v.bind("<<PageFinished>>", self._handle_page_finished)
        v.bind("<<DocumentFinished>>", self._handle_document_finished)
        v.bind("<<RenderingError>>", self._handle_rendering_error)

        # ----------------------------------------------------------------

        # Outer frame for status bar widgets
        # Displayed as needed when a rendering process is active
        sfo = self._status_frame_outer = Frame(self)

        # Inner frame for status bar widgets
        sf = self._status_frame = Frame(sfo)
        sf.grid_columnconfigure(1, weight=1)
        sf.pack(side="top", fill="x", padx=2, pady=2)

        # Progress bar
        pb = self._progress_bar = Progressbar(sf, length=120)

        # Status text
        st = self._status_text = Label(sf)
        st.grid(row=0, column=1, sticky="we")

        # ----------------------------------------------------------------

        # Populate the menu bar
        self._create_menus()
        self._bind_keys()

        # Install our custom handler for window configuration events
        self.bind("<Configure>", self._handle_configure)

        # Call close_window() when the window is X'd
        top.protocol("WM_DELETE_WINDOW", self.close_window)

        # Load configuration options
        self._load_config()
Exemplo n.º 3
0
    'Windows': "NI.ico",
    "Linux": "NI.xbm",
    "Darwin": "NI.icns"
}

with pkg_resources.path(config, _icon_to_use[PLATFORM]) as p:
    PROGRAM_ICON = str(p) if PLATFORM == 'Windows' else "@" + str(p)

if PLATFORM == "Windows":
    with pkg_resources.open_text(config, "rloc.txt") as rloc:
        R_LOC = os.path.join(rloc.read().strip('\n'), "bin/Rscript.exe")
else:
    R_LOC = "Rscript"


USER_PATH = os.path.join(userpaths.get_my_documents(), "niclassify")


_required_folders = [
    os.path.join(USER_PATH),
    os.path.join(USER_PATH, "config"),
    os.path.join(USER_PATH, "output"),
    os.path.join(USER_PATH, "data"),
    os.path.join(USER_PATH, "output/classifiers"),
    os.path.join(USER_PATH, "logs"),
    os.path.join(USER_PATH, "logs/delim"),
    os.path.join(USER_PATH, "logs/delim/tree"),
    os.path.join(USER_PATH, "logs/delim/delim"),
    os.path.join(USER_PATH, "logs/ftgen"),
    os.path.join(MAIN_PATH, "data"),
    os.path.join(USER_PATH, "data/unprepared")