Exemplo n.º 1
0
 def get_surveys(self):
     if not self.surveys:
         survey_list = get_csv_files(self.input_dir)
         self.surveys = [InputFile(id+1, name) for id, name in enumerate(survey_list)]
         logger.info("Surveys found: {}".format(len(self.surveys)))
         for survey in self.surveys:
             logger.info("\t--> {}.{}".format(survey.id, survey.name))
Exemplo n.º 2
0
def main():
    global args, PROBLEMS
    PROBLEMS = 0

    parse_cli_opts()
 
    file_parser = surveyDI_lib.Parser(args.input_dir)
    
    if not args.only_to_db:
        # Surveys.tab
        file_parser.write_surveys(make_output_path(Globals.OPUT_S))
        # Questions.tab
        file_parser.write_questions(make_output_path(Globals.OPUT_Q))
        # SurveysQuestions.tab
        file_parser.write_surveysquestions(make_output_path(Globals.OPUT_SQ))
        # Respondents.tab
        file_parser.write_respondents(make_output_path(Globals.OPUT_R))
        # QuestionResponses.tab
        file_parser.write_responses(make_output_path(Globals.OPUT_QR))
        # write ro MySQL
        if args.write_to_db:
            file_parser.write_all_to_mysql(Globals.SERVER_NAME, 
                                           Globals.USER, 
                                           Globals.PASS, 
                                           Globals.DB_NAME)
    else:
        logger.info("Writing only to database.")
        file_parser.write_all_to_mysql(Globals.SERVER_NAME, 
                                       Globals.USER, 
                                       Globals.PASS, 
                                       Globals.DB_NAME)
Exemplo n.º 3
0
def read_csv2dict(input_file):
    """
    Read a csv file in a csv reader dictionary
    
    :input: input_file - csv input file
    :return: the csv reader object
    """

    logger.info("Reading input file {}".format(input_file))
    with open(input_file) as csv_file:
        reader = csv.DictReader(csv_file)
    return reader
Exemplo n.º 4
0
    def get_respondents(self):
        self.get_questions()
        if not self.respondents:
            questions_delim = "Custom Data"
            self.get_surveys()
            for input_file in self.surveys:
                try:
                    with open(input_file.name, 'rb') as csv_file:
                        fileid = input_file.get_id()
                        reader = csv.reader(csv_file)
                        headers = reader.next()
                        qstart_idx = headers.index(questions_delim) + 1
                        questions = headers[qstart_idx:]
                        # delete duplicate questions but save their indexes
                        dup_qs = [
                            q for q in self.qduplicates if fileid in q.fileid
                        ]
                        for q in dup_qs:
                            for idx in q.order[1:]:
                                del (questions[idx])
                        # process responses and respondents
                        for row in reader:
                            # process respondents
                            user_details = row[:qstart_idx]
                            user_details.insert(0, fileid)
                            self.respondents.append(user_details)
                            # process responses
                            user_responses = row[qstart_idx:]
                            # handle duplicate questions responses
                            for q in dup_qs:
                                q_responses = [
                                    user_responses[idx] for idx in q.order
                                ]
                                valid_responses = [
                                    r for r in q_responses if r not in q.text
                                ]
                                # save concatenated valid responses and delete the rest
                                user_responses[q.order[0]] = ";".join(
                                    valid_responses)
                                for idx in q.order[1:]:
                                    del (user_responses[idx])

                            user_id = user_details[1]
                            for qtext, response in zip(questions,
                                                       user_responses):
                                q = self.get_question_by_text(qtext)
                                self.qresponses.append((q.id, user_id,
                                                        response))
                except:
                    write_exception(
                        "While reading file '{}'".format(input_file))
            logger.info("Respondents found: {}".format(len(self.respondents)))
Exemplo n.º 5
0
 def write_all_to_mysql(self, server_name, user, passw, db_name):
     logger.info("Writing do database {}:".format(db_name))
     conn = connect(server_name, user, passw, db_name)
     with conn:
         cur = conn.cursor()
         # surveys
         write_sql_table(cur, db_name, self.stable, self.fheader, 
                         [(s.id, s.name) for s in self.surveys])            
         # questions
         write_sql_table(cur, db_name, self.qtable, self.qheader,
                         [(q.id, q.text) for q in self.questions])
         # surveyquestions
         write_sql_table(cur, db_name, self.sqtable, self.sqheader, self.squestions)
         # respondents
         write_sql_table(cur, db_name, self.rtable, self.rheader, self.respondents)            
         # responses
         write_sql_table(cur, db_name, self.qrtable, self.qrheader, self.qresponses)
Exemplo n.º 6
0
def write_to_csv(output_file, headers, values_list, delim='\t'):
    """
    Write header and corresponding values to a csv file
        
    :input: desired output file name
            a list representing the csv file header
            a list of lists, each representing the corresponding values
            delimiter 
    :return: None
    """
    try:
        logger.info("\tWriting file {}...".format(output_file))
        with open(output_file, 'wb') as csv_file:
            writer = csv.writer(csv_file, delimiter=delim)
            writer.writerow(headers)
            for values in values_list:
                writer.writerow(values)
    except:
        write_exception("Trying to write values: {}".format(values))
Exemplo n.º 7
0
 def get_questions(self):
     if not self.questions:
         all_questions_list = []
         questions_delim = "Custom Data"
         self.get_surveys()
         for input_file in self.surveys:
             try:
                 with open(input_file.name, 'rb') as csv_file:
                     fileid = input_file.get_id()
                     reader = csv.reader(csv_file)
                     headers = reader.next()
                     qstart_idx = headers.index(questions_delim) + 1
                     #get questions
                     qlist = headers[qstart_idx:]
                     all_questions_list.extend(
                         [(text, fileid, qorder)
                          for qorder, text in enumerate(qlist)])
             except IOError as e:
                 write_exception(
                     "While reading file '{}'".format(input_file))
         # process questions
         uniq_questions_list = list(set([q[0] for q in all_questions_list]))
         #  get file id for all questions and create questions instances
         #  for each unique question.
         total = len(all_questions_list)
         # build Parser questions list
         for id, text in enumerate(uniq_questions_list):
             q = self.get_question_by_text(text)
             for info in all_questions_list:
                 (qtext, qfileid, qorder) = info
                 if qtext == text:
                     if not q:
                         q = Question(id + 1, qtext, qfileid, qorder)
                         self.questions.append(q)
                     else:
                         q.add_fieldid(qfileid)
                         q.add_order(qorder)
         # display questions statistics
         commons = [q for q in self.questions if len(q.fileid) > 1]
         self.qduplicates = [q for q in commons if q.is_duplicate()]
         logger.info("Distinct questions found: {} (total {})".format(
             len(self.questions), total))
         logger.info("Common questions to multiple input files: {}".format(
             len(commons) - len(self.qduplicates)))
         logger.info(
             "Duplicate questions in the same input file: {}".format(
                 len(self.qduplicates)))
Exemplo n.º 8
0
def write_sql_table(cursor, db_name, table_name, headers_list, values_list):
    """
    Write info to an SQL table.

    :input: cursor - MySQLdb cursor object as obtained prior to connecting
                     to the database
            db_name - name of the database to create the table in
            table_name - name of table to be created
            headers_list - table headers
            values_list - list of lists each containing a table row
    :return: None
    :notes: Tables will be dropped and recreated if already exist
    Column names will be the CSV headers with spaces and round
    brackets removed.
    """

    logger.info("\n\tDropping table {}.{}...".format(db_name, table_name))
    cursor.execute("DROP TABLE IF EXISTS {}.{}".format(db_name, table_name))

    logger.info("\tCreating table {}.{}...".format(db_name, table_name))
    db_headers = [x.translate(None, '() ') for x in headers_list]
    create_cmd = "CREATE TABLE {}({})".format(
        table_name,
        ", ".join(["`" + str(x) + "`" + " VARCHAR(3000)" for x in db_headers]))
    cursor.execute(create_cmd)

    logger.info("\tPopulating table {}.{}...".format(db_name, table_name))
    for row in values_list:
        insert_cmd = "INSERT INTO {0}({1}) VALUES({2})".format(
            db_name + "." + table_name, ", ".join(
                ["`" + str(x).replace("`", "\\`") + "`" for x in db_headers]),
            ", ".join(["'" + str(x).replace("'", "\\'") + "'" for x in row]))
        try:
            cursor.execute(insert_cmd)
        except Exception as e:
            if len(db_headers) != len(row):
                write_exception('''
Different sizes in header ({}) and row ({}): 
header:{}
row:   {}'''.format(len(db_headers), len(row), db_headers, row))
            else:
                write_exception("SQL error while executing command:\n\t{}".
                                format(insert_cmd))
            return