def run(self):
        #pass

        jsonfilename = self.filename[-31:]
        #print("jsonfilename: ", jsonfilename)

        insert_list = []
        file = open(self.filename, 'r')
        json_content = file.readlines()
        if len(json_content) == 0:
            insert_list.append((jsonfilename, None))
        else:
            for line in json_content:
                insert_list.append((jsonfilename, line))
            file.close()

        qry_string = "insert into restaurant_inbox (file_name, rest_desc_json) values (%s, %s)"
        #dbobj = dbUtil("192.168.1.221", "restuser", "restuser", "restaurantdb", True)
        dbobj = dbUtil(config.db_config['host'], config.db_config['username'], config.db_config['password'],
                        config.db_config['database_name'])
        try:
            if dbobj:
                dbobj.executeQuery("delete from restaurant_inbox where file_name = '" + jsonfilename + "'")
                dbobj.executeManyQuery(qry_string, insert_list)
            #print('filename: ', self.filename)
            #dt = datetime.date.today()
            #valid_datelist = date_util.getDateListString(self.dt)

            #rest_businessobj.saveValidatedJsonFile(validated_datelist)

            with self.output().open('w') as output:
                output.write("Done")
        except Exception as e:
            print('Failed Insert ', self.filename, ': ', e)
    def main(self, sc, *args):
        sqlContext = SQLContext(sc)
        # Connect to MySQL table and return data frame
        df_rest_data = sqlContext.read.format("jdbc").options(
            url="jdbc:mysql://192.168.1.221:3306/restaurantdb",
            driver="com.mysql.jdbc.Driver",
            dbtable="vw_restaurant_inbox",
            user="******",
            password="******").load()

        # aggragate data and return dataframe
        df_rest_data2 = df_rest_data.filter("boro is not null").groupBy(
            "boro", "dba").agg(
                F.avg("grade").alias("grade_avg"),
                F.count("*").alias("grade_count"))
        df_rest_data3 = df_rest_data2.select(
            "boro", "dba", "grade_avg", "grade_count",
            F.row_number().over(
                Window.partitionBy("boro").orderBy(
                    F.desc("grade_avg"), F.desc("grade_count"))).alias(
                        "row_num")).filter("row_num = 1")

        import time
        insert_list = []
        for r in df_rest_data3.collect():
            # insert_list.append(r(i) for i in (range(0, len(r))))
            insert_list.append((r.boro, r.dba, r.grade_avg, r.grade_count,
                                time.strftime('%Y-%m-%d %H:%M:%S'),
                                time.strftime('%Y-%m-%d %H:%M:%S')))

        # for r in insert_list:
        #     print(r)
        # insert_list = []
        # for r in df_rest_data3.collect():
        #     mydict = r.asDict(True)
        #     mydict['date_created'] = datetime.now()
        #     mydict['date_modifed'] = datetime.now()
        #     insert_list.append(mydict)

        dbobj = dbUtil("192.168.1.221", "restuser", "restuser", "restaurantdb")
        if dbobj:
            #dbobj.executeQuery('truncate table recommendation_boro;')
            dbobj.executeQuery('update recommendation_boro set is_latest = 0;')
            dbobj.executeManyQuery(
                "insert into recommendation_boro (boro, dba, grade_average, grade_count, date_created, date_modified) values (%s, %s, %s, %s, %s, %s)",
                insert_list)
            dbobj.executeQuery(
                'update recommendation_boro set is_latest = 1 where is_latest is NULL;'
            )
            dbobj.executeCommit()

            with self.output().open('w') as output:
                output.write("Done")

        print(datetime.now() - startTime)
    def test_conn(self):
        """
            Test database connection success
        """

        dbconn = dbUtil(config.db_config['host'], config.db_config['username'],
                        config.db_config['password'],
                        config.db_config['database_name'])
        resultset = dbconn.executeQuery("select 1 col1")
        if len(resultset) > 0:
            result = True
        else:
            result = False
        self.assertEqual(result, True)
    def requires(self):
        #list within 1 month
        #------------------------------------------------------
        valid_datelist = date_util.getDateListString(self.dt)

        file_folder = config.download_root_folder + '/data/'
        #file_folder = './data/'
        file_folder = os.path.abspath(file_folder)
        #print('abs folder path: ', file_folder)

        file_pattern = 'restaurant_data_*.json'
        #print(file_folder + file_pattern)

        #Search all *.json files downloaded/saved
        # ------------------------------------------------------
        saved_list = glob.glob(file_folder + '/' + file_pattern)

        #print(saved_list)
        #valid date list are files downloaded within 30 days
        validated_datelist = sorted(list(filter(lambda l: (l[-15:-5] in valid_datelist), saved_list)))
        # print(validated_datelist)

        #dbobj = dbUtil("192.168.1.221", "restuser", "restuser", "restaurantdb", True)
        dbobj = dbUtil(config.db_config['host'], config.db_config['username'], config.db_config['password'],
                        config.db_config['database_name'])

        if dbobj:
            resultobj = dbobj.executeQuery("select distinct file_name from restaurant_inbox order by file_name desc")
            #print(resultobj)

        #processed list is the list inserted into database
        processed_filelist = [' '.join(item) for item in resultobj]
        # print("--------------------\n")
        # print(processed_filelist)

        #dbinsert list is the new file list hasn't inserted into database
        dbinsert_datelist = sorted(list(filter(lambda l: (l[-31:] not in processed_filelist), validated_datelist)))
        # print("--------------------\n")
        # print(dbinsert_datelist)
        #for r in dbinsert_datelist:
        #    print(r)

        return [SaveRestaurantData(filename) for filename in dbinsert_datelist]