I want to create a nested json file from data in PySpark from the following data.
I wanted to convert this into Nested json file which should have following structure.
{    "NewData" : [          {"id":"1","number":"smith","name":"uber","age":12},         {"id":"2","number":"jon","name":"lunch","age":13},          {"id":"3","number":"jocelyn","name":"rental","age":15},         {"id":"4","number":"megan","name":"sds","age":15}
             ]  }
How to put the correct output in a json file
Can you help me achieve this?
data = [(1,12,"smith", "uber"),
         (2,13,"jon","lunch"),
         (3,15,"jocelyn","rental"),
         (4,15,"megan","sds")
         ]
 
 
 schema = StructType([
 StructField('id', IntegerType(), True),
 StructField('age', IntegerType(), True),
 StructField('number', StringType(), True),
 StructField('name', StringType(), True)
                     ])
 df = spark.createDataFrame(data,schema)
 
 df.show(truncate=False)
 df = df.withColumn("NewData", F.lit("NewData"))
 
 df2 = df.groupBy('NewData').agg(F.collect_list(
 F.to_json(F.struct('id','number', 'name', 'age'))
                           ).alias('values')
                 ))
 
 df2.show(truncate=False)
 
    