import pyspark # import pyspark package ## Filter Function: keep only odd numbers def fOdd(x): if x%2 == 0: return False else: return True ## Map function: square each element def sQuare(x): return x*x ## Reduce function: add all elements up def mySum(x,y): return x+y ### Step 1: get Spark Context object. ### ### we use .getOrCreate() method to prevent issues with rerunning ### this code (only Spark Context connection is allowed sc = pyspark.SparkContext().getOrCreate() ### Step 2: generate the initial RDD - list of numbers from 0 to 99 rdd1 = sc.range(100) ## Step 3: apply transformations to the initial RDD ## ## First transformation: filter out even numbers ## Second transformation: square all remaining numbers rdd2 = rdd1.filter(fOdd).map(sQuare) ## Step 4: action - materialize the RDD output1 = rdd2.collect() ## Step 5: action - compute the sum of all remaining numbers output2 = rdd2.reduce(mySum) ## Print out the contents of rdd2 and the sum print(output1) print("!!!!!!!!!") print(output2)