spark wordcount test
카테고리 없음2015. 11. 20. 10:46
spark-1.3.1
./conf/spark-defaults.conf
--> default 값 세팅
wordcounts = sc.textFile('../wiki_10GB').persist(StorageLevel.MEMORY_ONLY) \
.map( lambda x: x.replace(',',' ').replace('.',' ').replace('-',' ').lower()).persist(StorageLevel.MEMORY_ONLY)\
.flatMap(lambda x: x.split()).persist(StorageLevel.MEMORY_ONLY)\
.map(lambda x: (x, 1)).persist(StorageLevel.MEMORY_ONLY)\
.reduceByKey(lambda x,y:x+y).persist(StorageLevel.MEMORY_ONLY)\
.map(lambda x:(x[1],x[0])).persist(StorageLevel.MEMORY_ONLY) \
.sortByKey(False).persist(StorageLevel.MEMORY_ONLY).saveAsTextFile("./wiki_rdd/rdd7")