Skip to main content

Scala 3 and Spark?

Picture of Filip Zybała, null

Filip Zybała

Oct 21, 2021|7 min read
1case class Library(org: String, name: String, version: String)
2case class TastyFile(lib: Library, path: String, content: Array[Byte])
1case class TreeInfo(
2 lib: Library,
3 sourceFile: String,
4 method: String,
5 treeKind: String,
6 index: Int,
7 depth: Int,
8 topLevelType: Option[String]
9)
1scala-cli run https://raw.githubusercontent.com/VirtusLab/types-usage-counter/master/tasty.scala -- org.typelevel cats-core_3 2.6.1
2
1io.github.vincenzobaz::spark-scala3:…
1import scala3encoders.given
1value toDF is not a member of … — did you mean libs.coll?
1def processLibraries(libs: Dataset[Library]): Dataset[SerializedTreeInfo] =
2 libs
3 .flatMap { lib =>
4 val tastyFiles: Either[String, Seq[TastyFile]] = loadTastyFiles(lib)
5 tastyFiles.left.foreach(log.warn)
6 tastyFiles.toSeq.flatten.map(SerializedTastyFile.from)
7 }
8 .flatMap { serialized =>
9 val treeInfos: Either[String, Seq[TreeInfo]] =
10 processTastyFile(serialized.toTastyFile)
11 treeInfos.left.foreach(log.warn)
12 treeInfos.toSeq.flatten.map(SerializedTreeInfo.from)
13 }
1@main def spark(args: String*) =
2 val csvPath = args.headOption.getOrElse("libs.csv")
3
4 val spark = SparkSession.builder().master("local").getOrCreate()
5 import spark.implicits._
6
7 val libs = spark.read.option("header", true).csv(csvPath).as[Library]
8 val treeInfos = processLibraries(libs)
9
10 treeInfos
11 .filter(col("topLevelType").=!=(NoType))
12 .groupBy("topLevelType")
13 .count()
14 .sort(col("count").desc)
15 .show(10, false)
16
17 spark.stop()
1scala-cli . --main-class spark -- libs.csv
1scala-cli https://gist.github.com/romanowski/ff5266cac98ac387bbfe648909b28ea0

Subscribe to our newsletter and never miss an article