Skip to main content

How to mine Scala 3 compiler metadata using TASTy files

Picture of Andrzej Ratajczak, Kotlin, Scala Developer

Andrzej Ratajczak

Kotlin, Scala Developer
Jun 28, 2023|15 min read
How_to_mine_Scala_3_compiler_metadata_using_TASTy_files_image-min.jpg
1val elems = (1 to 64).par
2 .flatMap { page =>
3 Jsoup
4 .connect(
5 s"https://index.scala-lang.org/search?sort=stars&languages=3.x&q=*&page=$page"
6 )
7 .get()
8 .select("h4")
9 .eachText
10 .asScala
11 }
12 .flatMap { header =>
13 Try(
14 Jsoup
15 .connect(s"https://index.scala-lang.org/$header/artifacts/version")
16 .get()
17 ).toOption
18 .map { page =>
19 val version = page.select(".head-last-version").text.trim
20 page.select("option").eachText.asScala.map((_, (header, version)))
21 }
22 }
23 .flatten
24 .flatMap { case (name, (header, version)) =>
25 Try {
26 val text = Jsoup
27 .connect(
28 s"https://index.scala-lang.org/$header/artifacts/$name/$version?binary-versions=_3"
29 )
30 .get()
31 .select("#copy-maven")
32 .text
33 Jsoup.parse(text, "", Parser.xmlParser())
34 }.toOption
35 .filter(_.select("artifactId").text.endsWith("_3"))
36 .map { doc =>
37 doc.select("groupId").text + ":" + doc
38 .select("artifactId")
39 .text + ":" + doc.select("version").text
40 }
41 }
1Fetch()
2 .withRepositories(repositories)
3 .withDependencies(
4 Seq(
5 Dependency(
6 Module(Organization(organization), ModuleName(module)),
7 version
8 )
9 )
10 )
11 .run
1class MyInspector(fileOutputName: String, classpath: String) extends Inspector:
2 val file = new File(fileOutputName)
3 val bw = new BufferedWriter(new FileWriter(file))
4 def inspect(using Quotes)(tastys: List[Tasty[quotes.type]]): Unit =
5 import quotes.reflect.*
6 object Traverser extends TreeAccumulator[List[DefDef]]:
7 def foldTree(defdefs: List[DefDef], tree: Tree)(
8 owner: Symbol
9 ): List[DefDef] =
10 val defdef = tree match
11 case d: DefDef =>
12 List(d)
13 case tree =>
14 Nil
15 foldOverTree(defdefs ++ defdef, tree)(owner)
16 end Traverser
17
18
19 tastys
20 .flatMap { tasty =>
21 val tree = tasty.ast
22 Traverser.foldTree(List.empty, tree)(tree.symbol)
23 }
24 .filter(_.symbol.docstring.nonEmpty)
25 .flatMap { defdef =>
26 val comment = Cleaner.clean(defdef.symbol.docstring.get).mkString(" ")
27 Option.when(!comment.isBlank && defdef.rhs != None)(
28 s"${astCode(defdef)}␟${byteCode(defdef)}␟${sourceCode(defdef, true)}␟${sourceCode(defdef, false)}␟${comment}\n"
29 )
30 }
31 .foreach(bw.write)
32
33
34 bw.close()
35
36
37 extension (s: String)
38 def removeNewLines: String =
39 s.replaceAll("\\p{C}|\\s+|\\r$|\\\\t|\\\\n|\\\\r", " ")
40
41
42 def astCode(using Quotes)(defdef: quotes.reflect.DefDef): String =
43 Extractors.showTree(defdef).removeNewLines
1TastyInspector.inspectAllTastyFiles(
2 Nil,
3 List(classpath.head),
4 classpath.tail.toList
5)(
6 new MyInspector(coordinates, classpath)
7)
1def sourceCode(using Quotes)(
2 defdef: quotes.reflect.DefDef,
3 fullNames: Boolean
4): String =
5 val sourceCode = Try(
6 SourceCode
7 .showTree(defdef)(SyntaxHighlight.plain, fullNames)
8 .removeNewLines
9 )
10 sourceCode.toOption.getOrElse("NO_SOURCECODE")
1def byteCode(using Quotes)(defdef: quotes.reflect.DefDef): String =
2 val reader = Try {
3 SyntheticRepository
4 .getInstance(ClassPath(classpath))
5 .loadClass(defdef.symbol.owner.fullName.replaceAll("\\$\\.", "\\$"))
6 .getMethods()
7 }
8 reader.toOption
9 .flatMap {
10 _.toList
11 .find(_.getName == defdef.symbol.name)
12 .map(_.getCode)
13 .filter(_ != null)
14 .map(x =>
15 Utility.codeToString(x.getCode, x.getConstantPool, 0, -1, true)
16 )
17 .map(_.toString.removeNewLines)
18 }
19 .getOrElse("NO_BYTECODE")

Curated by Sebastian Synowiec

Subscribe to our newsletter and never miss an article