Let's connect
Let's connect

How to mine Scala 3 compiler metadata using TASTy files

Picture of Andrzej Ratajczak, Kotlin, Scala Developer

Andrzej Ratajczak

Kotlin, Scala Developer

15 minutes read

java

val elems = (1 to 64).par
 .flatMap { page =>
   Jsoup
     .connect(
       s"https://index.scala-lang.org/search?sort=stars&languages=3.x&q=*&page=$page"
     )
     .get()
     .select("h4")
     .eachText
     .asScala
 }
 .flatMap { header =>
   Try(
     Jsoup
       .connect(s"https://index.scala-lang.org/$header/artifacts/version")
       .get()
   ).toOption
     .map { page =>
       val version = page.select(".head-last-version").text.trim
       page.select("option").eachText.asScala.map((_, (header, version)))
     }
 }
 .flatten
 .flatMap { case (name, (header, version)) =>
   Try {
     val text = Jsoup
       .connect(
         s"https://index.scala-lang.org/$header/artifacts/$name/$version?binary-versions=_3"
       )
       .get()
       .select("#copy-maven")
       .text
     Jsoup.parse(text, "", Parser.xmlParser())
   }.toOption
     .filter(_.select("artifactId").text.endsWith("_3"))
     .map { doc =>
       doc.select("groupId").text + ":" + doc
         .select("artifactId")
         .text + ":" + doc.select("version").text
     }
 }

java

Fetch()
 .withRepositories(repositories)
 .withDependencies(
   Seq(
     Dependency(
       Module(Organization(organization), ModuleName(module)),
       version
     )
   )
 )
 .run

java

class MyInspector(fileOutputName: String, classpath: String) extends Inspector:
 val file = new File(fileOutputName)
 val bw = new BufferedWriter(new FileWriter(file))
 def inspect(using Quotes)(tastys: List[Tasty[quotes.type]]): Unit =
   import quotes.reflect.*
   object Traverser extends TreeAccumulator[List[DefDef]]:
     def foldTree(defdefs: List[DefDef], tree: Tree)(
         owner: Symbol
     ): List[DefDef] =
       val defdef = tree match
         case d: DefDef =>
           List(d)
         case tree =>
           Nil
       foldOverTree(defdefs ++ defdef, tree)(owner)
   end Traverser


   tastys
     .flatMap { tasty =>
       val tree = tasty.ast
       Traverser.foldTree(List.empty, tree)(tree.symbol)
     }
     .filter(_.symbol.docstring.nonEmpty)
     .flatMap { defdef =>
       val comment = Cleaner.clean(defdef.symbol.docstring.get).mkString(" ")
       Option.when(!comment.isBlank && defdef.rhs != None)(
         s"${astCode(defdef)}␟${byteCode(defdef)}␟${sourceCode(defdef, true)}␟${sourceCode(defdef, false)}␟${comment}\n"
       )
     }
     .foreach(bw.write)


   bw.close()


 extension (s: String)
   def removeNewLines: String =
     s.replaceAll("\\p{C}|\\s+|\\r$|\\\\t|\\\\n|\\\\r", " ")


 def astCode(using Quotes)(defdef: quotes.reflect.DefDef): String =
   Extractors.showTree(defdef).removeNewLines

java

TastyInspector.inspectAllTastyFiles(
 Nil,
 List(classpath.head),
 classpath.tail.toList
)(
 new MyInspector(coordinates, classpath)
)

java

def sourceCode(using Quotes)(
   defdef: quotes.reflect.DefDef,
   fullNames: Boolean
): String =
 val sourceCode = Try(
   SourceCode
     .showTree(defdef)(SyntaxHighlight.plain, fullNames)
     .removeNewLines
 )
 sourceCode.toOption.getOrElse("NO_SOURCECODE")

java

def byteCode(using Quotes)(defdef: quotes.reflect.DefDef): String =
 val reader = Try {
   SyntheticRepository
     .getInstance(ClassPath(classpath))
     .loadClass(defdef.symbol.owner.fullName.replaceAll("\\$\\.", "\\$"))
     .getMethods()
 }
 reader.toOption
   .flatMap {
     _.toList
       .find(_.getName == defdef.symbol.name)
       .map(_.getCode)
       .filter(_ != null)
       .map(x =>
         Utility.codeToString(x.getCode, x.getConstantPool, 0, -1, true)
       )
       .map(_.toString.removeNewLines)
   }
   .getOrElse("NO_BYTECODE")

Curated by

Sebastian Synowiec

Liked the article?

Share it with others!

explore more on

Take the first step to a sustained competitive edge for your business

Get your free consultation

VirtusLab's work has met the mark several times over, and their latest project is no exception. The team is efficient, hard-working, and trustworthy. Customers can expect a proactive team that drives results.

Stephen Rooke
Stephen RookeDirector of Software Development @ Extreme Reach

VirtusLab's engineers are truly Strapi extensions experts. Their knowledge and expertise in the area of Strapi plugins gave us the opportunity to lift our multi-brand CMS implementation to a different level.

facile logo
Leonardo PoddaEngineering Manager @ Facile.it

VirtusLab has been an incredible partner since the early development of Scala 3, essential to a mature and stable Scala 3 ecosystem.

Martin_Odersky
Martin OderskyHead of Programming Research Group @ EPFL

The VirtusLab team's in-depth knowledge, understanding, and experience of technology have been invaluable to us in developing our product. The team is professional and delivers on time – we greatly appreciated this efficiency when working with them.

Michael_Grant
Michael GrantDirector of Development @ Cyber Sec Company