Skip to content

Quick Start

This guide walks you from install to parsing, code intelligence, and LLM chunking.


1. Install

pip install tree-sitter-language-pack
npm install @kreuzberg/tree-sitter-language-pack
cargo add tree-sitter-language-pack
brew install kreuzberg-dev/tap/ts-pack

!!! Tip "Other ecosystems" Go, Java, Ruby, Elixir, PHP, and WebAssembly are also supported. See Installation for the full list.


2. Download Parsers

Parsers download automatically on first use. For production, CI, Docker, or offline environments, pre-download them.

Specific languages

CLI
# Download specific languages
ts-pack download python javascript rust go

# Download all available languages
ts-pack download --all

# Download a language group
ts-pack download --groups web,systems

# Fresh download (clear cache first)
ts-pack download --fresh python

# Check what's cached
ts-pack list --downloaded
Python
import tree_sitter_language_pack as tslp

# Pre-download specific languages
tslp.download(["python", "javascript", "rust"])

# Or initialize with config
tslp.init(tslp.PackConfig(languages=["python", "go"], cache_dir="/tmp/parsers"))

# Check what's cached
print(tslp.downloaded_languages())
print(tslp.manifest_languages()[:5])
Node.js
import {
  init,
  download,
  downloadedLanguages,
  manifestLanguages,
} from "@kreuzberg/tree-sitter-language-pack";

// Pre-download specific languages
const count = download(["python", "javascript", "rust"]);
console.log(`Downloaded ${count} languages`);

// Or initialize with config
init({ languages: ["python", "go"], cacheDir: "/tmp/parsers" });

// Check what's cached
console.log(downloadedLanguages());
console.log(manifestLanguages().slice(0, 5));
Ruby
require "tree_sitter_language_pack"

config = TreeSitterLanguagePack::PackConfig.new(languages: ["ruby", "python"])
TreeSitterLanguagePack.init(config)

count = TreeSitterLanguagePack.download(["rust", "javascript"])
puts "Ensured #{count} languages"

TreeSitterLanguagePack.downloaded_languages.each do |name|
  puts "cached: #{name}"
end
PHP
<?php

use Tree\Sitter\Language\Pack\PackConfig;
use Tree\Sitter\Language\Pack\TreeSitterLanguagePack;

$config = new PackConfig(
    cacheDir: null,
    languages: ["php", "javascript"],
    groups: null,
);
TreeSitterLanguagePack::init($config);

$count = TreeSitterLanguagePack::download(["python", "rust"]);
echo "Ensured {$count} languages\n";

foreach (TreeSitterLanguagePack::downloadedLanguages() as $name) {
    echo "cached: {$name}\n";
}
Go
package main

import (
    "fmt"
    "log"

    "github.com/kreuzberg-dev/tree-sitter-language-pack/packages/go"
)

func main() {
    config := tspack.PackConfig{
        Languages: []string{"go", "python"},
    }
    if err := tspack.Init(config); err != nil {
        log.Fatal(err)
    }

    count, err := tspack.Download([]string{"rust", "javascript"})
    if err != nil {
        log.Fatal(err)
    }
    if count != nil {
        fmt.Printf("Ensured %d languages\n", *count)
    }

    for _, name := range tspack.DownloadedLanguages() {
        fmt.Println("cached:", name)
    }
}
Java
import dev.kreuzberg.treesitterlanguagepack.PackConfig;
import dev.kreuzberg.treesitterlanguagepack.TreeSitterLanguagePack;

import java.util.List;
import java.util.Optional;

class Main {
    public static void main(String[] args) throws Exception {
        PackConfig config = PackConfig.builder()
                .withLanguages(Optional.of(List.of("java", "kotlin")))
                .build();
        TreeSitterLanguagePack.init(config);

        long ensured = TreeSitterLanguagePack.download(List.of("python", "rust"));
        System.out.println("Ensured " + ensured + " languages");

        for (String name : TreeSitterLanguagePack.downloadedLanguages()) {
            System.out.println("cached: " + name);
        }
    }
}
C#
using TreeSitterLanguagePack;

var dm = DownloadManager.New("1.9.0");

dm.DownloadAllBestEffort();

var downloaded = dm.InstalledLanguages();
Console.WriteLine($"Downloaded languages: {string.Join(", ", downloaded)}");

var registry = LanguageRegistry.Default();
var available = registry.AvailableLanguages();
Console.WriteLine($"Total available: {available.Count}");

dm.Dispose();
registry.Dispose();
Elixir
{:ok, nil} = TreeSitterLanguagePack.init(~s({"languages": ["elixir", "erlang"]}))

{:ok, count} = TreeSitterLanguagePack.download(["python", "rust"])
IO.puts("Ensured #{count} languages")

langs = TreeSitterLanguagePack.downloaded_languages()
IO.inspect(langs, label: "cached")
Dart
import 'package:tree_sitter_language_pack/tree_sitter_language_pack.dart';
import 'package:tree_sitter_language_pack/src/tree_sitter_language_pack_bridge_generated/frb_generated.dart'
    show RustLib;

void main() async {
  await RustLib.init();

  // Pre-download specific languages.
  final count = await TreeSitterLanguagePackBridge.download(
    ['python', 'javascript', 'rust'],
  );
  print('Downloaded $count languages');

  // Or initialize with config.
  await TreeSitterLanguagePackBridge.init(
    const PackConfig(languages: ['python', 'go'], cacheDir: '/tmp/parsers'),
  );

  // Inspect cache state.
  print(await TreeSitterLanguagePackBridge.downloadedLanguages());
  final manifest = await TreeSitterLanguagePackBridge.manifestLanguages();
  print(manifest.take(5).toList());
}
Swift
import TreeSitterLanguagePack
import RustBridge

// Pre-download specific languages.
let names = RustVec<String>()
names.push(value: "python")
names.push(value: "javascript")
names.push(value: "rust")
let installed = try download(names)
print("Downloaded \(installed) parsers")

// Or initialize with config (cache_dir + languages).
let packConfig = try packConfigFromJson(
    #"{"cache_dir":"/tmp/parsers","languages":["python","go"]}"#
)
try init(packConfig)

// Inspect downloaded state.
let cached = downloadedLanguages().map { $0.as_str().toString() }
let manifest = try manifestLanguages().map { $0.as_str().toString() }
print("Cached: \(cached)")
print("Manifest sample: \(manifest.prefix(5))")
Zig
const std = @import("std");
const tslp = @import("tree_sitter_language_pack");

pub fn main() !void {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    defer _ = gpa.deinit();
    _ = gpa.allocator();

    // Pre-download specific languages (names passed as JSON array).
    const count = try tslp.download("[\"python\", \"javascript\", \"rust\"]");
    std.debug.print("languages available after download: {d}\n", .{count});

    // Inspect what is cached locally — returned as a JSON array string.
    const installed = try tslp.downloaded_languages();
    defer std.heap.c_allocator.free(installed);
    std.debug.print("downloaded: {s}\n", .{installed});

    // Report the effective cache directory.
    const dir = try tslp.cache_dir();
    defer std.heap.c_allocator.free(dir);
    std.debug.print("cache dir: {s}\n", .{dir});
}
Kotlin (Android)
import dev.kreuzberg.tslp.android.TreeSitterLanguagePack
import dev.kreuzberg.tslp.android.PackConfig
import android.app.Application
import java.nio.file.Paths

class MyApp : Application() {
    override fun onCreate() {
        super.onCreate()

        val cacheDir = Paths.get(cacheDir.absolutePath, "tree-sitter")
        val config = PackConfig(
            cacheDir = cacheDir,
            languages = listOf("kotlin", "java", "xml"),
            groups = null
        )
        TreeSitterLanguagePack.init(config)

        val downloaded = TreeSitterLanguagePack.downloadedLanguages()
        println("Downloaded parsers: $downloaded")

        val count = TreeSitterLanguagePack.languageCount()
        println("Total available languages: $count")
    }
}
WebAssembly
// Note: the WASM build ships with statically compiled parsers — no download step needed.
import {
  availableLanguages,
  hasLanguage,
  languageCount,
} from "@kreuzberg/tree-sitter-language-pack-wasm";

console.log(`Has Python: ${hasLanguage("python")}`);
console.log(`Has Rust: ${hasLanguage("rust")}`);
console.log(`Total bundled languages: ${languageCount()}`);
console.log(`Sample: ${availableLanguages().slice(0, 10).join(", ")}`);
Rust
// Requires feature = "download" (enabled by default).
use std::path::PathBuf;
use tree_sitter_language_pack::{PackConfig, download, downloaded_languages, init};

fn main() -> anyhow::Result<()> {
    // Pre-download specific languages; returns count of ensured languages.
    let _count = download(&["python", "javascript", "rust"])?;

    // Or initialize with config (cache_dir is PathBuf, not String).
    let config = PackConfig {
        languages: Some(vec!["python".into(), "go".into()]),
        cache_dir: Some(PathBuf::from("/tmp/parsers")),
        groups: None,
    };
    init(&config)?;

    println!("{:?}", downloaded_languages());
    Ok(())
}

All 306 languages

ts-pack download --all
from tree_sitter_language_pack import download_all

download_all()
import { downloadAll } from "@kreuzberg/tree-sitter-language-pack";

await downloadAll();
use tree_sitter_language_pack::download_all;

download_all()?;

By language group

Groups bundle related languages: web, systems, scripting, data, jvm, functional.

# Download all web languages (HTML, CSS, JS, TS, Vue, Svelte, …)
ts-pack download --groups web,data

# See what's cached
ts-pack list --downloaded
from tree_sitter_language_pack import init

init({"groups": ["web", "data"]})
import { init } from "@kreuzberg/tree-sitter-language-pack";

await init({ groups: ["web", "data"] });
use tree_sitter_language_pack::{PackConfig, init};

let config = PackConfig {
    groups: Some(vec!["web".into(), "data".into()]),
    ..Default::default()
};
init(&config)?;

Docker and CI

Pre-download parsers during your build to avoid runtime network calls:

Dockerfile
FROM python:3.12-slim
RUN pip install tree-sitter-language-pack
# Pre-download at build time — no network needed at runtime
RUN python -c "from tree_sitter_language_pack import download_all; download_all()"
GitHub Actions
- name: Install and pre-download parsers
  run: |
    pip install tree-sitter-language-pack
    python -c "from tree_sitter_language_pack import download; download(['python', 'javascript', 'rust'])"

Configuration file

Declare which languages your project needs in a language-pack.toml:

language-pack.toml
languages = ["python", "javascript", "rust", "go"]
# groups = ["web", "systems"]
# cache_dir = "/tmp/parsers"

Then download everything declared in the config:

# Reads language-pack.toml automatically
ts-pack download
from tree_sitter_language_pack import init

# Reads language-pack.toml from current directory
init()

!!! Info "Cache location" Parsers cache to ~/.cache/tree-sitter-language-pack/ on Linux/macOS and %LOCALAPPDATA%\tree-sitter-language-pack\ on Windows. Override with cache_dir in language-pack.toml or the programmatic API. See Download Model for full details.


3. Parse Code

Build a concrete syntax tree from source code.

CLI
# Download parsers
ts-pack download python javascript rust

# Parse a file
ts-pack parse main.py --format json

# Run code intelligence
ts-pack process src/app.py --all

# List available languages
ts-pack list --manifest
Python
import tree_sitter_language_pack as tslp

# Parsers download automatically on first use
result = tslp.process(
    "def hello():\n    print('world')\n",
    tslp.ProcessConfig(language="python", structure=True, imports=True),
)

print(f"Language: {result.language}")
print(f"Functions: {len(result.structure)}")
Node.js
import { process } from "@kreuzberg/tree-sitter-language-pack";

const result = process("function hello() { console.log('world'); }", {
  language: "javascript",
  structure: true,
  imports: true,
});

console.log(`Language: ${result.language}`);
console.log(`Functions: ${result.structure?.length ?? 0}`);
Ruby
require "tree_sitter_language_pack"

config = TreeSitterLanguagePack::PackConfig.new(languages: ["ruby"])
TreeSitterLanguagePack.init(config)

puts "Ruby available: #{TreeSitterLanguagePack.has_language("ruby")}"
puts "Languages: #{TreeSitterLanguagePack.language_count}"
PHP
<?php

use Tree\Sitter\Language\Pack\TreeSitterLanguagePack;

if (TreeSitterLanguagePack::hasLanguage("php")) {
    echo "PHP grammar is available\n";
}

echo "Total languages: " . TreeSitterLanguagePack::languageCount() . "\n";
Go
package main

import (
    "fmt"
    "log"

    "github.com/kreuzberg-dev/tree-sitter-language-pack/packages/go"
)

func main() {
    parser, err := tspack.GetParser("go")
    if err != nil {
        log.Fatal(err)
    }
    defer parser.Free()

    tree := parser.Parse("package main\nfunc hello() {}")
    defer tree.Free()

    root := tree.RootNode()
    defer root.Free()

    kind := root.Kind()
    if kind != nil {
        fmt.Println("Root:", *kind)
    }
}
Java
import dev.kreuzberg.treesitterlanguagepack.PackConfig;
import dev.kreuzberg.treesitterlanguagepack.TreeSitterLanguagePack;

import java.util.List;
import java.util.Optional;

class Main {
    public static void main(String[] args) throws Exception {
        PackConfig config = PackConfig.builder()
                .withLanguages(Optional.of(List.of("java")))
                .build();
        TreeSitterLanguagePack.init(config);

        System.out.println("Java available: " + TreeSitterLanguagePack.hasLanguage("java"));
        System.out.println("Languages: " + TreeSitterLanguagePack.languageCount());
    }
}
C#
using TreeSitterLanguagePack;

var parser = Parser.Default();
parser.SetLanguage("python");

var tree = parser.Parse("def hello():\n    print('world')\n");
var root = tree!.RootNode();

Console.WriteLine($"Root kind: {root.Kind()}");

parser.Dispose();
Elixir
{:ok, nil} = TreeSitterLanguagePack.init(~s({"languages": ["elixir"]}))

count = TreeSitterLanguagePack.language_count()
IO.puts("Languages: #{count}")

IO.puts("Elixir available: #{TreeSitterLanguagePack.has_language("elixir")}")
Dart
import 'package:tree_sitter_language_pack/tree_sitter_language_pack.dart';
import 'package:tree_sitter_language_pack/src/tree_sitter_language_pack_bridge_generated/frb_generated.dart'
    show RustLib;

void main() async {
  await RustLib.init();

  final parser = await TreeSitterLanguagePackBridge.getParser('python');
  final tree = await parser.parse(source: "def hello():\n    print('world')\n");
  final root = await tree!.rootNode();

  print('Root kind: ${await root.kind()}');
}
Swift
import TreeSitterLanguagePack
import RustBridge

// Parsers download automatically on first use.
let config = try processConfigFromJson(#"{"language":"swift","structure":true}"#)
let result = try process("func greet() { print(\"hello\") }", config)

print("Language: \(result.language().toString())")
print("Functions: \(result.structure().count)")
print("Total lines: \(result.metrics().total_lines())")
Zig
const std = @import("std");
const tslp = @import("tree_sitter_language_pack");

pub fn main() !void {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    defer _ = gpa.deinit();
    _ = gpa.allocator();

    var parser = try tslp.get_parser("rust");
    defer parser.free();

    const source = "fn main() { println!(\"hello\"); }";
    var tree = (try parser.parse(source)) orelse return error.ParseFailed;
    defer tree.free();

    var root = tree.root_node();
    defer root.free();

    const kind = try root.kind();
    defer std.heap.c_allocator.free(kind);
    std.debug.print("root kind: {s}\n", .{kind});
}
Kotlin (Android)
import dev.kreuzberg.tslp.android.TreeSitterLanguagePack
import dev.kreuzberg.tslp.android.PackConfig
import java.nio.file.Paths

suspend fun main() {
    val config = PackConfig(
        cacheDir = Paths.get("/data/data/com.example.app/cache/parsers"),
        languages = listOf("kotlin"),
        groups = null
    )
    TreeSitterLanguagePack.init(config)

    val lang = TreeSitterLanguagePack.getLanguage("kotlin")
    val parser = TreeSitterLanguagePack.getParser("kotlin")
    println("Root node kind: ${parser.parse("fun hello() {}").rootNode().type}")
}
WebAssembly
import {
  availableLanguages,
  getParser,
  hasLanguage,
  languageCount,
} from "@kreuzberg/tree-sitter-language-pack-wasm";

console.log(`${languageCount()} languages available`);
console.log(`Python available: ${hasLanguage("python")}`);
console.log(`First 5: ${availableLanguages().slice(0, 5).join(", ")}`);

const parser = getParser("python");
try {
  const tree = parser.parse("def hello(): pass");
  try {
    console.log(`Root: ${tree.rootNode().kind()}`);
  } finally {
    tree.free();
  }
} finally {
  parser.free();
}
Rust
use tree_sitter_language_pack::{ProcessConfig, process};

fn main() -> anyhow::Result<()> {
    let config = ProcessConfig::new("rust").all();
    let result = process("fn main() { println!(\"hello\"); }", &config)?;

    println!("Language: {}", result.language);
    println!("Functions: {}", result.structure.len());
    Ok(())
}

4. Extract Code Intelligence

Go beyond the raw syntax tree. Extract functions, classes, imports, docstrings, and more with process.

CLI
# Parse and show S-expression
ts-pack parse main.py --language python

# Parse as JSON
echo "fn main() {}" | ts-pack parse - --language rust --format json

# Full code intelligence
ts-pack process src/app.py --language python --all

# Structure + imports only
ts-pack process src/app.py --structure --imports
Python
import tree_sitter_language_pack as tslp

config = tslp.ProcessConfig(
    language="python",
    structure=True,
    imports=True,
    comments=True,
    chunk_max_size=1000,
)

result = tslp.process('''
import os
from pathlib import Path

def read_file(path: str) -> str:
    """Read a file and return its contents."""
    return Path(path).read_text()

class FileReader:
    def __init__(self, base_dir: str):
        self.base_dir = base_dir
''', config)

for item in result.structure:
    print(f"{item.kind}: {item.name}")

for imp in result.imports:
    print(f"import: {imp.source}")
Node.js
import { process } from "@kreuzberg/tree-sitter-language-pack";

const result = process(
  `
import { readFile } from 'fs/promises';

export async function loadConfig(path: string): Promise<Config> {
  const data = await readFile(path, 'utf-8');
  return JSON.parse(data);
}

export class ConfigManager {
  constructor(private basePath: string) {}
}
`,
  { language: "typescript", structure: true, imports: true, exports: true, comments: true },
);

if (result.structure) {
  for (const item of result.structure) {
    console.log(`${item.kind}: ${item.name}`);
  }
}
Ruby
require "tree_sitter_language_pack"

config = TreeSitterLanguagePack::ProcessConfig.new(
  language: "ruby",
  structure: true,
  imports: true,
)

result = TreeSitterLanguagePack.process(
  "require 'json'\ndef parse(data)\n  JSON.parse(data)\nend",
  config
)

puts "Language: #{result.language}"
if result.structure
  result.structure.each do |item|
    puts "#{item.kind}: #{item.name}"
  end
end

if result.imports
  result.imports.each do |imp|
    puts "import: #{imp.source}"
  end
end
PHP
<?php

use Tree\Sitter\Language\Pack\ProcessConfig;
use Tree\Sitter\Language\Pack\TreeSitterLanguagePack;

$config = new ProcessConfig(
    language: "php",
    structure: true,
    imports: true,
    exports: true,
    comments: false,
    docstrings: false,
    symbols: false,
    diagnostics: false,
    chunkMaxSize: null,
);

$result = TreeSitterLanguagePack::process(
    "<?php namespace App; class Controller { public function index() {} }",
    $config,
);

echo "Language: " . $result->language . "\n";
foreach ($result->structure as $item) {
    echo $item->kind->value . ": " . ($item->name ?? "(anonymous)") . "\n";
}
Go
package main

import (
    "fmt"
    "log"

    "github.com/kreuzberg-dev/tree-sitter-language-pack/packages/go"
)

func main() {
    config := tspack.NewProcessConfig(
        tspack.WithProcessConfigLanguage("go"),
        tspack.WithProcessConfigStructure(true),
        tspack.WithProcessConfigImports(true),
    )
    result, err := tspack.Process(
        "package main\nimport \"fmt\"\nfunc hello() { fmt.Println(\"hi\") }",
        *config,
    )
    if err != nil {
        log.Fatal(err)
    }

    fmt.Println("Language:", result.Language)
    for _, item := range result.Structure {
        fmt.Printf("%s: %s\n", item.Kind, item.Name)
    }
    for _, imp := range result.Imports {
        fmt.Println("import:", imp.Source)
    }
}
Java
import dev.kreuzberg.treesitterlanguagepack.ProcessConfig;
import dev.kreuzberg.treesitterlanguagepack.ProcessResult;
import dev.kreuzberg.treesitterlanguagepack.StructureItem;
import dev.kreuzberg.treesitterlanguagepack.TreeSitterLanguagePack;

class Main {
    public static void main(String[] args) throws Exception {
        ProcessConfig config = ProcessConfig.builder()
                .withLanguage("java")
                .withStructure(true)
                .withImports(true)
                .build();

        ProcessResult result = TreeSitterLanguagePack.process(
                "import java.util.List;\npublic class App { public void run() {} }",
                config);

        System.out.println("Language: " + result.language());
        for (StructureItem item : result.structure()) {
            System.out.println(item.kind() + ": " + item.name());
        }
    }
}
C#
using TreeSitterLanguagePack;

var registry = LanguageRegistry.Default();

var config = new ProcessConfig
{
    Language = "csharp",
    Structure = true,
    Imports = true
};

var result = registry.Process("public class Greeter { }", config);

foreach (var item in result.Structure)
{
    Console.WriteLine($"Kind: {item.Kind}, Name: {item.Name}");
}

registry.Dispose();
Elixir
{:ok, json} =
  TreeSitterLanguagePack.process(
    "defmodule MyApp do\n  def hello, do: :world\nend",
    ~s({"language": "elixir", "structure": true, "imports": true})
  )

result = Jason.decode!(json)
IO.puts("Language: #{result["language"]}")

for item <- result["structure"] do
  IO.puts("#{item["kind"]}: #{item["name"]}")
end
Dart
import 'package:tree_sitter_language_pack/tree_sitter_language_pack.dart';
import 'package:tree_sitter_language_pack/src/tree_sitter_language_pack_bridge_generated/frb_generated.dart'
    show RustLib;

void main() async {
  await RustLib.init();

  const config = ProcessConfig(
    language: 'python',
    structure: true,
    imports: true,
    exports: false,
    comments: false,
    docstrings: false,
    symbols: false,
    diagnostics: false,
  );

  const source = '''
import os
from pathlib import Path

def read_file(path: str) -> str:
    return Path(path).read_text()

class FileReader:
    def __init__(self, base_dir: str):
        self.base_dir = base_dir
''';

  final result = await TreeSitterLanguagePackBridge.process(source, config);

  for (final item in result.structure) {
    print('${item.kind}: ${item.name ?? "<anonymous>"}');
  }
}
Swift
import TreeSitterLanguagePack
import RustBridge

let config = try processConfigFromJson(#"""
{
  "language": "python",
  "structure": true,
  "imports": true
}
"""#)

let source = """
import os
from pathlib import Path

def read_file(path: str) -> str:
    return Path(path).read_text()

class FileReader:
    def __init__(self, base_dir: str):
        self.base_dir = base_dir
"""

let result = try process(source, config)

for item in result.structure() {
    let kind = item.kind().toString()
    let name = item.name()?.toString() ?? "<anonymous>"
    print("\(kind): \(name)")
}

for imp in result.imports() {
    print("import: \(imp.source().toString())")
}
Zig
const std = @import("std");
const tslp = @import("tree_sitter_language_pack");

pub fn main() !void {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    defer _ = gpa.deinit();
    const allocator = gpa.allocator();

    const source = "def hello():\n    pass\n\nimport os\n";
    const config_json =
        \\{"language":"python","structure":true,"imports":true,"exports":false,
        \\"comments":false,"docstrings":false,"symbols":false,"diagnostics":false,
        \\"chunk_max_size":null}
    ;

    const result_json = try tslp.process(source, config_json);
    defer std.heap.c_allocator.free(result_json);

    var parsed = try std.json.parseFromSlice(std.json.Value, allocator, result_json, .{});
    defer parsed.deinit();

    const structure = parsed.value.object.get("structure").?.array;
    for (structure.items) |item| {
        const kind_value = item.object.get("kind").?;
        const kind_name = switch (kind_value) {
            .string => |s| s,
            .object => |obj| obj.keys()[0],
            else => "unknown",
        };
        const name_value = item.object.get("name") orelse std.json.Value{ .null = {} };
        const name_str = if (name_value == .string) name_value.string else "<anonymous>";
        std.debug.print("{s}: {s}\n", .{ kind_name, name_str });
    }
}
Kotlin (Android)
import dev.kreuzberg.tslp.android.TreeSitterLanguagePack
import dev.kreuzberg.tslp.android.ProcessConfig

suspend fun analyzeCode(source: String) {
    val config = ProcessConfig(
        language = "kotlin",
        structure = true,
        imports = true,
        exports = false,
        comments = false,
        docstrings = false,
        symbols = false,
        diagnostics = false,
        chunks = null
    )

    val result = TreeSitterLanguagePack.processAsync(source, config)

    println("Language: ${result.language}")
    println("Detected ${result.structure.size} structural items")

    for (item in result.structure) {
        println("${item.kind}: ${item.name}")
        for (child in item.children) {
            println("  └ ${child.kind}: ${child.name}")
        }
    }
}
WebAssembly
import { process } from "@kreuzberg/tree-sitter-language-pack-wasm";

const result = process("function add(a, b) { return a + b; }", {
  language: "javascript",
  structure: true,
  imports: true,
  exports: true,
  comments: false,
  docstrings: false,
  symbols: false,
  diagnostics: false,
});

console.log(`Language: ${result.language}`);
for (const item of result.structure) {
  console.log(`${item.kind}: ${item.name ?? "(anonymous)"}`);
}
Rust
use tree_sitter_language_pack::{ProcessConfig, process};

fn main() -> anyhow::Result<()> {
    let config = ProcessConfig::new("python")
        .all()
        .with_chunking(1000);

    let result = process("def hello(): pass\ndef world(): pass", &config)?;

    for item in &result.structure {
        // item.kind implements Debug (not Display); item.name is Option<String>
        println!("{:?}: {}", item.kind, item.name.as_deref().unwrap_or("<unnamed>"));
    }
    for chunk in &result.chunks {
        println!("chunk: lines {}-{}", chunk.start_line, chunk.end_line);
    }
    Ok(())
}

5. Run Extraction Queries

Use extract to run custom tree-sitter queries and get structured results with captured text and metadata.

import tree_sitter_language_pack as tslp

source = """
def greet(name: str) -> str:
    return f"Hello, {name}!"

def farewell(name: str) -> str:
    return f"Goodbye, {name}!"
"""

result = tslp.extract(source, {
    "language": "python",
    "patterns": {
        "functions": {
            "query": "(function_definition name: (identifier) @name)",
            "capture_output": "Text",
        }
    }
})

for match in result["results"]["functions"]["matches"]:
    print(match["captures"][0]["text"])
# greet
# farewell

7. Chunk for LLMs

Split code at natural boundaries so language models receive coherent, complete units which is ideal for embedding pipelines and context windows.

from tree_sitter_language_pack import process, ProcessConfig

with open("large_module.py") as f:
    source = f.read()

config = ProcessConfig(
    language="python",
    chunk_max_size=1500,  # max bytes per chunk
    structure=True,
)
result = process(source, config)

for i, chunk in enumerate(result["chunks"]):
    print(f"Chunk {i}: lines {chunk['start_line']}-{chunk['end_line']} "
          f"({chunk['end_byte'] - chunk['start_byte']} bytes)")
import { process } from "@kreuzberg/tree-sitter-language-pack";
import { readFileSync } from "fs";

const source = readFileSync("large_module.ts", "utf8");

const result = await process(source, {
  language: "typescript",
  chunkMaxSize: 1500,
  structure: true,
});

result.chunks.forEach((chunk, i) => {
  console.log(`Chunk ${i}: lines ${chunk.startLine}-${chunk.endLine} (${chunk.endByte - chunk.startByte} bytes)`);
});
# Chunk a file for LLM ingestion
ts-pack process large_module.py --chunk-size 1500 \
  | jq '.chunks[] | {start: .start_line, end: .end_line, bytes: (.end_byte - .start_byte)}'

You now have the full workflow. You can now install, download, parse, extract intelligence, run queries, and chunk for LLMs. Go further with the following guides:

Edit this page on GitHub