Skip to content

Quick Start

This guide walks you from install to parsing, code intelligence, and LLM chunking.


1. Install

pip install tree-sitter-language-pack
npm install @kreuzberg/tree-sitter-language-pack
cargo add tree-sitter-language-pack
brew trust kreuzberg-dev/tap
brew install kreuzberg-dev/tap/ts-pack

!!! Tip "Other ecosystems" Go, Java, C#, Ruby, Elixir, PHP, Dart, Kotlin Android, Swift, Zig, C FFI, and WebAssembly are also supported. See Installation for the full list.


2. Download Parsers

Parsers download automatically on first use. For production, CI, Docker, or offline environments, pre-download them.

Specific languages

CLI
# Download specific languages
ts-pack download python javascript rust go

# Download all available languages
ts-pack download --all

# Download a language group
ts-pack download --groups web,systems

# Fresh download (clear cache first)
ts-pack download --fresh python

# Check what's cached
ts-pack list --downloaded
Python
import tree_sitter_language_pack as tslp

# Pre-download specific languages
tslp.download(["python", "javascript", "rust"])

# Or initialize with config
tslp.init(tslp.PackConfig(languages=["python", "go"], cache_dir="/tmp/parsers"))

# Check what's cached
print(tslp.downloaded_languages())
print(tslp.manifest_languages()[:5])
Node.js
import {
  init,
  download,
  downloadedLanguages,
  manifestLanguages,
} from "@kreuzberg/tree-sitter-language-pack";

// Pre-download specific languages
const count = download(["python", "javascript", "rust"]);
console.log(`Downloaded ${count} languages`);

// Or initialize with config
init({ languages: ["python", "go"], cacheDir: "/tmp/parsers" });

// Check what's cached
console.log(downloadedLanguages());
console.log(manifestLanguages().slice(0, 5));
Ruby
require "tree_sitter_language_pack"

config = TreeSitterLanguagePack::PackConfig.new(languages: ["ruby", "python"])
TreeSitterLanguagePack.init(config)

count = TreeSitterLanguagePack.download(["rust", "javascript"])
puts "Ensured #{count} languages"

TreeSitterLanguagePack.downloaded_languages.each do |name|
  puts "cached: #{name}"
end
PHP
<?php

use Tree\Sitter\Language\Pack\PackConfig;
use Tree\Sitter\Language\Pack\TreeSitterLanguagePack;

$config = new PackConfig(
    cacheDir: null,
    languages: ["php", "javascript"],
    groups: null,
);
TreeSitterLanguagePack::init($config);

$count = TreeSitterLanguagePack::download(["python", "rust"]);
echo "Ensured {$count} languages\n";

foreach (TreeSitterLanguagePack::downloadedLanguages() as $name) {
    echo "cached: {$name}\n";
}
Go
package main

import (
    "fmt"
    "log"

    "github.com/kreuzberg-dev/tree-sitter-language-pack/packages/go"
)

func main() {
    config := tspack.PackConfig{
        Languages: []string{"go", "python"},
    }
    if err := tspack.Init(config); err != nil {
        log.Fatal(err)
    }

    count, err := tspack.Download([]string{"rust", "javascript"})
    if err != nil {
        log.Fatal(err)
    }
    if count != nil {
        fmt.Printf("Ensured %d languages\n", *count)
    }

    for _, name := range tspack.DownloadedLanguages() {
        fmt.Println("cached:", name)
    }
}
Java
import dev.kreuzberg.treesitterlanguagepack.PackConfig;
import dev.kreuzberg.treesitterlanguagepack.TreeSitterLanguagePack;

import java.util.List;
import java.util.Optional;

class Main {
    public static void main(String[] args) throws Exception {
        PackConfig config = PackConfig.builder()
                .withLanguages(Optional.of(List.of("java", "kotlin")))
                .build();
        TreeSitterLanguagePack.init(config);

        long ensured = TreeSitterLanguagePack.download(List.of("python", "rust"));
        System.out.println("Ensured " + ensured + " languages");

        for (String name : TreeSitterLanguagePack.downloadedLanguages()) {
            System.out.println("cached: " + name);
        }
    }
}
C#
using TreeSitterLanguagePack;

var dm = DownloadManager.New("1.9.0");

dm.DownloadAllBestEffort();

var downloaded = dm.InstalledLanguages();
Console.WriteLine($"Downloaded languages: {string.Join(", ", downloaded)}");

var registry = LanguageRegistry.Default();
var available = registry.AvailableLanguages();
Console.WriteLine($"Total available: {available.Count}");

dm.Dispose();
registry.Dispose();
Elixir
{:ok, nil} = TreeSitterLanguagePack.init(~s({"languages": ["elixir", "erlang"]}))

{:ok, count} = TreeSitterLanguagePack.download(["python", "rust"])
IO.puts("Ensured #{count} languages")

langs = TreeSitterLanguagePack.downloaded_languages()
IO.inspect(langs, label: "cached")
Dart
import 'package:tree_sitter_language_pack/tree_sitter_language_pack.dart';
import 'package:tree_sitter_language_pack/src/tree_sitter_language_pack_bridge_generated/frb_generated.dart'
    show RustLib;

void main() async {
  await RustLib.init();

  // Pre-download specific languages.
  final count = await TreeSitterLanguagePackBridge.download(
    ['python', 'javascript', 'rust'],
  );
  print('Downloaded $count languages');

  // Or initialize with config.
  await TreeSitterLanguagePackBridge.init(
    const PackConfig(languages: ['python', 'go'], cacheDir: '/tmp/parsers'),
  );

  // Inspect cache state.
  print(await TreeSitterLanguagePackBridge.downloadedLanguages());
  final manifest = await TreeSitterLanguagePackBridge.manifestLanguages();
  print(manifest.take(5).toList());
}
Swift
import TreeSitterLanguagePack
import RustBridge

// Pre-download specific languages.
let names = RustVec<String>()
names.push(value: "python")
names.push(value: "javascript")
names.push(value: "rust")
let installed = try download(names)
print("Downloaded \(installed) parsers")

// Or initialize with config (cache_dir + languages).
let packConfig = try packConfigFromJson(
    #"{"cache_dir":"/tmp/parsers","languages":["python","go"]}"#
)
try init(packConfig)

// Inspect downloaded state.
let cached = downloadedLanguages().map { $0.as_str().toString() }
let manifest = try manifestLanguages().map { $0.as_str().toString() }
print("Cached: \(cached)")
print("Manifest sample: \(manifest.prefix(5))")
Zig
const std = @import("std");
const tslp = @import("tree_sitter_language_pack");

pub fn main() !void {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    defer _ = gpa.deinit();
    _ = gpa.allocator();

    // Pre-download specific languages (names passed as JSON array).
    const count = try tslp.download("[\"python\", \"javascript\", \"rust\"]");
    std.debug.print("languages available after download: {d}\n", .{count});

    // Inspect what is cached locally — returned as a JSON array string.
    const installed = try tslp.downloaded_languages();
    defer std.heap.c_allocator.free(installed);
    std.debug.print("downloaded: {s}\n", .{installed});

    // Report the effective cache directory.
    const dir = try tslp.cache_dir();
    defer std.heap.c_allocator.free(dir);
    std.debug.print("cache dir: {s}\n", .{dir});
}
Kotlin (Android)
import dev.kreuzberg.tslp.android.TreeSitterLanguagePack
import dev.kreuzberg.tslp.android.PackConfig
import android.app.Application
import java.nio.file.Paths

class MyApp : Application() {
    override fun onCreate() {
        super.onCreate()

        val cacheDir = Paths.get(cacheDir.absolutePath, "tree-sitter")
        val config = PackConfig(
            cacheDir = cacheDir,
            languages = listOf("kotlin", "java", "xml"),
            groups = null
        )
        TreeSitterLanguagePack.init(config)

        val downloaded = TreeSitterLanguagePack.downloadedLanguages()
        println("Downloaded parsers: $downloaded")

        val count = TreeSitterLanguagePack.languageCount()
        println("Total available languages: $count")
    }
}
WebAssembly
// Note: the WASM build ships with statically compiled parsers — no download step needed.
import {
  availableLanguages,
  hasLanguage,
  languageCount,
} from "@kreuzberg/tree-sitter-language-pack-wasm";

console.log(`Has Python: ${hasLanguage("python")}`);
console.log(`Has Rust: ${hasLanguage("rust")}`);
console.log(`Total bundled languages: ${languageCount()}`);
console.log(`Sample: ${availableLanguages().slice(0, 10).join(", ")}`);
Rust
// Requires feature = "download" (enabled by default).
use std::path::PathBuf;
use tree_sitter_language_pack::{PackConfig, download, downloaded_languages, init};

fn main() -> anyhow::Result<()> {
    // Pre-download specific languages; returns count of ensured languages.
    let _count = download(&["python", "javascript", "rust"])?;

    // Or initialize with config (cache_dir is PathBuf, not String).
    let config = PackConfig {
        languages: Some(vec!["python".into(), "go".into()]),
        cache_dir: Some(PathBuf::from("/tmp/parsers")),
        groups: None,
    };
    init(&config)?;

    println!("{:?}", downloaded_languages());
    Ok(())
}

All 306 languages

ts-pack download --all
from tree_sitter_language_pack import download_all

download_all()
import { downloadAll } from "@kreuzberg/tree-sitter-language-pack";

await downloadAll();
use tree_sitter_language_pack::download_all;

download_all()?;

By language group

Groups bundle related languages: web, systems, scripting, data, jvm, functional.

# Download all web languages (HTML, CSS, JS, TS, Vue, Svelte, …)
ts-pack download --groups web,data

# See what's cached
ts-pack list --downloaded
from tree_sitter_language_pack import init

init({"groups": ["web", "data"]})
import { init } from "@kreuzberg/tree-sitter-language-pack";

await init({ groups: ["web", "data"] });
use tree_sitter_language_pack::{PackConfig, init};

let config = PackConfig {
    groups: Some(vec!["web".into(), "data".into()]),
    ..Default::default()
};
init(&config)?;

Docker and CI

Pre-download parsers during your build to avoid runtime network calls:

Dockerfile
FROM python:3.12-slim
RUN pip install tree-sitter-language-pack
# Pre-download at build time — no network needed at runtime
RUN python -c "from tree_sitter_language_pack import download_all; download_all()"
GitHub Actions
- name: Install and pre-download parsers
  run: |
    pip install tree-sitter-language-pack
    python -c "from tree_sitter_language_pack import download; download(['python', 'javascript', 'rust'])"

Configuration file

Declare which languages your project needs in a language-pack.toml:

language-pack.toml
languages = ["python", "javascript", "rust", "go"]
# groups = ["web", "systems"]
# cache_dir = "/tmp/parsers"

Then download everything declared in the config:

# Reads language-pack.toml automatically
ts-pack download
from tree_sitter_language_pack import init

# Reads language-pack.toml from current directory
init()

!!! Info "Cache location" Parsers cache to ~/.cache/tree-sitter-language-pack/ on Linux/macOS and %LOCALAPPDATA%\tree-sitter-language-pack\ on Windows. Override with cache_dir in language-pack.toml or the programmatic API. See Download Model for full details.


3. Parse Code

Build a concrete syntax tree from source code.

CLI
# Download parsers
ts-pack download python javascript rust

# Parse a file
ts-pack parse main.py --format json

# Run code intelligence
ts-pack process src/app.py --all

# List available languages
ts-pack list --manifest
Python
import tree_sitter_language_pack as tslp

# Parsers download automatically on first use
result = tslp.process(
    "def hello():\n    print('world')\n",
    tslp.ProcessConfig(language="python", structure=True, imports=True),
)

print(f"Language: {result.language}")
print(f"Functions: {len(result.structure)}")
Node.js
import { process } from "@kreuzberg/tree-sitter-language-pack";

const result = process("function hello() { console.log('world'); }", {
  language: "javascript",
  structure: true,
  imports: true,
});

console.log(`Language: ${result.language}`);
console.log(`Functions: ${result.structure?.length ?? 0}`);
Ruby
require "tree_sitter_language_pack"

config = TreeSitterLanguagePack::PackConfig.new(languages: ["ruby"])
TreeSitterLanguagePack.init(config)

puts "Ruby available: #{TreeSitterLanguagePack.has_language("ruby")}"
puts "Languages: #{TreeSitterLanguagePack.language_count}"
PHP
<?php

use Tree\Sitter\Language\Pack\TreeSitterLanguagePack;

if (TreeSitterLanguagePack::hasLanguage("php")) {
    echo "PHP grammar is available\n";
}

echo "Total languages: " . TreeSitterLanguagePack::languageCount() . "\n";
Go
package main

import (
    "fmt"
    "log"

    "github.com/kreuzberg-dev/tree-sitter-language-pack/packages/go"
)

func main() {
    parser, err := tspack.GetParser("go")
    if err != nil {
        log.Fatal(err)
    }
    defer parser.Free()

    tree := parser.Parse("package main\nfunc hello() {}")
    defer tree.Free()

    root := tree.RootNode()
    defer root.Free()

    kind := root.Kind()
    if kind != nil {
        fmt.Println("Root:", *kind)
    }
}
Java
import dev.kreuzberg.treesitterlanguagepack.PackConfig;
import dev.kreuzberg.treesitterlanguagepack.TreeSitterLanguagePack;

import java.util.List;
import java.util.Optional;

class Main {
    public static void main(String[] args) throws Exception {
        PackConfig config = PackConfig.builder()
                .withLanguages(Optional.of(List.of("java")))
                .build();
        TreeSitterLanguagePack.init(config);

        System.out.println("Java available: " + TreeSitterLanguagePack.hasLanguage("java"));
        System.out.println("Languages: " + TreeSitterLanguagePack.languageCount());
    }
}
C#
using TreeSitterLanguagePack;

var parser = Parser.Default();
parser.SetLanguage("python");

var tree = parser.Parse("def hello():\n    print('world')\n");
var root = tree!.RootNode();

Console.WriteLine($"Root kind: {root.Kind()}");

parser.Dispose();
Elixir
{:ok, nil} = TreeSitterLanguagePack.init(~s({"languages": ["elixir"]}))

count = TreeSitterLanguagePack.language_count()
IO.puts("Languages: #{count}")

IO.puts("Elixir available: #{TreeSitterLanguagePack.has_language("elixir")}")
Dart
import 'package:tree_sitter_language_pack/tree_sitter_language_pack.dart';
import 'package:tree_sitter_language_pack/src/tree_sitter_language_pack_bridge_generated/frb_generated.dart'
    show RustLib;

void main() async {
  await RustLib.init();

  final parser = await TreeSitterLanguagePackBridge.getParser('python');
  final tree = await parser.parse(source: "def hello():\n    print('world')\n");
  final root = await tree!.rootNode();

  print('Root kind: ${await root.kind()}');
}
Swift
import TreeSitterLanguagePack
import RustBridge

// Parsers download automatically on first use.
let config = try processConfigFromJson(#"{"language":"swift","structure":true}"#)
let result = try process("func greet() { print(\"hello\") }", config)

print("Language: \(result.language().toString())")
print("Functions: \(result.structure().count)")
print("Total lines: \(result.metrics().total_lines())")
Zig
const std = @import("std");
const tslp = @import("tree_sitter_language_pack");

pub fn main() !void {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    defer _ = gpa.deinit();
    _ = gpa.allocator();

    var parser = try tslp.get_parser("rust");
    defer parser.free();

    const source = "fn main() { println!(\"hello\"); }";
    var tree = (try parser.parse(source)) orelse return error.ParseFailed;
    defer tree.free();

    var root = tree.root_node();
    defer root.free();

    const kind = try root.kind();
    defer std.heap.c_allocator.free(kind);
    std.debug.print("root kind: {s}\n", .{kind});
}
Kotlin (Android)
import dev.kreuzberg.tslp.android.TreeSitterLanguagePack
import dev.kreuzberg.tslp.android.PackConfig
import java.nio.file.Paths

suspend fun main() {
    val config = PackConfig(
        cacheDir = Paths.get("/data/data/com.example.app/cache/parsers"),
        languages = listOf("kotlin"),
        groups = null
    )
    TreeSitterLanguagePack.init(config)

    val lang = TreeSitterLanguagePack.getLanguage("kotlin")
    val parser = TreeSitterLanguagePack.getParser("kotlin")
    println("Root node kind: ${parser.parse("fun hello() {}").rootNode().type}")
}
WebAssembly
import {
  availableLanguages,
  getParser,
  hasLanguage,
  languageCount,
} from "@kreuzberg/tree-sitter-language-pack-wasm";

console.log(`${languageCount()} languages available`);
console.log(`Python available: ${hasLanguage("python")}`);
console.log(`First 5: ${availableLanguages().slice(0, 5).join(", ")}`);

const parser = getParser("python");
try {
  const tree = parser.parse("def hello(): pass");
  try {
    console.log(`Root: ${tree.rootNode().kind()}`);
  } finally {
    tree.free();
  }
} finally {
  parser.free();
}
Rust
use tree_sitter_language_pack::{ProcessConfig, process};

fn main() -> anyhow::Result<()> {
    let config = ProcessConfig::new("rust").all();
    let result = process("fn main() { println!(\"hello\"); }", &config)?;

    println!("Language: {}", result.language);
    println!("Functions: {}", result.structure.len());
    Ok(())
}

4. Extract Code Intelligence

Go beyond the raw syntax tree. Extract functions, classes, imports, docstrings, and more with process.

CLI
# Parse and show S-expression
ts-pack parse main.py --language python

# Parse as JSON
echo "fn main() {}" | ts-pack parse - --language rust --format json

# Full code intelligence
ts-pack process src/app.py --language python --all

# Structure + imports only
ts-pack process src/app.py --structure --imports
Python
import tree_sitter_language_pack as tslp

config = tslp.ProcessConfig(
    language="python",
    structure=True,
    imports=True,
    comments=True,
    chunk_max_size=1000,
)

result = tslp.process('''
import os
from pathlib import Path

def read_file(path: str) -> str:
    """Read a file and return its contents."""
    return Path(path).read_text()

class FileReader:
    def __init__(self, base_dir: str):
        self.base_dir = base_dir
''', config)

for item in result.structure:
    print(f"{item.kind}: {item.name}")

for imp in result.imports:
    print(f"import: {imp.source}")
Node.js
import { process } from "@kreuzberg/tree-sitter-language-pack";

const result = process(
  `
import { readFile } from 'fs/promises';

export async function loadConfig(path: string): Promise<Config> {
  const data = await readFile(path, 'utf-8');
  return JSON.parse(data);
}

export class ConfigManager {
  constructor(private basePath: string) {}
}
`,
  { language: "typescript", structure: true, imports: true, exports: true, comments: true },
);

if (result.structure) {
  for (const item of result.structure) {
    console.log(`${item.kind}: ${item.name}`);
  }
}
Ruby
require "tree_sitter_language_pack"

config = TreeSitterLanguagePack::ProcessConfig.new(
  language: "ruby",
  structure: true,
  imports: true,
)

result = TreeSitterLanguagePack.process(
  "require 'json'\ndef parse(data)\n  JSON.parse(data)\nend",
  config
)

puts "Language: #{result.language}"
if result.structure
  result.structure.each do |item|
    puts "#{item.kind}: #{item.name}"
  end
end

if result.imports
  result.imports.each do |imp|
    puts "import: #{imp.source}"
  end
end
PHP
<?php

use Tree\Sitter\Language\Pack\ProcessConfig;
use Tree\Sitter\Language\Pack\TreeSitterLanguagePack;

$config = new ProcessConfig(
    language: "php",
    structure: true,
    imports: true,
    exports: true,
    comments: false,
    docstrings: false,
    symbols: false,
    diagnostics: false,
    chunkMaxSize: null,
);

$result = TreeSitterLanguagePack::process(
    "<?php namespace App; class Controller { public function index() {} }",
    $config,
);

echo "Language: " . $result->language . "\n";
foreach ($result->structure as $item) {
    echo $item->kind->value . ": " . ($item->name ?? "(anonymous)") . "\n";
}
Go
package main

import (
    "fmt"
    "log"

    "github.com/kreuzberg-dev/tree-sitter-language-pack/packages/go"
)

func main() {
    config := tspack.NewProcessConfig(
        tspack.WithProcessConfigLanguage("go"),
        tspack.WithProcessConfigStructure(true),
        tspack.WithProcessConfigImports(true),
    )
    result, err := tspack.Process(
        "package main\nimport \"fmt\"\nfunc hello() { fmt.Println(\"hi\") }",
        *config,
    )
    if err != nil {
        log.Fatal(err)
    }

    fmt.Println("Language:", result.Language)
    for _, item := range result.Structure {
        fmt.Printf("%s: %s\n", item.Kind, item.Name)
    }
    for _, imp := range result.Imports {
        fmt.Println("import:", imp.Source)
    }
}
Java
import dev.kreuzberg.treesitterlanguagepack.ProcessConfig;
import dev.kreuzberg.treesitterlanguagepack.ProcessResult;
import dev.kreuzberg.treesitterlanguagepack.StructureItem;
import dev.kreuzberg.treesitterlanguagepack.TreeSitterLanguagePack;

class Main {
    public static void main(String[] args) throws Exception {
        ProcessConfig config = ProcessConfig.builder()
                .withLanguage("java")
                .withStructure(true)
                .withImports(true)
                .build();

        ProcessResult result = TreeSitterLanguagePack.process(
                "import java.util.List;\npublic class App { public void run() {} }",
                config);

        System.out.println("Language: " + result.language());
        for (StructureItem item : result.structure()) {
            System.out.println(item.kind() + ": " + item.name());
        }
    }
}
C#
using TreeSitterLanguagePack;

var registry = LanguageRegistry.Default();

var config = new ProcessConfig
{
    Language = "csharp",
    Structure = true,
    Imports = true
};

var result = registry.Process("public class Greeter { }", config);

foreach (var item in result.Structure)
{
    Console.WriteLine($"Kind: {item.Kind}, Name: {item.Name}");
}

registry.Dispose();
Elixir
{:ok, json} =
  TreeSitterLanguagePack.process(
    "defmodule MyApp do\n  def hello, do: :world\nend",
    ~s({"language": "elixir", "structure": true, "imports": true})
  )

result = Jason.decode!(json)
IO.puts("Language: #{result["language"]}")

for item <- result["structure"] do
  IO.puts("#{item["kind"]}: #{item["name"]}")
end
Dart
import 'package:tree_sitter_language_pack/tree_sitter_language_pack.dart';
import 'package:tree_sitter_language_pack/src/tree_sitter_language_pack_bridge_generated/frb_generated.dart'
    show RustLib;

void main() async {
  await RustLib.init();

  const config = ProcessConfig(
    language: 'python',
    structure: true,
    imports: true,
    exports: false,
    comments: false,
    docstrings: false,
    symbols: false,
    diagnostics: false,
  );

  const source = '''
import os
from pathlib import Path

def read_file(path: str) -> str:
    return Path(path).read_text()

class FileReader:
    def __init__(self, base_dir: str):
        self.base_dir = base_dir
''';

  final result = await TreeSitterLanguagePackBridge.process(source, config);

  for (final item in result.structure) {
    print('${item.kind}: ${item.name ?? "<anonymous>"}');
  }
}
Swift
import TreeSitterLanguagePack
import RustBridge

let config = try processConfigFromJson(#"""
{
  "language": "python",
  "structure": true,
  "imports": true
}
"""#)

let source = """
import os
from pathlib import Path

def read_file(path: str) -> str:
    return Path(path).read_text()

class FileReader:
    def __init__(self, base_dir: str):
        self.base_dir = base_dir
"""

let result = try process(source, config)

for item in result.structure() {
    let kind = item.kind().toString()
    let name = item.name()?.toString() ?? "<anonymous>"
    print("\(kind): \(name)")
}

for imp in result.imports() {
    print("import: \(imp.source().toString())")
}
Zig
const std = @import("std");
const tslp = @import("tree_sitter_language_pack");

pub fn main() !void {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    defer _ = gpa.deinit();
    const allocator = gpa.allocator();

    const source = "def hello():\n    pass\n\nimport os\n";
    const config_json =
        \\{"language":"python","structure":true,"imports":true,"exports":false,
        \\"comments":false,"docstrings":false,"symbols":false,"diagnostics":false,
        \\"chunk_max_size":null}
    ;

    const result_json = try tslp.process(source, config_json);
    defer std.heap.c_allocator.free(result_json);

    var parsed = try std.json.parseFromSlice(std.json.Value, allocator, result_json, .{});
    defer parsed.deinit();

    const structure = parsed.value.object.get("structure").?.array;
    for (structure.items) |item| {
        const kind_value = item.object.get("kind").?;
        const kind_name = switch (kind_value) {
            .string => |s| s,
            .object => |obj| obj.keys()[0],
            else => "unknown",
        };
        const name_value = item.object.get("name") orelse std.json.Value{ .null = {} };
        const name_str = if (name_value == .string) name_value.string else "<anonymous>";
        std.debug.print("{s}: {s}\n", .{ kind_name, name_str });
    }
}
Kotlin (Android)
import dev.kreuzberg.tslp.android.TreeSitterLanguagePack
import dev.kreuzberg.tslp.android.ProcessConfig

suspend fun analyzeCode(source: String) {
    val config = ProcessConfig(
        language = "kotlin",
        structure = true,
        imports = true,
        exports = false,
        comments = false,
        docstrings = false,
        symbols = false,
        diagnostics = false,
        chunks = null
    )

    val result = TreeSitterLanguagePack.processAsync(source, config)

    println("Language: ${result.language}")
    println("Detected ${result.structure.size} structural items")

    for (item in result.structure) {
        println("${item.kind}: ${item.name}")
        for (child in item.children) {
            println("  └ ${child.kind}: ${child.name}")
        }
    }
}
WebAssembly
import { process } from "@kreuzberg/tree-sitter-language-pack-wasm";

const result = process("function add(a, b) { return a + b; }", {
  language: "javascript",
  structure: true,
  imports: true,
  exports: true,
  comments: false,
  docstrings: false,
  symbols: false,
  diagnostics: false,
});

console.log(`Language: ${result.language}`);
for (const item of result.structure) {
  console.log(`${item.kind}: ${item.name ?? "(anonymous)"}`);
}
Rust
use tree_sitter_language_pack::{ProcessConfig, process};

fn main() -> anyhow::Result<()> {
    let config = ProcessConfig::new("python")
        .all()
        .with_chunking(1000);

    let result = process("def hello(): pass\ndef world(): pass", &config)?;

    for item in &result.structure {
        // item.kind implements Debug (not Display); item.name is Option<String>
        println!("{:?}: {}", item.kind, item.name.as_deref().unwrap_or("<unnamed>"));
    }
    for chunk in &result.chunks {
        println!("chunk: lines {}-{}", chunk.start_line, chunk.end_line);
    }
    Ok(())
}

5. Inspect Query Sources

process() covers the built-in intelligence fields. There is no public extract() helper for arbitrary query execution. For custom analysis, fetch bundled query source with helpers such as get_tags_query() Available by v1.9, then run tree-sitter query APIs yourself or walk the AST manually.

from tree_sitter_language_pack import get_tags_query

tags_query = get_tags_query("python")
if tags_query is not None:
    print(tags_query.splitlines()[0])
use tree_sitter_language_pack::{get_parser, get_tags_query};

let query_source = get_tags_query("rust");
let mut parser = get_parser("rust")?;
let tree = parser.parse("fn main() {}").ok_or("failed to parse")?;
println!("{}", tree.root_node().kind());

6. Chunk for LLMs

Split code at natural boundaries so language models receive coherent, complete units which is ideal for embedding pipelines and context windows.

from tree_sitter_language_pack import process, ProcessConfig

with open("large_module.py") as f:
    source = f.read()

config = ProcessConfig(
    language="python",
    chunk_max_size=1500,  # max bytes per chunk
    structure=True,
)
result = process(source, config)

for i, chunk in enumerate(result.chunks):
    print(f"Chunk {i}: lines {chunk.start_line}-{chunk.end_line} "
          f"({chunk.end_byte - chunk.start_byte} bytes)")
import { process } from "@kreuzberg/tree-sitter-language-pack";
import { readFileSync } from "fs";

const source = readFileSync("large_module.ts", "utf8");

const result = await process(source, {
  language: "typescript",
  chunkMaxSize: 1500,
  structure: true,
});

result.chunks.forEach((chunk, i) => {
  console.log(`Chunk ${i}: lines ${chunk.startLine}-${chunk.endLine} (${chunk.endByte - chunk.startByte} bytes)`);
});
# Chunk a file for LLM ingestion
ts-pack process large_module.py --chunk-size 1500 \
  | jq '.chunks[] | {start: .start_line, end: .end_line, bytes: (.end_byte - .start_byte)}'

You now have the full workflow. You can now install, download, parse, extract intelligence, inspect query sources, and chunk for LLMs. Go further with the following guides:

Edit this page on GitHub