Quick Start
This guide walks you from install to parsing, code intelligence, and LLM chunking.
1. Install¶
!!! Tip "Other ecosystems" Go, Java, Ruby, Elixir, PHP, and WebAssembly are also supported. See Installation for the full list.
2. Download Parsers¶
Parsers download automatically on first use. For production, CI, Docker, or offline environments, pre-download them.
Specific languages¶
# Download specific languages
ts-pack download python javascript rust go
# Download all available languages
ts-pack download --all
# Download a language group
ts-pack download --groups web,systems
# Fresh download (clear cache first)
ts-pack download --fresh python
# Check what's cached
ts-pack list --downloaded
import tree_sitter_language_pack as tslp
# Pre-download specific languages
tslp.download(["python", "javascript", "rust"])
# Or initialize with config
tslp.init(tslp.PackConfig(languages=["python", "go"], cache_dir="/tmp/parsers"))
# Check what's cached
print(tslp.downloaded_languages())
print(tslp.manifest_languages()[:5])
import {
init,
download,
downloadedLanguages,
manifestLanguages,
} from "@kreuzberg/tree-sitter-language-pack";
// Pre-download specific languages
const count = download(["python", "javascript", "rust"]);
console.log(`Downloaded ${count} languages`);
// Or initialize with config
init({ languages: ["python", "go"], cacheDir: "/tmp/parsers" });
// Check what's cached
console.log(downloadedLanguages());
console.log(manifestLanguages().slice(0, 5));
require "tree_sitter_language_pack"
config = TreeSitterLanguagePack::PackConfig.new(languages: ["ruby", "python"])
TreeSitterLanguagePack.init(config)
count = TreeSitterLanguagePack.download(["rust", "javascript"])
puts "Ensured #{count} languages"
TreeSitterLanguagePack.downloaded_languages.each do |name|
puts "cached: #{name}"
end
<?php
use Tree\Sitter\Language\Pack\PackConfig;
use Tree\Sitter\Language\Pack\TreeSitterLanguagePack;
$config = new PackConfig(
cacheDir: null,
languages: ["php", "javascript"],
groups: null,
);
TreeSitterLanguagePack::init($config);
$count = TreeSitterLanguagePack::download(["python", "rust"]);
echo "Ensured {$count} languages\n";
foreach (TreeSitterLanguagePack::downloadedLanguages() as $name) {
echo "cached: {$name}\n";
}
package main
import (
"fmt"
"log"
"github.com/kreuzberg-dev/tree-sitter-language-pack/packages/go"
)
func main() {
config := tspack.PackConfig{
Languages: []string{"go", "python"},
}
if err := tspack.Init(config); err != nil {
log.Fatal(err)
}
count, err := tspack.Download([]string{"rust", "javascript"})
if err != nil {
log.Fatal(err)
}
if count != nil {
fmt.Printf("Ensured %d languages\n", *count)
}
for _, name := range tspack.DownloadedLanguages() {
fmt.Println("cached:", name)
}
}
import dev.kreuzberg.treesitterlanguagepack.PackConfig;
import dev.kreuzberg.treesitterlanguagepack.TreeSitterLanguagePack;
import java.util.List;
import java.util.Optional;
class Main {
public static void main(String[] args) throws Exception {
PackConfig config = PackConfig.builder()
.withLanguages(Optional.of(List.of("java", "kotlin")))
.build();
TreeSitterLanguagePack.init(config);
long ensured = TreeSitterLanguagePack.download(List.of("python", "rust"));
System.out.println("Ensured " + ensured + " languages");
for (String name : TreeSitterLanguagePack.downloadedLanguages()) {
System.out.println("cached: " + name);
}
}
}
using TreeSitterLanguagePack;
var dm = DownloadManager.New("1.9.0");
dm.DownloadAllBestEffort();
var downloaded = dm.InstalledLanguages();
Console.WriteLine($"Downloaded languages: {string.Join(", ", downloaded)}");
var registry = LanguageRegistry.Default();
var available = registry.AvailableLanguages();
Console.WriteLine($"Total available: {available.Count}");
dm.Dispose();
registry.Dispose();
import 'package:tree_sitter_language_pack/tree_sitter_language_pack.dart';
import 'package:tree_sitter_language_pack/src/tree_sitter_language_pack_bridge_generated/frb_generated.dart'
show RustLib;
void main() async {
await RustLib.init();
// Pre-download specific languages.
final count = await TreeSitterLanguagePackBridge.download(
['python', 'javascript', 'rust'],
);
print('Downloaded $count languages');
// Or initialize with config.
await TreeSitterLanguagePackBridge.init(
const PackConfig(languages: ['python', 'go'], cacheDir: '/tmp/parsers'),
);
// Inspect cache state.
print(await TreeSitterLanguagePackBridge.downloadedLanguages());
final manifest = await TreeSitterLanguagePackBridge.manifestLanguages();
print(manifest.take(5).toList());
}
import TreeSitterLanguagePack
import RustBridge
// Pre-download specific languages.
let names = RustVec<String>()
names.push(value: "python")
names.push(value: "javascript")
names.push(value: "rust")
let installed = try download(names)
print("Downloaded \(installed) parsers")
// Or initialize with config (cache_dir + languages).
let packConfig = try packConfigFromJson(
#"{"cache_dir":"/tmp/parsers","languages":["python","go"]}"#
)
try init(packConfig)
// Inspect downloaded state.
let cached = downloadedLanguages().map { $0.as_str().toString() }
let manifest = try manifestLanguages().map { $0.as_str().toString() }
print("Cached: \(cached)")
print("Manifest sample: \(manifest.prefix(5))")
const std = @import("std");
const tslp = @import("tree_sitter_language_pack");
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
_ = gpa.allocator();
// Pre-download specific languages (names passed as JSON array).
const count = try tslp.download("[\"python\", \"javascript\", \"rust\"]");
std.debug.print("languages available after download: {d}\n", .{count});
// Inspect what is cached locally — returned as a JSON array string.
const installed = try tslp.downloaded_languages();
defer std.heap.c_allocator.free(installed);
std.debug.print("downloaded: {s}\n", .{installed});
// Report the effective cache directory.
const dir = try tslp.cache_dir();
defer std.heap.c_allocator.free(dir);
std.debug.print("cache dir: {s}\n", .{dir});
}
import dev.kreuzberg.tslp.android.TreeSitterLanguagePack
import dev.kreuzberg.tslp.android.PackConfig
import android.app.Application
import java.nio.file.Paths
class MyApp : Application() {
override fun onCreate() {
super.onCreate()
val cacheDir = Paths.get(cacheDir.absolutePath, "tree-sitter")
val config = PackConfig(
cacheDir = cacheDir,
languages = listOf("kotlin", "java", "xml"),
groups = null
)
TreeSitterLanguagePack.init(config)
val downloaded = TreeSitterLanguagePack.downloadedLanguages()
println("Downloaded parsers: $downloaded")
val count = TreeSitterLanguagePack.languageCount()
println("Total available languages: $count")
}
}
// Note: the WASM build ships with statically compiled parsers — no download step needed.
import {
availableLanguages,
hasLanguage,
languageCount,
} from "@kreuzberg/tree-sitter-language-pack-wasm";
console.log(`Has Python: ${hasLanguage("python")}`);
console.log(`Has Rust: ${hasLanguage("rust")}`);
console.log(`Total bundled languages: ${languageCount()}`);
console.log(`Sample: ${availableLanguages().slice(0, 10).join(", ")}`);
// Requires feature = "download" (enabled by default).
use std::path::PathBuf;
use tree_sitter_language_pack::{PackConfig, download, downloaded_languages, init};
fn main() -> anyhow::Result<()> {
// Pre-download specific languages; returns count of ensured languages.
let _count = download(&["python", "javascript", "rust"])?;
// Or initialize with config (cache_dir is PathBuf, not String).
let config = PackConfig {
languages: Some(vec!["python".into(), "go".into()]),
cache_dir: Some(PathBuf::from("/tmp/parsers")),
groups: None,
};
init(&config)?;
println!("{:?}", downloaded_languages());
Ok(())
}
All 306 languages¶
By language group¶
Groups bundle related languages: web, systems, scripting, data, jvm, functional.
Docker and CI¶
Pre-download parsers during your build to avoid runtime network calls:
FROM python:3.12-slim
RUN pip install tree-sitter-language-pack
# Pre-download at build time — no network needed at runtime
RUN python -c "from tree_sitter_language_pack import download_all; download_all()"
- name: Install and pre-download parsers
run: |
pip install tree-sitter-language-pack
python -c "from tree_sitter_language_pack import download; download(['python', 'javascript', 'rust'])"
Configuration file¶
Declare which languages your project needs in a language-pack.toml:
languages = ["python", "javascript", "rust", "go"]
# groups = ["web", "systems"]
# cache_dir = "/tmp/parsers"
Then download everything declared in the config:
!!! Info "Cache location" Parsers cache to ~/.cache/tree-sitter-language-pack/ on Linux/macOS and %LOCALAPPDATA%\tree-sitter-language-pack\ on Windows. Override with cache_dir in language-pack.toml or the programmatic API. See Download Model for full details.
3. Parse Code¶
Build a concrete syntax tree from source code.
import tree_sitter_language_pack as tslp
# Parsers download automatically on first use
result = tslp.process(
"def hello():\n print('world')\n",
tslp.ProcessConfig(language="python", structure=True, imports=True),
)
print(f"Language: {result.language}")
print(f"Functions: {len(result.structure)}")
import { process } from "@kreuzberg/tree-sitter-language-pack";
const result = process("function hello() { console.log('world'); }", {
language: "javascript",
structure: true,
imports: true,
});
console.log(`Language: ${result.language}`);
console.log(`Functions: ${result.structure?.length ?? 0}`);
package main
import (
"fmt"
"log"
"github.com/kreuzberg-dev/tree-sitter-language-pack/packages/go"
)
func main() {
parser, err := tspack.GetParser("go")
if err != nil {
log.Fatal(err)
}
defer parser.Free()
tree := parser.Parse("package main\nfunc hello() {}")
defer tree.Free()
root := tree.RootNode()
defer root.Free()
kind := root.Kind()
if kind != nil {
fmt.Println("Root:", *kind)
}
}
import dev.kreuzberg.treesitterlanguagepack.PackConfig;
import dev.kreuzberg.treesitterlanguagepack.TreeSitterLanguagePack;
import java.util.List;
import java.util.Optional;
class Main {
public static void main(String[] args) throws Exception {
PackConfig config = PackConfig.builder()
.withLanguages(Optional.of(List.of("java")))
.build();
TreeSitterLanguagePack.init(config);
System.out.println("Java available: " + TreeSitterLanguagePack.hasLanguage("java"));
System.out.println("Languages: " + TreeSitterLanguagePack.languageCount());
}
}
import 'package:tree_sitter_language_pack/tree_sitter_language_pack.dart';
import 'package:tree_sitter_language_pack/src/tree_sitter_language_pack_bridge_generated/frb_generated.dart'
show RustLib;
void main() async {
await RustLib.init();
final parser = await TreeSitterLanguagePackBridge.getParser('python');
final tree = await parser.parse(source: "def hello():\n print('world')\n");
final root = await tree!.rootNode();
print('Root kind: ${await root.kind()}');
}
import TreeSitterLanguagePack
import RustBridge
// Parsers download automatically on first use.
let config = try processConfigFromJson(#"{"language":"swift","structure":true}"#)
let result = try process("func greet() { print(\"hello\") }", config)
print("Language: \(result.language().toString())")
print("Functions: \(result.structure().count)")
print("Total lines: \(result.metrics().total_lines())")
const std = @import("std");
const tslp = @import("tree_sitter_language_pack");
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
_ = gpa.allocator();
var parser = try tslp.get_parser("rust");
defer parser.free();
const source = "fn main() { println!(\"hello\"); }";
var tree = (try parser.parse(source)) orelse return error.ParseFailed;
defer tree.free();
var root = tree.root_node();
defer root.free();
const kind = try root.kind();
defer std.heap.c_allocator.free(kind);
std.debug.print("root kind: {s}\n", .{kind});
}
import dev.kreuzberg.tslp.android.TreeSitterLanguagePack
import dev.kreuzberg.tslp.android.PackConfig
import java.nio.file.Paths
suspend fun main() {
val config = PackConfig(
cacheDir = Paths.get("/data/data/com.example.app/cache/parsers"),
languages = listOf("kotlin"),
groups = null
)
TreeSitterLanguagePack.init(config)
val lang = TreeSitterLanguagePack.getLanguage("kotlin")
val parser = TreeSitterLanguagePack.getParser("kotlin")
println("Root node kind: ${parser.parse("fun hello() {}").rootNode().type}")
}
import {
availableLanguages,
getParser,
hasLanguage,
languageCount,
} from "@kreuzberg/tree-sitter-language-pack-wasm";
console.log(`${languageCount()} languages available`);
console.log(`Python available: ${hasLanguage("python")}`);
console.log(`First 5: ${availableLanguages().slice(0, 5).join(", ")}`);
const parser = getParser("python");
try {
const tree = parser.parse("def hello(): pass");
try {
console.log(`Root: ${tree.rootNode().kind()}`);
} finally {
tree.free();
}
} finally {
parser.free();
}
use tree_sitter_language_pack::{ProcessConfig, process};
fn main() -> anyhow::Result<()> {
let config = ProcessConfig::new("rust").all();
let result = process("fn main() { println!(\"hello\"); }", &config)?;
println!("Language: {}", result.language);
println!("Functions: {}", result.structure.len());
Ok(())
}
4. Extract Code Intelligence¶
Go beyond the raw syntax tree. Extract functions, classes, imports, docstrings, and more with process.
# Parse and show S-expression
ts-pack parse main.py --language python
# Parse as JSON
echo "fn main() {}" | ts-pack parse - --language rust --format json
# Full code intelligence
ts-pack process src/app.py --language python --all
# Structure + imports only
ts-pack process src/app.py --structure --imports
import tree_sitter_language_pack as tslp
config = tslp.ProcessConfig(
language="python",
structure=True,
imports=True,
comments=True,
chunk_max_size=1000,
)
result = tslp.process('''
import os
from pathlib import Path
def read_file(path: str) -> str:
"""Read a file and return its contents."""
return Path(path).read_text()
class FileReader:
def __init__(self, base_dir: str):
self.base_dir = base_dir
''', config)
for item in result.structure:
print(f"{item.kind}: {item.name}")
for imp in result.imports:
print(f"import: {imp.source}")
import { process } from "@kreuzberg/tree-sitter-language-pack";
const result = process(
`
import { readFile } from 'fs/promises';
export async function loadConfig(path: string): Promise<Config> {
const data = await readFile(path, 'utf-8');
return JSON.parse(data);
}
export class ConfigManager {
constructor(private basePath: string) {}
}
`,
{ language: "typescript", structure: true, imports: true, exports: true, comments: true },
);
if (result.structure) {
for (const item of result.structure) {
console.log(`${item.kind}: ${item.name}`);
}
}
require "tree_sitter_language_pack"
config = TreeSitterLanguagePack::ProcessConfig.new(
language: "ruby",
structure: true,
imports: true,
)
result = TreeSitterLanguagePack.process(
"require 'json'\ndef parse(data)\n JSON.parse(data)\nend",
config
)
puts "Language: #{result.language}"
if result.structure
result.structure.each do |item|
puts "#{item.kind}: #{item.name}"
end
end
if result.imports
result.imports.each do |imp|
puts "import: #{imp.source}"
end
end
<?php
use Tree\Sitter\Language\Pack\ProcessConfig;
use Tree\Sitter\Language\Pack\TreeSitterLanguagePack;
$config = new ProcessConfig(
language: "php",
structure: true,
imports: true,
exports: true,
comments: false,
docstrings: false,
symbols: false,
diagnostics: false,
chunkMaxSize: null,
);
$result = TreeSitterLanguagePack::process(
"<?php namespace App; class Controller { public function index() {} }",
$config,
);
echo "Language: " . $result->language . "\n";
foreach ($result->structure as $item) {
echo $item->kind->value . ": " . ($item->name ?? "(anonymous)") . "\n";
}
package main
import (
"fmt"
"log"
"github.com/kreuzberg-dev/tree-sitter-language-pack/packages/go"
)
func main() {
config := tspack.NewProcessConfig(
tspack.WithProcessConfigLanguage("go"),
tspack.WithProcessConfigStructure(true),
tspack.WithProcessConfigImports(true),
)
result, err := tspack.Process(
"package main\nimport \"fmt\"\nfunc hello() { fmt.Println(\"hi\") }",
*config,
)
if err != nil {
log.Fatal(err)
}
fmt.Println("Language:", result.Language)
for _, item := range result.Structure {
fmt.Printf("%s: %s\n", item.Kind, item.Name)
}
for _, imp := range result.Imports {
fmt.Println("import:", imp.Source)
}
}
import dev.kreuzberg.treesitterlanguagepack.ProcessConfig;
import dev.kreuzberg.treesitterlanguagepack.ProcessResult;
import dev.kreuzberg.treesitterlanguagepack.StructureItem;
import dev.kreuzberg.treesitterlanguagepack.TreeSitterLanguagePack;
class Main {
public static void main(String[] args) throws Exception {
ProcessConfig config = ProcessConfig.builder()
.withLanguage("java")
.withStructure(true)
.withImports(true)
.build();
ProcessResult result = TreeSitterLanguagePack.process(
"import java.util.List;\npublic class App { public void run() {} }",
config);
System.out.println("Language: " + result.language());
for (StructureItem item : result.structure()) {
System.out.println(item.kind() + ": " + item.name());
}
}
}
using TreeSitterLanguagePack;
var registry = LanguageRegistry.Default();
var config = new ProcessConfig
{
Language = "csharp",
Structure = true,
Imports = true
};
var result = registry.Process("public class Greeter { }", config);
foreach (var item in result.Structure)
{
Console.WriteLine($"Kind: {item.Kind}, Name: {item.Name}");
}
registry.Dispose();
{:ok, json} =
TreeSitterLanguagePack.process(
"defmodule MyApp do\n def hello, do: :world\nend",
~s({"language": "elixir", "structure": true, "imports": true})
)
result = Jason.decode!(json)
IO.puts("Language: #{result["language"]}")
for item <- result["structure"] do
IO.puts("#{item["kind"]}: #{item["name"]}")
end
import 'package:tree_sitter_language_pack/tree_sitter_language_pack.dart';
import 'package:tree_sitter_language_pack/src/tree_sitter_language_pack_bridge_generated/frb_generated.dart'
show RustLib;
void main() async {
await RustLib.init();
const config = ProcessConfig(
language: 'python',
structure: true,
imports: true,
exports: false,
comments: false,
docstrings: false,
symbols: false,
diagnostics: false,
);
const source = '''
import os
from pathlib import Path
def read_file(path: str) -> str:
return Path(path).read_text()
class FileReader:
def __init__(self, base_dir: str):
self.base_dir = base_dir
''';
final result = await TreeSitterLanguagePackBridge.process(source, config);
for (final item in result.structure) {
print('${item.kind}: ${item.name ?? "<anonymous>"}');
}
}
import TreeSitterLanguagePack
import RustBridge
let config = try processConfigFromJson(#"""
{
"language": "python",
"structure": true,
"imports": true
}
"""#)
let source = """
import os
from pathlib import Path
def read_file(path: str) -> str:
return Path(path).read_text()
class FileReader:
def __init__(self, base_dir: str):
self.base_dir = base_dir
"""
let result = try process(source, config)
for item in result.structure() {
let kind = item.kind().toString()
let name = item.name()?.toString() ?? "<anonymous>"
print("\(kind): \(name)")
}
for imp in result.imports() {
print("import: \(imp.source().toString())")
}
const std = @import("std");
const tslp = @import("tree_sitter_language_pack");
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
const allocator = gpa.allocator();
const source = "def hello():\n pass\n\nimport os\n";
const config_json =
\\{"language":"python","structure":true,"imports":true,"exports":false,
\\"comments":false,"docstrings":false,"symbols":false,"diagnostics":false,
\\"chunk_max_size":null}
;
const result_json = try tslp.process(source, config_json);
defer std.heap.c_allocator.free(result_json);
var parsed = try std.json.parseFromSlice(std.json.Value, allocator, result_json, .{});
defer parsed.deinit();
const structure = parsed.value.object.get("structure").?.array;
for (structure.items) |item| {
const kind_value = item.object.get("kind").?;
const kind_name = switch (kind_value) {
.string => |s| s,
.object => |obj| obj.keys()[0],
else => "unknown",
};
const name_value = item.object.get("name") orelse std.json.Value{ .null = {} };
const name_str = if (name_value == .string) name_value.string else "<anonymous>";
std.debug.print("{s}: {s}\n", .{ kind_name, name_str });
}
}
import dev.kreuzberg.tslp.android.TreeSitterLanguagePack
import dev.kreuzberg.tslp.android.ProcessConfig
suspend fun analyzeCode(source: String) {
val config = ProcessConfig(
language = "kotlin",
structure = true,
imports = true,
exports = false,
comments = false,
docstrings = false,
symbols = false,
diagnostics = false,
chunks = null
)
val result = TreeSitterLanguagePack.processAsync(source, config)
println("Language: ${result.language}")
println("Detected ${result.structure.size} structural items")
for (item in result.structure) {
println("${item.kind}: ${item.name}")
for (child in item.children) {
println(" └ ${child.kind}: ${child.name}")
}
}
}
import { process } from "@kreuzberg/tree-sitter-language-pack-wasm";
const result = process("function add(a, b) { return a + b; }", {
language: "javascript",
structure: true,
imports: true,
exports: true,
comments: false,
docstrings: false,
symbols: false,
diagnostics: false,
});
console.log(`Language: ${result.language}`);
for (const item of result.structure) {
console.log(`${item.kind}: ${item.name ?? "(anonymous)"}`);
}
use tree_sitter_language_pack::{ProcessConfig, process};
fn main() -> anyhow::Result<()> {
let config = ProcessConfig::new("python")
.all()
.with_chunking(1000);
let result = process("def hello(): pass\ndef world(): pass", &config)?;
for item in &result.structure {
// item.kind implements Debug (not Display); item.name is Option<String>
println!("{:?}: {}", item.kind, item.name.as_deref().unwrap_or("<unnamed>"));
}
for chunk in &result.chunks {
println!("chunk: lines {}-{}", chunk.start_line, chunk.end_line);
}
Ok(())
}
5. Run Extraction Queries¶
Use extract to run custom tree-sitter queries and get structured results with captured text and metadata.
import tree_sitter_language_pack as tslp
source = """
def greet(name: str) -> str:
return f"Hello, {name}!"
def farewell(name: str) -> str:
return f"Goodbye, {name}!"
"""
result = tslp.extract(source, {
"language": "python",
"patterns": {
"functions": {
"query": "(function_definition name: (identifier) @name)",
"capture_output": "Text",
}
}
})
for match in result["results"]["functions"]["matches"]:
print(match["captures"][0]["text"])
# greet
# farewell
7. Chunk for LLMs¶
Split code at natural boundaries so language models receive coherent, complete units which is ideal for embedding pipelines and context windows.
from tree_sitter_language_pack import process, ProcessConfig
with open("large_module.py") as f:
source = f.read()
config = ProcessConfig(
language="python",
chunk_max_size=1500, # max bytes per chunk
structure=True,
)
result = process(source, config)
for i, chunk in enumerate(result["chunks"]):
print(f"Chunk {i}: lines {chunk['start_line']}-{chunk['end_line']} "
f"({chunk['end_byte'] - chunk['start_byte']} bytes)")
import { process } from "@kreuzberg/tree-sitter-language-pack";
import { readFileSync } from "fs";
const source = readFileSync("large_module.ts", "utf8");
const result = await process(source, {
language: "typescript",
chunkMaxSize: 1500,
structure: true,
});
result.chunks.forEach((chunk, i) => {
console.log(`Chunk ${i}: lines ${chunk.startLine}-${chunk.endLine} (${chunk.endByte - chunk.startByte} bytes)`);
});
You now have the full workflow. You can now install, download, parse, extract intelligence, run queries, and chunk for LLMs. Go further with the following guides:
- Parsing guide — syntax trees, error handling, and incremental parsing
- Configuration —
language-pack.tomland advanced options - API Reference — full API docs for every binding