Quick Start¶
This guide walks from install to parsing code in 5 minutes.
Step 1 — Install¶
Step 2 — Get a Parser¶
Parsers are downloaded automatically on first use. You can also pre-download for offline use.
Step 3 — Parse Code¶
With a parser in hand, build a concrete syntax tree from source code.
from tree_sitter_language_pack import get_parser
parser = get_parser("python")
source = b"""
def greet(name: str) -> str:
return f"Hello, {name}!"
result = greet("world")
"""
tree = parser.parse(source)
root = tree.root_node
print(root.type) # module
print(root.child_count) # 2
print(root.sexp()[:120]) # S-expression of the tree
import { getParser } from "@kreuzberg/tree-sitter-language-pack";
const parser = await getParser("javascript");
const source = `
function greet(name) {
return \`Hello, \${name}!\`;
}
greet("world");
`;
const tree = parser.parse(source);
const root = tree.rootNode;
console.log(root.type); // program
console.log(root.childCount); // 2
console.log(root.toString().slice(0, 120));
use ts_pack_core::get_parser;
fn main() -> anyhow::Result<()> {
let mut parser = get_parser("rust")?;
let source = r#"
fn greet(name: &str) -> String {
format!("Hello, {}!", name)
}
"#;
let tree = parser.parse(source, None).unwrap();
let root = tree.root_node();
println!("{}", root.kind()); // source_file
println!("{}", root.child_count()); // 1
println!("{}", root.to_sexp());
Ok(())
}
Step 4 — Extract Code Intelligence¶
Go beyond the raw syntax tree. Extract functions, classes, imports, and more with process.
from tree_sitter_language_pack import process, ProcessConfig
source = """
import os
from pathlib import Path
def read_file(path: str) -> str:
\"\"\"Read and return the contents of a file.\"\"\"
return Path(path).read_text()
class FileManager:
def __init__(self, base_dir: str):
self.base_dir = base_dir
def get(self, name: str) -> str:
return read_file(os.path.join(self.base_dir, name))
"""
config = ProcessConfig(
language="python",
structure=True, # functions and classes
imports=True, # import statements
comments=True, # inline comments
docstrings=True, # docstring extraction
)
result = process(source, config)
print(f"Imports: {[i['name'] for i in result['imports']]}")
print(f"Symbols: {[s['name'] for s in result['structure']]}")
print(f"Docstring: {result['structure'][0]['docstring']}")
import { process } from "@kreuzberg/tree-sitter-language-pack";
const source = `
import fs from "fs";
import { join } from "path";
/**
* Read and return the contents of a file.
*/
function readFile(path: string): string {
return fs.readFileSync(path, "utf8");
}
class FileManager {
constructor(private baseDir: string) {}
get(name: string): string {
return readFile(join(this.baseDir, name));
}
}
`;
const result = await process(source, {
language: "typescript",
structure: true,
imports: true,
docstrings: true,
});
console.log("Imports:", result.imports.map(i => i.name));
console.log("Symbols:", result.structure.map(s => s.name));
use ts_pack_core::{process, ProcessConfig};
fn main() -> anyhow::Result<()> {
let source = r#"
use std::fs;
use std::path::Path;
/// Read and return the contents of a file.
fn read_file(path: &str) -> String {
fs::read_to_string(path).unwrap()
}
struct FileManager {
base_dir: String,
}
"#;
let config = ProcessConfig::new("rust")
.structure(true)
.imports(true)
.docstrings(true);
let result = process(source, &config)?;
println!("Imports: {:?}", result.imports.iter().map(|i| &i.name).collect::<Vec<_>>());
println!("Symbols: {:?}", result.structure.iter().map(|s| &s.name).collect::<Vec<_>>());
Ok(())
}
Step 5 — Chunk for LLMs¶
Split code at natural boundaries so language models receive coherent, complete units.
from tree_sitter_language_pack import process, ProcessConfig
with open("large_module.py") as f:
source = f.read()
config = ProcessConfig(
language="python",
chunk_max_size=1500, # max tokens per chunk
structure=True,
)
result = process(source, config)
for i, chunk in enumerate(result["chunks"]):
print(f"Chunk {i}: {chunk['start_line']}-{chunk['end_line']} "
f"({chunk['token_count']} tokens)")
import { process } from "@kreuzberg/tree-sitter-language-pack";
import { readFileSync } from "fs";
const source = readFileSync("large_module.ts", "utf8");
const result = await process(source, {
language: "typescript",
chunkMaxSize: 1500,
structure: true,
});
result.chunks.forEach((chunk, i) => {
console.log(`Chunk ${i}: lines ${chunk.startLine}-${chunk.endLine} (${chunk.tokenCount} tokens)`);
});
What's Next¶
-
Concepts
Understand the architecture, download model, and what code intelligence extracts.
-
Guides
Deep dives on specific features and real-world use cases.
-
API Reference
Full API documentation for every language binding.