Skip to main content


LangChain支持各种不同的标记和编程语言特定的文本分割器,以基于语言特定的语法分割文本。 ​这将导致更具有语义的自包含块,更适用于矢量存储或其他检索器。 流行的语言,如JavaScript, Python,和Rust,以及Latex,HTML,和Markdown都受到支持。




import {
} from "langchain/text_splitter";

console.log(SupportedTextSplitterLanguages); // Array of supported languages

'cpp', 'go',
'java', 'js',
'php', 'proto',
'python', 'rst',
'ruby', 'rust',
'scala', 'swift',
'markdown', 'latex',

const jsCode = `function helloWorld() {
console.log("Hello, World!");
// Call the function

const splitter = RecursiveCharacterTextSplitter.fromLanguage("js", {
chunkSize: 32,
chunkOverlap: 0,
const jsOutput = await splitter.createDocuments([jsCode]);


Document {
pageContent: 'function helloWorld() {',
metadata: { loc: [Object] }
Document {
pageContent: 'console.log("Hello, World!");',
metadata: { loc: [Object] }
Document {
pageContent: '}\n// Call the function',
metadata: { loc: [Object] }
Document {
pageContent: 'helloWorld();',
metadata: { loc: [Object] }


import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";

const pythonCode = `def hello_world():
print("Hello, World!")
# Call the function

const splitter = RecursiveCharacterTextSplitter.fromLanguage("python", {
chunkSize: 32,
chunkOverlap: 0,

const pythonOutput = await splitter.createDocuments([pythonCode]);


Document {
pageContent: 'def hello_world():',
metadata: { loc: [Object] }
Document {
pageContent: 'print("Hello, World!")',
metadata: { loc: [Object] }
Document {
pageContent: '# Call the function',
metadata: { loc: [Object] }
Document {
pageContent: 'hello_world()',
metadata: { loc: [Object] }


import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";

const text = `<!DOCTYPE html>
<title>🦜️🔗 LangChain</title>
body {
font-family: Arial, sans-serif;
h1 {
color: darkblue;
<h1>🦜️🔗 LangChain</h1>
<p>⚡ Building applications with LLMs through composability ⚡</p>
As an open source project in a rapidly developing field, we are extremely open to contributions.

const splitter = RecursiveCharacterTextSplitter.fromLanguage("html", {
chunkSize: 175,
chunkOverlap: 20,
const output = await splitter.createDocuments([text]);


Document {
pageContent: '<!DOCTYPE html>\n<html>',
metadata: { loc: [Object] }
Document {
pageContent: '<head>\n <title>🦜️🔗 LangChain</title>',
metadata: { loc: [Object] }
Document {
pageContent: '<style>\n' +
' body {\n' +
' font-family: Arial, sans-serif;\n' +
' }\n' +
' h1 {\n' +
' color: darkblue;\n' +
' }\n' +
' </style>\n' +
' </head>',
metadata: { loc: [Object] }
Document {
pageContent: '<body>\n' +
' <div>\n' +
' <h1>🦜️🔗 LangChain</h1>\n' +
' <p>⚡ Building applications with LLMs through composability ⚡</p>\n' +
' </div>',
metadata: { loc: [Object] }
Document {
pageContent: '<div>\n' +
' As an open source project in a rapidly developing field, we are extremely open to contributions.\n' +
' </div>\n' +
' </body>\n' +
metadata: { loc: [Object] }


import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";

const text = `\\begin{document}
\\title{🦜️🔗 LangChain}
⚡ Building applications with LLMs through composability ⚡

\\section{Quick Install}

Hopefully this code block isn't split
yarn add langchain

As an open source project in a rapidly developing field, we are extremely open to contributions.


const splitter = RecursiveCharacterTextSplitter.fromLanguage("latex", {
chunkSize: 100,
chunkOverlap: 0,
const output = await splitter.createDocuments([text]);


Document {
pageContent: '\\begin{document}\n' +
'\\title{🦜️🔗 LangChain}\n' +
'⚡ Building applications with LLMs through composability ⚡',
metadata: { loc: [Object] }
Document {
pageContent: '\\section{Quick Install}',
metadata: { loc: [Object] }
Document {
pageContent: '\\begin{verbatim}\n' +
"Hopefully this code block isn't split\n" +
'yarn add langchain\n' +
metadata: { loc: [Object] }
Document {
pageContent: 'As an open source project in a rapidly developing field, we are extremely open to contributions.',
metadata: { loc: [Object] }
Document {
pageContent: '\\end{document}',
metadata: { loc: [Object] }