Kimi K2 File API Deep Dive: Long Document Processing and Structured Extraction
Complete Kimi K2 File API tutorial: PDF/Word/Excel/PPT file upload, long-document Q&A, table structured extraction, batch document processing. Includes Moonshot platform 128K context hands-on code.
What This Tutorial Solves
Kimi’s biggest advantage is its 128K ultra-long context + file understanding. You will learn:
- Upload PDF/Word/Excel/PPT/image files
- Analyze multiple documents simultaneously with 128K context
- Extract structured table data
- Batch document intelligent processing
- File conversation API integration
🎯 Kimi’s file processing capability is among the strongest of all Chinese AI models. 128K context = read a 200,000-word book in one go.
Moonshot File API Basics
from openai import OpenAI
import os
client = OpenAI(
api_key=os.getenv("MOONSHOT_API_KEY"),
base_url="https://api.moonshot.cn/v1",
)
Uploading Files
def upload_file(file_path: str) -> str:
"""Upload a file to Moonshot and return the file_id"""
file_object = client.files.create(
file=open(file_path, "rb"),
purpose="file-extract", # Extract file contents
)
print(f"File uploaded: {file_object.id} ({file_object.bytes} bytes)")
return file_object.id
# Upload examples
pdf_id = upload_file("annual_report.pdf")
excel_id = upload_file("sales_data.xlsx")
Listing / Deleting Files
# List all files
files = client.files.list()
for f in files.data:
print(f"{f.id}: {f.filename} ({f.bytes} bytes, {f.created_at})")
# Delete a file
client.files.delete(pdf_id)
Single Document Deep Q&A
def chat_with_document(file_id: str, question: str) -> str:
"""Ask questions based on file contents"""
response = client.chat.completions.create(
model="moonshot-v1-128k",
messages=[
{
"role": "system",
"content": "你是 Kimi,一个擅长长文档分析的 AI 助手。用用户的文件内容回答问题,引用具体页码和段落。",
},
{
"role": "system",
"content": file_id, # Pass file via system message
},
{"role": "user", "content": question},
],
temperature=0.3,
)
return response.choices[0].message.content
# Usage
answer = chat_with_document(pdf_id, "这份报告的核心结论是什么?用了哪些关键数据支撑?")
print(answer)
Multi-Document Comparative Analysis
def compare_documents(file_ids: list[str], compare_aspect: str) -> str:
"""Analyze and compare multiple documents simultaneously"""
# Build system message with multiple files
system_content = """你是文档分析专家。对比以下文档,找出它们在指定方面的异同。
引用时标明来源文档。"""
messages = [{"role": "system", "content": system_content}]
# Add files one by one
for i, fid in enumerate(file_ids, 1):
messages.append({
"role": "system",
"content": f"[Document {i}] {fid}",
})
messages.append({
"role": "user",
"content": f"请对比这些文档在「{compare_aspect}」方面的主要内容、异同点和各自特点。",
})
response = client.chat.completions.create(
model="moonshot-v1-128k",
messages=messages,
temperature=0.3,
max_tokens=4096,
)
return response.choices[0].message.content
# Compare three competitive analysis reports
comparison = compare_documents(
[pdf_id_1, pdf_id_2, pdf_id_3],
"定价策略、目标客户群、核心功能差异",
)
print(comparison)
Excel Table Structured Extraction
import json
def extract_table_data(file_id: str) -> list[dict]:
"""Extract structured table data from Excel/CSV files"""
response = client.chat.completions.create(
model="moonshot-v1-128k",
messages=[
{
"role": "system",
"content": f"""你是数据分析助手。文件 ID: {file_id}
请提取文件中的所有表格数据,以 JSON 数组格式输出。
每条记录包含所有列字段。如果有多个 Sheet,用 sheet_name 区分。""",
},
{
"role": "user",
"content": "请提取这个 Excel 文件中的所有数据,以 JSON 格式返回。",
},
],
temperature=0, # Use temperature=0 for data extraction
max_tokens=8192,
)
content = response.choices[0].message.content
# Extract the JSON portion
try:
# Find the start and end positions of the JSON array
start = content.find("[")
end = content.rfind("]") + 1
if start >= 0 and end > start:
return json.loads(content[start:end])
except json.JSONDecodeError:
pass
return [{"raw": content}]
# Usage
data = extract_table_data(excel_id)
for row in data[:5]:
print(row)
Intelligent Table Q&A
def query_table(file_id: str, query: str) -> str:
"""Natural language queries against table data"""
response = client.chat.completions.create(
model="moonshot-v1-128k",
messages=[
{
"role": "system",
"content": f"你是一个数据分析师。基于文件 {file_id} 中的数据回答问题。",
},
{
"role": "user",
"content": f"""基于表格数据回答:
{query}
请给出具体数值和分析,不要只说结论。""",
},
],
temperature=0.1,
)
return response.choices[0].message.content
# Natural language query
result = query_table(excel_id, "Q3销售额最高的前5个产品是什么?它们的环比增长率是多少?")
print(result)
Batch Document Processing Pipeline
import time
from concurrent.futures import ThreadPoolExecutor
def batch_process_documents(file_paths: list[str], task: str) -> list[dict]:
"""Upload and process documents in batch"""
results = []
def process_one(path: str) -> dict:
try:
# Upload
file_id = upload_file(path)
# Analyze
response = client.chat.completions.create(
model="moonshot-v1-128k",
messages=[
{
"role": "system",
"content": f"你是文档处理助手。{task}",
},
{
"role": "system",
"content": file_id,
},
{"role": "user", "content": f"请对这份文档执行:{task}"},
],
temperature=0.3,
)
# Clean up
client.files.delete(file_id)
return {
"file": path,
"status": "success",
"result": response.choices[0].message.content,
}
except Exception as e:
return {"file": path, "status": "error", "error": str(e)}
# Parallel processing (mind API rate limits)
with ThreadPoolExecutor(max_workers=2) as executor:
futures = [executor.submit(process_one, p) for p in file_paths]
for future in futures:
results.append(future.result())
time.sleep(1) # Rate limiting
return results
# Batch processing
contracts = ["contract1.pdf", "contract2.pdf", "contract3.pdf"]
results = batch_process_documents(contracts, "提取合同的关键条款:签约方、金额、期限、违约责任")
Kimi Context Management Strategy
class KimiSession:
"""Manage Kimi sessions with long conversations + files"""
def __init__(self):
self.messages = []
self.files = {}
self.total_tokens = 0
def add_file(self, name: str, file_path: str):
"""Add a file to the session"""
file_id = upload_file(file_path)
self.files[name] = file_id
# Files also consume tokens
self.total_tokens += self._estimate_file_tokens(file_path)
def ask(self, question: str) -> str:
"""Q&A based on all context"""
# Build messages
current_msg = [
{"role": "system", "content": "你是 Kimi,擅长综合分析多个文档。"},
]
# Add all files
for name, fid in self.files.items():
current_msg.append({
"role": "system",
"content": f"[{name}] {fid}",
})
# Add conversation history
current_msg.extend(self.messages[-10:]) # Keep last 10 turns
# Add new question
current_msg.append({"role": "user", "content": question})
response = client.chat.completions.create(
model="moonshot-v1-128k",
messages=current_msg,
temperature=0.5,
)
answer = response.choices[0].message.content
# Save to history
self.messages.append({"role": "user", "content": question})
self.messages.append({"role": "assistant", "content": answer})
self.total_tokens += response.usage.total_tokens
# Check context usage
usage_pct = self.total_tokens / 128000 * 100
if usage_pct > 70:
print(f"⚠️ Context usage at {usage_pct:.0f}%, consider clearing")
return answer
def _estimate_file_tokens(self, file_path: str) -> int:
"""Estimate file token count"""
size = os.path.getsize(file_path)
# Rough estimate: Chinese ~1.5 characters per token
return int(size * 0.7)
def summarize_so_far(self) -> str:
"""Compress context -- summarize the conversation so far"""
summary_prompt = "请总结以上的对话要点,保留关键信息和结论。"
summary = self.ask(summary_prompt)
# Replace history with summary
self.messages = [
{"role": "system", "content": f"之前的对话摘要:{summary}"},
]
return summary
Supported File Formats
| Format | Extensions | Max Size | Notes |
|---|---|---|---|
.pdf | 100 MB | Includes scanned document OCR | |
| Word | .doc .docx | 100 MB | Includes tables/images |
| Excel | .xls .xlsx | 100 MB | Multiple sheets |
| PPT | .ppt .pptx | 100 MB | Includes notes |
| Images | .jpg .png | 20 MB | OCR text extraction |
| Text | .txt .md | 10 MB | Raw text |
FAQ
Q: Can the 128K context really be fully utilized?
A: Theoretically yes, but in practice, beyond 80K tokens, the model’s attention to the middle portion declines. Recommendation: place key information at the beginning or end, and preprocess middle sections with summaries.
Q: How long are uploaded files retained?
A: Moonshot files are retained on the server for 7 days. For long-term use, upload, analyze, and save results immediately.
Next Steps
📝 Based on Moonshot API + Kimi K2, tested June 2026.