diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..f5f67b6 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,74 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## 项目概述 + +AI 智能客服系统,基于 Spring AI Alibaba + 通义千问 + PGVector,支持 RAG 知识库检索、多轮对话、结构化数据提取、知识库全生命周期管理。 + +## 构建与运行 + +```bash +# 编译 +./mvnw compile + +# 运行(端口 9090) +./mvnw spring-boot:run + +# 运行测试 +./mvnw test + +# 运行单个测试类 +./mvnw test -Dtest=SupportBotApplicationTests + +# 运行单个测试方法 +./mvnw test -Dtest=SupportBotApplicationTests#testRag +``` + +**前提条件**: PostgreSQL 12+ 需运行且安装 PGVector 扩展,数据库 `support_bot` 需存在。`knowledge_category` 和 `knowledge_document` 表由 `DatabaseInitConfig` 自动创建,无需手动建表。 + +## 核心架构决策 + +### 主启动类排除了 PgVectorStoreAutoConfiguration +`SupportBotApplication.java` 中 `@SpringBootApplication(exclude = PgVectorStoreAutoConfiguration.class)`,因为项目在 `PgVectorStoreConfig` 中手动配置 PgVectorStore Bean(标记 `@Primary`),不使用自动配置。另有一个 `InMemoryVectorStoreConfig` 作为开发备选。 + +### Spring AI 集成模式 +- **ChatClient Builder**: 所有对话通过 `ChatClient.builder(dashscopeChatModel)` 构建 +- **Advisor 链**: `MessageChatMemoryAdvisor`(记忆) → `MyLoggerAdvisor`(日志) → `QuestionAnswerAdvisor`(RAG) +- **结构化输出**: `ProductInfoApp` 使用 `.entity(ProductInfo.class)` 提取结构化数据 +- **SSE 流式**: 三种实现 — Flux\、Flux\、SseEmitter + +### ChatMemory 持久化 +当前使用 `DatabaseChatMemory`(PostgreSQL 持久化),`FileBasedChatMemory`(Kryo 序列化)已注释掉。`ProductInfoApp` 单独使用 `InMemoryChatMemory`。 + +### RAG 双模式 +1. **QuestionAnswerAdvisor 模式**(生产使用): 预检索优化 + `QuestionAnswerAdvisor` +2. **RetrievalAugmentationAdvisor 模式**(实验性): `doChatWithRagEnhance()` 中 `queryTransformers` 和 `multiQueryExpander` 未生效 + +### 文档处理管道 +`DocumentService.uploadDocument()` 统一流程:文档提取 → `MyTokenTextSplitter` 分块 → `MyKeywordEnricher` AI 关键词提取 → `pgVectorVectorStore.add()` 向量化存储。每个分块的 metadata 中注入 `documentId`、`chunkIndex`、`sourceName`、`title` 以关联 `knowledge_document` 表。 + +### 预检索查询优化 +四种策略在 `rag/preretrieval/` 下,由 `AssistantApp.doChatWithRagStrategy()` 根据 `strategy` 参数动态选择:REWRITE / TRANSLATION / COMPRESSION / MULTI_QUERY。另存在 Bean 配置版本(`QueryTransformerConfig`、`QueryExpanderConfig`),但实际使用自定义 Rewriter 组件。 + +## 关键配置 + +- `application.yml` 含 API Key,已被 `.gitignore` 排除 +- MyBatis Plus 逻辑删除字段: `isDelete`,主键策略: `assign_id`(雪花算法) +- PostgreSQL JSONB 字段使用自定义 `PostgresJsonTypeHandler`(期望 JSON 对象,非数组) +- 向量维度: 1536,距离类型: COSINE_DISTANCE,索引: HNSW + +## API 路由约定 + +- AI 对话: `/ai/*`(`AiController`) +- 文档上传: `/upload/*`(`DocumentController`) +- 文档管理: `/document/*`(`DocumentController`) +- 分类管理: `/category/*`(`DocumentController`) + +## 已知 TODO + +- `AssistantApp.doChatWithRagEnhance()`: `queryTransformers` 未生效 +- `DocumentService.updateDocumentMetadata()`: Spring AI 无直接更新 vector_store metadata 的 API,向量元数据同步留后续 +- `DocumentService.searchDocuments()`: Spring AI 1.0.0-M6 的 filter 支持有限,分类过滤暂未实现 +- `CompressionQueryRewriter`: 当前传入空历史列表 +- MyBatis Plus 3.5.12 的 `mybatis-plus-spring-boot3-starter` 不含 `PaginationInnerInterceptor`,分页通过 SQL `LIMIT/OFFSET` 手动实现 diff --git a/frontend.html b/frontend.html index dc3ad60..221f444 100644 --- a/frontend.html +++ b/frontend.html @@ -723,7 +723,7 @@ async function loadCategories() { $('categoryList').innerHTML = list.map(c => `
${c.name} ${c.description||''} - +
` ).join(''); } @@ -794,7 +794,7 @@ async function loadDocuments(page=1) { } else { $('docTableBody').innerHTML = docs.map(d => { const statusClass = d.status==='READY'?'status-ready':d.status==='PROCESSING'?'status-processing':'status-failed'; - const catName = categoryMap[d.categoryId] || (d.categoryId>0?'未知':'未分类'); + const catName = categoryMap[d.categoryId] || (d.categoryId && d.categoryId!=='0'?'未知':'未分类'); return ` ${d.id} ${d.title}
${d.sourceName||''} @@ -803,9 +803,9 @@ async function loadDocuments(page=1) { ${d.chunkCount} ${formatDate(d.createTime)} - - - + + + `; }).join(''); @@ -844,7 +844,7 @@ async function viewDocDetail(id) { const d = detailJson.data; const chunks = chunksJson.success ? (chunksJson.data || []) : []; const statusClass = d.status==='READY'?'status-ready':d.status==='PROCESSING'?'status-processing':'status-failed'; - const catName = categoryMap[d.categoryId] || (d.categoryId>0?'未知':'未分类'); + const catName = categoryMap[d.categoryId] || (d.categoryId && d.categoryId!=='0'?'未知':'未分类'); let html = `
diff --git a/src/main/java/com/wok/supportbot/entity/KnowledgeCategory.java b/src/main/java/com/wok/supportbot/entity/KnowledgeCategory.java index b2f9d50..5c71b81 100644 --- a/src/main/java/com/wok/supportbot/entity/KnowledgeCategory.java +++ b/src/main/java/com/wok/supportbot/entity/KnowledgeCategory.java @@ -1,6 +1,8 @@ package com.wok.supportbot.entity; import com.baomidou.mybatisplus.annotation.*; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import com.fasterxml.jackson.databind.ser.std.ToStringSerializer; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; @@ -25,6 +27,7 @@ public class KnowledgeCategory implements Serializable { private static final long serialVersionUID = 1L; @TableId(value = "id", type = IdType.ASSIGN_ID) + @JsonSerialize(using = ToStringSerializer.class) private Long id; /** @@ -43,6 +46,7 @@ public class KnowledgeCategory implements Serializable { * 父分类ID - 0表示顶级分类 */ @TableField("parent_id") + @JsonSerialize(using = ToStringSerializer.class) private Long parentId; /** diff --git a/src/main/java/com/wok/supportbot/entity/KnowledgeDocument.java b/src/main/java/com/wok/supportbot/entity/KnowledgeDocument.java index dc09f3e..83dff08 100644 --- a/src/main/java/com/wok/supportbot/entity/KnowledgeDocument.java +++ b/src/main/java/com/wok/supportbot/entity/KnowledgeDocument.java @@ -1,6 +1,8 @@ package com.wok.supportbot.entity; import com.baomidou.mybatisplus.annotation.*; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import com.fasterxml.jackson.databind.ser.std.ToStringSerializer; import com.wok.supportbot.handler.PostgresJsonTypeHandler; import lombok.AllArgsConstructor; import lombok.Builder; @@ -27,6 +29,7 @@ public class KnowledgeDocument implements Serializable { private static final long serialVersionUID = 1L; @TableId(value = "id", type = IdType.ASSIGN_ID) + @JsonSerialize(using = ToStringSerializer.class) private Long id; /** @@ -63,6 +66,7 @@ public class KnowledgeDocument implements Serializable { * 所属分类ID - 0表示未分类 */ @TableField("category_id") + @JsonSerialize(using = ToStringSerializer.class) private Long categoryId; /** diff --git a/src/main/resources/static/frontend.html b/src/main/resources/static/frontend.html index dc3ad60..221f444 100644 --- a/src/main/resources/static/frontend.html +++ b/src/main/resources/static/frontend.html @@ -723,7 +723,7 @@ async function loadCategories() { $('categoryList').innerHTML = list.map(c => `
${c.name} ${c.description||''} - +
` ).join(''); } @@ -794,7 +794,7 @@ async function loadDocuments(page=1) { } else { $('docTableBody').innerHTML = docs.map(d => { const statusClass = d.status==='READY'?'status-ready':d.status==='PROCESSING'?'status-processing':'status-failed'; - const catName = categoryMap[d.categoryId] || (d.categoryId>0?'未知':'未分类'); + const catName = categoryMap[d.categoryId] || (d.categoryId && d.categoryId!=='0'?'未知':'未分类'); return ` ${d.id} ${d.title}
${d.sourceName||''} @@ -803,9 +803,9 @@ async function loadDocuments(page=1) { ${d.chunkCount} ${formatDate(d.createTime)} - - - + + + `; }).join(''); @@ -844,7 +844,7 @@ async function viewDocDetail(id) { const d = detailJson.data; const chunks = chunksJson.success ? (chunksJson.data || []) : []; const statusClass = d.status==='READY'?'status-ready':d.status==='PROCESSING'?'status-processing':'status-failed'; - const catName = categoryMap[d.categoryId] || (d.categoryId>0?'未知':'未分类'); + const catName = categoryMap[d.categoryId] || (d.categoryId && d.categoryId!=='0'?'未知':'未分类'); let html = `