From 54f6da84c8ea921d08d01ba1820ca2671776184d Mon Sep 17 00:00:00 2001 From: wanghanlin <1533525126@qq.com> Date: Tue, 23 Jun 2026 08:37:22 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=80=E6=9C=9F-=E9=9B=AA=E8=8A=B1=E7=AE=97?= =?UTF-8?q?=E6=B3=95=E7=94=9F=E6=88=90=E7=9A=84=20Long=20=E7=B1=BB?= =?UTF-8?q?=E5=9E=8B=20ID=EF=BC=8818-19=20=E4=BD=8D=E6=95=B0=E5=AD=97?= =?UTF-8?q?=EF=BC=89=E8=B6=85=E8=BF=87=20JavaScript=20=E5=AE=89=E5=85=A8?= =?UTF-8?q?=E6=95=B4=E6=95=B0=E8=8C=83=E5=9B=B4=EF=BC=882^53-1=EF=BC=89?= =?UTF-8?q?=EF=BC=8CJSON=20=20=20=20=20=20=E5=8F=8D=E5=BA=8F=E5=88=97?= =?UTF-8?q?=E5=8C=96=E6=97=B6=E7=B2=BE=E5=BA=A6=E4=B8=A2=E5=A4=B1=EF=BC=8C?= =?UTF-8?q?=E5=AF=BC=E8=87=B4=E5=89=8D=E7=AB=AF=E6=98=BE=E7=A4=BA=E5=92=8C?= =?UTF-8?q?=E4=BC=A0=E5=8F=82=E7=9A=84=20ID=20=E4=B8=8E=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E5=BA=93=E4=B8=AD=E4=B8=8D=E4=B8=80=E8=87=B4=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CLAUDE.md | 74 +++++++++++++++++++ frontend.html | 12 +-- .../supportbot/entity/KnowledgeCategory.java | 4 + .../supportbot/entity/KnowledgeDocument.java | 4 + src/main/resources/static/frontend.html | 12 +-- 5 files changed, 94 insertions(+), 12 deletions(-) create mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..f5f67b6 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,74 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## 项目概述 + +AI 智能客服系统,基于 Spring AI Alibaba + 通义千问 + PGVector,支持 RAG 知识库检索、多轮对话、结构化数据提取、知识库全生命周期管理。 + +## 构建与运行 + +```bash +# 编译 +./mvnw compile + +# 运行(端口 9090) +./mvnw spring-boot:run + +# 运行测试 +./mvnw test + +# 运行单个测试类 +./mvnw test -Dtest=SupportBotApplicationTests + +# 运行单个测试方法 +./mvnw test -Dtest=SupportBotApplicationTests#testRag +``` + +**前提条件**: PostgreSQL 12+ 需运行且安装 PGVector 扩展,数据库 `support_bot` 需存在。`knowledge_category` 和 `knowledge_document` 表由 `DatabaseInitConfig` 自动创建,无需手动建表。 + +## 核心架构决策 + +### 主启动类排除了 PgVectorStoreAutoConfiguration +`SupportBotApplication.java` 中 `@SpringBootApplication(exclude = PgVectorStoreAutoConfiguration.class)`,因为项目在 `PgVectorStoreConfig` 中手动配置 PgVectorStore Bean(标记 `@Primary`),不使用自动配置。另有一个 `InMemoryVectorStoreConfig` 作为开发备选。 + +### Spring AI 集成模式 +- **ChatClient Builder**: 所有对话通过 `ChatClient.builder(dashscopeChatModel)` 构建 +- **Advisor 链**: `MessageChatMemoryAdvisor`(记忆) → `MyLoggerAdvisor`(日志) → `QuestionAnswerAdvisor`(RAG) +- **结构化输出**: `ProductInfoApp` 使用 `.entity(ProductInfo.class)` 提取结构化数据 +- **SSE 流式**: 三种实现 — Flux\、Flux\、SseEmitter + +### ChatMemory 持久化 +当前使用 `DatabaseChatMemory`(PostgreSQL 持久化),`FileBasedChatMemory`(Kryo 序列化)已注释掉。`ProductInfoApp` 单独使用 `InMemoryChatMemory`。 + +### RAG 双模式 +1. **QuestionAnswerAdvisor 模式**(生产使用): 预检索优化 + `QuestionAnswerAdvisor` +2. **RetrievalAugmentationAdvisor 模式**(实验性): `doChatWithRagEnhance()` 中 `queryTransformers` 和 `multiQueryExpander` 未生效 + +### 文档处理管道 +`DocumentService.uploadDocument()` 统一流程:文档提取 → `MyTokenTextSplitter` 分块 → `MyKeywordEnricher` AI 关键词提取 → `pgVectorVectorStore.add()` 向量化存储。每个分块的 metadata 中注入 `documentId`、`chunkIndex`、`sourceName`、`title` 以关联 `knowledge_document` 表。 + +### 预检索查询优化 +四种策略在 `rag/preretrieval/` 下,由 `AssistantApp.doChatWithRagStrategy()` 根据 `strategy` 参数动态选择:REWRITE / TRANSLATION / COMPRESSION / MULTI_QUERY。另存在 Bean 配置版本(`QueryTransformerConfig`、`QueryExpanderConfig`),但实际使用自定义 Rewriter 组件。 + +## 关键配置 + +- `application.yml` 含 API Key,已被 `.gitignore` 排除 +- MyBatis Plus 逻辑删除字段: `isDelete`,主键策略: `assign_id`(雪花算法) +- PostgreSQL JSONB 字段使用自定义 `PostgresJsonTypeHandler`(期望 JSON 对象,非数组) +- 向量维度: 1536,距离类型: COSINE_DISTANCE,索引: HNSW + +## API 路由约定 + +- AI 对话: `/ai/*`(`AiController`) +- 文档上传: `/upload/*`(`DocumentController`) +- 文档管理: `/document/*`(`DocumentController`) +- 分类管理: `/category/*`(`DocumentController`) + +## 已知 TODO + +- `AssistantApp.doChatWithRagEnhance()`: `queryTransformers` 未生效 +- `DocumentService.updateDocumentMetadata()`: Spring AI 无直接更新 vector_store metadata 的 API,向量元数据同步留后续 +- `DocumentService.searchDocuments()`: Spring AI 1.0.0-M6 的 filter 支持有限,分类过滤暂未实现 +- `CompressionQueryRewriter`: 当前传入空历史列表 +- MyBatis Plus 3.5.12 的 `mybatis-plus-spring-boot3-starter` 不含 `PaginationInnerInterceptor`,分页通过 SQL `LIMIT/OFFSET` 手动实现 diff --git a/frontend.html b/frontend.html index dc3ad60..221f444 100644 --- a/frontend.html +++ b/frontend.html @@ -723,7 +723,7 @@ async function loadCategories() { $('categoryList').innerHTML = list.map(c => `
${c.name} ${c.description||''} - +
` ).join(''); } @@ -794,7 +794,7 @@ async function loadDocuments(page=1) { } else { $('docTableBody').innerHTML = docs.map(d => { const statusClass = d.status==='READY'?'status-ready':d.status==='PROCESSING'?'status-processing':'status-failed'; - const catName = categoryMap[d.categoryId] || (d.categoryId>0?'未知':'未分类'); + const catName = categoryMap[d.categoryId] || (d.categoryId && d.categoryId!=='0'?'未知':'未分类'); return ` ${d.id} ${d.title}
${d.sourceName||''} @@ -803,9 +803,9 @@ async function loadDocuments(page=1) { ${d.chunkCount} ${formatDate(d.createTime)} - - - + + + `; }).join(''); @@ -844,7 +844,7 @@ async function viewDocDetail(id) { const d = detailJson.data; const chunks = chunksJson.success ? (chunksJson.data || []) : []; const statusClass = d.status==='READY'?'status-ready':d.status==='PROCESSING'?'status-processing':'status-failed'; - const catName = categoryMap[d.categoryId] || (d.categoryId>0?'未知':'未分类'); + const catName = categoryMap[d.categoryId] || (d.categoryId && d.categoryId!=='0'?'未知':'未分类'); let html = `
diff --git a/src/main/java/com/wok/supportbot/entity/KnowledgeCategory.java b/src/main/java/com/wok/supportbot/entity/KnowledgeCategory.java index b2f9d50..5c71b81 100644 --- a/src/main/java/com/wok/supportbot/entity/KnowledgeCategory.java +++ b/src/main/java/com/wok/supportbot/entity/KnowledgeCategory.java @@ -1,6 +1,8 @@ package com.wok.supportbot.entity; import com.baomidou.mybatisplus.annotation.*; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import com.fasterxml.jackson.databind.ser.std.ToStringSerializer; import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; @@ -25,6 +27,7 @@ public class KnowledgeCategory implements Serializable { private static final long serialVersionUID = 1L; @TableId(value = "id", type = IdType.ASSIGN_ID) + @JsonSerialize(using = ToStringSerializer.class) private Long id; /** @@ -43,6 +46,7 @@ public class KnowledgeCategory implements Serializable { * 父分类ID - 0表示顶级分类 */ @TableField("parent_id") + @JsonSerialize(using = ToStringSerializer.class) private Long parentId; /** diff --git a/src/main/java/com/wok/supportbot/entity/KnowledgeDocument.java b/src/main/java/com/wok/supportbot/entity/KnowledgeDocument.java index dc09f3e..83dff08 100644 --- a/src/main/java/com/wok/supportbot/entity/KnowledgeDocument.java +++ b/src/main/java/com/wok/supportbot/entity/KnowledgeDocument.java @@ -1,6 +1,8 @@ package com.wok.supportbot.entity; import com.baomidou.mybatisplus.annotation.*; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import com.fasterxml.jackson.databind.ser.std.ToStringSerializer; import com.wok.supportbot.handler.PostgresJsonTypeHandler; import lombok.AllArgsConstructor; import lombok.Builder; @@ -27,6 +29,7 @@ public class KnowledgeDocument implements Serializable { private static final long serialVersionUID = 1L; @TableId(value = "id", type = IdType.ASSIGN_ID) + @JsonSerialize(using = ToStringSerializer.class) private Long id; /** @@ -63,6 +66,7 @@ public class KnowledgeDocument implements Serializable { * 所属分类ID - 0表示未分类 */ @TableField("category_id") + @JsonSerialize(using = ToStringSerializer.class) private Long categoryId; /** diff --git a/src/main/resources/static/frontend.html b/src/main/resources/static/frontend.html index dc3ad60..221f444 100644 --- a/src/main/resources/static/frontend.html +++ b/src/main/resources/static/frontend.html @@ -723,7 +723,7 @@ async function loadCategories() { $('categoryList').innerHTML = list.map(c => `
${c.name} ${c.description||''} - +
` ).join(''); } @@ -794,7 +794,7 @@ async function loadDocuments(page=1) { } else { $('docTableBody').innerHTML = docs.map(d => { const statusClass = d.status==='READY'?'status-ready':d.status==='PROCESSING'?'status-processing':'status-failed'; - const catName = categoryMap[d.categoryId] || (d.categoryId>0?'未知':'未分类'); + const catName = categoryMap[d.categoryId] || (d.categoryId && d.categoryId!=='0'?'未知':'未分类'); return ` ${d.id} ${d.title}
${d.sourceName||''} @@ -803,9 +803,9 @@ async function loadDocuments(page=1) { ${d.chunkCount} ${formatDate(d.createTime)} - - - + + + `; }).join(''); @@ -844,7 +844,7 @@ async function viewDocDetail(id) { const d = detailJson.data; const chunks = chunksJson.success ? (chunksJson.data || []) : []; const statusClass = d.status==='READY'?'status-ready':d.status==='PROCESSING'?'status-processing':'status-failed'; - const catName = categoryMap[d.categoryId] || (d.categoryId>0?'未知':'未分类'); + const catName = categoryMap[d.categoryId] || (d.categoryId && d.categoryId!=='0'?'未知':'未分类'); let html = `