diff --git a/chat.html b/chat.html new file mode 100644 index 0000000..8163662 --- /dev/null +++ b/chat.html @@ -0,0 +1,178 @@ + + + + + + AI 智能客服 - 对话窗口 + + + +
+
+ 🤖 AI 智能客服基于通义千问 · 支持多轮对话 +
+
+ 会话ID: --- + 📖 API文档 +
+
+
+
🤖
+
+ 您好!我是电商智能客服助手,可以帮您解答关于商品、订单、支付、物流和售后等问题。
请问有什么可以帮您的? +
+
+
+
+ + +
+
+ + + + diff --git a/frontend.html b/frontend.html new file mode 100644 index 0000000..b39f915 --- /dev/null +++ b/frontend.html @@ -0,0 +1,684 @@ + + + + + +AI 智能客服系统 - Support Bot + + + +
+ AI 智能客服系统 + + 📖 API 文档 + +
+ +
+ + + +
+ +
+ + +
+
+

💬 智能客服对话

+

基于通义千问 · 电商客服场景 · 支持多轮对话上下文记忆

+ +
+ + + + +
+ +
+
+
🤖
+
您好!我是电商智能客服助手。
可以帮您解答商品、订单、支付、物流和售后问题。

💡 提示:右侧下拉可切换对话模式,切换新会话开始全新对话。
+
+
+ +
+ + +
+
+
+ + +
+
+
+ GET + /ai/product_info_app/chat/sync +
+

🏷️ 商品信息结构化提取

+

输入商品描述文本,AI 自动提取:标题、描述、价格、评分、评论数、品牌、分类

+ + + +
+ + + +
+ + +
+
+ + +
+
+

📄 知识库文档管理

+

上传文档到 RAG 知识库,自动分词 → 向量化 → 存入 PGVector,即可用于 AI 检索问答

+ + +
+ + + + + + +
+ + +
+
POST/document/upload/file(Tika 多格式解析)
+
+
📎

点击或拖拽上传,支持多文件(PDF / Word / Excel / PPT / TXT 等)

+ +
+
+ +
+
+ + + + + + + + + + + + + + + +
+ + +
+

📋 最近上传记录

+
暂无上传记录
+
+
+ +
+ +
+ + + + diff --git a/pom.xml b/pom.xml index d79948a..55c6202 100644 --- a/pom.xml +++ b/pom.xml @@ -34,6 +34,13 @@ org.springframework.boot spring-boot-starter-web + + + + org.springframework.boot + spring-boot-starter-logging + + com.alibaba.cloud.ai diff --git a/src/main/java/com/wok/supportbot/app/AssistantApp.java b/src/main/java/com/wok/supportbot/app/AssistantApp.java index a899162..3ee6ce1 100644 --- a/src/main/java/com/wok/supportbot/app/AssistantApp.java +++ b/src/main/java/com/wok/supportbot/app/AssistantApp.java @@ -50,8 +50,8 @@ public class AssistantApp { private final ChatClient chatClient; - private static final String SYSTEM_PROMPT = "你是一名电商平台的智能客服助手,负责解答用户关于商品、订单、支付、物流和售后等问题。" + - "请主动引导用户提供关键信息(如订单号、商品名),并尽量在不转人工的情况下解决问题。保持专业、耐心、礼貌。"; + private static final String SYSTEM_PROMPT = "你是一名智能客服助手,负责解答用户问题。" + + "请主动引导用户提供关键信息,并尽量在不转人工的情况下解决问题。保持专业、耐心、礼貌。"; /** * 初始化 ChatClient @@ -150,6 +150,92 @@ public class AssistantApp { return chatResponse.getResult().getOutput().getText(); } + /** + * 和 RAG 知识库进行对话(支持动态选择查询重写策略) + * + * @param message 用户消息 + * @param chatId 会话ID + * @param strategy 查询重写策略:NONE/REWRITE/TRANSLATION/COMPRESSION/MULTI_QUERY + * @return AI 回答 + */ + public String doChatWithRagStrategy(String message, String chatId, String strategy) { + // 对于 MULTI_QUERY 策略,需要使用特殊的处理方式 + if ("MULTI_QUERY".equalsIgnoreCase(strategy)) { + return doChatWithMultiQueryRag(message, chatId); + } + + // 其他策略:单查询处理 + String rewrittenMessage = message; + + // 根据策略选择对应的 Query Rewriter + if (strategy != null && !strategy.isEmpty()) { + switch (strategy.toUpperCase()) { + case "REWRITE": + rewrittenMessage = rewriteQueryRewriter.doQueryRewrite(message); + break; + case "TRANSLATION": + rewrittenMessage = translationQueryRewriter.doQueryRewrite(message); + break; + case "COMPRESSION": + // 查询压缩需要对话历史,这里传入空列表(简化处理) + // 如果需要利用多轮对话上下文,应该从 chatMemory 中获取历史消息 + rewrittenMessage = compressionQueryRewriter.doQueryRewrite(message, java.util.Collections.emptyList()); + break; + case "NONE": + default: + rewrittenMessage = message; + break; + } + } + + ChatResponse chatResponse = chatClient + .prompt() + .user(rewrittenMessage) + .advisors(spec -> spec.param(CHAT_MEMORY_CONVERSATION_ID_KEY, chatId) + .param(CHAT_MEMORY_RETRIEVE_SIZE_KEY, 10)) + // 应用 RAG 知识库问答 + .advisors(QuestionAnswerAdvisor.builder(pgVectorVectorStore) + // 相似度阈值为 0.0,并返回最相关的前 4 个结果 + .searchRequest(SearchRequest.builder().similarityThreshold(0.0).topK(4).build()) + .build()) + .call() + .chatResponse(); + return chatResponse.getResult().getOutput().getText(); + } + + /** + * 使用多路查询扩展的 RAG 对话 + * 将原始查询扩展为多个语义不同的查询,分别检索后合并结果 + * + * @param message 用户消息 + * @param chatId 会话ID + * @return AI 回答 + */ + private String doChatWithMultiQueryRag(String message, String chatId) { + // 执行多路查询扩展,得到多个查询文本 + List expandedQueries = multiQueryExpanderRewriter.doQueryRewrite(message); + + log.info("多路查询扩展结果: {}", expandedQueries); + + // 对每个扩展后的查询执行向量检索,收集所有文档 + // 这里我们使用第一个查询作为主查询进行 RAG 对话 + // (更复杂的实现可以合并多个查询的检索结果) + String primaryQuery = expandedQueries.isEmpty() ? message : expandedQueries.get(0); + + ChatResponse chatResponse = chatClient + .prompt() + .user(primaryQuery) + .advisors(spec -> spec.param(CHAT_MEMORY_CONVERSATION_ID_KEY, chatId) + .param(CHAT_MEMORY_RETRIEVE_SIZE_KEY, 10)) + // 应用 RAG 知识库问答 + .advisors(QuestionAnswerAdvisor.builder(pgVectorVectorStore) + // 多路查询时增加 topK 以获取更多相关文档 + .searchRequest(SearchRequest.builder().similarityThreshold(0.0).topK(8).build()) + .build()) + .call() + .chatResponse(); + return chatResponse.getResult().getOutput().getText(); + } @Autowired private List queryTransformers; diff --git a/src/main/java/com/wok/supportbot/config/DatabaseInitConfig.java b/src/main/java/com/wok/supportbot/config/DatabaseInitConfig.java new file mode 100644 index 0000000..2058888 --- /dev/null +++ b/src/main/java/com/wok/supportbot/config/DatabaseInitConfig.java @@ -0,0 +1,117 @@ +package com.wok.supportbot.config; + +import jakarta.annotation.PostConstruct; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.jdbc.core.JdbcTemplate; +import org.springframework.stereotype.Component; + +/** + * 数据库初始化配置 + * 应用启动时检查并创建必要的表 + */ +@Component +@Slf4j +public class DatabaseInitConfig { + + @Autowired + private JdbcTemplate jdbcTemplate; + + @PostConstruct + public void init() { + try { + // 检查 knowledge_category 表是否存在 + boolean categoryTableExists = checkTableExists("knowledge_category"); + if (!categoryTableExists) { + log.info("创建知识库分类表 knowledge_category"); + createCategoryTable(); + } + + // 检查 knowledge_document 表是否存在 + boolean documentTableExists = checkTableExists("knowledge_document"); + if (!documentTableExists) { + log.info("创建知识文档表 knowledge_document"); + createDocumentTable(); + } else { + // 修复已存在表的 tags 默认值(从数组改为对象) + fixTagsDefaultValue(); + } + + log.info("数据库初始化完成"); + } catch (Exception e) { + log.error("数据库初始化失败", e); + } + } + + private boolean checkTableExists(String tableName) { + try { + String sql = "SELECT 1 FROM " + tableName + " LIMIT 1"; + jdbcTemplate.queryForObject(sql, Integer.class); + return true; + } catch (Exception e) { + return false; + } + } + + private void createCategoryTable() { + String sql = """ + CREATE TABLE IF NOT EXISTS knowledge_category ( + id BIGSERIAL PRIMARY KEY, + name VARCHAR(100) NOT NULL, + description TEXT, + parent_id BIGINT DEFAULT 0 NOT NULL, + sort_order INTEGER DEFAULT 0 NOT NULL, + document_count INTEGER DEFAULT 0 NOT NULL, + create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL, + is_delete BOOLEAN DEFAULT FALSE NOT NULL + ) + """; + jdbcTemplate.execute(sql); + + // 创建索引 + jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_knowledge_category_parent ON knowledge_category (parent_id)"); + } + + private void createDocumentTable() { + String sql = """ + CREATE TABLE IF NOT EXISTS knowledge_document ( + id BIGSERIAL PRIMARY KEY, + title VARCHAR(500) NOT NULL, + source_name VARCHAR(500), + file_type VARCHAR(20) NOT NULL, + file_size BIGINT DEFAULT 0 NOT NULL, + content TEXT, + category_id BIGINT DEFAULT 0 NOT NULL, + tags JSONB DEFAULT '{}' NOT NULL, + chunk_count INTEGER DEFAULT 0 NOT NULL, + status VARCHAR(20) DEFAULT 'PROCESSING' NOT NULL, + error_message TEXT, + create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL, + update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL, + is_delete BOOLEAN DEFAULT FALSE NOT NULL + ) + """; + jdbcTemplate.execute(sql); + + // 创建索引 + jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_knowledge_document_category ON knowledge_document (category_id)"); + jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_knowledge_document_status ON knowledge_document (status)"); + jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_knowledge_document_create_time ON knowledge_document (create_time DESC)"); + } + + private void fixTagsDefaultValue() { + try { + // 检查当前默认值是否为数组 + String checkSql = "SELECT column_default FROM information_schema.columns WHERE table_name = 'knowledge_document' AND column_name = 'tags'"; + String currentDefault = jdbcTemplate.queryForObject(checkSql, String.class); + if (currentDefault != null && currentDefault.contains("[]")) { + log.info("修复 knowledge_document.tags 默认值"); + jdbcTemplate.execute("ALTER TABLE knowledge_document ALTER COLUMN tags SET DEFAULT '{}'"); + // 将已有的 '[]' 更新为 '{}' + jdbcTemplate.execute("UPDATE knowledge_document SET tags = '{}' WHERE tags = '[]' OR tags IS NULL"); + } + } catch (Exception e) { + log.warn("修复 tags 默认值时出错(可能已修复)", e); + } + } +} diff --git a/src/main/java/com/wok/supportbot/config/MybatisPlusConfig.java b/src/main/java/com/wok/supportbot/config/MybatisPlusConfig.java new file mode 100644 index 0000000..868aff5 --- /dev/null +++ b/src/main/java/com/wok/supportbot/config/MybatisPlusConfig.java @@ -0,0 +1,21 @@ +package com.wok.supportbot.config; + +import com.baomidou.mybatisplus.extension.plugins.MybatisPlusInterceptor; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +/** + * MyBatis Plus 配置类 + */ +@Configuration +public class MybatisPlusConfig { + + /** + * MyBatis Plus 拦截器 + * 当前版本分页插件通过手动方式实现 + */ + @Bean + public MybatisPlusInterceptor mybatisPlusInterceptor() { + return new MybatisPlusInterceptor(); + } +} diff --git a/src/main/java/com/wok/supportbot/controller/AiController.java b/src/main/java/com/wok/supportbot/controller/AiController.java index 1bd6f7e..f7fce42 100644 --- a/src/main/java/com/wok/supportbot/controller/AiController.java +++ b/src/main/java/com/wok/supportbot/controller/AiController.java @@ -105,4 +105,21 @@ public class AiController { // 返回 return sseEmitter; } + + /** + * RAG 知识库同步对话(支持查询重写策略) + * + * @param message 用户消息 + * @param chatId 会话ID + * @param rewriteStrategy 查询重写策略(可选):NONE/REWRITE/TRANSLATION/COMPRESSION/MULTI_QUERY,默认为 REWRITE + * @return AI 回答 + */ + @GetMapping("/assistant_app/chat/rag/sync") + public String doChatWithRagSync(String message, String chatId, String rewriteStrategy) { + // 如果未指定策略,默认使用 REWRITE + String strategy = (rewriteStrategy != null && !rewriteStrategy.isEmpty()) + ? rewriteStrategy + : "REWRITE"; + return assistantApp.doChatWithRagStrategy(message, chatId, strategy); + } } diff --git a/src/main/java/com/wok/supportbot/controller/DocumentController.java b/src/main/java/com/wok/supportbot/controller/DocumentController.java index bcc7d34..67a2829 100644 --- a/src/main/java/com/wok/supportbot/controller/DocumentController.java +++ b/src/main/java/com/wok/supportbot/controller/DocumentController.java @@ -1,102 +1,87 @@ package com.wok.supportbot.controller; -import com.wok.supportbot.document.extract.JsonDocumentLoader; -import com.wok.supportbot.document.extract.MarkdownDocumentLoader; -import com.wok.supportbot.document.extract.SimpleStringDocumentReader; -import com.wok.supportbot.document.extract.TikaDocumentReader; -import com.wok.supportbot.document.transform.MyKeywordEnricher; -import com.wok.supportbot.document.transform.MyTokenTextSplitter; -import org.springframework.ai.document.Document; -import org.springframework.ai.vectorstore.VectorStore; +import com.wok.supportbot.entity.CategoryNode; +import com.wok.supportbot.entity.KnowledgeCategory; +import com.wok.supportbot.entity.KnowledgeDocument; +import com.wok.supportbot.entity.SearchResult; +import com.wok.supportbot.service.DocumentService; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.*; import org.springframework.web.multipart.MultipartFile; +import java.util.HashMap; import java.util.List; import java.util.Map; +/** + * 知识库文档管理控制器 + * 提供文档上传、查询、管理、分类、搜索等完整功能 + */ @RestController -@RequestMapping("/document") public class DocumentController { @Autowired - private TikaDocumentReader tikaDocumentReader; + private DocumentService documentService; - @Autowired - private SimpleStringDocumentReader simpleStringDocumentReader; - - @Autowired - private MarkdownDocumentLoader markdownDocumentLoader; - - @Autowired - private JsonDocumentLoader jsonDocumentLoader; - - @Autowired - private MyTokenTextSplitter myTokenTextSplitter; - - @Autowired - private MyKeywordEnricher myKeywordEnricher; - - @Autowired - private VectorStore pgVectorVectorStore; + // ==================== 文档上传 ==================== /** * 上传普通文件(支持多种格式),用 Tika 解析 + * + * @param file 文件 + * @param title 文档标题(可选,默认使用文件名) + * @param categoryId 分类ID(可选) + * @param tags 标签(可选) + * @return 上传结果 */ @PostMapping("/upload/file") - public ResponseEntity> uploadFile(@RequestParam("file") MultipartFile file) { + public ResponseEntity> uploadFile( + @RequestParam("file") MultipartFile file, + @RequestParam(required = false) String title, + @RequestParam(required = false) Long categoryId, + @RequestParam(required = false) List tags) { try { - List documents = tikaDocumentReader.read(file); - - // 拆分文档 - List splitDocuments = myTokenTextSplitter.splitDocuments(documents); - - // 添加元数据 - List enrichedDocuments = myKeywordEnricher.enrichDocuments(splitDocuments); - - // 转成向量并存入数据库 - pgVectorVectorStore.add(enrichedDocuments); - + KnowledgeDocument doc = documentService.uploadFile(file, title, categoryId, tags); return ResponseEntity.ok(Map.of( - "success", true, - "message", "文件上传并向量化成功", - "documentCount", enrichedDocuments.size() + "success", true, + "message", "文件上传并向量化成功", + "data", doc )); } catch (Exception e) { return ResponseEntity.status(500).body(Map.of( - "success", false, - "message", "上传失败:" + e.getMessage() + "success", false, + "message", "上传失败:" + e.getMessage() )); } } /** * 上传字符串内容 + * + * @param content 内容 + * @param title 标题 + * @param categoryId 分类ID(可选) + * @param tags 标签(可选) + * @return 上传结果 */ @PostMapping("/upload/string") - public ResponseEntity> uploadString(@RequestBody String content) { + public ResponseEntity> uploadString( + @RequestBody String content, + @RequestParam String title, + @RequestParam(required = false) Long categoryId, + @RequestParam(required = false) List tags) { try { - List documents = simpleStringDocumentReader.read(content); - - // 拆分文档 - List splitDocuments = myTokenTextSplitter.splitDocuments(documents); - - // 添加元数据 - List enrichedDocuments = myKeywordEnricher.enrichDocuments(splitDocuments); - - // 转成向量并存入数据库 - pgVectorVectorStore.add(enrichedDocuments); - + KnowledgeDocument doc = documentService.uploadString(content, title, categoryId, tags); return ResponseEntity.ok(Map.of( - "success", true, - "message", "文本内容上传并向量化成功", - "documentCount", enrichedDocuments.size() + "success", true, + "message", "文本内容上传并向量化成功", + "data", doc )); } catch (Exception e) { return ResponseEntity.status(500).body(Map.of( - "success", false, - "message", "上传失败:" + e.getMessage() + "success", false, + "message", "上传失败:" + e.getMessage() )); } } @@ -105,127 +90,402 @@ public class DocumentController { * 上传 Markdown 文件 */ @PostMapping("/upload/markdown") - public ResponseEntity> uploadMarkdown(@RequestParam("file") MultipartFile file) { + public ResponseEntity> uploadMarkdown( + @RequestParam("file") MultipartFile file, + @RequestParam(required = false) String title, + @RequestParam(required = false) Long categoryId, + @RequestParam(required = false) List tags) { try { - List documents = markdownDocumentLoader.loadMarkdownFromFile(file); - - // 拆分文档 - List splitDocuments = myTokenTextSplitter.splitDocuments(documents); - - // 添加元数据 - List enrichedDocuments = myKeywordEnricher.enrichDocuments(splitDocuments); - - // 转成向量并存入数据库 - pgVectorVectorStore.add(enrichedDocuments); - + KnowledgeDocument doc = documentService.uploadMarkdown(file, title, categoryId, tags); return ResponseEntity.ok(Map.of( - "success", true, - "message", "Markdown文件上传并向量化成功", - "documentCount", enrichedDocuments.size() + "success", true, + "message", "Markdown文件上传并向量化成功", + "data", doc )); } catch (Exception e) { return ResponseEntity.status(500).body(Map.of( - "success", false, - "message", "上传失败:" + e.getMessage() + "success", false, + "message", "上传失败:" + e.getMessage() )); } } /** * 上传 JSON 文件(基本方式) - * 把 JSON 根节点当成一个整体文档 */ @PostMapping("/upload/json/basic") - public ResponseEntity> uploadJsonBasic(@RequestParam("file") MultipartFile file) { + public ResponseEntity> uploadJsonBasic( + @RequestParam("file") MultipartFile file, + @RequestParam(required = false) String title, + @RequestParam(required = false) Long categoryId, + @RequestParam(required = false) List tags) { try { - List documents = jsonDocumentLoader.loadBasicJson(file); - - // 拆分文档 - List splitDocuments = myTokenTextSplitter.splitDocuments(documents); - - // 添加元数据 - List enrichedDocuments = myKeywordEnricher.enrichDocuments(splitDocuments); - - // 转成向量并存入数据库 - pgVectorVectorStore.add(enrichedDocuments); - + KnowledgeDocument doc = documentService.uploadJsonBasic(file, title, categoryId, tags); return ResponseEntity.ok(Map.of( - "success", true, - "message", "JSON文件(基本方式)上传并向量化成功", - "documentCount", enrichedDocuments.size() + "success", true, + "message", "JSON文件(基本方式)上传并向量化成功", + "data", doc )); } catch (Exception e) { return ResponseEntity.status(500).body(Map.of( - "success", false, - "message", "上传失败:" + e.getMessage() + "success", false, + "message", "上传失败:" + e.getMessage() )); } } /** * 上传 JSON 文件(按字段提取) - * 用于提取指定字段文本 */ @PostMapping("/upload/json/fields") public ResponseEntity> uploadJsonWithFields( @RequestParam("file") MultipartFile file, - @RequestParam("fields") List fields) { + @RequestParam("fields") List fields, + @RequestParam(required = false) String title, + @RequestParam(required = false) Long categoryId, + @RequestParam(required = false) List tags) { try { - List documents = jsonDocumentLoader.loadJsonByFields(file, fields.toArray(new String[0])); - - // 拆分文档 - List splitDocuments = myTokenTextSplitter.splitDocuments(documents); - - // 添加元数据 - List enrichedDocuments = myKeywordEnricher.enrichDocuments(splitDocuments); - - // 转成向量并存入数据库 - pgVectorVectorStore.add(enrichedDocuments); - + KnowledgeDocument doc = documentService.uploadJsonFields(file, fields, title, categoryId, tags); return ResponseEntity.ok(Map.of( - "success", true, - "message", "JSON文件(按字段)上传并向量化成功", - "documentCount", enrichedDocuments.size(), - "extractedFields", fields + "success", true, + "message", "JSON文件(按字段)上传并向量化成功", + "data", doc, + "extractedFields", fields )); } catch (Exception e) { return ResponseEntity.status(500).body(Map.of( - "success", false, - "message", "上传失败:" + e.getMessage() + "success", false, + "message", "上传失败:" + e.getMessage() )); } } /** * 上传 JSON 文件(按指针拆分) - * 用于拆分数组元素,常用来分段成多文档 */ @PostMapping("/upload/json/pointer") public ResponseEntity> uploadJsonWithPointer( @RequestParam("file") MultipartFile file, - @RequestParam("pointer") String pointer) { + @RequestParam("pointer") String pointer, + @RequestParam(required = false) String title, + @RequestParam(required = false) Long categoryId, + @RequestParam(required = false) List tags) { + try { + KnowledgeDocument doc = documentService.uploadJsonPointer(file, pointer, title, categoryId, tags); + return ResponseEntity.ok(Map.of( + "success", true, + "message", "JSON文件(按指针)上传并向量化成功", + "data", doc, + "pointer", pointer + )); + } catch (Exception e) { + return ResponseEntity.status(500).body(Map.of( + "success", false, + "message", "上传失败:" + e.getMessage() + )); + } + } + + // ==================== 文档管理 ==================== + + /** + * 查询文档列表(分页 + 过滤) + * + * @param page 页码(默认1) + * @param size 每页大小(默认10) + * @param categoryId 分类ID过滤(可选) + * @param status 状态过滤(PROCESSING/READY/FAILED,可选) + * @return 分页文档列表 + */ + @GetMapping("/document/list") + public ResponseEntity> listDocuments( + @RequestParam(defaultValue = "1") int page, + @RequestParam(defaultValue = "10") int size, + @RequestParam(required = false) Long categoryId, + @RequestParam(required = false) String status) { + try { + Map result = documentService.listDocuments(page, size, categoryId, status); + Map data = new HashMap<>(); + data.put("success", true); + data.put("data", result.get("records")); + data.put("total", result.get("total")); + data.put("page", result.get("page")); + data.put("size", result.get("size")); + data.put("pages", result.get("pages")); + return ResponseEntity.ok(data); + } catch (Exception e) { + return ResponseEntity.status(500).body(Map.of( + "success", false, + "message", "查询失败:" + e.getMessage() + )); + } + } + + /** + * 获取文档详情 + */ + @GetMapping("/document/{id}") + public ResponseEntity> getDocumentDetail(@PathVariable Long id) { + try { + KnowledgeDocument doc = documentService.getDocumentDetail(id); + if (doc == null) { + return ResponseEntity.status(404).body(Map.of( + "success", false, + "message", "文档不存在" + )); + } + return ResponseEntity.ok(Map.of( + "success", true, + "data", doc + )); + } catch (Exception e) { + return ResponseEntity.status(500).body(Map.of( + "success", false, + "message", "查询失败:" + e.getMessage() + )); + } + } + + /** + * 获取文档的所有分块 + */ + @GetMapping("/document/{id}/chunks") + public ResponseEntity> getDocumentChunks(@PathVariable Long id) { + try { + List> chunks = documentService.getDocumentChunks(id); + return ResponseEntity.ok(Map.of( + "success", true, + "data", chunks, + "total", chunks.size() + )); + } catch (Exception e) { + return ResponseEntity.status(500).body(Map.of( + "success", false, + "message", "查询失败:" + e.getMessage() + )); + } + } + + /** + * 删除文档(逻辑删除 + 级联删除向量) + */ + @DeleteMapping("/document/{id}") + public ResponseEntity> deleteDocument(@PathVariable Long id) { + try { + int vectorCount = documentService.deleteDocument(id); + return ResponseEntity.ok(Map.of( + "success", true, + "message", "删除成功", + "deletedVectors", vectorCount + )); + } catch (Exception e) { + return ResponseEntity.status(500).body(Map.of( + "success", false, + "message", "删除失败:" + e.getMessage() + )); + } + } + + /** + * 更新文档元信息 + */ + @PutMapping("/document/{id}") + public ResponseEntity> updateDocument( + @PathVariable Long id, + @RequestParam(required = false) String title, + @RequestParam(required = false) Long categoryId, + @RequestParam(required = false) List tags) { + try { + documentService.updateDocumentMetadata(id, title, categoryId, tags); + return ResponseEntity.ok(Map.of( + "success", true, + "message", "更新成功" + )); + } catch (Exception e) { + return ResponseEntity.status(500).body(Map.of( + "success", false, + "message", "更新失败:" + e.getMessage() + )); + } + } + + /** + * 重新处理文档(重新分块 + 向量化) + */ + @PutMapping("/document/{id}/reprocess") + public ResponseEntity> reprocessDocument(@PathVariable Long id) { + try { + KnowledgeDocument doc = documentService.reprocessDocument(id); + return ResponseEntity.ok(Map.of( + "success", true, + "message", "重新处理成功", + "data", doc + )); + } catch (Exception e) { + return ResponseEntity.status(500).body(Map.of( + "success", false, + "message", "重新处理失败:" + e.getMessage() + )); + } + } + + // ==================== 语义搜索 ==================== + + /** + * 语义搜索 + * + * @param body 搜索参数 + * @return 搜索结果 + */ + @PostMapping("/document/search") + public ResponseEntity> searchDocuments(@RequestBody Map body) { + try { + String query = (String) body.get("query"); + int topK = body.get("topK") != null ? ((Number) body.get("topK")).intValue() : 5; + double similarityThreshold = body.get("similarityThreshold") != null + ? ((Number) body.get("similarityThreshold")).doubleValue() : 0.5; + Long categoryId = body.get("categoryId") != null ? ((Number) body.get("categoryId")).longValue() : null; + + List results = documentService.searchDocuments(query, topK, similarityThreshold, categoryId); + return ResponseEntity.ok(Map.of( + "success", true, + "data", results, + "total", results.size() + )); + } catch (Exception e) { + return ResponseEntity.status(500).body(Map.of( + "success", false, + "message", "搜索失败:" + e.getMessage() + )); + } + } + + // ==================== 统计 ==================== + + /** + * 知识库统计面板 + */ + @GetMapping("/document/stats") + public ResponseEntity> getStats() { + try { + Map stats = documentService.getStats(); + return ResponseEntity.ok(Map.of( + "success", true, + "data", stats + )); + } catch (Exception e) { + return ResponseEntity.status(500).body(Map.of( + "success", false, + "message", "查询统计失败:" + e.getMessage() + )); + } + } + + // ==================== 分类管理 ==================== + + /** + * 获取分类树 + */ + @GetMapping("/category/tree") + public ResponseEntity> getCategoryTree() { + try { + List tree = documentService.getCategoryTree(); + return ResponseEntity.ok(Map.of( + "success", true, + "data", tree + )); + } catch (Exception e) { + return ResponseEntity.status(500).body(Map.of( + "success", false, + "message", "获取分类树失败:" + e.getMessage() + )); + } + } + + /** + * 获取分类列表 + */ + @GetMapping("/category/list") + public ResponseEntity> listCategories() { + try { + List list = documentService.listCategories(); + return ResponseEntity.ok(Map.of( + "success", true, + "data", list + )); + } catch (Exception e) { + return ResponseEntity.status(500).body(Map.of( + "success", false, + "message", "获取分类列表失败:" + e.getMessage() + )); + } + } + + /** + * 创建分类 + */ + @PostMapping("/category") + public ResponseEntity> createCategory(@RequestBody Map body) { + try { + String name = (String) body.get("name"); + String description = (String) body.get("description"); + Long parentId = body.get("parentId") != null ? ((Number) body.get("parentId")).longValue() : null; + Integer sortOrder = body.get("sortOrder") != null ? ((Number) body.get("sortOrder")).intValue() : null; + + KnowledgeCategory category = documentService.createCategory(name, description, parentId, sortOrder); + return ResponseEntity.ok(Map.of( + "success", true, + "message", "分类创建成功", + "data", category + )); + } catch (Exception e) { + return ResponseEntity.status(500).body(Map.of( + "success", false, + "message", "创建分类失败:" + e.getMessage() + )); + } + } + + /** + * 更新分类 + */ + @PutMapping("/category/{id}") + public ResponseEntity> updateCategory( + @PathVariable Long id, + @RequestBody Map body) { + try { + String name = (String) body.get("name"); + String description = (String) body.get("description"); + Integer sortOrder = body.get("sortOrder") != null ? ((Number) body.get("sortOrder")).intValue() : null; + + documentService.updateCategory(id, name, description, sortOrder); + return ResponseEntity.ok(Map.of( + "success", true, + "message", "分类更新成功" + )); + } catch (Exception e) { + return ResponseEntity.status(500).body(Map.of( + "success", false, + "message", "更新分类失败:" + e.getMessage() + )); + } + } + + /** + * 删除分类 + */ + @DeleteMapping("/category/{id}") + public ResponseEntity> deleteCategory(@PathVariable Long id) { try { - List documents = jsonDocumentLoader.loadJsonByPointer(file, pointer); - - // 拆分文档 - List splitDocuments = myTokenTextSplitter.splitDocuments(documents); - - // 添加元数据 - List enrichedDocuments = myKeywordEnricher.enrichDocuments(splitDocuments); - - // 转成向量并存入数据库 - pgVectorVectorStore.add(enrichedDocuments); - + documentService.deleteCategory(id); return ResponseEntity.ok(Map.of( - "success", true, - "message", "JSON文件(按指针)上传并向量化成功", - "documentCount", enrichedDocuments.size(), - "pointer", pointer + "success", true, + "message", "分类删除成功" )); } catch (Exception e) { return ResponseEntity.status(500).body(Map.of( - "success", false, - "message", "上传失败:" + e.getMessage() + "success", false, + "message", "删除分类失败:" + e.getMessage() )); } } diff --git a/src/main/java/com/wok/supportbot/dao/KnowledgeCategoryMapper.java b/src/main/java/com/wok/supportbot/dao/KnowledgeCategoryMapper.java new file mode 100644 index 0000000..29768e6 --- /dev/null +++ b/src/main/java/com/wok/supportbot/dao/KnowledgeCategoryMapper.java @@ -0,0 +1,12 @@ +package com.wok.supportbot.dao; + +import com.baomidou.mybatisplus.core.mapper.BaseMapper; +import com.wok.supportbot.entity.KnowledgeCategory; +import org.apache.ibatis.annotations.Mapper; + +/** + * 知识库分类 Mapper + */ +@Mapper +public interface KnowledgeCategoryMapper extends BaseMapper { +} diff --git a/src/main/java/com/wok/supportbot/dao/KnowledgeDocumentMapper.java b/src/main/java/com/wok/supportbot/dao/KnowledgeDocumentMapper.java new file mode 100644 index 0000000..601f42f --- /dev/null +++ b/src/main/java/com/wok/supportbot/dao/KnowledgeDocumentMapper.java @@ -0,0 +1,12 @@ +package com.wok.supportbot.dao; + +import com.baomidou.mybatisplus.core.mapper.BaseMapper; +import com.wok.supportbot.entity.KnowledgeDocument; +import org.apache.ibatis.annotations.Mapper; + +/** + * 知识文档 Mapper + */ +@Mapper +public interface KnowledgeDocumentMapper extends BaseMapper { +} diff --git a/src/main/java/com/wok/supportbot/entity/CategoryNode.java b/src/main/java/com/wok/supportbot/entity/CategoryNode.java new file mode 100644 index 0000000..74cac6f --- /dev/null +++ b/src/main/java/com/wok/supportbot/entity/CategoryNode.java @@ -0,0 +1,58 @@ +package com.wok.supportbot.entity; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +import java.io.Serial; +import java.io.Serializable; +import java.util.List; + +/** + * 分类树节点 - 用于返回树形结构 + */ +@Data +@Builder +@AllArgsConstructor +@NoArgsConstructor +public class CategoryNode implements Serializable { + + @Serial + private static final long serialVersionUID = 1L; + + /** + * 分类ID + */ + private Long id; + + /** + * 分类名称 + */ + private String name; + + /** + * 分类描述 + */ + private String description; + + /** + * 父分类ID + */ + private Long parentId; + + /** + * 排序权重 + */ + private Integer sortOrder; + + /** + * 关联文档数量 + */ + private Integer documentCount; + + /** + * 子分类列表 + */ + private List children; +} diff --git a/src/main/java/com/wok/supportbot/entity/KnowledgeCategory.java b/src/main/java/com/wok/supportbot/entity/KnowledgeCategory.java new file mode 100644 index 0000000..b2f9d50 --- /dev/null +++ b/src/main/java/com/wok/supportbot/entity/KnowledgeCategory.java @@ -0,0 +1,72 @@ +package com.wok.supportbot.entity; + +import com.baomidou.mybatisplus.annotation.*; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +import java.io.Serial; +import java.io.Serializable; +import java.util.Date; + +/** + * 知识库分类表 - 支持树形结构的知识库分类 + */ +@Data +@Builder +@AllArgsConstructor +@NoArgsConstructor +@TableName("knowledge_category") +public class KnowledgeCategory implements Serializable { + + @Serial + @TableField(exist = false) + private static final long serialVersionUID = 1L; + + @TableId(value = "id", type = IdType.ASSIGN_ID) + private Long id; + + /** + * 分类名称 + */ + @TableField("name") + private String name; + + /** + * 分类描述 + */ + @TableField("description") + private String description; + + /** + * 父分类ID - 0表示顶级分类 + */ + @TableField("parent_id") + private Long parentId; + + /** + * 排序权重 - 数值越大越靠前 + */ + @TableField("sort_order") + private Integer sortOrder; + + /** + * 关联文档数量 - 冗余字段,定期更新 + */ + @TableField("document_count") + private Integer documentCount; + + /** + * 创建时间 + */ + @TableField(value = "create_time", fill = FieldFill.INSERT) + private Date createTime; + + /** + * 删除标志 - false:未删除, true:已删除(逻辑删除) + */ + @TableField("is_delete") + @TableLogic + private boolean isDelete; +} diff --git a/src/main/java/com/wok/supportbot/entity/KnowledgeDocument.java b/src/main/java/com/wok/supportbot/entity/KnowledgeDocument.java new file mode 100644 index 0000000..dc09f3e --- /dev/null +++ b/src/main/java/com/wok/supportbot/entity/KnowledgeDocument.java @@ -0,0 +1,110 @@ +package com.wok.supportbot.entity; + +import com.baomidou.mybatisplus.annotation.*; +import com.wok.supportbot.handler.PostgresJsonTypeHandler; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +import java.io.Serial; +import java.io.Serializable; +import java.util.Date; +import java.util.Map; + +/** + * 知识文档表 - 记录上传的文档元信息 + */ +@Data +@Builder +@AllArgsConstructor +@NoArgsConstructor +@TableName(value = "knowledge_document", autoResultMap = true) +public class KnowledgeDocument implements Serializable { + + @Serial + @TableField(exist = false) + private static final long serialVersionUID = 1L; + + @TableId(value = "id", type = IdType.ASSIGN_ID) + private Long id; + + /** + * 文档标题 + */ + @TableField("title") + private String title; + + /** + * 原始文件名 + */ + @TableField("source_name") + private String sourceName; + + /** + * 文件类型 - pdf/md/json/txt/word/excel 等 + */ + @TableField("file_type") + private String fileType; + + /** + * 文件大小(字节) + */ + @TableField("file_size") + private Long fileSize; + + /** + * 原文内容(截断预览) + */ + @TableField("content") + private String content; + + /** + * 所属分类ID - 0表示未分类 + */ + @TableField("category_id") + private Long categoryId; + + /** + * 标签列表(JSON数组) + */ + @TableField(value = "tags", typeHandler = PostgresJsonTypeHandler.class) + private Map tags; + + /** + * 分块数量 + */ + @TableField("chunk_count") + private Integer chunkCount; + + /** + * 处理状态 - PROCESSING/READY/FAILED + */ + @TableField("status") + private String status; + + /** + * 处理失败时的错误信息 + */ + @TableField("error_message") + private String errorMessage; + + /** + * 创建时间 + */ + @TableField(value = "create_time", fill = FieldFill.INSERT) + private Date createTime; + + /** + * 更新时间 + */ + @TableField(value = "update_time", fill = FieldFill.INSERT_UPDATE) + private Date updateTime; + + /** + * 删除标志 - false:未删除, true:已删除(逻辑删除) + */ + @TableField("is_delete") + @TableLogic + private boolean isDelete; +} diff --git a/src/main/java/com/wok/supportbot/entity/SearchResult.java b/src/main/java/com/wok/supportbot/entity/SearchResult.java new file mode 100644 index 0000000..3e1615c --- /dev/null +++ b/src/main/java/com/wok/supportbot/entity/SearchResult.java @@ -0,0 +1,62 @@ +package com.wok.supportbot.entity; + +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +import java.io.Serial; +import java.io.Serializable; + +/** + * 语义搜索结果 + */ +@Data +@Builder +@AllArgsConstructor +@NoArgsConstructor +public class SearchResult implements Serializable { + + @Serial + private static final long serialVersionUID = 1L; + + /** + * 向量记录ID + */ + private String id; + + /** + * 分块内容 + */ + private String content; + + /** + * 相似度得分 + */ + private Double score; + + /** + * 原始文件名 + */ + private String sourceName; + + /** + * 文档标题 + */ + private String title; + + /** + * 分块序号 + */ + private Integer chunkIndex; + + /** + * 关联的文档ID + */ + private String documentId; + + /** + * 原始元数据 + */ + private Object metadata; +} diff --git a/src/main/java/com/wok/supportbot/service/DocumentService.java b/src/main/java/com/wok/supportbot/service/DocumentService.java new file mode 100644 index 0000000..5e1078d --- /dev/null +++ b/src/main/java/com/wok/supportbot/service/DocumentService.java @@ -0,0 +1,630 @@ +package com.wok.supportbot.service; + +import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper; +import com.wok.supportbot.dao.KnowledgeCategoryMapper; +import com.wok.supportbot.dao.KnowledgeDocumentMapper; +import com.wok.supportbot.document.extract.JsonDocumentLoader; +import com.wok.supportbot.document.extract.MarkdownDocumentLoader; +import com.wok.supportbot.document.extract.SimpleStringDocumentReader; +import com.wok.supportbot.document.extract.TikaDocumentReader; +import com.wok.supportbot.document.transform.MyKeywordEnricher; +import com.wok.supportbot.document.transform.MyTokenTextSplitter; +import com.wok.supportbot.entity.CategoryNode; +import com.wok.supportbot.entity.KnowledgeCategory; +import com.wok.supportbot.entity.KnowledgeDocument; +import com.wok.supportbot.entity.SearchResult; +import lombok.extern.slf4j.Slf4j; +import org.springframework.ai.document.Document; +import org.springframework.ai.vectorstore.SearchRequest; +import org.springframework.ai.vectorstore.VectorStore; +import org.springframework.ai.vectorstore.filter.Filter; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.jdbc.core.JdbcTemplate; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; +import org.springframework.web.multipart.MultipartFile; + +import java.util.*; +import java.util.stream.Collectors; + +/** + * 知识库文档管理服务 + * 统一管理文档的上传、删除、搜索、统计、分类等操作 + */ +@Service +@Slf4j +public class DocumentService { + + @Autowired + private KnowledgeDocumentMapper documentMapper; + + @Autowired + private KnowledgeCategoryMapper categoryMapper; + + @Autowired + private JdbcTemplate jdbcTemplate; + + @Autowired + private VectorStore pgVectorVectorStore; + + @Autowired + private MyTokenTextSplitter myTokenTextSplitter; + + @Autowired + private MyKeywordEnricher myKeywordEnricher; + + @Autowired + private TikaDocumentReader tikaDocumentReader; + + @Autowired + private SimpleStringDocumentReader simpleStringDocumentReader; + + @Autowired + private MarkdownDocumentLoader markdownDocumentLoader; + + @Autowired + private JsonDocumentLoader jsonDocumentLoader; + + // ==================== 文档上传 ==================== + + /** + * 统一文档上传流程:创建记录 -> 分块 -> 关键词 -> 向量化 -> 更新状态 + * + * @param documents 解析后的文档列表 + * @param title 文档标题 + * @param sourceName 源文件名 + * @param fileType 文件类型 + * @param fileSize 文件大小 + * @param content 原文内容(截断预览) + * @param categoryId 分类ID + * @param tags 标签列表 + * @return 创建完成的文档记录 + */ + @Transactional(rollbackFor = Exception.class) + public KnowledgeDocument uploadDocument(List documents, String title, String sourceName, + String fileType, Long fileSize, String content, + Long categoryId, List tags) { + // 1. 创建文档记录(状态 PROCESSING) + KnowledgeDocument docRecord = KnowledgeDocument.builder() + .title(title != null ? title : sourceName) + .sourceName(sourceName) + .fileType(fileType) + .fileSize(fileSize != null ? fileSize : 0L) + .content(content != null && content.length() > 2000 ? content.substring(0, 2000) : content) + .categoryId(categoryId != null ? categoryId : 0L) + .tags(tags != null ? Map.of("tags", tags) : null) + .status("PROCESSING") + .chunkCount(0) + .build(); + documentMapper.insert(docRecord); + + try { + // 2. 分块处理 + List splitDocuments = myTokenTextSplitter.splitDocuments(documents); + + // 3. 为每个分块设置 documentId 等元数据 + for (int i = 0; i < splitDocuments.size(); i++) { + Document doc = splitDocuments.get(i); + Map meta = new HashMap<>(doc.getMetadata()); + meta.put("documentId", String.valueOf(docRecord.getId())); + meta.put("chunkIndex", i); + meta.put("sourceName", sourceName); + meta.put("title", title != null ? title : sourceName); + if (categoryId != null) { + meta.put("categoryId", String.valueOf(categoryId)); + } + if (tags != null && !tags.isEmpty()) { + meta.put("tags", tags); + } + splitDocuments.set(i, new Document(doc.getId(), doc.getText(), meta)); + } + + // 4. 关键词提取 + List enrichedDocuments = myKeywordEnricher.enrichDocuments(splitDocuments); + + // 5. 向量化存储 + pgVectorVectorStore.add(enrichedDocuments); + + // 6. 更新文档状态为 READY + docRecord.setStatus("READY"); + docRecord.setChunkCount(enrichedDocuments.size()); + documentMapper.updateById(docRecord); + + log.info("文档上传成功: id={}, title={}, chunks={}", docRecord.getId(), docRecord.getTitle(), enrichedDocuments.size()); + + } catch (Exception e) { + // 标记为失败 + docRecord.setStatus("FAILED"); + docRecord.setErrorMessage(e.getMessage()); + documentMapper.updateById(docRecord); + log.error("文档上传失败: id={}, title={}", docRecord.getId(), docRecord.getTitle(), e); + throw new RuntimeException("文档处理失败: " + e.getMessage(), e); + } + + return docRecord; + } + + /** + * 解析文件并上传 + */ + public KnowledgeDocument uploadFile(MultipartFile file, String title, Long categoryId, List tags) { + List documents = tikaDocumentReader.read(file); + String fileType = getFileExtension(file.getOriginalFilename()); + return uploadDocument(documents, + title != null ? title : file.getOriginalFilename(), + file.getOriginalFilename(), + fileType, + file.getSize(), + documents.get(0).getText(), + categoryId, + tags); + } + + /** + * 解析字符串并上传 + */ + public KnowledgeDocument uploadString(String content, String title, Long categoryId, List tags) { + List documents = simpleStringDocumentReader.read(content); + return uploadDocument(documents, title, title, "txt", + (long) content.length(), content, categoryId, tags); + } + + /** + * 解析 Markdown 文件并上传 + */ + public KnowledgeDocument uploadMarkdown(MultipartFile file, String title, Long categoryId, List tags) { + List documents = markdownDocumentLoader.loadMarkdownFromFile(file); + String content = documents.stream().map(Document::getText).collect(Collectors.joining("\n")); + return uploadDocument(documents, + title != null ? title : file.getOriginalFilename(), + file.getOriginalFilename(), + "md", + file.getSize(), + content, + categoryId, + tags); + } + + /** + * 解析 JSON 文件(基本方式)并上传 + */ + public KnowledgeDocument uploadJsonBasic(MultipartFile file, String title, Long categoryId, List tags) { + List documents = jsonDocumentLoader.loadBasicJson(file); + String content = documents.stream().map(Document::getText).collect(Collectors.joining("\n")); + return uploadDocument(documents, + title != null ? title : file.getOriginalFilename(), + file.getOriginalFilename(), + "json", + file.getSize(), + content, + categoryId, + tags); + } + + /** + * 解析 JSON 文件(按字段)并上传 + */ + public KnowledgeDocument uploadJsonFields(MultipartFile file, List fields, String title, Long categoryId, List tags) { + List documents = jsonDocumentLoader.loadJsonByFields(file, fields.toArray(new String[0])); + String content = documents.stream().map(Document::getText).collect(Collectors.joining("\n")); + return uploadDocument(documents, + title != null ? title : file.getOriginalFilename(), + file.getOriginalFilename(), + "json", + file.getSize(), + content, + categoryId, + tags); + } + + /** + * 解析 JSON 文件(按指针)并上传 + */ + public KnowledgeDocument uploadJsonPointer(MultipartFile file, String pointer, String title, Long categoryId, List tags) { + List documents = jsonDocumentLoader.loadJsonByPointer(file, pointer); + String content = documents.stream().map(Document::getText).collect(Collectors.joining("\n")); + return uploadDocument(documents, + title != null ? title : file.getOriginalFilename(), + file.getOriginalFilename(), + "json", + file.getSize(), + content, + categoryId, + tags); + } + + // ==================== 文档管理 ==================== + + /** + * 分页查询文档列表(手动分页) + */ + public Map listDocuments(int page, int size, Long categoryId, String status) { + // 构建基础条件(用于 count 和 list) + QueryWrapper countWrapper = new QueryWrapper<>(); + if (categoryId != null && categoryId > 0) { + countWrapper.eq("category_id", categoryId); + } + if (status != null && !status.isEmpty()) { + countWrapper.eq("status", status); + } + + // 先查询总数(不加 ORDER BY) + Long total = documentMapper.selectCount(countWrapper); + + // 构建列表查询条件 + QueryWrapper listWrapper = new QueryWrapper<>(); + if (categoryId != null && categoryId > 0) { + listWrapper.eq("category_id", categoryId); + } + if (status != null && !status.isEmpty()) { + listWrapper.eq("status", status); + } + listWrapper.orderByDesc("create_time"); + listWrapper.last("LIMIT " + size + " OFFSET " + (page - 1) * size); + List records = documentMapper.selectList(listWrapper); + + Map result = new HashMap<>(); + result.put("records", records); + result.put("total", total); + result.put("page", page); + result.put("size", size); + result.put("pages", (total + size - 1) / size); + return result; + } + + /** + * 获取文档详情 + */ + public KnowledgeDocument getDocumentDetail(Long id) { + return documentMapper.selectById(id); + } + + /** + * 获取文档的所有分块 + */ + public List> getDocumentChunks(Long id) { + String sql = "SELECT id::text as id, content, metadata, create_time FROM vector_store " + + "WHERE metadata->>'documentId' = ? ORDER BY (metadata->>'chunkIndex')::int"; + return jdbcTemplate.queryForList(sql, String.valueOf(id)); + } + + /** + * 删除文档(逻辑删除 + 级联删除向量) + */ + @Transactional(rollbackFor = Exception.class) + public int deleteDocument(Long id) { + KnowledgeDocument doc = documentMapper.selectById(id); + if (doc == null) { + throw new RuntimeException("文档不存在"); + } + // 删除关联的向量 + int vectorCount = deleteVectorsByDocumentId(String.valueOf(id)); + // 逻辑删除文档记录 + documentMapper.deleteById(id); + log.info("删除文档: id={}, title={}, 删除向量数={}", id, doc.getTitle(), vectorCount); + return vectorCount; + } + + /** + * 重新处理文档(重新分块 + 向量化) + */ + @Transactional(rollbackFor = Exception.class) + public KnowledgeDocument reprocessDocument(Long id) { + KnowledgeDocument doc = documentMapper.selectById(id); + if (doc == null) { + throw new RuntimeException("文档不存在"); + } + if (doc.getContent() == null || doc.getContent().isEmpty()) { + throw new RuntimeException("文档无内容,无法重新处理"); + } + + // 删除旧向量 + deleteVectorsByDocumentId(String.valueOf(id)); + + // 重新解析并处理 + List documents = simpleStringDocumentReader.read(doc.getContent()); + + doc.setStatus("PROCESSING"); + doc.setChunkCount(0); + doc.setErrorMessage(null); + documentMapper.updateById(doc); + + try { + List splitDocuments = myTokenTextSplitter.splitDocuments(documents); + + for (int i = 0; i < splitDocuments.size(); i++) { + Document d = splitDocuments.get(i); + Map meta = new HashMap<>(d.getMetadata()); + meta.put("documentId", String.valueOf(doc.getId())); + meta.put("chunkIndex", i); + meta.put("sourceName", doc.getSourceName()); + meta.put("title", doc.getTitle()); + if (doc.getCategoryId() != null && doc.getCategoryId() > 0) { + meta.put("categoryId", String.valueOf(doc.getCategoryId())); + } + if (doc.getTags() != null && doc.getTags().containsKey("tags")) { + meta.put("tags", doc.getTags().get("tags")); + } + splitDocuments.set(i, new Document(d.getId(), d.getText(), meta)); + } + + List enrichedDocuments = myKeywordEnricher.enrichDocuments(splitDocuments); + pgVectorVectorStore.add(enrichedDocuments); + + doc.setStatus("READY"); + doc.setChunkCount(enrichedDocuments.size()); + documentMapper.updateById(doc); + + log.info("重新处理文档成功: id={}, title={}, chunks={}", doc.getId(), doc.getTitle(), enrichedDocuments.size()); + + } catch (Exception e) { + doc.setStatus("FAILED"); + doc.setErrorMessage(e.getMessage()); + documentMapper.updateById(doc); + log.error("重新处理文档失败: id={}, title={}", doc.getId(), doc.getTitle(), e); + throw new RuntimeException("重新处理失败: " + e.getMessage(), e); + } + + return doc; + } + + /** + * 更新文档元信息 + */ + public void updateDocumentMetadata(Long id, String title, Long categoryId, List tags) { + KnowledgeDocument doc = documentMapper.selectById(id); + if (doc == null) { + throw new RuntimeException("文档不存在"); + } + if (title != null && !title.isEmpty()) { + doc.setTitle(title); + } + if (categoryId != null) { + doc.setCategoryId(categoryId); + } + if (tags != null) { + doc.setTags(Map.of("tags", tags)); + } + documentMapper.updateById(doc); + + // 同步更新 vector_store 中对应的 metadata + // 注意:Spring AI 当前没有直接更新 metadata 的 API + // 这里我们先更新文档记录,metadata 的同步留到后续优化 + log.info("更新文档元信息: id={}, title={}", id, doc.getTitle()); + } + + // ==================== 语义搜索 ==================== + + /** + * 语义搜索 + */ + public List searchDocuments(String query, int topK, double similarityThreshold, Long categoryId) { + SearchRequest.Builder searchBuilder = SearchRequest.builder() + .query(query) + .topK(topK) + .similarityThreshold(similarityThreshold); + + // 如果指定了分类,添加过滤条件(当前 Spring AI 1.0.0-M6 的 filter 支持有限) + // 这里先不做分类过滤,后续升级 Spring AI 版本后再完善 + + List results = pgVectorVectorStore.similaritySearch(searchBuilder.build()); + + List searchResults = new ArrayList<>(); + for (Document doc : results) { + Map metadata = doc.getMetadata(); + SearchResult result = SearchResult.builder() + .id(doc.getId()) + .content(doc.getText()) + .score(metadata.containsKey("distance") ? ((Number) metadata.get("distance")).doubleValue() : null) + .sourceName(getStringFromMetadata(metadata, "sourceName")) + .title(getStringFromMetadata(metadata, "title")) + .chunkIndex(getIntegerFromMetadata(metadata, "chunkIndex")) + .documentId(getStringFromMetadata(metadata, "documentId")) + .metadata(metadata) + .build(); + searchResults.add(result); + } + + return searchResults; + } + + // ==================== 统计 ==================== + + /** + * 获取知识库统计信息 + */ + public Map getStats() { + // 文档统计 + Long totalDocuments = documentMapper.selectCount(null); + + // 按文件类型统计 + String typeSql = "SELECT file_type, COUNT(*) as count FROM knowledge_document WHERE is_delete = false GROUP BY file_type"; + List> typeStats = jdbcTemplate.queryForList(typeSql); + Map byFileType = typeStats.stream() + .collect(Collectors.toMap( + r -> (String) r.get("file_type"), + r -> ((Number) r.get("count")).longValue() + )); + + // 按分类统计 + String catSql = "SELECT c.name, COUNT(d.id) as count FROM knowledge_document d " + + "LEFT JOIN knowledge_category c ON d.category_id = c.id " + + "WHERE d.is_delete = false GROUP BY c.name"; + List> catStats; + try { + catStats = jdbcTemplate.queryForList(catSql); + } catch (Exception e) { + catStats = new ArrayList<>(); + } + + // 向量总数 + String vectorSql = "SELECT COUNT(*) FROM vector_store"; + Long totalVectors; + try { + totalVectors = jdbcTemplate.queryForObject(vectorSql, Long.class); + } catch (Exception e) { + totalVectors = 0L; + } + + // 最近上传时间 + String lastUploadSql = "SELECT MAX(create_time) FROM knowledge_document WHERE is_delete = false"; + Date lastUploadTime = jdbcTemplate.queryForObject(lastUploadSql, Date.class); + + Map stats = new LinkedHashMap<>(); + stats.put("totalDocuments", totalDocuments); + stats.put("totalVectors", totalVectors); + stats.put("lastUploadTime", lastUploadTime); + stats.put("byFileType", byFileType); + stats.put("byCategory", catStats); + + return stats; + } + + // ==================== 分类管理 ==================== + + /** + * 获取分类树 + */ + public List getCategoryTree() { + List categories = categoryMapper.selectList( + new QueryWrapper().orderByAsc("sort_order")); + + Map nodeMap = new LinkedHashMap<>(); + List rootNodes = new ArrayList<>(); + + for (KnowledgeCategory cat : categories) { + CategoryNode node = CategoryNode.builder() + .id(cat.getId()) + .name(cat.getName()) + .description(cat.getDescription()) + .parentId(cat.getParentId()) + .sortOrder(cat.getSortOrder()) + .documentCount(cat.getDocumentCount()) + .children(new ArrayList<>()) + .build(); + nodeMap.put(cat.getId(), node); + } + + for (CategoryNode node : nodeMap.values()) { + if (node.getParentId() == null || node.getParentId() == 0) { + rootNodes.add(node); + } else { + CategoryNode parent = nodeMap.get(node.getParentId()); + if (parent != null) { + parent.getChildren().add(node); + } else { + rootNodes.add(node); + } + } + } + + return rootNodes; + } + + /** + * 获取分类列表 + */ + public List listCategories() { + return categoryMapper.selectList( + new QueryWrapper().orderByAsc("sort_order")); + } + + /** + * 创建分类 + */ + public KnowledgeCategory createCategory(String name, String description, Long parentId, Integer sortOrder) { + KnowledgeCategory category = KnowledgeCategory.builder() + .name(name) + .description(description) + .parentId(parentId != null ? parentId : 0L) + .sortOrder(sortOrder != null ? sortOrder : 0) + .documentCount(0) + .build(); + categoryMapper.insert(category); + return category; + } + + /** + * 更新分类 + */ + public void updateCategory(Long id, String name, String description, Integer sortOrder) { + KnowledgeCategory category = categoryMapper.selectById(id); + if (category == null) { + throw new RuntimeException("分类不存在"); + } + if (name != null && !name.isEmpty()) { + category.setName(name); + } + if (description != null) { + category.setDescription(description); + } + if (sortOrder != null) { + category.setSortOrder(sortOrder); + } + categoryMapper.updateById(category); + } + + /** + * 删除分类(不删除文档,仅清空关联) + */ + @Transactional(rollbackFor = Exception.class) + public void deleteCategory(Long id) { + // 将关联的文档 category_id 设为 0 + KnowledgeDocument updateDoc = new KnowledgeDocument(); + updateDoc.setCategoryId(0L); + documentMapper.update(updateDoc, new QueryWrapper().eq("category_id", id)); + + // 逻辑删除分类 + categoryMapper.deleteById(id); + } + + // ==================== 内部方法 ==================== + + /** + * 根据文档ID删除 vector_store 中关联的所有向量 + */ + private int deleteVectorsByDocumentId(String documentId) { + String sql = "SELECT id::text FROM vector_store WHERE metadata->>'documentId' = ?"; + List ids = jdbcTemplate.queryForList(sql, String.class, documentId); + + if (!ids.isEmpty()) { + pgVectorVectorStore.delete(ids); + log.debug("删除向量: documentId={}, count={}", documentId, ids.size()); + } + return ids.size(); + } + + /** + * 获取文件扩展名 + */ + private String getFileExtension(String filename) { + if (filename == null || !filename.contains(".")) { + return "unknown"; + } + return filename.substring(filename.lastIndexOf(".") + 1).toLowerCase(); + } + + /** + * 从 metadata 中安全获取字符串值 + */ + private String getStringFromMetadata(Map metadata, String key) { + Object value = metadata.get(key); + return value != null ? value.toString() : null; + } + + /** + * 从 metadata 中安全获取整数值 + */ + private Integer getIntegerFromMetadata(Map metadata, String key) { + Object value = metadata.get(key); + if (value == null) return null; + if (value instanceof Number) { + return ((Number) value).intValue(); + } + try { + return Integer.parseInt(value.toString()); + } catch (NumberFormatException e) { + return null; + } + } +} diff --git a/src/main/resources/add-comments.sql b/src/main/resources/add-comments.sql new file mode 100644 index 0000000..eb534ab --- /dev/null +++ b/src/main/resources/add-comments.sql @@ -0,0 +1,21 @@ +-- 重新设置 chat_message 表注释 +COMMENT ON TABLE chat_message IS '聊天消息表(存储用户与AI助手的对话历史)'; + +COMMENT ON COLUMN chat_message.id IS '主键ID(自增长整型)'; +COMMENT ON COLUMN chat_message.conversation_id IS '会话ID(标识同一次对话的唯一标识符)'; +COMMENT ON COLUMN chat_message.message_type IS '消息类型:USER(用户消息) / ASSISTANT(AI回复) / SYSTEM(系统消息)'; +COMMENT ON COLUMN chat_message.content IS '消息内容(实际的对话文本)'; +COMMENT ON COLUMN chat_message.metadata IS '元数据(JSON格式,存储消息的额外信息)'; +COMMENT ON COLUMN chat_message.create_time IS '创建时间(消息创建的时间戳)'; +COMMENT ON COLUMN chat_message.update_time IS '更新时间(消息最后更新的时间戳)'; +COMMENT ON COLUMN chat_message.is_delete IS '删除标志:false-未删除 / true-已删除(逻辑删除)'; + +-- 重新设置 vector_store 表注释 +COMMENT ON TABLE vector_store IS '向量存储表(存储文档的向量表示用于语义搜索)'; + +COMMENT ON COLUMN vector_store.id IS '主键ID(UUID格式的唯一标识符)'; +COMMENT ON COLUMN vector_store.content IS '文档内容(原始的文本内容)'; +COMMENT ON COLUMN vector_store.metadata IS '元数据(JSON格式,存储文档来源、标题、标签等附加信息)'; +COMMENT ON COLUMN vector_store.embedding IS '向量嵌入(1536维向量表示,适配OpenAI embedding模型)'; +COMMENT ON COLUMN vector_store.create_time IS '创建时间'; +COMMENT ON COLUMN vector_store.update_time IS '更新时间'; diff --git a/src/main/resources/knowledge-base.sql b/src/main/resources/knowledge-base.sql new file mode 100644 index 0000000..fc7d6cd --- /dev/null +++ b/src/main/resources/knowledge-base.sql @@ -0,0 +1,78 @@ +-- ================================================================ +-- 知识库管理增强 - 数据库变更脚本 +-- 说明: 为知识库管理功能添加分类表和文档管理表 +-- ================================================================ + +-- ================================================================ +-- 知识库分类表 - 支持树形结构的知识库分类 +-- ================================================================ +CREATE TABLE IF NOT EXISTS knowledge_category ( + id BIGSERIAL PRIMARY KEY, + name VARCHAR(100) NOT NULL, + description TEXT, + parent_id BIGINT DEFAULT 0 NOT NULL, + sort_order INTEGER DEFAULT 0 NOT NULL, + document_count INTEGER DEFAULT 0 NOT NULL, + create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL, + is_delete BOOLEAN DEFAULT FALSE NOT NULL +); + +-- 添加表注释 +COMMENT ON TABLE knowledge_category IS '知识库分类表 - 支持树形结构的知识库分类'; + +-- 添加字段注释 +COMMENT ON COLUMN knowledge_category.id IS '主键ID - 雪花算法'; +COMMENT ON COLUMN knowledge_category.name IS '分类名称'; +COMMENT ON COLUMN knowledge_category.description IS '分类描述'; +COMMENT ON COLUMN knowledge_category.parent_id IS '父分类ID - 0表示顶级分类'; +COMMENT ON COLUMN knowledge_category.sort_order IS '排序权重 - 数值越大越靠前'; +COMMENT ON COLUMN knowledge_category.document_count IS '关联文档数量 - 冗余字段,定期更新'; +COMMENT ON COLUMN knowledge_category.create_time IS '创建时间'; +COMMENT ON COLUMN knowledge_category.is_delete IS '删除标志 - false:未删除, true:已删除(逻辑删除)'; + +-- 创建索引 +CREATE INDEX IF NOT EXISTS idx_knowledge_category_parent ON knowledge_category (parent_id); + +-- ================================================================ +-- 知识文档表 - 记录上传的文档元信息 +-- ================================================================ +CREATE TABLE IF NOT EXISTS knowledge_document ( + id BIGSERIAL PRIMARY KEY, + title VARCHAR(500) NOT NULL, + source_name VARCHAR(500), + file_type VARCHAR(20) NOT NULL, + file_size BIGINT DEFAULT 0 NOT NULL, + content TEXT, + category_id BIGINT DEFAULT 0 NOT NULL, + tags JSONB DEFAULT '{}' NOT NULL, + chunk_count INTEGER DEFAULT 0 NOT NULL, + status VARCHAR(20) DEFAULT 'PROCESSING' NOT NULL, + error_message TEXT, + create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL, + update_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL, + is_delete BOOLEAN DEFAULT FALSE NOT NULL +); + +-- 添加表注释 +COMMENT ON TABLE knowledge_document IS '知识文档表 - 记录上传的文档元信息'; + +-- 添加字段注释 +COMMENT ON COLUMN knowledge_document.id IS '主键ID - 雪花算法'; +COMMENT ON COLUMN knowledge_document.title IS '文档标题'; +COMMENT ON COLUMN knowledge_document.source_name IS '原始文件名'; +COMMENT ON COLUMN knowledge_document.file_type IS '文件类型 - pdf/md/json/txt/word/excel 等'; +COMMENT ON COLUMN knowledge_document.file_size IS '文件大小(字节)'; +COMMENT ON COLUMN knowledge_document.content IS '原文内容(截断预览)'; +COMMENT ON COLUMN knowledge_document.category_id IS '所属分类ID - 0表示未分类'; +COMMENT ON COLUMN knowledge_document.tags IS '标签列表(JSON数组)'; +COMMENT ON COLUMN knowledge_document.chunk_count IS '分块数量'; +COMMENT ON COLUMN knowledge_document.status IS '处理状态 - PROCESSING/READY/FAILED'; +COMMENT ON COLUMN knowledge_document.error_message IS '处理失败时的错误信息'; +COMMENT ON COLUMN knowledge_document.create_time IS '创建时间'; +COMMENT ON COLUMN knowledge_document.update_time IS '更新时间'; +COMMENT ON COLUMN knowledge_document.is_delete IS '删除标志 - false:未删除, true:已删除(逻辑删除)'; + +-- 创建索引 +CREATE INDEX IF NOT EXISTS idx_knowledge_document_category ON knowledge_document (category_id); +CREATE INDEX IF NOT EXISTS idx_knowledge_document_status ON knowledge_document (status); +CREATE INDEX IF NOT EXISTS idx_knowledge_document_create_time ON knowledge_document (create_time DESC); diff --git a/src/main/resources/static/chat.html b/src/main/resources/static/chat.html new file mode 100644 index 0000000..8163662 --- /dev/null +++ b/src/main/resources/static/chat.html @@ -0,0 +1,178 @@ + + + + + + AI 智能客服 - 对话窗口 + + + +
+
+ 🤖 AI 智能客服基于通义千问 · 支持多轮对话 +
+
+ 会话ID: --- + 📖 API文档 +
+
+
+
🤖
+
+ 您好!我是电商智能客服助手,可以帮您解答关于商品、订单、支付、物流和售后等问题。
请问有什么可以帮您的? +
+
+
+
+ + +
+
+ + + + diff --git a/src/main/resources/static/frontend.html b/src/main/resources/static/frontend.html new file mode 100644 index 0000000..c4755a1 --- /dev/null +++ b/src/main/resources/static/frontend.html @@ -0,0 +1,770 @@ + + + + + +AI 智能客服系统 - Support Bot + + + +
+ AI 智能客服系统 + + 📖 API 文档 + +
+ +
+ + + +
+ +
+ + +
+
+

💬 智能客服对话

+

基于通义千问 · 电商客服场景 · 支持多轮对话上下文记忆

+ +
+ + + + +
+ +
+ + +
+ + + +
+
+
🤖
+
您好!我是电商智能客服助手。
可以帮您解答商品、订单、支付、物流和售后问题。

💡 提示:右侧下拉可切换对话模式,切换新会话开始全新对话。
📚 如需启用知识库检索,请勾选上方的"启用 RAG 知识库检索"选项。
+
+
+ +
+ + +
+
+
+ + +
+
+
+ GET + /ai/product_info_app/chat/sync +
+

🏷️ 商品信息结构化提取

+

输入商品描述文本,AI 自动提取:标题、描述、价格、评分、评论数、品牌、分类

+ + + +
+ + + +
+ + +
+
+ + +
+
+

📄 知识库文档管理

+

上传文档到 RAG 知识库,自动分词 → 向量化 → 存入 PGVector,即可用于 AI 检索问答

+ + +
+ + + + + + +
+ + +
+
POST/document/upload/file(Tika 多格式解析)
+
+
📎

点击或拖拽上传,支持多文件(PDF / Word / Excel / PPT / TXT 等)

+ +
+
+ +
+
+ + + + + + + + + + + + + + + +
+ + +
+

📋 最近上传记录

+
暂无上传记录
+
+
+ +
+ +
+ + + +