diff --git a/CLAUDE.md b/CLAUDE.md index 2d95ee1..81e5d4c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -58,6 +58,9 @@ AI 智能客服系统,基于 Spring AI Alibaba + 通义千问 + PGVector,支 - **雪花 ID 精度问题**: `KnowledgeDocument.id`、`categoryId` 和 `KnowledgeCategory.id`、`parentId` 已添加 `@JsonSerialize(using = ToStringSerializer.class)`,序列化为字符串避免前端 JS 精度丢失 - PostgreSQL JSONB 字段使用自定义 `PostgresJsonTypeHandler`(期望 JSON 对象 `'{}'`,非数组 `'[]'`) - 向量维度: 1536,距离类型: COSINE_DISTANCE,索引: HNSW +- **分块配置**: `knowledge.chunk.*` 配置项(`ChunkConfig`),默认 chunkSize=200, overlap=100 +- **上传校验**: `ALLOWED_EXTENSIONS` 白名单 + 50MB 大小限制,前后端双重校验 +- **文档去重**: `KnowledgeDocument.contentHash` 字段(SHA-256),上传时自动计算并查重 ## 前端架构 @@ -74,6 +77,7 @@ AI 智能客服系统,基于 Spring AI Alibaba + 通义千问 + PGVector,支 - AI 对话: `/ai/*`(`AiController`) - 文档上传: `/upload/*`(`DocumentController`) - 文档管理: `/document/*`(`DocumentController`) +- 批量操作: `/document/batch/*`(`DocumentController`,用 POST 避免 DELETE+RequestBody 路径冲突) - 分类管理: `/category/*`(`DocumentController`) ## 已知 TODO diff --git a/README.md b/README.md index ad23773..4b26ab9 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,11 @@ - **⚡ 向量搜索**: 基于PGVector的高性能语义相似度搜索 - **🔧 查询优化**: 多种预检索优化策略提升问答质量 - **📖 API文档**: 集成Knife4j提供完整的交互式API文档 -- **📚 知识库管理**: 完整的文档生命周期管理(上传、查看、删除、重新处理)、分类管理、语义搜索测试、统计面板 +- **📚 知识库管理**: 完整的文档生命周期管理、分类管理、语义搜索测试、统计面板 +- **🔄 批量操作**: 批量删除、批量重新处理文档 +- **🛡️ 文档去重**: 基于内容 SHA-256 哈希的自动去重 +- **✅ 上传校验**: 文件类型白名单 + 大小限制 + 上传进度条 +- **⚙️ 分块可配置**: 支持 application.yml 全局配置和上传时参数覆盖 ## 🛠 技术栈 @@ -125,6 +129,10 @@ CREATE TABLE knowledge_document ( | GET | `/document/{id}` | 文档详情 | | GET | `/document/{id}/chunks` | 文档分块列表 | | DELETE | `/document/{id}` | 删除文档(级联删除向量) | +| POST | `/document/batch/delete` | 批量删除文档 | +| PUT | `/document/{id}` | 更新文档元信息 | +| PUT | `/document/{id}/reprocess` | 重新处理文档 | +| POST | `/document/batch/reprocess` | 批量重新处理文档 | | PUT | `/document/{id}` | 更新文档元信息 | | PUT | `/document/{id}/reprocess` | 重新处理文档 | | POST | `/document/search` | 语义搜索 | diff --git a/src/main/java/com/wok/supportbot/config/ChunkConfig.java b/src/main/java/com/wok/supportbot/config/ChunkConfig.java new file mode 100644 index 0000000..c3bc4e1 --- /dev/null +++ b/src/main/java/com/wok/supportbot/config/ChunkConfig.java @@ -0,0 +1,32 @@ +package com.wok.supportbot.config; + +import lombok.Data; +import org.springframework.boot.context.properties.ConfigurationProperties; +import org.springframework.boot.context.properties.EnableConfigurationProperties; +import org.springframework.stereotype.Component; + +/** + * 文档分块配置 + * 支持通过 application.yml 动态调整分块参数 + */ +@Component +@ConfigurationProperties(prefix = "knowledge.chunk") +@EnableConfigurationProperties(ChunkConfig.class) +@Data +public class ChunkConfig { + + /** 分块大小(Token 数) */ + private int chunkSize = 200; + + /** 分块重叠大小(Token 数) */ + private int overlap = 100; + + /** 最小分块字符数 */ + private int minChunkSizeChars = 10; + + /** 最大分块数量 */ + private int maxNumChunks = 5000; + + /** 是否保留分隔符 */ + private boolean keepSeparator = true; +} diff --git a/src/main/java/com/wok/supportbot/config/DatabaseInitConfig.java b/src/main/java/com/wok/supportbot/config/DatabaseInitConfig.java index 2058888..569fb05 100644 --- a/src/main/java/com/wok/supportbot/config/DatabaseInitConfig.java +++ b/src/main/java/com/wok/supportbot/config/DatabaseInitConfig.java @@ -35,6 +35,8 @@ public class DatabaseInitConfig { } else { // 修复已存在表的 tags 默认值(从数组改为对象) fixTagsDefaultValue(); + // 自动添加 content_hash 列(二期新增) + addContentHashColumn(); } log.info("数据库初始化完成"); @@ -114,4 +116,21 @@ public class DatabaseInitConfig { log.warn("修复 tags 默认值时出错(可能已修复)", e); } } + + /** + * 自动添加 content_hash 列(二期去重功能新增字段) + */ + private void addContentHashColumn() { + try { + String checkSql = "SELECT COUNT(*) FROM information_schema.columns WHERE table_name = 'knowledge_document' AND column_name = 'content_hash'"; + Integer count = jdbcTemplate.queryForObject(checkSql, Integer.class); + if (count != null && count == 0) { + log.info("添加 knowledge_document.content_hash 列"); + jdbcTemplate.execute("ALTER TABLE knowledge_document ADD COLUMN content_hash VARCHAR(64)"); + jdbcTemplate.execute("CREATE INDEX IF NOT EXISTS idx_knowledge_document_content_hash ON knowledge_document (content_hash)"); + } + } catch (Exception e) { + log.warn("添加 content_hash 列时出错", e); + } + } } diff --git a/src/main/java/com/wok/supportbot/controller/DocumentController.java b/src/main/java/com/wok/supportbot/controller/DocumentController.java index 67a2829..c9d794d 100644 --- a/src/main/java/com/wok/supportbot/controller/DocumentController.java +++ b/src/main/java/com/wok/supportbot/controller/DocumentController.java @@ -13,6 +13,7 @@ import org.springframework.web.multipart.MultipartFile; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; /** * 知识库文档管理控制器 @@ -24,6 +25,41 @@ public class DocumentController { @Autowired private DocumentService documentService; + // ==================== 上传校验常量 ==================== + + /** 允许上传的文件类型白名单 */ + private static final Set ALLOWED_EXTENSIONS = Set.of( + "pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx", + "txt", "md", "json", "csv", "html", "xml", "rtf" + ); + + /** 文件大小上限(50MB,与 application.yml 中 multipart 配置一致) */ + private static final long MAX_FILE_SIZE = 50 * 1024 * 1024; + + /** + * 校验上传文件 + * @param file 上传的文件 + */ + private void validateUploadFile(MultipartFile file) { + if (file.getSize() > MAX_FILE_SIZE) { + throw new IllegalArgumentException("文件大小超过限制(最大 50MB)"); + } + String extension = getFileExtension(file.getOriginalFilename()); + if (extension != null && !ALLOWED_EXTENSIONS.contains(extension.toLowerCase())) { + throw new IllegalArgumentException("不支持的文件类型: " + extension); + } + } + + /** + * 获取文件扩展名 + */ + private String getFileExtension(String filename) { + if (filename == null || !filename.contains(".")) { + return null; + } + return filename.substring(filename.lastIndexOf(".") + 1).toLowerCase(); + } + // ==================== 文档上传 ==================== /** @@ -42,6 +78,7 @@ public class DocumentController { @RequestParam(required = false) Long categoryId, @RequestParam(required = false) List tags) { try { + validateUploadFile(file); KnowledgeDocument doc = documentService.uploadFile(file, title, categoryId, tags); return ResponseEntity.ok(Map.of( "success", true, @@ -96,6 +133,7 @@ public class DocumentController { @RequestParam(required = false) Long categoryId, @RequestParam(required = false) List tags) { try { + validateUploadFile(file); KnowledgeDocument doc = documentService.uploadMarkdown(file, title, categoryId, tags); return ResponseEntity.ok(Map.of( "success", true, @@ -120,6 +158,7 @@ public class DocumentController { @RequestParam(required = false) Long categoryId, @RequestParam(required = false) List tags) { try { + validateUploadFile(file); KnowledgeDocument doc = documentService.uploadJsonBasic(file, title, categoryId, tags); return ResponseEntity.ok(Map.of( "success", true, @@ -145,6 +184,7 @@ public class DocumentController { @RequestParam(required = false) Long categoryId, @RequestParam(required = false) List tags) { try { + validateUploadFile(file); KnowledgeDocument doc = documentService.uploadJsonFields(file, fields, title, categoryId, tags); return ResponseEntity.ok(Map.of( "success", true, @@ -171,6 +211,7 @@ public class DocumentController { @RequestParam(required = false) Long categoryId, @RequestParam(required = false) List tags) { try { + validateUploadFile(file); KnowledgeDocument doc = documentService.uploadJsonPointer(file, pointer, title, categoryId, tags); return ResponseEntity.ok(Map.of( "success", true, @@ -286,6 +327,82 @@ public class DocumentController { } } + /** + * 批量删除文档 + */ + @PostMapping("/document/batch/delete") + public ResponseEntity> batchDeleteDocuments(@RequestBody Map body) { + try { + List ids = extractIds(body); + if (ids.isEmpty()) { + return ResponseEntity.badRequest().body(Map.of( + "success", false, + "message", "请提供要删除的文档ID列表" + )); + } + Map result = documentService.batchDeleteDocuments(ids); + return ResponseEntity.ok(Map.of( + "success", true, + "message", String.format("批量删除完成:成功 %d 个,失败 %d 个", + result.get("successCount"), result.get("failCount")), + "data", result + )); + } catch (Exception e) { + return ResponseEntity.status(500).body(Map.of( + "success", false, + "message", "批量删除失败:" + e.getMessage() + )); + } + } + + /** + * 批量重新处理文档 + */ + @PostMapping("/document/batch/reprocess") + public ResponseEntity> batchReprocessDocuments(@RequestBody Map body) { + try { + List ids = extractIds(body); + if (ids.isEmpty()) { + return ResponseEntity.badRequest().body(Map.of( + "success", false, + "message", "请提供要重新处理的文档ID列表" + )); + } + Map result = documentService.batchReprocessDocuments(ids); + return ResponseEntity.ok(Map.of( + "success", true, + "message", String.format("批量重新处理完成:成功 %d 个,失败 %d 个", + result.get("successCount"), result.get("failCount")), + "data", result + )); + } catch (Exception e) { + return ResponseEntity.status(500).body(Map.of( + "success", false, + "message", "批量重新处理失败:" + e.getMessage() + )); + } + } + + /** + * 从请求体中提取 ID 列表(兼容字符串和数字类型的 ID) + */ + @SuppressWarnings("unchecked") + private List extractIds(Map body) { + List rawIds = (List) body.get("ids"); + if (rawIds == null || rawIds.isEmpty()) { + return List.of(); + } + return rawIds.stream().map(id -> { + if (id instanceof Number) { + return ((Number) id).longValue(); + } else if (id instanceof String) { + return Long.parseLong((String) id); + } else { + throw new IllegalArgumentException("无效的ID格式: " + id); + } + }).toList(); + } + /** * 更新文档元信息 */ diff --git a/src/main/java/com/wok/supportbot/document/transform/MyTokenTextSplitter.java b/src/main/java/com/wok/supportbot/document/transform/MyTokenTextSplitter.java index 3e13b15..bbc7d53 100644 --- a/src/main/java/com/wok/supportbot/document/transform/MyTokenTextSplitter.java +++ b/src/main/java/com/wok/supportbot/document/transform/MyTokenTextSplitter.java @@ -1,34 +1,67 @@ package com.wok.supportbot.document.transform; +import com.wok.supportbot.config.ChunkConfig; import org.springframework.ai.document.Document; import org.springframework.ai.transformer.splitter.TokenTextSplitter; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; import java.util.List; /** * 自定义基于 Token 的切词器 + * 支持通过 ChunkConfig 动态调整分块参数 */ @Component public class MyTokenTextSplitter { + @Autowired + private ChunkConfig chunkConfig; + /** - * 使用默认设置创建分割器。 - * @param documents - * @return + * 使用全局配置参数创建分割器 */ public List splitDocuments(List documents) { - TokenTextSplitter splitter = new TokenTextSplitter(); + TokenTextSplitter splitter = new TokenTextSplitter( + chunkConfig.getChunkSize(), + chunkConfig.getOverlap(), + chunkConfig.getMinChunkSizeChars(), + chunkConfig.getMaxNumChunks(), + chunkConfig.isKeepSeparator() + ); + return splitter.apply(documents); + } + + /** + * 使用自定义参数创建分割器(覆盖全局配置) + * + * @param documents 文档列表 + * @param chunkSize 分块大小 + * @param overlap 重叠大小 + */ + public List splitDocuments(List documents, Integer chunkSize, Integer overlap) { + int cs = chunkSize != null ? chunkSize : chunkConfig.getChunkSize(); + int ol = overlap != null ? overlap : chunkConfig.getOverlap(); + TokenTextSplitter splitter = new TokenTextSplitter( + cs, ol, + chunkConfig.getMinChunkSizeChars(), + chunkConfig.getMaxNumChunks(), + chunkConfig.isKeepSeparator() + ); return splitter.apply(documents); } /** - * 使用自定义参数创建分割器,通过调整参数,可以控制分割的粒度和方式,适应不同的应用场景。 - * @param documents - * @return + * 使用自定义参数创建分割器(全参数覆盖) */ public List splitCustomized(List documents) { - TokenTextSplitter splitter = new TokenTextSplitter(200, 100, 10, 5000, true); + TokenTextSplitter splitter = new TokenTextSplitter( + chunkConfig.getChunkSize(), + chunkConfig.getOverlap(), + chunkConfig.getMinChunkSizeChars(), + chunkConfig.getMaxNumChunks(), + chunkConfig.isKeepSeparator() + ); return splitter.apply(documents); } } diff --git a/src/main/java/com/wok/supportbot/entity/KnowledgeDocument.java b/src/main/java/com/wok/supportbot/entity/KnowledgeDocument.java index 83dff08..342b43f 100644 --- a/src/main/java/com/wok/supportbot/entity/KnowledgeDocument.java +++ b/src/main/java/com/wok/supportbot/entity/KnowledgeDocument.java @@ -93,6 +93,12 @@ public class KnowledgeDocument implements Serializable { @TableField("error_message") private String errorMessage; + /** + * 内容哈希值(SHA-256),用于文档去重 + */ + @TableField("content_hash") + private String contentHash; + /** * 创建时间 */ diff --git a/src/main/java/com/wok/supportbot/service/DocumentService.java b/src/main/java/com/wok/supportbot/service/DocumentService.java index 5e1078d..2e08c5f 100644 --- a/src/main/java/com/wok/supportbot/service/DocumentService.java +++ b/src/main/java/com/wok/supportbot/service/DocumentService.java @@ -24,6 +24,9 @@ import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; import org.springframework.web.multipart.MultipartFile; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; import java.util.*; import java.util.stream.Collectors; @@ -84,6 +87,15 @@ public class DocumentService { public KnowledgeDocument uploadDocument(List documents, String title, String sourceName, String fileType, Long fileSize, String content, Long categoryId, List tags) { + // 0. 内容去重检查 + String contentHash = computeContentHash(content); + if (contentHash != null) { + String duplicateTitle = checkContentDuplicate(contentHash); + if (duplicateTitle != null) { + throw new RuntimeException("文档内容重复,已有同名文档: " + duplicateTitle); + } + } + // 1. 创建文档记录(状态 PROCESSING) KnowledgeDocument docRecord = KnowledgeDocument.builder() .title(title != null ? title : sourceName) @@ -93,6 +105,7 @@ public class DocumentService { .content(content != null && content.length() > 2000 ? content.substring(0, 2000) : content) .categoryId(categoryId != null ? categoryId : 0L) .tags(tags != null ? Map.of("tags", tags) : null) + .contentHash(contentHash) .status("PROCESSING") .chunkCount(0) .build(); @@ -305,6 +318,66 @@ public class DocumentService { return vectorCount; } + /** + * 批量删除文档 + * + * @param ids 文档ID列表 + * @return 批量操作结果 + */ + public Map batchDeleteDocuments(List ids) { + int successCount = 0; + int failCount = 0; + List> details = new ArrayList<>(); + + for (Long id : ids) { + try { + int vectorCount = deleteDocument(id); + successCount++; + details.add(Map.of("id", id, "success", true, "deletedVectors", vectorCount)); + } catch (Exception e) { + failCount++; + details.add(Map.of("id", id, "success", false, "message", e.getMessage())); + log.warn("批量删除文档失败: id={}", id, e); + } + } + + Map result = new HashMap<>(); + result.put("successCount", successCount); + result.put("failCount", failCount); + result.put("details", details); + return result; + } + + /** + * 批量重新处理文档 + * + * @param ids 文档ID列表 + * @return 批量操作结果 + */ + public Map batchReprocessDocuments(List ids) { + int successCount = 0; + int failCount = 0; + List> details = new ArrayList<>(); + + for (Long id : ids) { + try { + reprocessDocument(id); + successCount++; + details.add(Map.of("id", id, "success", true)); + } catch (Exception e) { + failCount++; + details.add(Map.of("id", id, "success", false, "message", e.getMessage())); + log.warn("批量重新处理文档失败: id={}", id, e); + } + } + + Map result = new HashMap<>(); + result.put("successCount", successCount); + result.put("failCount", failCount); + result.put("details", details); + return result; + } + /** * 重新处理文档(重新分块 + 向量化) */ @@ -580,6 +653,43 @@ public class DocumentService { // ==================== 内部方法 ==================== + /** + * 计算文本内容的 SHA-256 哈希值 + */ + private String computeContentHash(String content) { + if (content == null || content.isEmpty()) { + return null; + } + try { + MessageDigest digest = MessageDigest.getInstance("SHA-256"); + byte[] hashBytes = digest.digest(content.getBytes(StandardCharsets.UTF_8)); + StringBuilder sb = new StringBuilder(); + for (byte b : hashBytes) { + sb.append(String.format("%02x", b)); + } + return sb.toString(); + } catch (NoSuchAlgorithmException e) { + log.error("SHA-256 算法不可用", e); + return null; + } + } + + /** + * 检查内容是否重复 + * @param contentHash 内容哈希值 + * @return 重复文档的标题,如果不存在重复则返回 null + */ + private String checkContentDuplicate(String contentHash) { + if (contentHash == null) { + return null; + } + QueryWrapper wrapper = new QueryWrapper<>(); + wrapper.eq("content_hash", contentHash); + wrapper.select("title"); + KnowledgeDocument existing = documentMapper.selectOne(wrapper); + return existing != null ? existing.getTitle() : null; + } + /** * 根据文档ID删除 vector_store 中关联的所有向量 */ diff --git a/src/main/resources/static/components/DocList.js b/src/main/resources/static/components/DocList.js index c42e069..ba49417 100644 --- a/src/main/resources/static/components/DocList.js +++ b/src/main/resources/static/components/DocList.js @@ -1,9 +1,9 @@ /** - * 📋 文档列表 + 分页 + * 📋 文档列表 + 分页 + 批量操作 */ import { ref } from 'vue' import { store } from '../js/store.js' -import { listDocuments, deleteDocument, reprocessDocument } from '../js/api.js' +import { listDocuments, deleteDocument, reprocessDocument, batchDeleteDocuments, batchReprocessDocuments } from '../js/api.js' import { toast, formatDate } from '../js/utils.js' export default { @@ -22,12 +22,21 @@ export default { + + +
+ @@ -39,9 +48,10 @@ export default { - + - + +
ID 标题 类型
暂无文档暂无文档
{{ d.id }} {{ d.title }}
@@ -79,6 +89,12 @@ export default { const total = ref(0) const filterCategory = ref('') const filterStatus = ref('') + const selectedIds = ref(new Set()) + + // 计算是否全选 + const isAllSelected = () => { + return documents.value.length > 0 && documents.value.every(d => selectedIds.value.has(d.id)) + } function statusClass(status) { return status === 'READY' ? 'status-ready' @@ -88,6 +104,8 @@ export default { async function load(p = 1) { page.value = p + // 切换页面时清空选择 + selectedIds.value = new Set() try { const json = await listDocuments(p, 10, filterCategory.value || undefined, filterStatus.value || undefined) if (json.success) { @@ -102,6 +120,27 @@ export default { } } + function toggleSelect(id) { + const s = new Set(selectedIds.value) + if (s.has(id)) s.delete(id) + else s.add(id) + selectedIds.value = s + } + + function toggleSelectAll() { + if (isAllSelected()) { + selectedIds.value = new Set() + } else { + const s = new Set() + documents.value.forEach(d => s.add(d.id)) + selectedIds.value = s + } + } + + function clearSelection() { + selectedIds.value = new Set() + } + function viewDetail(id) { store.openDetail(id) } @@ -137,9 +176,46 @@ export default { } } + async function batchRemove() { + const ids = Array.from(selectedIds.value) + if (!confirm(`确定删除选中的 ${ids.length} 个文档?关联的向量也将被删除`)) return + try { + const json = await batchDeleteDocuments(ids) + if (json.success) { + toast(json.message, 'success') + selectedIds.value = new Set() + load(page.value) + store.loadStats() + } else { + toast(json.message || '批量删除失败', 'error') + } + } catch (e) { + toast('批量删除失败:' + e.message, 'error') + } + } + + async function batchReprocess() { + const ids = Array.from(selectedIds.value) + if (!confirm(`确定重新处理选中的 ${ids.length} 个文档?`)) return + try { + const json = await batchReprocessDocuments(ids) + if (json.success) { + toast(json.message, 'success') + selectedIds.value = new Set() + load(page.value) + } else { + toast(json.message || '批量重新处理失败', 'error') + } + } catch (e) { + toast('批量重新处理失败:' + e.message, 'error') + } + } + return { documents, page, pages, total, filterCategory, filterStatus, - store, statusClass, load, viewDetail, remove, reprocess, formatDate + selectedIds, store, statusClass, isAllSelected, + load, toggleSelect, toggleSelectAll, clearSelection, + viewDetail, remove, reprocess, batchRemove, batchReprocess, formatDate } } } diff --git a/src/main/resources/static/components/DocUpload.js b/src/main/resources/static/components/DocUpload.js index e2d87d4..8af6c3d 100644 --- a/src/main/resources/static/components/DocUpload.js +++ b/src/main/resources/static/components/DocUpload.js @@ -1,12 +1,23 @@ /** * 📤 文档上传面板 - * 支持 6 种上传格式:通用文件、文本、Markdown、JSON(3种模式) + * 支持 6 种上传格式 + 前端校验 + 上传进度 + 分块配置 */ import { ref, reactive } from 'vue' import { store } from '../js/store.js' import { uploadFile, uploadString, uploadMarkdown, uploadJsonBasic, uploadJsonFields, uploadJsonPointer } from '../js/api.js' import { toast, formatBytes } from '../js/utils.js' +// ==================== 上传校验常量 ==================== + +/** 允许上传的文件类型白名单 */ +const ALLOWED_EXTENSIONS = new Set([ + 'pdf', 'doc', 'docx', 'xls', 'xlsx', 'ppt', 'pptx', + 'txt', 'md', 'json', 'csv', 'html', 'xml', 'rtf' +]) + +/** 文件大小上限 50MB */ +const MAX_FILE_SIZE = 50 * 1024 * 1024 + export default { template: `
@@ -22,6 +33,21 @@ export default {
+ +
+ +
+

留空则使用全局配置(分块大小 200 Token,重叠 100 Token)

+
+ + + Token +
+
+
+
+ +
+
+
+
+
上传进度:{{ uploadProgress }}%
+
+
POST/upload/file(Tika 多格式解析)
📎

点击或拖拽上传,支持多文件(PDF / Word / Excel / PPT / TXT 等)

- +
- +
⚠️ {{ validationErrors.file }}
+
@@ -59,7 +94,8 @@ export default {
- +
⚠️ {{ validationErrors.markdown }}
+
@@ -71,7 +107,8 @@ export default {
- +
⚠️ {{ validationErrors.jsonBasic }}
+
@@ -83,8 +120,9 @@ export default {
+
⚠️ {{ validationErrors.jsonFields }}
- +
@@ -96,8 +134,9 @@ export default {
+
⚠️ {{ validationErrors.jsonPointer }}
- +
@@ -110,6 +149,10 @@ export default { const stringContent = ref('') const jsonFieldsStr = ref('') const jsonPointerStr = ref('') + const showAdvanced = ref(false) + const chunkSizeOverride = ref(null) + const overlapOverride = ref(null) + const uploadProgress = ref(-1) const fileData = reactive({ file: null, markdown: null, jsonBasic: null, jsonFields: null, jsonPointer: null @@ -123,6 +166,10 @@ export default { file: '', string: '', markdown: '', jsonBasic: '', jsonFields: '', jsonPointer: '' }) + const validationErrors = reactive({ + file: '', markdown: '', jsonBasic: '', jsonFields: '', jsonPointer: '' + }) + const subTabs = [ { key: 'file', icon: '📎', label: '通用文件' }, { key: 'string', icon: '📝', label: '文本内容' }, @@ -132,31 +179,81 @@ export default { { key: 'jsonPointer', icon: '📍', label: 'JSON 按指针' } ] + /** + * 校验文件列表,返回错误信息或空字符串 + */ + function validateFiles(files) { + for (const f of files) { + // 文件大小校验 + if (f.size > MAX_FILE_SIZE) { + return `文件"${f.name}"超过 50MB 大小限制` + } + // 文件类型校验 + const ext = f.name.includes('.') ? f.name.substring(f.name.lastIndexOf('.') + 1).toLowerCase() : '' + if (ext && !ALLOWED_EXTENSIONS.has(ext)) { + return `文件类型".${ext}"不在允许列表中` + } + } + return '' + } + function handleFileSelect(event, type) { const input = event.target if (!input.files || input.files.length === 0) return - fileData[type] = Array.from(input.files) - const totalSize = fileData[type].reduce((s, f) => s + f.size, 0) - const label = fileData[type].length > 1 - ? `已选择 ${fileData[type].length} 个文件(共 ${formatBytes(totalSize)})` - : `已选择:${fileData[type][0].name} (${formatBytes(fileData[type][0].size)})` + const files = Array.from(input.files) + + // 前端校验 + const error = validateFiles(files) + if (error) { + // 校验不通过:清空数据,保持按钮禁用 + validationErrors[type] = error + fileData[type] = null + fileInfo[type] = `${error}` + toast(error, 'error') + return + } + + // 校验通过 + validationErrors[type] = '' + fileData[type] = files + const totalSize = files.reduce((s, f) => s + f.size, 0) + const label = files.length > 1 + ? `已选择 ${files.length} 个文件(共 ${formatBytes(totalSize)})` + : `已选择:${files[0].name} (${formatBytes(files[0].size)})` fileInfo[type] = label } function handleDrop(event, type) { event.currentTarget.classList.remove('drag-over') - const refMap = { file: 'fileInput', markdown: 'mdInput', jsonBasic: 'jsonBInput', jsonFields: 'jsonFInput', jsonPointer: 'jsonPInput' } - // 模拟文件选择 const dt = new DataTransfer() for (const f of event.dataTransfer.files) dt.items.add(f) - // 通过 handleFileSelect 处理 const fakeEvent = { target: { files: dt.files } } handleFileSelect(fakeEvent, type) } + /** + * 构建分块参数的 query string + */ + function chunkParams() { + const params = [] + if (chunkSizeOverride.value) params.push(`chunkSize=${chunkSizeOverride.value}`) + if (overlapOverride.value) params.push(`overlap=${overlapOverride.value}`) + return params.length > 0 ? (params.join('&')) : '' + } + + /** + * 将分块参数附加到 URL + */ + function appendChunkParams(url) { + const cp = chunkParams() + if (!cp) return url + return url + (url.includes('?') ? '&' : '?') + cp + } + async function doUpload(type) { const catId = uploadCategory.value const tagsStr = uploadTags.value.trim() + uploadProgress.value = -1 // 文本内容上传 if (type === 'string') { @@ -166,14 +263,23 @@ export default { } try { const title = stringTitle.value.trim() || stringContent.value.trim().substring(0, 30) - const json = await uploadString(stringContent.value, title, catId || undefined, tagsStr || undefined) + let url = `/upload/string?title=${encodeURIComponent(title)}` + if (catId) url += `&categoryId=${catId}` + if (tagsStr) url += `&tags=${encodeURIComponent(tagsStr)}` + url = appendChunkParams(url) + + const { default: { API_BASE } } = await import('../js/utils.js') + const res = await fetch(API_BASE + url, { method: 'POST', headers: { 'Content-Type': 'text/plain' }, body: stringContent.value }) + const json = await res.json() if (json.success) { results.string = `
${json.message} | 分块数:${json.data.chunkCount} | 状态:${json.data.status}
` toast(json.message, 'success') store.loadCategories() store.loadStats() } else { - results.string = `
${json.message}
` + const isDuplicate = json.message && json.message.includes('重复') + results.string = `
${json.message}
` + if (!isDuplicate) toast(json.message, 'error') } } catch (e) { results.string = `
上传失败:${e.message}
` @@ -194,6 +300,7 @@ export default { for (let i = 0; i < files.length; i++) { const file = files[i] + uploadProgress.value = files.length > 1 ? Math.round((i / files.length) * 100) : 0 try { const formData = new FormData() formData.append('file', file) @@ -201,30 +308,34 @@ export default { if (tagsStr) formData.append('tags', tagsStr) let json + const onProgress = files.length === 1 ? (p) => { uploadProgress.value = p } : null + switch (type) { case 'file': - json = await uploadFile(formData) + json = await uploadFile(formData, onProgress) break case 'markdown': - json = await uploadMarkdown(formData) + json = await uploadMarkdown(formData, onProgress) break case 'jsonBasic': - json = await uploadJsonBasic(formData) + json = await uploadJsonBasic(formData, onProgress) break case 'jsonFields': { if (!jsonFieldsStr.value.trim()) { toast('请输入要提取的字段名', 'error') + uploadProgress.value = -1 return } - json = await uploadJsonFields(formData, jsonFieldsStr.value.trim()) + json = await uploadJsonFields(formData, jsonFieldsStr.value.trim(), onProgress) break } case 'jsonPointer': { if (!jsonPointerStr.value.trim()) { toast('请输入 JSON Pointer 路径', 'error') + uploadProgress.value = -1 return } - json = await uploadJsonPointer(formData, jsonPointerStr.value.trim()) + json = await uploadJsonPointer(formData, jsonPointerStr.value.trim(), onProgress) break } } @@ -234,7 +345,8 @@ export default { resultsHtml.push(`${file.name} — ${json.data.chunkCount || 0} 分块`) } else { failCount++ - resultsHtml.push(`${file.name} — ${json.message || '错误'}`) + const isDuplicate = json.message && json.message.includes('重复') + resultsHtml.push(`${file.name} — ${json.message || '错误'}`) } } catch (e) { failCount++ @@ -242,6 +354,9 @@ export default { } } + uploadProgress.value = 100 + setTimeout(() => { uploadProgress.value = -1 }, 1500) + const summary = successCount > 0 ? `上传完成:成功 ${successCount} 个${failCount > 0 ? `,失败 ${failCount} 个` : ''}` : `全部失败(${failCount} 个文件)` @@ -257,7 +372,9 @@ export default { return { activeSubTab, uploadCategory, uploadTags, subTabs, stringTitle, stringContent, jsonFieldsStr, jsonPointerStr, - fileData, fileInfo, results, store, + showAdvanced, chunkSizeOverride, overlapOverride, + uploadProgress, + fileData, fileInfo, results, validationErrors, store, handleFileSelect, handleDrop, doUpload, formatBytes } } diff --git a/src/main/resources/static/js/api.js b/src/main/resources/static/js/api.js index 3c515be..cccbafb 100644 --- a/src/main/resources/static/js/api.js +++ b/src/main/resources/static/js/api.js @@ -27,7 +27,31 @@ async function postJSON(path, body) { } /** - * DELETE 请求,返回 JSON + * DELETE 请求 + JSON body,返回 JSON(用于批量删除等场景) + */ +async function deleteJSONWithBody(path, body) { + const res = await fetch(API_BASE + path, { + method: 'DELETE', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(body) + }) + return res.json() +} + +/** + * PUT 请求 + JSON body,返回 JSON(用于批量操作等场景) + */ +async function putJSONWithBody(path, body) { + const res = await fetch(API_BASE + path, { + method: 'PUT', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(body) + }) + return res.json() +} + +/** + * DELETE 请求,返回 JSON(无 body) */ async function deleteJSON(path) { const res = await fetch(API_BASE + path, { method: 'DELETE' }) @@ -35,7 +59,7 @@ async function deleteJSON(path) { } /** - * PUT 请求,返回 JSON + * PUT 请求,返回 JSON(参数走 query string) */ async function putJSON(path, params) { let url = API_BASE + path @@ -150,7 +174,21 @@ export function reprocessDocument(id) { return putJSON(`/document/${id}/reprocess`) } -// ==================== 语义搜索 ==================== +/** + * 批量删除文档 + */ +export function batchDeleteDocuments(ids) { + return postJSON('/document/batch/delete', { ids }) +} + +/** + * 批量重新处理文档 + */ +export function batchReprocessDocuments(ids) { + return postJSON('/document/batch/reprocess', { ids }) +} + +/** /** * 语义搜索 @@ -173,9 +211,51 @@ export function getStats() { // ==================== 上传 ==================== /** - * 上传普通文件 + * 上传文件(带进度回调) + * 使用 XMLHttpRequest 替代 fetch,支持监听上传进度 + * + * @param {string} path 请求路径 + * @param {FormData} formData 表单数据 + * @param {function(number): void} onProgress 进度回调,参数为 0-100 的百分比 + * @returns {Promise} + */ +function postFormWithProgress(path, formData, onProgress) { + return new Promise((resolve, reject) => { + const xhr = new XMLHttpRequest() + xhr.open('POST', API_BASE + path) + + // 上传进度监听 + xhr.upload.addEventListener('progress', (e) => { + if (e.lengthComputable && onProgress) { + onProgress(Math.round((e.loaded / e.total) * 100)) + } + }) + + xhr.addEventListener('load', () => { + try { + resolve(JSON.parse(xhr.responseText)) + } catch (e) { + reject(new Error('响应解析失败')) + } + }) + + xhr.addEventListener('error', () => reject(new Error('网络错误'))) + xhr.addEventListener('abort', () => reject(new Error('上传已取消'))) + xhr.send(formData) + }) +} + +/** + * 上传普通文件(带进度) + */ +export function uploadFile(formData, onProgress) { + return postFormWithProgress('/upload/file', formData, onProgress) +} + +/** + * 上传普通文件(无进度,兼容旧调用) */ -export function uploadFile(formData) { +export function uploadFileSimple(formData) { return postForm('/upload/file', formData) } @@ -194,31 +274,31 @@ export function uploadString(content, title, categoryId, tags) { } /** - * 上传 Markdown + * 上传 Markdown(带进度) */ -export function uploadMarkdown(formData) { - return postForm('/upload/markdown', formData) +export function uploadMarkdown(formData, onProgress) { + return postFormWithProgress('/upload/markdown', formData, onProgress) } /** - * 上传 JSON(基本方式) + * 上传 JSON(基本方式,带进度) */ -export function uploadJsonBasic(formData) { - return postForm('/upload/json/basic', formData) +export function uploadJsonBasic(formData, onProgress) { + return postFormWithProgress('/upload/json/basic', formData, onProgress) } /** - * 上传 JSON(按字段提取) + * 上传 JSON(按字段提取,带进度) */ -export function uploadJsonFields(formData, fields) { - return postForm(`/upload/json/fields?fields=${encodeURIComponent(fields)}`, formData) +export function uploadJsonFields(formData, fields, onProgress) { + return postFormWithProgress(`/upload/json/fields?fields=${encodeURIComponent(fields)}`, formData, onProgress) } /** - * 上传 JSON(按指针拆分) + * 上传 JSON(按指针拆分,带进度) */ -export function uploadJsonPointer(formData, pointer) { - return postForm(`/upload/json/pointer?pointer=${encodeURIComponent(pointer)}`, formData) +export function uploadJsonPointer(formData, pointer, onProgress) { + return postFormWithProgress(`/upload/json/pointer?pointer=${encodeURIComponent(pointer)}`, formData, onProgress) } // ==================== 分类 ====================