You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
630 lines
24 KiB
630 lines
24 KiB
package com.wok.supportbot.service;
|
|
|
|
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
|
|
import com.wok.supportbot.dao.KnowledgeCategoryMapper;
|
|
import com.wok.supportbot.dao.KnowledgeDocumentMapper;
|
|
import com.wok.supportbot.document.extract.JsonDocumentLoader;
|
|
import com.wok.supportbot.document.extract.MarkdownDocumentLoader;
|
|
import com.wok.supportbot.document.extract.SimpleStringDocumentReader;
|
|
import com.wok.supportbot.document.extract.TikaDocumentReader;
|
|
import com.wok.supportbot.document.transform.MyKeywordEnricher;
|
|
import com.wok.supportbot.document.transform.MyTokenTextSplitter;
|
|
import com.wok.supportbot.entity.CategoryNode;
|
|
import com.wok.supportbot.entity.KnowledgeCategory;
|
|
import com.wok.supportbot.entity.KnowledgeDocument;
|
|
import com.wok.supportbot.entity.SearchResult;
|
|
import lombok.extern.slf4j.Slf4j;
|
|
import org.springframework.ai.document.Document;
|
|
import org.springframework.ai.vectorstore.SearchRequest;
|
|
import org.springframework.ai.vectorstore.VectorStore;
|
|
import org.springframework.ai.vectorstore.filter.Filter;
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
import org.springframework.jdbc.core.JdbcTemplate;
|
|
import org.springframework.stereotype.Service;
|
|
import org.springframework.transaction.annotation.Transactional;
|
|
import org.springframework.web.multipart.MultipartFile;
|
|
|
|
import java.util.*;
|
|
import java.util.stream.Collectors;
|
|
|
|
/**
|
|
* 知识库文档管理服务
|
|
* 统一管理文档的上传、删除、搜索、统计、分类等操作
|
|
*/
|
|
@Service
|
|
@Slf4j
|
|
public class DocumentService {
|
|
|
|
@Autowired
|
|
private KnowledgeDocumentMapper documentMapper;
|
|
|
|
@Autowired
|
|
private KnowledgeCategoryMapper categoryMapper;
|
|
|
|
@Autowired
|
|
private JdbcTemplate jdbcTemplate;
|
|
|
|
@Autowired
|
|
private VectorStore pgVectorVectorStore;
|
|
|
|
@Autowired
|
|
private MyTokenTextSplitter myTokenTextSplitter;
|
|
|
|
@Autowired
|
|
private MyKeywordEnricher myKeywordEnricher;
|
|
|
|
@Autowired
|
|
private TikaDocumentReader tikaDocumentReader;
|
|
|
|
@Autowired
|
|
private SimpleStringDocumentReader simpleStringDocumentReader;
|
|
|
|
@Autowired
|
|
private MarkdownDocumentLoader markdownDocumentLoader;
|
|
|
|
@Autowired
|
|
private JsonDocumentLoader jsonDocumentLoader;
|
|
|
|
// ==================== 文档上传 ====================
|
|
|
|
/**
|
|
* 统一文档上传流程:创建记录 -> 分块 -> 关键词 -> 向量化 -> 更新状态
|
|
*
|
|
* @param documents 解析后的文档列表
|
|
* @param title 文档标题
|
|
* @param sourceName 源文件名
|
|
* @param fileType 文件类型
|
|
* @param fileSize 文件大小
|
|
* @param content 原文内容(截断预览)
|
|
* @param categoryId 分类ID
|
|
* @param tags 标签列表
|
|
* @return 创建完成的文档记录
|
|
*/
|
|
@Transactional(rollbackFor = Exception.class)
|
|
public KnowledgeDocument uploadDocument(List<Document> documents, String title, String sourceName,
|
|
String fileType, Long fileSize, String content,
|
|
Long categoryId, List<String> tags) {
|
|
// 1. 创建文档记录(状态 PROCESSING)
|
|
KnowledgeDocument docRecord = KnowledgeDocument.builder()
|
|
.title(title != null ? title : sourceName)
|
|
.sourceName(sourceName)
|
|
.fileType(fileType)
|
|
.fileSize(fileSize != null ? fileSize : 0L)
|
|
.content(content != null && content.length() > 2000 ? content.substring(0, 2000) : content)
|
|
.categoryId(categoryId != null ? categoryId : 0L)
|
|
.tags(tags != null ? Map.of("tags", tags) : null)
|
|
.status("PROCESSING")
|
|
.chunkCount(0)
|
|
.build();
|
|
documentMapper.insert(docRecord);
|
|
|
|
try {
|
|
// 2. 分块处理
|
|
List<Document> splitDocuments = myTokenTextSplitter.splitDocuments(documents);
|
|
|
|
// 3. 为每个分块设置 documentId 等元数据
|
|
for (int i = 0; i < splitDocuments.size(); i++) {
|
|
Document doc = splitDocuments.get(i);
|
|
Map<String, Object> meta = new HashMap<>(doc.getMetadata());
|
|
meta.put("documentId", String.valueOf(docRecord.getId()));
|
|
meta.put("chunkIndex", i);
|
|
meta.put("sourceName", sourceName);
|
|
meta.put("title", title != null ? title : sourceName);
|
|
if (categoryId != null) {
|
|
meta.put("categoryId", String.valueOf(categoryId));
|
|
}
|
|
if (tags != null && !tags.isEmpty()) {
|
|
meta.put("tags", tags);
|
|
}
|
|
splitDocuments.set(i, new Document(doc.getId(), doc.getText(), meta));
|
|
}
|
|
|
|
// 4. 关键词提取
|
|
List<Document> enrichedDocuments = myKeywordEnricher.enrichDocuments(splitDocuments);
|
|
|
|
// 5. 向量化存储
|
|
pgVectorVectorStore.add(enrichedDocuments);
|
|
|
|
// 6. 更新文档状态为 READY
|
|
docRecord.setStatus("READY");
|
|
docRecord.setChunkCount(enrichedDocuments.size());
|
|
documentMapper.updateById(docRecord);
|
|
|
|
log.info("文档上传成功: id={}, title={}, chunks={}", docRecord.getId(), docRecord.getTitle(), enrichedDocuments.size());
|
|
|
|
} catch (Exception e) {
|
|
// 标记为失败
|
|
docRecord.setStatus("FAILED");
|
|
docRecord.setErrorMessage(e.getMessage());
|
|
documentMapper.updateById(docRecord);
|
|
log.error("文档上传失败: id={}, title={}", docRecord.getId(), docRecord.getTitle(), e);
|
|
throw new RuntimeException("文档处理失败: " + e.getMessage(), e);
|
|
}
|
|
|
|
return docRecord;
|
|
}
|
|
|
|
/**
|
|
* 解析文件并上传
|
|
*/
|
|
public KnowledgeDocument uploadFile(MultipartFile file, String title, Long categoryId, List<String> tags) {
|
|
List<Document> documents = tikaDocumentReader.read(file);
|
|
String fileType = getFileExtension(file.getOriginalFilename());
|
|
return uploadDocument(documents,
|
|
title != null ? title : file.getOriginalFilename(),
|
|
file.getOriginalFilename(),
|
|
fileType,
|
|
file.getSize(),
|
|
documents.get(0).getText(),
|
|
categoryId,
|
|
tags);
|
|
}
|
|
|
|
/**
|
|
* 解析字符串并上传
|
|
*/
|
|
public KnowledgeDocument uploadString(String content, String title, Long categoryId, List<String> tags) {
|
|
List<Document> documents = simpleStringDocumentReader.read(content);
|
|
return uploadDocument(documents, title, title, "txt",
|
|
(long) content.length(), content, categoryId, tags);
|
|
}
|
|
|
|
/**
|
|
* 解析 Markdown 文件并上传
|
|
*/
|
|
public KnowledgeDocument uploadMarkdown(MultipartFile file, String title, Long categoryId, List<String> tags) {
|
|
List<Document> documents = markdownDocumentLoader.loadMarkdownFromFile(file);
|
|
String content = documents.stream().map(Document::getText).collect(Collectors.joining("\n"));
|
|
return uploadDocument(documents,
|
|
title != null ? title : file.getOriginalFilename(),
|
|
file.getOriginalFilename(),
|
|
"md",
|
|
file.getSize(),
|
|
content,
|
|
categoryId,
|
|
tags);
|
|
}
|
|
|
|
/**
|
|
* 解析 JSON 文件(基本方式)并上传
|
|
*/
|
|
public KnowledgeDocument uploadJsonBasic(MultipartFile file, String title, Long categoryId, List<String> tags) {
|
|
List<Document> documents = jsonDocumentLoader.loadBasicJson(file);
|
|
String content = documents.stream().map(Document::getText).collect(Collectors.joining("\n"));
|
|
return uploadDocument(documents,
|
|
title != null ? title : file.getOriginalFilename(),
|
|
file.getOriginalFilename(),
|
|
"json",
|
|
file.getSize(),
|
|
content,
|
|
categoryId,
|
|
tags);
|
|
}
|
|
|
|
/**
|
|
* 解析 JSON 文件(按字段)并上传
|
|
*/
|
|
public KnowledgeDocument uploadJsonFields(MultipartFile file, List<String> fields, String title, Long categoryId, List<String> tags) {
|
|
List<Document> documents = jsonDocumentLoader.loadJsonByFields(file, fields.toArray(new String[0]));
|
|
String content = documents.stream().map(Document::getText).collect(Collectors.joining("\n"));
|
|
return uploadDocument(documents,
|
|
title != null ? title : file.getOriginalFilename(),
|
|
file.getOriginalFilename(),
|
|
"json",
|
|
file.getSize(),
|
|
content,
|
|
categoryId,
|
|
tags);
|
|
}
|
|
|
|
/**
|
|
* 解析 JSON 文件(按指针)并上传
|
|
*/
|
|
public KnowledgeDocument uploadJsonPointer(MultipartFile file, String pointer, String title, Long categoryId, List<String> tags) {
|
|
List<Document> documents = jsonDocumentLoader.loadJsonByPointer(file, pointer);
|
|
String content = documents.stream().map(Document::getText).collect(Collectors.joining("\n"));
|
|
return uploadDocument(documents,
|
|
title != null ? title : file.getOriginalFilename(),
|
|
file.getOriginalFilename(),
|
|
"json",
|
|
file.getSize(),
|
|
content,
|
|
categoryId,
|
|
tags);
|
|
}
|
|
|
|
// ==================== 文档管理 ====================
|
|
|
|
/**
|
|
* 分页查询文档列表(手动分页)
|
|
*/
|
|
public Map<String, Object> listDocuments(int page, int size, Long categoryId, String status) {
|
|
// 构建基础条件(用于 count 和 list)
|
|
QueryWrapper<KnowledgeDocument> countWrapper = new QueryWrapper<>();
|
|
if (categoryId != null && categoryId > 0) {
|
|
countWrapper.eq("category_id", categoryId);
|
|
}
|
|
if (status != null && !status.isEmpty()) {
|
|
countWrapper.eq("status", status);
|
|
}
|
|
|
|
// 先查询总数(不加 ORDER BY)
|
|
Long total = documentMapper.selectCount(countWrapper);
|
|
|
|
// 构建列表查询条件
|
|
QueryWrapper<KnowledgeDocument> listWrapper = new QueryWrapper<>();
|
|
if (categoryId != null && categoryId > 0) {
|
|
listWrapper.eq("category_id", categoryId);
|
|
}
|
|
if (status != null && !status.isEmpty()) {
|
|
listWrapper.eq("status", status);
|
|
}
|
|
listWrapper.orderByDesc("create_time");
|
|
listWrapper.last("LIMIT " + size + " OFFSET " + (page - 1) * size);
|
|
List<KnowledgeDocument> records = documentMapper.selectList(listWrapper);
|
|
|
|
Map<String, Object> result = new HashMap<>();
|
|
result.put("records", records);
|
|
result.put("total", total);
|
|
result.put("page", page);
|
|
result.put("size", size);
|
|
result.put("pages", (total + size - 1) / size);
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* 获取文档详情
|
|
*/
|
|
public KnowledgeDocument getDocumentDetail(Long id) {
|
|
return documentMapper.selectById(id);
|
|
}
|
|
|
|
/**
|
|
* 获取文档的所有分块
|
|
*/
|
|
public List<Map<String, Object>> getDocumentChunks(Long id) {
|
|
String sql = "SELECT id::text as id, content, metadata, create_time FROM vector_store " +
|
|
"WHERE metadata->>'documentId' = ? ORDER BY (metadata->>'chunkIndex')::int";
|
|
return jdbcTemplate.queryForList(sql, String.valueOf(id));
|
|
}
|
|
|
|
/**
|
|
* 删除文档(逻辑删除 + 级联删除向量)
|
|
*/
|
|
@Transactional(rollbackFor = Exception.class)
|
|
public int deleteDocument(Long id) {
|
|
KnowledgeDocument doc = documentMapper.selectById(id);
|
|
if (doc == null) {
|
|
throw new RuntimeException("文档不存在");
|
|
}
|
|
// 删除关联的向量
|
|
int vectorCount = deleteVectorsByDocumentId(String.valueOf(id));
|
|
// 逻辑删除文档记录
|
|
documentMapper.deleteById(id);
|
|
log.info("删除文档: id={}, title={}, 删除向量数={}", id, doc.getTitle(), vectorCount);
|
|
return vectorCount;
|
|
}
|
|
|
|
/**
|
|
* 重新处理文档(重新分块 + 向量化)
|
|
*/
|
|
@Transactional(rollbackFor = Exception.class)
|
|
public KnowledgeDocument reprocessDocument(Long id) {
|
|
KnowledgeDocument doc = documentMapper.selectById(id);
|
|
if (doc == null) {
|
|
throw new RuntimeException("文档不存在");
|
|
}
|
|
if (doc.getContent() == null || doc.getContent().isEmpty()) {
|
|
throw new RuntimeException("文档无内容,无法重新处理");
|
|
}
|
|
|
|
// 删除旧向量
|
|
deleteVectorsByDocumentId(String.valueOf(id));
|
|
|
|
// 重新解析并处理
|
|
List<Document> documents = simpleStringDocumentReader.read(doc.getContent());
|
|
|
|
doc.setStatus("PROCESSING");
|
|
doc.setChunkCount(0);
|
|
doc.setErrorMessage(null);
|
|
documentMapper.updateById(doc);
|
|
|
|
try {
|
|
List<Document> splitDocuments = myTokenTextSplitter.splitDocuments(documents);
|
|
|
|
for (int i = 0; i < splitDocuments.size(); i++) {
|
|
Document d = splitDocuments.get(i);
|
|
Map<String, Object> meta = new HashMap<>(d.getMetadata());
|
|
meta.put("documentId", String.valueOf(doc.getId()));
|
|
meta.put("chunkIndex", i);
|
|
meta.put("sourceName", doc.getSourceName());
|
|
meta.put("title", doc.getTitle());
|
|
if (doc.getCategoryId() != null && doc.getCategoryId() > 0) {
|
|
meta.put("categoryId", String.valueOf(doc.getCategoryId()));
|
|
}
|
|
if (doc.getTags() != null && doc.getTags().containsKey("tags")) {
|
|
meta.put("tags", doc.getTags().get("tags"));
|
|
}
|
|
splitDocuments.set(i, new Document(d.getId(), d.getText(), meta));
|
|
}
|
|
|
|
List<Document> enrichedDocuments = myKeywordEnricher.enrichDocuments(splitDocuments);
|
|
pgVectorVectorStore.add(enrichedDocuments);
|
|
|
|
doc.setStatus("READY");
|
|
doc.setChunkCount(enrichedDocuments.size());
|
|
documentMapper.updateById(doc);
|
|
|
|
log.info("重新处理文档成功: id={}, title={}, chunks={}", doc.getId(), doc.getTitle(), enrichedDocuments.size());
|
|
|
|
} catch (Exception e) {
|
|
doc.setStatus("FAILED");
|
|
doc.setErrorMessage(e.getMessage());
|
|
documentMapper.updateById(doc);
|
|
log.error("重新处理文档失败: id={}, title={}", doc.getId(), doc.getTitle(), e);
|
|
throw new RuntimeException("重新处理失败: " + e.getMessage(), e);
|
|
}
|
|
|
|
return doc;
|
|
}
|
|
|
|
/**
|
|
* 更新文档元信息
|
|
*/
|
|
public void updateDocumentMetadata(Long id, String title, Long categoryId, List<String> tags) {
|
|
KnowledgeDocument doc = documentMapper.selectById(id);
|
|
if (doc == null) {
|
|
throw new RuntimeException("文档不存在");
|
|
}
|
|
if (title != null && !title.isEmpty()) {
|
|
doc.setTitle(title);
|
|
}
|
|
if (categoryId != null) {
|
|
doc.setCategoryId(categoryId);
|
|
}
|
|
if (tags != null) {
|
|
doc.setTags(Map.of("tags", tags));
|
|
}
|
|
documentMapper.updateById(doc);
|
|
|
|
// 同步更新 vector_store 中对应的 metadata
|
|
// 注意:Spring AI 当前没有直接更新 metadata 的 API
|
|
// 这里我们先更新文档记录,metadata 的同步留到后续优化
|
|
log.info("更新文档元信息: id={}, title={}", id, doc.getTitle());
|
|
}
|
|
|
|
// ==================== 语义搜索 ====================
|
|
|
|
/**
|
|
* 语义搜索
|
|
*/
|
|
public List<SearchResult> searchDocuments(String query, int topK, double similarityThreshold, Long categoryId) {
|
|
SearchRequest.Builder searchBuilder = SearchRequest.builder()
|
|
.query(query)
|
|
.topK(topK)
|
|
.similarityThreshold(similarityThreshold);
|
|
|
|
// 如果指定了分类,添加过滤条件(当前 Spring AI 1.0.0-M6 的 filter 支持有限)
|
|
// 这里先不做分类过滤,后续升级 Spring AI 版本后再完善
|
|
|
|
List<Document> results = pgVectorVectorStore.similaritySearch(searchBuilder.build());
|
|
|
|
List<SearchResult> searchResults = new ArrayList<>();
|
|
for (Document doc : results) {
|
|
Map<String, Object> metadata = doc.getMetadata();
|
|
SearchResult result = SearchResult.builder()
|
|
.id(doc.getId())
|
|
.content(doc.getText())
|
|
.score(metadata.containsKey("distance") ? ((Number) metadata.get("distance")).doubleValue() : null)
|
|
.sourceName(getStringFromMetadata(metadata, "sourceName"))
|
|
.title(getStringFromMetadata(metadata, "title"))
|
|
.chunkIndex(getIntegerFromMetadata(metadata, "chunkIndex"))
|
|
.documentId(getStringFromMetadata(metadata, "documentId"))
|
|
.metadata(metadata)
|
|
.build();
|
|
searchResults.add(result);
|
|
}
|
|
|
|
return searchResults;
|
|
}
|
|
|
|
// ==================== 统计 ====================
|
|
|
|
/**
|
|
* 获取知识库统计信息
|
|
*/
|
|
public Map<String, Object> getStats() {
|
|
// 文档统计
|
|
Long totalDocuments = documentMapper.selectCount(null);
|
|
|
|
// 按文件类型统计
|
|
String typeSql = "SELECT file_type, COUNT(*) as count FROM knowledge_document WHERE is_delete = false GROUP BY file_type";
|
|
List<Map<String, Object>> typeStats = jdbcTemplate.queryForList(typeSql);
|
|
Map<String, Long> byFileType = typeStats.stream()
|
|
.collect(Collectors.toMap(
|
|
r -> (String) r.get("file_type"),
|
|
r -> ((Number) r.get("count")).longValue()
|
|
));
|
|
|
|
// 按分类统计
|
|
String catSql = "SELECT c.name, COUNT(d.id) as count FROM knowledge_document d " +
|
|
"LEFT JOIN knowledge_category c ON d.category_id = c.id " +
|
|
"WHERE d.is_delete = false GROUP BY c.name";
|
|
List<Map<String, Object>> catStats;
|
|
try {
|
|
catStats = jdbcTemplate.queryForList(catSql);
|
|
} catch (Exception e) {
|
|
catStats = new ArrayList<>();
|
|
}
|
|
|
|
// 向量总数
|
|
String vectorSql = "SELECT COUNT(*) FROM vector_store";
|
|
Long totalVectors;
|
|
try {
|
|
totalVectors = jdbcTemplate.queryForObject(vectorSql, Long.class);
|
|
} catch (Exception e) {
|
|
totalVectors = 0L;
|
|
}
|
|
|
|
// 最近上传时间
|
|
String lastUploadSql = "SELECT MAX(create_time) FROM knowledge_document WHERE is_delete = false";
|
|
Date lastUploadTime = jdbcTemplate.queryForObject(lastUploadSql, Date.class);
|
|
|
|
Map<String, Object> stats = new LinkedHashMap<>();
|
|
stats.put("totalDocuments", totalDocuments);
|
|
stats.put("totalVectors", totalVectors);
|
|
stats.put("lastUploadTime", lastUploadTime);
|
|
stats.put("byFileType", byFileType);
|
|
stats.put("byCategory", catStats);
|
|
|
|
return stats;
|
|
}
|
|
|
|
// ==================== 分类管理 ====================
|
|
|
|
/**
|
|
* 获取分类树
|
|
*/
|
|
public List<CategoryNode> getCategoryTree() {
|
|
List<KnowledgeCategory> categories = categoryMapper.selectList(
|
|
new QueryWrapper<KnowledgeCategory>().orderByAsc("sort_order"));
|
|
|
|
Map<Long, CategoryNode> nodeMap = new LinkedHashMap<>();
|
|
List<CategoryNode> rootNodes = new ArrayList<>();
|
|
|
|
for (KnowledgeCategory cat : categories) {
|
|
CategoryNode node = CategoryNode.builder()
|
|
.id(cat.getId())
|
|
.name(cat.getName())
|
|
.description(cat.getDescription())
|
|
.parentId(cat.getParentId())
|
|
.sortOrder(cat.getSortOrder())
|
|
.documentCount(cat.getDocumentCount())
|
|
.children(new ArrayList<>())
|
|
.build();
|
|
nodeMap.put(cat.getId(), node);
|
|
}
|
|
|
|
for (CategoryNode node : nodeMap.values()) {
|
|
if (node.getParentId() == null || node.getParentId() == 0) {
|
|
rootNodes.add(node);
|
|
} else {
|
|
CategoryNode parent = nodeMap.get(node.getParentId());
|
|
if (parent != null) {
|
|
parent.getChildren().add(node);
|
|
} else {
|
|
rootNodes.add(node);
|
|
}
|
|
}
|
|
}
|
|
|
|
return rootNodes;
|
|
}
|
|
|
|
/**
|
|
* 获取分类列表
|
|
*/
|
|
public List<KnowledgeCategory> listCategories() {
|
|
return categoryMapper.selectList(
|
|
new QueryWrapper<KnowledgeCategory>().orderByAsc("sort_order"));
|
|
}
|
|
|
|
/**
|
|
* 创建分类
|
|
*/
|
|
public KnowledgeCategory createCategory(String name, String description, Long parentId, Integer sortOrder) {
|
|
KnowledgeCategory category = KnowledgeCategory.builder()
|
|
.name(name)
|
|
.description(description)
|
|
.parentId(parentId != null ? parentId : 0L)
|
|
.sortOrder(sortOrder != null ? sortOrder : 0)
|
|
.documentCount(0)
|
|
.build();
|
|
categoryMapper.insert(category);
|
|
return category;
|
|
}
|
|
|
|
/**
|
|
* 更新分类
|
|
*/
|
|
public void updateCategory(Long id, String name, String description, Integer sortOrder) {
|
|
KnowledgeCategory category = categoryMapper.selectById(id);
|
|
if (category == null) {
|
|
throw new RuntimeException("分类不存在");
|
|
}
|
|
if (name != null && !name.isEmpty()) {
|
|
category.setName(name);
|
|
}
|
|
if (description != null) {
|
|
category.setDescription(description);
|
|
}
|
|
if (sortOrder != null) {
|
|
category.setSortOrder(sortOrder);
|
|
}
|
|
categoryMapper.updateById(category);
|
|
}
|
|
|
|
/**
|
|
* 删除分类(不删除文档,仅清空关联)
|
|
*/
|
|
@Transactional(rollbackFor = Exception.class)
|
|
public void deleteCategory(Long id) {
|
|
// 将关联的文档 category_id 设为 0
|
|
KnowledgeDocument updateDoc = new KnowledgeDocument();
|
|
updateDoc.setCategoryId(0L);
|
|
documentMapper.update(updateDoc, new QueryWrapper<KnowledgeDocument>().eq("category_id", id));
|
|
|
|
// 逻辑删除分类
|
|
categoryMapper.deleteById(id);
|
|
}
|
|
|
|
// ==================== 内部方法 ====================
|
|
|
|
/**
|
|
* 根据文档ID删除 vector_store 中关联的所有向量
|
|
*/
|
|
private int deleteVectorsByDocumentId(String documentId) {
|
|
String sql = "SELECT id::text FROM vector_store WHERE metadata->>'documentId' = ?";
|
|
List<String> ids = jdbcTemplate.queryForList(sql, String.class, documentId);
|
|
|
|
if (!ids.isEmpty()) {
|
|
pgVectorVectorStore.delete(ids);
|
|
log.debug("删除向量: documentId={}, count={}", documentId, ids.size());
|
|
}
|
|
return ids.size();
|
|
}
|
|
|
|
/**
|
|
* 获取文件扩展名
|
|
*/
|
|
private String getFileExtension(String filename) {
|
|
if (filename == null || !filename.contains(".")) {
|
|
return "unknown";
|
|
}
|
|
return filename.substring(filename.lastIndexOf(".") + 1).toLowerCase();
|
|
}
|
|
|
|
/**
|
|
* 从 metadata 中安全获取字符串值
|
|
*/
|
|
private String getStringFromMetadata(Map<String, Object> metadata, String key) {
|
|
Object value = metadata.get(key);
|
|
return value != null ? value.toString() : null;
|
|
}
|
|
|
|
/**
|
|
* 从 metadata 中安全获取整数值
|
|
*/
|
|
private Integer getIntegerFromMetadata(Map<String, Object> metadata, String key) {
|
|
Object value = metadata.get(key);
|
|
if (value == null) return null;
|
|
if (value instanceof Number) {
|
|
return ((Number) value).intValue();
|
|
}
|
|
try {
|
|
return Integer.parseInt(value.toString());
|
|
} catch (NumberFormatException e) {
|
|
return null;
|
|
}
|
|
}
|
|
}
|