From f49e186f8a594a199a448c8db8d23a3bd0e0309c Mon Sep 17 00:00:00 2001
From: hygl <3154803225@qq.com>
Date: Mon, 30 Jun 2025 18:25:58 +0800
Subject: [PATCH] =?UTF-8?q?=E6=94=AF=E6=8C=81=E5=A4=9A=E7=A7=8D=E6=96=87?=
=?UTF-8?q?=E4=BB=B6=E6=A0=BC=E5=BC=8F=E7=9A=84=E4=B8=8A=E4=BC=A0=E5=92=8C?=
=?UTF-8?q?=E8=A7=A3=E6=9E=90,=20=E5=AE=9E=E7=8E=B0=E8=87=AA=E5=AE=9A?=
=?UTF-8?q?=E4=B9=89=E7=9F=A5=E8=AF=86=E5=BA=93=E7=9A=84=E5=8A=9F=E8=83=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
pom.xml | 6 +
.../supportbot/controller/AiController.java | 3 +-
.../controller/DocumentController.java | 232 ++++++++++++++++++
.../document/extract/JsonDocumentLoader.java | 59 +++++
.../extract/MarkdownDocumentLoader.java | 52 ++--
.../document/extract/MyJsonReader.java | 37 ---
.../extract/SimpleStringDocumentReader.java | 20 ++
.../document/extract/TikaDocumentReader.java | 44 ++++
.../transform/MyTokenTextSplitter.java | 2 +-
9 files changed, 387 insertions(+), 68 deletions(-)
create mode 100644 src/main/java/com/wok/supportbot/controller/DocumentController.java
create mode 100644 src/main/java/com/wok/supportbot/document/extract/JsonDocumentLoader.java
delete mode 100644 src/main/java/com/wok/supportbot/document/extract/MyJsonReader.java
create mode 100644 src/main/java/com/wok/supportbot/document/extract/SimpleStringDocumentReader.java
create mode 100644 src/main/java/com/wok/supportbot/document/extract/TikaDocumentReader.java
diff --git a/pom.xml b/pom.xml
index a5c01d4..d79948a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -101,6 +101,12 @@
spring-ai-starter-vector-store-pgvector
1.0.0-M7
-->
+
+
+ org.springframework.ai
+ spring-ai-tika-document-reader
+ 1.0.0
+
com.baomidou
mybatis-plus-spring-boot3-starter
diff --git a/src/main/java/com/wok/supportbot/controller/AiController.java b/src/main/java/com/wok/supportbot/controller/AiController.java
index ef6c15b..1bd6f7e 100644
--- a/src/main/java/com/wok/supportbot/controller/AiController.java
+++ b/src/main/java/com/wok/supportbot/controller/AiController.java
@@ -18,7 +18,8 @@ import reactor.core.publisher.Flux;
import java.io.IOException;
-
+@RestController
+@RequestMapping("/ai")
public class AiController {
@Resource
diff --git a/src/main/java/com/wok/supportbot/controller/DocumentController.java b/src/main/java/com/wok/supportbot/controller/DocumentController.java
new file mode 100644
index 0000000..bcc7d34
--- /dev/null
+++ b/src/main/java/com/wok/supportbot/controller/DocumentController.java
@@ -0,0 +1,232 @@
+package com.wok.supportbot.controller;
+
+import com.wok.supportbot.document.extract.JsonDocumentLoader;
+import com.wok.supportbot.document.extract.MarkdownDocumentLoader;
+import com.wok.supportbot.document.extract.SimpleStringDocumentReader;
+import com.wok.supportbot.document.extract.TikaDocumentReader;
+import com.wok.supportbot.document.transform.MyKeywordEnricher;
+import com.wok.supportbot.document.transform.MyTokenTextSplitter;
+import org.springframework.ai.document.Document;
+import org.springframework.ai.vectorstore.VectorStore;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.http.ResponseEntity;
+import org.springframework.web.bind.annotation.*;
+import org.springframework.web.multipart.MultipartFile;
+
+import java.util.List;
+import java.util.Map;
+
+@RestController
+@RequestMapping("/document")
+public class DocumentController {
+
+ @Autowired
+ private TikaDocumentReader tikaDocumentReader;
+
+ @Autowired
+ private SimpleStringDocumentReader simpleStringDocumentReader;
+
+ @Autowired
+ private MarkdownDocumentLoader markdownDocumentLoader;
+
+ @Autowired
+ private JsonDocumentLoader jsonDocumentLoader;
+
+ @Autowired
+ private MyTokenTextSplitter myTokenTextSplitter;
+
+ @Autowired
+ private MyKeywordEnricher myKeywordEnricher;
+
+ @Autowired
+ private VectorStore pgVectorVectorStore;
+
+ /**
+ * 上传普通文件(支持多种格式),用 Tika 解析
+ */
+ @PostMapping("/upload/file")
+ public ResponseEntity