Browse Source

整合向量数据库,实现对文档的提取,转换,加载

master
hygl 1 year ago
parent
commit
dcb3eaf7fe
  1. 26
      pom.xml
  2. 6
      src/main/java/com/wok/supportbot/SupportBotApplication.java
  3. 34
      src/main/java/com/wok/supportbot/app/AssistantApp.java
  4. 4
      src/main/java/com/wok/supportbot/app/ProductInfoApp.java
  5. 26
      src/main/java/com/wok/supportbot/demo/invoke/SpringAiAiInvoke.java
  6. 2
      src/main/java/com/wok/supportbot/entity/ProductInfo.java
  7. 53
      src/main/java/com/wok/supportbot/extract/MarkdownDocumentLoader.java
  8. 37
      src/main/java/com/wok/supportbot/extract/MyJsonReader.java
  9. 25
      src/main/java/com/wok/supportbot/load/InMemoryVectorStoreConfig.java
  10. 38
      src/main/java/com/wok/supportbot/load/PgVectorStoreConfig.java
  11. 14
      src/main/java/com/wok/supportbot/record/AssistantReport.java
  12. 29
      src/main/java/com/wok/supportbot/transform/MyKeywordEnricher.java
  13. 34
      src/main/java/com/wok/supportbot/transform/MyTokenTextSplitter.java
  14. 32
      src/test/java/com/wok/supportbot/PgVectorVectorStoreConfigTest.java
  15. 11
      src/test/java/com/wok/supportbot/SupportBotApplicationTests.java

26
pom.xml

@ -64,6 +64,11 @@
<artifactId>kryo</artifactId> <artifactId>kryo</artifactId>
<version>5.6.2</version> <version>5.6.2</version>
</dependency> </dependency>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-markdown-document-reader</artifactId>
<version>1.0.0-M6</version>
</dependency>
<dependency> <dependency>
<groupId>org.projectlombok</groupId> <groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId> <artifactId>lombok</artifactId>
@ -75,6 +80,27 @@
<artifactId>spring-boot-starter-test</artifactId> <artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
<!-- 手动整合 PGVector 向量存储 -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-jdbc</artifactId>
</dependency>
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-pgvector-store</artifactId>
<version>1.0.0-M6</version>
</dependency>
<!-- 自动整合 PGVector 向量存储 -->
<!--<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-starter-vector-store-pgvector</artifactId>
<version>1.0.0-M7</version>
</dependency>-->
</dependencies> </dependencies>
<build> <build>

6
src/main/java/com/wok/supportbot/SupportBotApplication.java

@ -1,10 +1,12 @@
package com.wok.supportbot; package com.wok.supportbot;
import org.springframework.ai.autoconfigure.vectorstore.pgvector.PgVectorStoreAutoConfiguration;
import org.springframework.boot.SpringApplication; import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication; import org.springframework.boot.autoconfigure.SpringBootApplication;
@SpringBootApplication
public class SupportBotApplication {
@SpringBootApplication(exclude = PgVectorStoreAutoConfiguration.class)
public class
SupportBotApplication {
public static void main(String[] args) { public static void main(String[] args) {
SpringApplication.run(SupportBotApplication.class, args); SpringApplication.run(SupportBotApplication.class, args);

34
src/main/java/com/wok/supportbot/app/AssistantApp.java

@ -1,19 +1,18 @@
package com.wok.supportbot.app; package com.wok.supportbot.app;
import com.wok.supportbot.advisor.MyLoggerAdvisor; import com.wok.supportbot.advisor.MyLoggerAdvisor;
import com.wok.supportbot.advisor.ReReadingAdvisor;
import com.wok.supportbot.record.AssistantReport;
import jakarta.annotation.Resource;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.chat.client.ChatClient; import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.chat.client.advisor.MessageChatMemoryAdvisor; import org.springframework.ai.chat.client.advisor.MessageChatMemoryAdvisor;
import org.springframework.ai.chat.client.advisor.QuestionAnswerAdvisor;
import org.springframework.ai.chat.memory.ChatMemory; import org.springframework.ai.chat.memory.ChatMemory;
import org.springframework.ai.chat.memory.InMemoryChatMemory; import org.springframework.ai.chat.memory.InMemoryChatMemory;
import org.springframework.ai.chat.model.ChatModel; import org.springframework.ai.chat.model.ChatModel;
import org.springframework.ai.chat.model.ChatResponse; import org.springframework.ai.chat.model.ChatResponse;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import java.util.List;
import static org.springframework.ai.chat.client.advisor.AbstractChatMemoryAdvisor.CHAT_MEMORY_CONVERSATION_ID_KEY; import static org.springframework.ai.chat.client.advisor.AbstractChatMemoryAdvisor.CHAT_MEMORY_CONVERSATION_ID_KEY;
import static org.springframework.ai.chat.client.advisor.AbstractChatMemoryAdvisor.CHAT_MEMORY_RETRIEVE_SIZE_KEY; import static org.springframework.ai.chat.client.advisor.AbstractChatMemoryAdvisor.CHAT_MEMORY_RETRIEVE_SIZE_KEY;
@ -28,6 +27,9 @@ import static org.springframework.ai.chat.client.advisor.AbstractChatMemoryAdvis
@Slf4j @Slf4j
public class AssistantApp { public class AssistantApp {
@Resource
private VectorStore vectorStore;
private final ChatClient chatClient; private final ChatClient chatClient;
private static final String SYSTEM_PROMPT = "你是一名电商平台的智能客服助手,负责解答用户关于商品、订单、支付、物流和售后等问题。" + private static final String SYSTEM_PROMPT = "你是一名电商平台的智能客服助手,负责解答用户关于商品、订单、支付、物流和售后等问题。" +
@ -73,4 +75,28 @@ public class AssistantApp {
//log.info("content: {}", content); //log.info("content: {}", content);
return content; return content;
} }
/**
* RAG 知识库进行对话
* @param message
* @param chatId
* @return
*/
public String doChatWithRag(String message, String chatId) {
ChatResponse chatResponse = chatClient
.prompt()
.user(message)
.advisors(spec -> spec.param(CHAT_MEMORY_CONVERSATION_ID_KEY, chatId)
.param(CHAT_MEMORY_RETRIEVE_SIZE_KEY, 10))
// 开启日志便于观察效果
.advisors(new MyLoggerAdvisor())
// 应用 RAG 知识库问答
.advisors(new QuestionAnswerAdvisor(vectorStore))
.call()
.chatResponse();
String content = chatResponse.getResult().getOutput().getText();
log.info("content: {}", content);
return content;
}
} }

4
src/main/java/com/wok/supportbot/app/ProductInfoApp.java

@ -1,14 +1,13 @@
package com.wok.supportbot.app; package com.wok.supportbot.app;
import com.wok.supportbot.advisor.MyLoggerAdvisor; import com.wok.supportbot.advisor.MyLoggerAdvisor;
import com.wok.supportbot.record.ProductInfo;
import com.wok.supportbot.entity.ProductInfo;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.chat.client.ChatClient; import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.chat.client.advisor.MessageChatMemoryAdvisor; import org.springframework.ai.chat.client.advisor.MessageChatMemoryAdvisor;
import org.springframework.ai.chat.memory.ChatMemory; import org.springframework.ai.chat.memory.ChatMemory;
import org.springframework.ai.chat.memory.InMemoryChatMemory; import org.springframework.ai.chat.memory.InMemoryChatMemory;
import org.springframework.ai.chat.model.ChatModel; import org.springframework.ai.chat.model.ChatModel;
import org.springframework.ai.chat.model.ChatResponse;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import static org.springframework.ai.chat.client.advisor.AbstractChatMemoryAdvisor.CHAT_MEMORY_CONVERSATION_ID_KEY; import static org.springframework.ai.chat.client.advisor.AbstractChatMemoryAdvisor.CHAT_MEMORY_CONVERSATION_ID_KEY;
@ -59,6 +58,7 @@ public class ProductInfoApp {
.call() .call()
.entity(ProductInfo.class); .entity(ProductInfo.class);
log.info("Extracted product info: {}", productInfo); log.info("Extracted product info: {}", productInfo);
// todo 保存到数据库
return productInfo; return productInfo;
} }
} }

26
src/main/java/com/wok/supportbot/demo/invoke/SpringAiAiInvoke.java

@ -1,26 +0,0 @@
package com.wok.supportbot.demo.invoke;
import jakarta.annotation.Resource;
import org.springframework.ai.chat.messages.AssistantMessage;
import org.springframework.ai.chat.model.ChatModel;
import org.springframework.ai.chat.prompt.Prompt;
import org.springframework.boot.CommandLineRunner;
import org.springframework.stereotype.Component;
/**
* Spring AI 框架调用 AI 大模型阿里
*/
@Component
public class SpringAiAiInvoke implements CommandLineRunner {
@Resource
private ChatModel dashscopeChatModel;
@Override
public void run(String... args) throws Exception {
AssistantMessage assistantMessage = dashscopeChatModel.call(new Prompt("hello"))
.getResult()
.getOutput();
System.out.println(assistantMessage.getText());
}
}

2
src/main/java/com/wok/supportbot/record/ProductInfo.java → src/main/java/com/wok/supportbot/entity/ProductInfo.java

@ -1,4 +1,4 @@
package com.wok.supportbot.record;
package com.wok.supportbot.entity;
import lombok.Data; import lombok.Data;

53
src/main/java/com/wok/supportbot/extract/MarkdownDocumentLoader.java

@ -0,0 +1,53 @@
package com.wok.supportbot.extract;
import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.document.Document;
import org.springframework.ai.reader.markdown.MarkdownDocumentReader;
import org.springframework.ai.reader.markdown.config.MarkdownDocumentReaderConfig;
import org.springframework.core.io.Resource;
import org.springframework.core.io.support.ResourcePatternResolver;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* 文档加载器
*/
@Component
@Slf4j
public class MarkdownDocumentLoader {
private final ResourcePatternResolver resourcePatternResolver;
public MarkdownDocumentLoader(ResourcePatternResolver resourcePatternResolver) {
this.resourcePatternResolver = resourcePatternResolver;
}
/**
* 加载多篇 Markdown 文档
* @return
*/
public List<Document> loadMarkdowns() {
List<Document> allDocuments = new ArrayList<>();
try {
Resource[] resources = resourcePatternResolver.getResources("classpath:document/*.md");
for (Resource resource : resources) {
String filename = resource.getFilename();
MarkdownDocumentReaderConfig config = MarkdownDocumentReaderConfig.builder()
.withHorizontalRuleCreateDocument(true)
.withIncludeCodeBlock(false)
.withIncludeBlockquote(false)
.withAdditionalMetadata("filename", filename)
.build();
MarkdownDocumentReader markdownDocumentReader = new MarkdownDocumentReader(resource, config);
allDocuments.addAll(markdownDocumentReader.get());
}
} catch (IOException e) {
log.error("Markdown 文档加载失败", e);
}
return allDocuments;
}
}

37
src/main/java/com/wok/supportbot/extract/MyJsonReader.java

@ -0,0 +1,37 @@
package com.wok.supportbot.extract;
import org.springframework.ai.document.Document;
import org.springframework.ai.reader.JsonReader;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.io.Resource;
import org.springframework.stereotype.Component;
import java.util.List;
// classpath 下的 JSON 文件中读取文档
@Component
class MyJsonReader {
private final Resource resource;
MyJsonReader(@Value("classpath:products.json") Resource resource) {
this.resource = resource;
}
// 基本用法
List<Document> loadBasicJsonDocuments() {
JsonReader jsonReader = new JsonReader(this.resource);
return jsonReader.get();
}
// 指定使用哪些 JSON 字段作为文档内容
List<Document> loadJsonWithSpecificFields() {
JsonReader jsonReader = new JsonReader(this.resource, "description", "features");
return jsonReader.get();
}
// 使用 JSON 指针精确提取文档内容
List<Document> loadJsonWithPointer() {
JsonReader jsonReader = new JsonReader(this.resource);
return jsonReader.get("/items"); // 提取 items 数组内的内容
}
}

25
src/main/java/com/wok/supportbot/load/InMemoryVectorStoreConfig.java

@ -0,0 +1,25 @@
package com.wok.supportbot.load;
import com.wok.supportbot.extract.MarkdownDocumentLoader;
import jakarta.annotation.Resource;
import org.springframework.ai.document.Document;
import org.springframework.ai.embedding.EmbeddingModel;
import org.springframework.ai.vectorstore.SimpleVectorStore;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import java.util.List;
/**
* 向量数据库配置初始化基于内存的向量数据库 Bean
*/
@Configuration
public class InMemoryVectorStoreConfig {
@Bean
VectorStore inMemoryVectorStore(EmbeddingModel dashscopeEmbeddingModel) {
SimpleVectorStore simpleVectorStore = SimpleVectorStore.builder(dashscopeEmbeddingModel).build();;
return simpleVectorStore;
}
}

38
src/main/java/com/wok/supportbot/load/PgVectorStoreConfig.java

@ -0,0 +1,38 @@
package com.wok.supportbot.load;
import com.wok.supportbot.extract.MarkdownDocumentLoader;
import jakarta.annotation.Resource;
import org.springframework.ai.document.Document;
import org.springframework.ai.embedding.EmbeddingModel;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.ai.vectorstore.pgvector.PgVectorStore;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Primary;
import org.springframework.jdbc.core.JdbcTemplate;
import java.util.List;
import static org.springframework.ai.vectorstore.pgvector.PgVectorStore.PgDistanceType.COSINE_DISTANCE;
import static org.springframework.ai.vectorstore.pgvector.PgVectorStore.PgIndexType.HNSW;
/**
* 向量数据库配置初始化基于pgsql的向量数据库 Bean
*/
@Configuration
public class PgVectorStoreConfig {
@Bean
@Primary
public VectorStore pgVectorVectorStore(JdbcTemplate jdbcTemplate, EmbeddingModel dashscopeEmbeddingModel) {
VectorStore vectorStore = PgVectorStore.builder(jdbcTemplate, dashscopeEmbeddingModel)
.dimensions(1536) // Optional: defaults to model dimensions or 1536
.distanceType(COSINE_DISTANCE) // Optional: defaults to COSINE_DISTANCE
.indexType(HNSW) // Optional: defaults to HNSW
.initializeSchema(true) // Optional: defaults to false
.schemaName("public") // Optional: defaults to "public"
.vectorTableName("vector_store") // Optional: defaults to "vector_store"
.maxDocumentBatchSize(10000) // Optional: defaults to 10000
.build();
return vectorStore;
}
}

14
src/main/java/com/wok/supportbot/record/AssistantReport.java

@ -1,14 +0,0 @@
package com.wok.supportbot.record;
import java.util.List;
/**
* @Classname AssistantReport
* @Description
* @Version 1.0.0
* @Date 2025/06/27 14:21
* @Author lyx
*/
public record AssistantReport(String title, List<String> suggestions) {
}

29
src/main/java/com/wok/supportbot/transform/MyKeywordEnricher.java

@ -0,0 +1,29 @@
package com.wok.supportbot.transform;
import jakarta.annotation.Resource;
import org.springframework.ai.chat.model.ChatModel;
import org.springframework.ai.document.Document;
import org.springframework.ai.transformer.KeywordMetadataEnricher;
import org.springframework.stereotype.Component;
import java.util.List;
/**
* 基于 AI 的文档元信息增强器为文档补充元信息
*/
@Component
public class MyKeywordEnricher {
@Resource
private ChatModel dashscopeChatModel;
/**
* 使用 AI 提取关键词并添加到元数据
* @param documents
* @return
*/
public List<Document> enrichDocuments(List<Document> documents) {
KeywordMetadataEnricher keywordMetadataEnricher = new KeywordMetadataEnricher(dashscopeChatModel, 5);
return keywordMetadataEnricher.apply(documents);
}
}

34
src/main/java/com/wok/supportbot/transform/MyTokenTextSplitter.java

@ -0,0 +1,34 @@
package com.wok.supportbot.transform;
import org.springframework.ai.document.Document;
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
import org.springframework.stereotype.Component;
import java.util.List;
/**
* 自定义基于 Token 的切词器
*/
@Component
class MyTokenTextSplitter {
/**
* 使用默认设置创建分割器
* @param documents
* @return
*/
public List<Document> splitDocuments(List<Document> documents) {
TokenTextSplitter splitter = new TokenTextSplitter();
return splitter.apply(documents);
}
/**
* 使用自定义参数创建分割器通过调整参数可以控制分割的粒度和方式适应不同的应用场景
* @param documents
* @return
*/
public List<Document> splitCustomized(List<Document> documents) {
TokenTextSplitter splitter = new TokenTextSplitter(200, 100, 10, 5000, true);
return splitter.apply(documents);
}
}

32
src/test/java/com/wok/supportbot/PgVectorVectorStoreConfigTest.java

@ -0,0 +1,32 @@
package com.wok.supportbot;
import jakarta.annotation.Resource;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.springframework.ai.document.Document;
import org.springframework.ai.vectorstore.SearchRequest;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.boot.test.context.SpringBootTest;
import java.util.List;
import java.util.Map;
@SpringBootTest
public class PgVectorVectorStoreConfigTest {
@Resource
VectorStore pgVectorVectorStore;
@Test
void test() {
List<Document> documents = List.of(
new Document("Spring AI rocks!! Spring AI rocks!! Spring AI rocks!! Spring AI rocks!! Spring AI rocks!!", Map.of("meta1", "meta1")),
new Document("The World is Big and Salvation Lurks Around the Corner"),
new Document("You walk forward facing the past and you turn back toward the future.", Map.of("meta2", "meta2")));
// 添加文档
pgVectorVectorStore.add(documents);
// 相似度查询
List<Document> results = pgVectorVectorStore.similaritySearch(SearchRequest.builder().query("Spring").topK(5).build());
Assertions.assertNotNull(results);
}
}

11
src/test/java/com/wok/supportbot/SupportBotApplicationTests.java

@ -2,8 +2,7 @@ package com.wok.supportbot;
import com.wok.supportbot.app.AssistantApp; import com.wok.supportbot.app.AssistantApp;
import com.wok.supportbot.app.ProductInfoApp; import com.wok.supportbot.app.ProductInfoApp;
import com.wok.supportbot.record.AssistantReport;
import com.wok.supportbot.record.ProductInfo;
import com.wok.supportbot.entity.ProductInfo;
import jakarta.annotation.Resource; import jakarta.annotation.Resource;
import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
@ -69,5 +68,13 @@ class SupportBotApplicationTests {
System.out.println("提取的商品信息: " + productInfo); System.out.println("提取的商品信息: " + productInfo);
} }
@Test
void doChatWithRag() {
String chatId = UUID.randomUUID().toString();
String message = "T恤怎么搭配?";
String answer = assistantApp.doChatWithRag(message, chatId);
Assertions.assertNotNull(answer);
}
} }
Loading…
Cancel
Save