4 changed files with 235 additions and 26 deletions
-
28CLAUDE.md
-
38src/main/java/com/wok/supportbot/config/EmbeddingModelFactory.java
-
189src/main/java/com/wok/supportbot/config/VolcengineMultimodalEmbeddingModel.java
-
6src/main/resources/static/components/ModelConfigManager.js
@ -0,0 +1,189 @@ |
|||
package com.wok.supportbot.config; |
|||
|
|||
import com.fasterxml.jackson.core.type.TypeReference; |
|||
import com.fasterxml.jackson.databind.ObjectMapper; |
|||
import lombok.extern.slf4j.Slf4j; |
|||
import org.springframework.ai.document.Document; |
|||
import org.springframework.ai.embedding.Embedding; |
|||
import org.springframework.ai.embedding.EmbeddingModel; |
|||
import org.springframework.ai.embedding.EmbeddingRequest; |
|||
import org.springframework.ai.embedding.EmbeddingResponse; |
|||
import org.springframework.ai.embedding.EmbeddingResponseMetadata; |
|||
import org.springframework.ai.chat.metadata.DefaultUsage; |
|||
import org.springframework.http.HttpHeaders; |
|||
import org.springframework.http.MediaType; |
|||
import org.springframework.retry.support.RetryTemplate; |
|||
import org.springframework.web.client.RestClient; |
|||
|
|||
import java.util.ArrayList; |
|||
import java.util.HashMap; |
|||
import java.util.List; |
|||
import java.util.Map; |
|||
|
|||
/** |
|||
* 豆包多模态向量化模型(Volcengine Multimodal Embedding Model) |
|||
* <p> |
|||
* 直接调用豆包 /embeddings/multimodal 端点,适配其特有的请求/响应格式。 |
|||
* <p> |
|||
* 与 OpenAI Embedding API 的核心差异: |
|||
* - 端点: /embeddings/multimodal(非 /embeddings) |
|||
* - 请求 input: [{type, text}] 对象数组(非纯字符串数组) |
|||
* - 响应 data: 单个对象 {"embedding": [...]}(非数组 [{...}]) |
|||
* - 语义: 整个 input 数组被视为一个文档,只返回一个 embedding 向量 |
|||
* <p> |
|||
* 因此必须逐条调用:每条文本单独发一次请求,不能批量。 |
|||
* 当前阶段仅处理纯文本(type=text),未来可扩展图片/视频。 |
|||
*/ |
|||
@Slf4j |
|||
public class VolcengineMultimodalEmbeddingModel implements EmbeddingModel { |
|||
|
|||
private static final String EMBEDDINGS_PATH = "/embeddings/multimodal"; |
|||
|
|||
private final String apiKey; |
|||
private final String baseUrl; |
|||
private final String modelName; |
|||
private final int dimensions; |
|||
private final RetryTemplate retryTemplate; |
|||
private final RestClient restClient; |
|||
private final ObjectMapper objectMapper = new ObjectMapper(); |
|||
|
|||
public VolcengineMultimodalEmbeddingModel(String apiKey, String baseUrl, String modelName, |
|||
int dimensions, RetryTemplate retryTemplate) { |
|||
this.apiKey = apiKey; |
|||
this.baseUrl = baseUrl; |
|||
this.modelName = modelName; |
|||
this.dimensions = dimensions; |
|||
this.retryTemplate = retryTemplate; |
|||
this.restClient = RestClient.builder() |
|||
.baseUrl(baseUrl) |
|||
.defaultHeader(HttpHeaders.AUTHORIZATION, "Bearer " + apiKey) |
|||
.defaultHeader(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON_VALUE) |
|||
.build(); |
|||
} |
|||
|
|||
@Override |
|||
public float[] embed(Document document) { |
|||
if (document.isText()) { |
|||
return embedSingleText(document.getText()); |
|||
} |
|||
// 未来扩展:检查 document.getMedia() 处理图片/视频 |
|||
throw new IllegalArgumentException( |
|||
"豆包多模态 Embedding 暂不支持非文本内容,Document 需为纯文本"); |
|||
} |
|||
|
|||
@Override |
|||
public EmbeddingResponse call(EmbeddingRequest request) { |
|||
List<String> texts = request.getInstructions(); |
|||
|
|||
// 豆包 /embeddings/multimodal 把整个 input 数组视为一个文档,只返回一个 embedding。 |
|||
// 因此必须逐条调用,每条文本单独发一次请求。 |
|||
List<Embedding> embeddings = new ArrayList<>(); |
|||
int totalPromptTokens = 0; |
|||
int totalTotalTokens = 0; |
|||
|
|||
for (int i = 0; i < texts.size(); i++) { |
|||
final int index = i; |
|||
final String text = texts.get(i); |
|||
|
|||
// 构造单条文本的多模态 input |
|||
Map<String, Object> inputItem = new HashMap<>(); |
|||
inputItem.put("type", "text"); |
|||
inputItem.put("text", text); |
|||
|
|||
Map<String, Object> requestBody = new HashMap<>(); |
|||
requestBody.put("model", modelName); |
|||
requestBody.put("input", List.of(inputItem)); |
|||
// 豆包 vision 模型默认输出 2048 维,超过 PGVector HNSW 索引上限 2000。 |
|||
// 传入 dimensions 参数让 API 端降维(支持 Matryoshka 降维到 1024 等) |
|||
requestBody.put("dimensions", dimensions); |
|||
|
|||
log.debug("豆包多模态 Embedding 请求: model={}, index={}/{}", modelName, index + 1, texts.size()); |
|||
|
|||
EmbeddingResult result = retryTemplate.execute(ctx -> { |
|||
String responseBody = restClient.post() |
|||
.uri(EMBEDDINGS_PATH) |
|||
.body(requestBody) |
|||
.retrieve() |
|||
.body(String.class); |
|||
|
|||
return parseResponse(responseBody); |
|||
}); |
|||
|
|||
embeddings.add(new Embedding(result.embedding, index)); |
|||
|
|||
// 累计 usage |
|||
if (result.promptTokens > 0) { |
|||
totalPromptTokens += result.promptTokens; |
|||
totalTotalTokens += result.totalTokens; |
|||
} |
|||
} |
|||
|
|||
EmbeddingResponseMetadata metadata = new EmbeddingResponseMetadata(modelName, |
|||
new DefaultUsage(totalPromptTokens, 0, totalTotalTokens, null)); |
|||
|
|||
return new EmbeddingResponse(embeddings, metadata); |
|||
} |
|||
|
|||
/** |
|||
* 单条文本向量化(供 embed(Document) 使用) |
|||
*/ |
|||
private float[] embedSingleText(String text) { |
|||
EmbeddingResponse response = call(new EmbeddingRequest(List.of(text), null)); |
|||
List<Embedding> results = response.getResults(); |
|||
return results.isEmpty() ? new float[0] : results.get(0).getOutput(); |
|||
} |
|||
|
|||
/** |
|||
* 解析豆包 /embeddings/multimodal 响应。 |
|||
* 响应格式:{"created":..., "data":{"embedding":[0.1,0.2,...]}, "id":"...", "model":"...", "object":"...", "usage":{...}} |
|||
* 注意:data 是单个对象(非数组),embedding 值可能是 Integer 或 Double。 |
|||
*/ |
|||
private EmbeddingResult parseResponse(String responseBody) { |
|||
try { |
|||
Map<String, Object> body = objectMapper.readValue(responseBody, |
|||
new TypeReference<Map<String, Object>>() {}); |
|||
|
|||
// 解析 data.embedding — data 是单个对象 {"embedding": [...]} |
|||
@SuppressWarnings("unchecked") |
|||
Map<String, Object> data = (Map<String, Object>) body.get("data"); |
|||
@SuppressWarnings("unchecked") |
|||
List<Number> rawEmbedding = (List<Number>) data.get("embedding"); |
|||
|
|||
float[] vec = new float[rawEmbedding.size()]; |
|||
for (int i = 0; i < rawEmbedding.size(); i++) { |
|||
vec[i] = rawEmbedding.get(i).floatValue(); |
|||
} |
|||
|
|||
// 解析 usage |
|||
int promptTokens = 0; |
|||
int totalTokens = 0; |
|||
@SuppressWarnings("unchecked") |
|||
Map<String, Object> usage = (Map<String, Object>) body.get("usage"); |
|||
if (usage != null) { |
|||
promptTokens = ((Number) usage.getOrDefault("prompt_tokens", 0)).intValue(); |
|||
totalTokens = ((Number) usage.getOrDefault("total_tokens", 0)).intValue(); |
|||
} |
|||
|
|||
return new EmbeddingResult(vec, promptTokens, totalTokens); |
|||
} catch (com.fasterxml.jackson.core.JsonProcessingException e) { |
|||
throw new RuntimeException("解析豆包多模态 Embedding 响应失败: " + e.getMessage() |
|||
+ ", 响应体前500字符: " + responseBody.substring(0, Math.min(500, responseBody.length())), e); |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public EmbeddingResponse embedForResponse(List<String> texts) { |
|||
return call(new EmbeddingRequest(texts, null)); |
|||
} |
|||
|
|||
@Override |
|||
public int dimensions() { |
|||
return dimensions; |
|||
} |
|||
|
|||
/** |
|||
* 内部解析结果 |
|||
*/ |
|||
private record EmbeddingResult(float[] embedding, int promptTokens, int totalTokens) { |
|||
} |
|||
} |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue