embedding.service.js 1.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. const { embed, embedMany } = require('ai');
  2. const { model, embeddingModel } = require('./openai.service');
  3. const { findSimilarContent } = require('./db.service'); // 数据库查询模块需单独实现
  4. // 文本分块处理
  5. function generateChunks(input) {
  6. return input
  7. .trim()
  8. .split('-------split line-------')
  9. .filter((chunk) => chunk !== '');
  10. }
  11. // 批量生成embedding
  12. async function generateEmbeddings(value) {
  13. const chunks = generateChunks(value);
  14. const { embeddings } = await embedMany({
  15. model: embeddingModel,
  16. values: chunks,
  17. });
  18. return embeddings.map((embedding, i) => ({
  19. content: chunks[i],
  20. embedding,
  21. }));
  22. }
  23. // 单个embedding生成
  24. async function generateEmbedding(value) {
  25. const input = value.replaceAll('\\n', ' ');
  26. const { embedding } = await embed({
  27. model: embeddingModel,
  28. value: input,
  29. dimensions: 1536,
  30. });
  31. return embedding;
  32. }
  33. // 核心查询函数
  34. async function findRelevantContent(userQuery) {
  35. const userQueryEmbedded = await generateEmbedding(userQuery);
  36. return findSimilarContent(userQueryEmbedded); // 需要数据库服务支持
  37. }
  38. module.exports = {
  39. generateEmbeddings,
  40. generateEmbedding,
  41. findRelevantContent,
  42. };