From 9609b3b1f620ab309e78f33b960e6fcf9e275843 Mon Sep 17 00:00:00 2001 From: balex Date: Wed, 18 Feb 2026 19:26:28 +0100 Subject: [PATCH] GPU --- .gitignore | 3 ++ rag-service/pom.xml | 13 +++++++-- .../java/com/balex/rag/RagApplication.java | 9 +++--- .../expansion/ExpansionQueryAdvisor.java | 9 +++--- .../rag/controller/ChatEntryController.java | 5 ++-- .../balex/rag/model/dto/UserEntryRequest.java | 3 +- .../balex/rag/service/ChatEntryService.java | 2 +- .../service/impl/ChatEntryServiceImpl.java | 7 ++--- .../src/main/resources/application.properties | 29 +++++++++++-------- 9 files changed, 46 insertions(+), 34 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..608b396 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.idea/ +*.iml +wireguard.txt \ No newline at end of file diff --git a/rag-service/pom.xml b/rag-service/pom.xml index 64aaf33..fe29688 100644 --- a/rag-service/pom.xml +++ b/rag-service/pom.xml @@ -1,6 +1,6 @@ + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 org.springframework.boot @@ -64,9 +64,16 @@ org.springframework.boot spring-boot-starter-web + org.springframework.ai - spring-ai-starter-model-ollama + spring-ai-starter-model-openai + + + + + org.springframework.ai + spring-ai-starter-model-transformers org.springframework.ai @@ -196,4 +203,4 @@ - + \ No newline at end of file diff --git a/rag-service/src/main/java/com/balex/rag/RagApplication.java b/rag-service/src/main/java/com/balex/rag/RagApplication.java index a28cd0f..799c89b 100644 --- a/rag-service/src/main/java/com/balex/rag/RagApplication.java +++ b/rag-service/src/main/java/com/balex/rag/RagApplication.java @@ -14,7 +14,7 @@ import org.springframework.ai.chat.client.advisor.api.Advisor; import org.springframework.ai.chat.memory.ChatMemory; import org.springframework.ai.chat.model.ChatModel; import org.springframework.ai.chat.prompt.PromptTemplate; -import org.springframework.ai.ollama.api.OllamaOptions; +import org.springframework.ai.openai.OpenAiChatOptions; import org.springframework.ai.vectorstore.VectorStore; import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.SpringApplication; @@ -49,9 +49,10 @@ public class RagApplication { .order(3).build(), SimpleLoggerAdvisor.builder().order(4).build() ) - .defaultOptions(OllamaOptions.builder() + .defaultOptions(OpenAiChatOptions.builder() .temperature(ragDefaults.temperature()) - .repeatPenalty(ragDefaults.repeatPenalty()) + .topP(ragDefaults.topP()) + .frequencyPenalty(ragDefaults.repeatPenalty() - 1.0) // Ollama repeatPenalty 1.1 -> frequencyPenalty 0.1 .build()) .build(); } @@ -71,4 +72,4 @@ public class RagApplication { SpringApplication.run(RagApplication.class, args); } -} +} \ No newline at end of file diff --git a/rag-service/src/main/java/com/balex/rag/advisors/expansion/ExpansionQueryAdvisor.java b/rag-service/src/main/java/com/balex/rag/advisors/expansion/ExpansionQueryAdvisor.java index 0c179ce..d51a4ff 100644 --- a/rag-service/src/main/java/com/balex/rag/advisors/expansion/ExpansionQueryAdvisor.java +++ b/rag-service/src/main/java/com/balex/rag/advisors/expansion/ExpansionQueryAdvisor.java @@ -10,7 +10,7 @@ import org.springframework.ai.chat.client.advisor.api.AdvisorChain; import org.springframework.ai.chat.client.advisor.api.BaseAdvisor; import org.springframework.ai.chat.model.ChatModel; import org.springframework.ai.chat.prompt.PromptTemplate; -import org.springframework.ai.ollama.api.OllamaOptions; +import org.springframework.ai.openai.OpenAiChatOptions; import java.util.Map; @@ -48,11 +48,10 @@ public class ExpansionQueryAdvisor implements BaseAdvisor { public static ExpansionQueryAdvisorBuilder builder(ChatModel chatModel, RagExpansionProperties props) { return new ExpansionQueryAdvisorBuilder().chatClient(ChatClient.builder(chatModel) - .defaultOptions(OllamaOptions.builder() + .defaultOptions(OpenAiChatOptions.builder() .temperature(props.temperature()) - .topK(props.topK()) .topP(props.topP()) - .repeatPenalty(props.repeatPenalty()) + .frequencyPenalty(props.repeatPenalty() - 1.0) // Ollama repeatPenalty 1.0 -> frequencyPenalty 0.0 .build()) .build()); } @@ -86,4 +85,4 @@ public class ExpansionQueryAdvisor implements BaseAdvisor { return chatClientResponse; } -} +} \ No newline at end of file diff --git a/rag-service/src/main/java/com/balex/rag/controller/ChatEntryController.java b/rag-service/src/main/java/com/balex/rag/controller/ChatEntryController.java index de16b63..3e8a003 100644 --- a/rag-service/src/main/java/com/balex/rag/controller/ChatEntryController.java +++ b/rag-service/src/main/java/com/balex/rag/controller/ChatEntryController.java @@ -36,16 +36,15 @@ public class ChatEntryController { log.trace(ApiLogMessage.NAME_OF_CURRENT_METHOD.getValue(), ApiUtils.getMethodName()); boolean onlyContext = request.onlyContext() != null ? request.onlyContext() : ragDefaults.onlyContext(); - int topK = request.topK() != null ? request.topK() : ragDefaults.topK(); double topP = request.topP() != null ? request.topP() : ragDefaults.topP(); - ChatEntry entry = chatEntryService.addUserEntry(chatId, request.content(), onlyContext, topK, topP); + ChatEntry entry = chatEntryService.addUserEntry(chatId, request.content(), onlyContext, topP); Chat chat = chatService.getChat(chatId); eventPublisher.publishQuerySent( chat.getIdOwner().toString(), chatId.toString(), - 0); // TODO: add tokensUsed when Ollama response provides it + 0); // TODO: add tokensUsed when usage info is available from Groq response return ResponseEntity.ok(entry); } diff --git a/rag-service/src/main/java/com/balex/rag/model/dto/UserEntryRequest.java b/rag-service/src/main/java/com/balex/rag/model/dto/UserEntryRequest.java index da5c8a4..0a5387d 100644 --- a/rag-service/src/main/java/com/balex/rag/model/dto/UserEntryRequest.java +++ b/rag-service/src/main/java/com/balex/rag/model/dto/UserEntryRequest.java @@ -3,6 +3,5 @@ package com.balex.rag.model.dto; public record UserEntryRequest( String content, Boolean onlyContext, - Integer topK, Double topP -) {} +) {} \ No newline at end of file diff --git a/rag-service/src/main/java/com/balex/rag/service/ChatEntryService.java b/rag-service/src/main/java/com/balex/rag/service/ChatEntryService.java index 4d77213..7c58f49 100644 --- a/rag-service/src/main/java/com/balex/rag/service/ChatEntryService.java +++ b/rag-service/src/main/java/com/balex/rag/service/ChatEntryService.java @@ -8,5 +8,5 @@ public interface ChatEntryService { List getEntriesByChatId(Long chatId); - ChatEntry addUserEntry(Long chatId, String content, boolean onlyContext, int topK, double topP); + ChatEntry addUserEntry(Long chatId, String content, boolean onlyContext, double topP); } \ No newline at end of file diff --git a/rag-service/src/main/java/com/balex/rag/service/impl/ChatEntryServiceImpl.java b/rag-service/src/main/java/com/balex/rag/service/impl/ChatEntryServiceImpl.java index 4f2ebaf..d8be7d1 100644 --- a/rag-service/src/main/java/com/balex/rag/service/impl/ChatEntryServiceImpl.java +++ b/rag-service/src/main/java/com/balex/rag/service/impl/ChatEntryServiceImpl.java @@ -11,7 +11,7 @@ import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.ai.chat.client.ChatClient; import org.springframework.ai.chat.memory.ChatMemory; -import org.springframework.ai.ollama.api.OllamaOptions; +import org.springframework.ai.openai.OpenAiChatOptions; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; @@ -33,7 +33,7 @@ public class ChatEntryServiceImpl implements ChatEntryService { @Override @Transactional - public ChatEntry addUserEntry(Long chatId, String content, boolean onlyContext, int topK, double topP) { + public ChatEntry addUserEntry(Long chatId, String content, boolean onlyContext, double topP) { Chat chat = chatRepository.findById(chatId) .orElseThrow(() -> new EntityNotFoundException("Chat not found with id: " + chatId)); @@ -63,8 +63,7 @@ public class ChatEntryServiceImpl implements ChatEntryService { .system(systemPrompt) .user(content) .advisors(a -> a.param(ChatMemory.CONVERSATION_ID, String.valueOf(chatId))) - .options(OllamaOptions.builder() - .topK(topK) + .options(OpenAiChatOptions.builder() .topP(topP) .build()) .call() diff --git a/rag-service/src/main/resources/application.properties b/rag-service/src/main/resources/application.properties index 504c04e..14e3da0 100644 --- a/rag-service/src/main/resources/application.properties +++ b/rag-service/src/main/resources/application.properties @@ -1,24 +1,29 @@ -spring.application.name=rag -#spring.ai.ollama.base-url=http://localhost:11431 -spring.ai.ollama.base-url=http://localhost:11434 -#spring.ai.ollama.chat.model=gemma3:4b-it-q4_K_M -spring.ai.ollama.chat.model=llama3.1:8b-instruct-q4_K_M -jwt.secret=ywfI6dBznYmHbokihB/OBzZz6E0Fj+6PiqrM8dQ5c3t0HeYarblCbOGM8vQtOt472AtQ+MsCH7OVIKHOzjrPsQ== +spring.application.name=rag-service + +# --- LLM Provider: Groq (OpenAI-compatible API) --- +spring.ai.openai.base-url=${SPRING_AI_OPENAI_BASE_URL:https://api.groq.com/openai/v1} +spring.ai.openai.api-key=${SPRING_AI_OPENAI_API_KEY:} +spring.ai.openai.chat.model=${SPRING_AI_OPENAI_CHAT_MODEL:llama-3.3-70b-versatile} + +# --- Embedding model: use local pgvector with a lightweight model --- +# Groq does not provide an embedding endpoint, so we disable OpenAI embedding auto-config +# and rely on the pgvector store's existing embedding setup +spring.ai.openai.embedding.enabled=false + +jwt.secret=${JWT_SECRET:ywfI6dBznYmHbokihB/OBzZz6E0Fj+6PiqrM8dQ5c3t0HeYarblCbOGM8vQtOt472AtQ+MsCH7OVIKHOzjrPsQ==} jwt.expiration=103600000 -spring.datasource.url=jdbc:postgresql://localhost:5432/ragdb -spring.datasource.username=postgres -spring.datasource.password=postgres +spring.datasource.url=${SPRING_DATASOURCE_URL:jdbc:postgresql://localhost:5432/ragdb} +spring.datasource.username=${SPRING_DATASOURCE_USERNAME:postgres} +spring.datasource.password=${SPRING_DATASOURCE_PASSWORD:postgres} logging.level.org.springframework.ai.chat.client.advisor=DEBUG logging.level.org.springframework.web.servlet.mvc.method.annotation.RequestMappingHandlerMapping=DEBUG logging.level.org.springframework.web=DEBUG logging.level.org.flywaydb=DEBUG logging.level.com.balex.rag.controller=DEBUG app.document.chunk-size=200 -#spring.main.allow-circular-references=true server.compression.enabled=false server.tomcat.connection-timeout=60000 spring.mvc.async.request-timeout=60000 -#spring.main.web-application-type=reactive end.points.users=/users end.points.id=/{id} end.points.all=/all @@ -31,7 +36,7 @@ end.points.register=/register end.points.chat=/chat end.points.entry=/entry end.points.document=/documents -rag.rerank-fetch-multiplier = 2 +rag.rerank-fetch-multiplier=2 #Swagger swagger.servers.first=http://localhost:8080 springdoc.swagger-ui.path=/swagger-ui.html