This commit is contained in:
2026-02-18 19:26:28 +01:00
parent 3f61a38afa
commit 9609b3b1f6
9 changed files with 46 additions and 34 deletions

3
.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
.idea/
*.iml
wireguard.txt

View File

@@ -64,9 +64,16 @@
<groupId>org.springframework.boot</groupId> <groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId> <artifactId>spring-boot-starter-web</artifactId>
</dependency> </dependency>
<!-- Changed: spring-ai-starter-model-ollama -> spring-ai-starter-model-openai (Groq-compatible) -->
<dependency> <dependency>
<groupId>org.springframework.ai</groupId> <groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-starter-model-ollama</artifactId> <artifactId>spring-ai-starter-model-openai</artifactId>
</dependency>
<!-- Local ONNX embedding model (Groq does not provide embedding API) -->
<!-- Default model: all-MiniLM-L6-v2 (384 dimensions) -->
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-starter-model-transformers</artifactId>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.springframework.ai</groupId> <groupId>org.springframework.ai</groupId>

View File

@@ -14,7 +14,7 @@ import org.springframework.ai.chat.client.advisor.api.Advisor;
import org.springframework.ai.chat.memory.ChatMemory; import org.springframework.ai.chat.memory.ChatMemory;
import org.springframework.ai.chat.model.ChatModel; import org.springframework.ai.chat.model.ChatModel;
import org.springframework.ai.chat.prompt.PromptTemplate; import org.springframework.ai.chat.prompt.PromptTemplate;
import org.springframework.ai.ollama.api.OllamaOptions; import org.springframework.ai.openai.OpenAiChatOptions;
import org.springframework.ai.vectorstore.VectorStore; import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.beans.factory.annotation.Value; import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.SpringApplication; import org.springframework.boot.SpringApplication;
@@ -49,9 +49,10 @@ public class RagApplication {
.order(3).build(), .order(3).build(),
SimpleLoggerAdvisor.builder().order(4).build() SimpleLoggerAdvisor.builder().order(4).build()
) )
.defaultOptions(OllamaOptions.builder() .defaultOptions(OpenAiChatOptions.builder()
.temperature(ragDefaults.temperature()) .temperature(ragDefaults.temperature())
.repeatPenalty(ragDefaults.repeatPenalty()) .topP(ragDefaults.topP())
.frequencyPenalty(ragDefaults.repeatPenalty() - 1.0) // Ollama repeatPenalty 1.1 -> frequencyPenalty 0.1
.build()) .build())
.build(); .build();
} }

View File

@@ -10,7 +10,7 @@ import org.springframework.ai.chat.client.advisor.api.AdvisorChain;
import org.springframework.ai.chat.client.advisor.api.BaseAdvisor; import org.springframework.ai.chat.client.advisor.api.BaseAdvisor;
import org.springframework.ai.chat.model.ChatModel; import org.springframework.ai.chat.model.ChatModel;
import org.springframework.ai.chat.prompt.PromptTemplate; import org.springframework.ai.chat.prompt.PromptTemplate;
import org.springframework.ai.ollama.api.OllamaOptions; import org.springframework.ai.openai.OpenAiChatOptions;
import java.util.Map; import java.util.Map;
@@ -48,11 +48,10 @@ public class ExpansionQueryAdvisor implements BaseAdvisor {
public static ExpansionQueryAdvisorBuilder builder(ChatModel chatModel, RagExpansionProperties props) { public static ExpansionQueryAdvisorBuilder builder(ChatModel chatModel, RagExpansionProperties props) {
return new ExpansionQueryAdvisorBuilder().chatClient(ChatClient.builder(chatModel) return new ExpansionQueryAdvisorBuilder().chatClient(ChatClient.builder(chatModel)
.defaultOptions(OllamaOptions.builder() .defaultOptions(OpenAiChatOptions.builder()
.temperature(props.temperature()) .temperature(props.temperature())
.topK(props.topK())
.topP(props.topP()) .topP(props.topP())
.repeatPenalty(props.repeatPenalty()) .frequencyPenalty(props.repeatPenalty() - 1.0) // Ollama repeatPenalty 1.0 -> frequencyPenalty 0.0
.build()) .build())
.build()); .build());
} }

View File

@@ -36,16 +36,15 @@ public class ChatEntryController {
log.trace(ApiLogMessage.NAME_OF_CURRENT_METHOD.getValue(), ApiUtils.getMethodName()); log.trace(ApiLogMessage.NAME_OF_CURRENT_METHOD.getValue(), ApiUtils.getMethodName());
boolean onlyContext = request.onlyContext() != null ? request.onlyContext() : ragDefaults.onlyContext(); boolean onlyContext = request.onlyContext() != null ? request.onlyContext() : ragDefaults.onlyContext();
int topK = request.topK() != null ? request.topK() : ragDefaults.topK();
double topP = request.topP() != null ? request.topP() : ragDefaults.topP(); double topP = request.topP() != null ? request.topP() : ragDefaults.topP();
ChatEntry entry = chatEntryService.addUserEntry(chatId, request.content(), onlyContext, topK, topP); ChatEntry entry = chatEntryService.addUserEntry(chatId, request.content(), onlyContext, topP);
Chat chat = chatService.getChat(chatId); Chat chat = chatService.getChat(chatId);
eventPublisher.publishQuerySent( eventPublisher.publishQuerySent(
chat.getIdOwner().toString(), chat.getIdOwner().toString(),
chatId.toString(), chatId.toString(),
0); // TODO: add tokensUsed when Ollama response provides it 0); // TODO: add tokensUsed when usage info is available from Groq response
return ResponseEntity.ok(entry); return ResponseEntity.ok(entry);
} }

View File

@@ -3,6 +3,5 @@ package com.balex.rag.model.dto;
public record UserEntryRequest( public record UserEntryRequest(
String content, String content,
Boolean onlyContext, Boolean onlyContext,
Integer topK,
Double topP Double topP
) {} ) {}

View File

@@ -8,5 +8,5 @@ public interface ChatEntryService {
List<ChatEntry> getEntriesByChatId(Long chatId); List<ChatEntry> getEntriesByChatId(Long chatId);
ChatEntry addUserEntry(Long chatId, String content, boolean onlyContext, int topK, double topP); ChatEntry addUserEntry(Long chatId, String content, boolean onlyContext, double topP);
} }

View File

@@ -11,7 +11,7 @@ import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.chat.client.ChatClient; import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.chat.memory.ChatMemory; import org.springframework.ai.chat.memory.ChatMemory;
import org.springframework.ai.ollama.api.OllamaOptions; import org.springframework.ai.openai.OpenAiChatOptions;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional; import org.springframework.transaction.annotation.Transactional;
@@ -33,7 +33,7 @@ public class ChatEntryServiceImpl implements ChatEntryService {
@Override @Override
@Transactional @Transactional
public ChatEntry addUserEntry(Long chatId, String content, boolean onlyContext, int topK, double topP) { public ChatEntry addUserEntry(Long chatId, String content, boolean onlyContext, double topP) {
Chat chat = chatRepository.findById(chatId) Chat chat = chatRepository.findById(chatId)
.orElseThrow(() -> new EntityNotFoundException("Chat not found with id: " + chatId)); .orElseThrow(() -> new EntityNotFoundException("Chat not found with id: " + chatId));
@@ -63,8 +63,7 @@ public class ChatEntryServiceImpl implements ChatEntryService {
.system(systemPrompt) .system(systemPrompt)
.user(content) .user(content)
.advisors(a -> a.param(ChatMemory.CONVERSATION_ID, String.valueOf(chatId))) .advisors(a -> a.param(ChatMemory.CONVERSATION_ID, String.valueOf(chatId)))
.options(OllamaOptions.builder() .options(OpenAiChatOptions.builder()
.topK(topK)
.topP(topP) .topP(topP)
.build()) .build())
.call() .call()

View File

@@ -1,24 +1,29 @@
spring.application.name=rag spring.application.name=rag-service
#spring.ai.ollama.base-url=http://localhost:11431
spring.ai.ollama.base-url=http://localhost:11434 # --- LLM Provider: Groq (OpenAI-compatible API) ---
#spring.ai.ollama.chat.model=gemma3:4b-it-q4_K_M spring.ai.openai.base-url=${SPRING_AI_OPENAI_BASE_URL:https://api.groq.com/openai/v1}
spring.ai.ollama.chat.model=llama3.1:8b-instruct-q4_K_M spring.ai.openai.api-key=${SPRING_AI_OPENAI_API_KEY:}
jwt.secret=ywfI6dBznYmHbokihB/OBzZz6E0Fj+6PiqrM8dQ5c3t0HeYarblCbOGM8vQtOt472AtQ+MsCH7OVIKHOzjrPsQ== spring.ai.openai.chat.model=${SPRING_AI_OPENAI_CHAT_MODEL:llama-3.3-70b-versatile}
# --- Embedding model: use local pgvector with a lightweight model ---
# Groq does not provide an embedding endpoint, so we disable OpenAI embedding auto-config
# and rely on the pgvector store's existing embedding setup
spring.ai.openai.embedding.enabled=false
jwt.secret=${JWT_SECRET:ywfI6dBznYmHbokihB/OBzZz6E0Fj+6PiqrM8dQ5c3t0HeYarblCbOGM8vQtOt472AtQ+MsCH7OVIKHOzjrPsQ==}
jwt.expiration=103600000 jwt.expiration=103600000
spring.datasource.url=jdbc:postgresql://localhost:5432/ragdb spring.datasource.url=${SPRING_DATASOURCE_URL:jdbc:postgresql://localhost:5432/ragdb}
spring.datasource.username=postgres spring.datasource.username=${SPRING_DATASOURCE_USERNAME:postgres}
spring.datasource.password=postgres spring.datasource.password=${SPRING_DATASOURCE_PASSWORD:postgres}
logging.level.org.springframework.ai.chat.client.advisor=DEBUG logging.level.org.springframework.ai.chat.client.advisor=DEBUG
logging.level.org.springframework.web.servlet.mvc.method.annotation.RequestMappingHandlerMapping=DEBUG logging.level.org.springframework.web.servlet.mvc.method.annotation.RequestMappingHandlerMapping=DEBUG
logging.level.org.springframework.web=DEBUG logging.level.org.springframework.web=DEBUG
logging.level.org.flywaydb=DEBUG logging.level.org.flywaydb=DEBUG
logging.level.com.balex.rag.controller=DEBUG logging.level.com.balex.rag.controller=DEBUG
app.document.chunk-size=200 app.document.chunk-size=200
#spring.main.allow-circular-references=true
server.compression.enabled=false server.compression.enabled=false
server.tomcat.connection-timeout=60000 server.tomcat.connection-timeout=60000
spring.mvc.async.request-timeout=60000 spring.mvc.async.request-timeout=60000
#spring.main.web-application-type=reactive
end.points.users=/users end.points.users=/users
end.points.id=/{id} end.points.id=/{id}
end.points.all=/all end.points.all=/all