diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..608b396
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+.idea/
+*.iml
+wireguard.txt
\ No newline at end of file
diff --git a/rag-service/pom.xml b/rag-service/pom.xml
index 64aaf33..fe29688 100644
--- a/rag-service/pom.xml
+++ b/rag-service/pom.xml
@@ -1,6 +1,6 @@
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
4.0.0
org.springframework.boot
@@ -64,9 +64,16 @@
org.springframework.boot
spring-boot-starter-web
+
org.springframework.ai
- spring-ai-starter-model-ollama
+ spring-ai-starter-model-openai
+
+
+
+
+ org.springframework.ai
+ spring-ai-starter-model-transformers
org.springframework.ai
@@ -196,4 +203,4 @@
-
+
\ No newline at end of file
diff --git a/rag-service/src/main/java/com/balex/rag/RagApplication.java b/rag-service/src/main/java/com/balex/rag/RagApplication.java
index a28cd0f..799c89b 100644
--- a/rag-service/src/main/java/com/balex/rag/RagApplication.java
+++ b/rag-service/src/main/java/com/balex/rag/RagApplication.java
@@ -14,7 +14,7 @@ import org.springframework.ai.chat.client.advisor.api.Advisor;
import org.springframework.ai.chat.memory.ChatMemory;
import org.springframework.ai.chat.model.ChatModel;
import org.springframework.ai.chat.prompt.PromptTemplate;
-import org.springframework.ai.ollama.api.OllamaOptions;
+import org.springframework.ai.openai.OpenAiChatOptions;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.SpringApplication;
@@ -49,9 +49,10 @@ public class RagApplication {
.order(3).build(),
SimpleLoggerAdvisor.builder().order(4).build()
)
- .defaultOptions(OllamaOptions.builder()
+ .defaultOptions(OpenAiChatOptions.builder()
.temperature(ragDefaults.temperature())
- .repeatPenalty(ragDefaults.repeatPenalty())
+ .topP(ragDefaults.topP())
+ .frequencyPenalty(ragDefaults.repeatPenalty() - 1.0) // Ollama repeatPenalty 1.1 -> frequencyPenalty 0.1
.build())
.build();
}
@@ -71,4 +72,4 @@ public class RagApplication {
SpringApplication.run(RagApplication.class, args);
}
-}
+}
\ No newline at end of file
diff --git a/rag-service/src/main/java/com/balex/rag/advisors/expansion/ExpansionQueryAdvisor.java b/rag-service/src/main/java/com/balex/rag/advisors/expansion/ExpansionQueryAdvisor.java
index 0c179ce..d51a4ff 100644
--- a/rag-service/src/main/java/com/balex/rag/advisors/expansion/ExpansionQueryAdvisor.java
+++ b/rag-service/src/main/java/com/balex/rag/advisors/expansion/ExpansionQueryAdvisor.java
@@ -10,7 +10,7 @@ import org.springframework.ai.chat.client.advisor.api.AdvisorChain;
import org.springframework.ai.chat.client.advisor.api.BaseAdvisor;
import org.springframework.ai.chat.model.ChatModel;
import org.springframework.ai.chat.prompt.PromptTemplate;
-import org.springframework.ai.ollama.api.OllamaOptions;
+import org.springframework.ai.openai.OpenAiChatOptions;
import java.util.Map;
@@ -48,11 +48,10 @@ public class ExpansionQueryAdvisor implements BaseAdvisor {
public static ExpansionQueryAdvisorBuilder builder(ChatModel chatModel, RagExpansionProperties props) {
return new ExpansionQueryAdvisorBuilder().chatClient(ChatClient.builder(chatModel)
- .defaultOptions(OllamaOptions.builder()
+ .defaultOptions(OpenAiChatOptions.builder()
.temperature(props.temperature())
- .topK(props.topK())
.topP(props.topP())
- .repeatPenalty(props.repeatPenalty())
+ .frequencyPenalty(props.repeatPenalty() - 1.0) // Ollama repeatPenalty 1.0 -> frequencyPenalty 0.0
.build())
.build());
}
@@ -86,4 +85,4 @@ public class ExpansionQueryAdvisor implements BaseAdvisor {
return chatClientResponse;
}
-}
+}
\ No newline at end of file
diff --git a/rag-service/src/main/java/com/balex/rag/controller/ChatEntryController.java b/rag-service/src/main/java/com/balex/rag/controller/ChatEntryController.java
index de16b63..3e8a003 100644
--- a/rag-service/src/main/java/com/balex/rag/controller/ChatEntryController.java
+++ b/rag-service/src/main/java/com/balex/rag/controller/ChatEntryController.java
@@ -36,16 +36,15 @@ public class ChatEntryController {
log.trace(ApiLogMessage.NAME_OF_CURRENT_METHOD.getValue(), ApiUtils.getMethodName());
boolean onlyContext = request.onlyContext() != null ? request.onlyContext() : ragDefaults.onlyContext();
- int topK = request.topK() != null ? request.topK() : ragDefaults.topK();
double topP = request.topP() != null ? request.topP() : ragDefaults.topP();
- ChatEntry entry = chatEntryService.addUserEntry(chatId, request.content(), onlyContext, topK, topP);
+ ChatEntry entry = chatEntryService.addUserEntry(chatId, request.content(), onlyContext, topP);
Chat chat = chatService.getChat(chatId);
eventPublisher.publishQuerySent(
chat.getIdOwner().toString(),
chatId.toString(),
- 0); // TODO: add tokensUsed when Ollama response provides it
+ 0); // TODO: add tokensUsed when usage info is available from Groq response
return ResponseEntity.ok(entry);
}
diff --git a/rag-service/src/main/java/com/balex/rag/model/dto/UserEntryRequest.java b/rag-service/src/main/java/com/balex/rag/model/dto/UserEntryRequest.java
index da5c8a4..0a5387d 100644
--- a/rag-service/src/main/java/com/balex/rag/model/dto/UserEntryRequest.java
+++ b/rag-service/src/main/java/com/balex/rag/model/dto/UserEntryRequest.java
@@ -3,6 +3,5 @@ package com.balex.rag.model.dto;
public record UserEntryRequest(
String content,
Boolean onlyContext,
- Integer topK,
Double topP
-) {}
+) {}
\ No newline at end of file
diff --git a/rag-service/src/main/java/com/balex/rag/service/ChatEntryService.java b/rag-service/src/main/java/com/balex/rag/service/ChatEntryService.java
index 4d77213..7c58f49 100644
--- a/rag-service/src/main/java/com/balex/rag/service/ChatEntryService.java
+++ b/rag-service/src/main/java/com/balex/rag/service/ChatEntryService.java
@@ -8,5 +8,5 @@ public interface ChatEntryService {
List getEntriesByChatId(Long chatId);
- ChatEntry addUserEntry(Long chatId, String content, boolean onlyContext, int topK, double topP);
+ ChatEntry addUserEntry(Long chatId, String content, boolean onlyContext, double topP);
}
\ No newline at end of file
diff --git a/rag-service/src/main/java/com/balex/rag/service/impl/ChatEntryServiceImpl.java b/rag-service/src/main/java/com/balex/rag/service/impl/ChatEntryServiceImpl.java
index 4f2ebaf..d8be7d1 100644
--- a/rag-service/src/main/java/com/balex/rag/service/impl/ChatEntryServiceImpl.java
+++ b/rag-service/src/main/java/com/balex/rag/service/impl/ChatEntryServiceImpl.java
@@ -11,7 +11,7 @@ import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.chat.memory.ChatMemory;
-import org.springframework.ai.ollama.api.OllamaOptions;
+import org.springframework.ai.openai.OpenAiChatOptions;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
@@ -33,7 +33,7 @@ public class ChatEntryServiceImpl implements ChatEntryService {
@Override
@Transactional
- public ChatEntry addUserEntry(Long chatId, String content, boolean onlyContext, int topK, double topP) {
+ public ChatEntry addUserEntry(Long chatId, String content, boolean onlyContext, double topP) {
Chat chat = chatRepository.findById(chatId)
.orElseThrow(() -> new EntityNotFoundException("Chat not found with id: " + chatId));
@@ -63,8 +63,7 @@ public class ChatEntryServiceImpl implements ChatEntryService {
.system(systemPrompt)
.user(content)
.advisors(a -> a.param(ChatMemory.CONVERSATION_ID, String.valueOf(chatId)))
- .options(OllamaOptions.builder()
- .topK(topK)
+ .options(OpenAiChatOptions.builder()
.topP(topP)
.build())
.call()
diff --git a/rag-service/src/main/resources/application.properties b/rag-service/src/main/resources/application.properties
index 504c04e..14e3da0 100644
--- a/rag-service/src/main/resources/application.properties
+++ b/rag-service/src/main/resources/application.properties
@@ -1,24 +1,29 @@
-spring.application.name=rag
-#spring.ai.ollama.base-url=http://localhost:11431
-spring.ai.ollama.base-url=http://localhost:11434
-#spring.ai.ollama.chat.model=gemma3:4b-it-q4_K_M
-spring.ai.ollama.chat.model=llama3.1:8b-instruct-q4_K_M
-jwt.secret=ywfI6dBznYmHbokihB/OBzZz6E0Fj+6PiqrM8dQ5c3t0HeYarblCbOGM8vQtOt472AtQ+MsCH7OVIKHOzjrPsQ==
+spring.application.name=rag-service
+
+# --- LLM Provider: Groq (OpenAI-compatible API) ---
+spring.ai.openai.base-url=${SPRING_AI_OPENAI_BASE_URL:https://api.groq.com/openai/v1}
+spring.ai.openai.api-key=${SPRING_AI_OPENAI_API_KEY:}
+spring.ai.openai.chat.model=${SPRING_AI_OPENAI_CHAT_MODEL:llama-3.3-70b-versatile}
+
+# --- Embedding model: use local pgvector with a lightweight model ---
+# Groq does not provide an embedding endpoint, so we disable OpenAI embedding auto-config
+# and rely on the pgvector store's existing embedding setup
+spring.ai.openai.embedding.enabled=false
+
+jwt.secret=${JWT_SECRET:ywfI6dBznYmHbokihB/OBzZz6E0Fj+6PiqrM8dQ5c3t0HeYarblCbOGM8vQtOt472AtQ+MsCH7OVIKHOzjrPsQ==}
jwt.expiration=103600000
-spring.datasource.url=jdbc:postgresql://localhost:5432/ragdb
-spring.datasource.username=postgres
-spring.datasource.password=postgres
+spring.datasource.url=${SPRING_DATASOURCE_URL:jdbc:postgresql://localhost:5432/ragdb}
+spring.datasource.username=${SPRING_DATASOURCE_USERNAME:postgres}
+spring.datasource.password=${SPRING_DATASOURCE_PASSWORD:postgres}
logging.level.org.springframework.ai.chat.client.advisor=DEBUG
logging.level.org.springframework.web.servlet.mvc.method.annotation.RequestMappingHandlerMapping=DEBUG
logging.level.org.springframework.web=DEBUG
logging.level.org.flywaydb=DEBUG
logging.level.com.balex.rag.controller=DEBUG
app.document.chunk-size=200
-#spring.main.allow-circular-references=true
server.compression.enabled=false
server.tomcat.connection-timeout=60000
spring.mvc.async.request-timeout=60000
-#spring.main.web-application-type=reactive
end.points.users=/users
end.points.id=/{id}
end.points.all=/all
@@ -31,7 +36,7 @@ end.points.register=/register
end.points.chat=/chat
end.points.entry=/entry
end.points.document=/documents
-rag.rerank-fetch-multiplier = 2
+rag.rerank-fetch-multiplier=2
#Swagger
swagger.servers.first=http://localhost:8080
springdoc.swagger-ui.path=/swagger-ui.html