GPU
This commit is contained in:
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
.idea/
|
||||||
|
*.iml
|
||||||
|
wireguard.txt
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>org.springframework.boot</groupId>
|
<groupId>org.springframework.boot</groupId>
|
||||||
@@ -64,9 +64,16 @@
|
|||||||
<groupId>org.springframework.boot</groupId>
|
<groupId>org.springframework.boot</groupId>
|
||||||
<artifactId>spring-boot-starter-web</artifactId>
|
<artifactId>spring-boot-starter-web</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<!-- Changed: spring-ai-starter-model-ollama -> spring-ai-starter-model-openai (Groq-compatible) -->
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.springframework.ai</groupId>
|
<groupId>org.springframework.ai</groupId>
|
||||||
<artifactId>spring-ai-starter-model-ollama</artifactId>
|
<artifactId>spring-ai-starter-model-openai</artifactId>
|
||||||
|
</dependency>
|
||||||
|
<!-- Local ONNX embedding model (Groq does not provide embedding API) -->
|
||||||
|
<!-- Default model: all-MiniLM-L6-v2 (384 dimensions) -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.springframework.ai</groupId>
|
||||||
|
<artifactId>spring-ai-starter-model-transformers</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.springframework.ai</groupId>
|
<groupId>org.springframework.ai</groupId>
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ import org.springframework.ai.chat.client.advisor.api.Advisor;
|
|||||||
import org.springframework.ai.chat.memory.ChatMemory;
|
import org.springframework.ai.chat.memory.ChatMemory;
|
||||||
import org.springframework.ai.chat.model.ChatModel;
|
import org.springframework.ai.chat.model.ChatModel;
|
||||||
import org.springframework.ai.chat.prompt.PromptTemplate;
|
import org.springframework.ai.chat.prompt.PromptTemplate;
|
||||||
import org.springframework.ai.ollama.api.OllamaOptions;
|
import org.springframework.ai.openai.OpenAiChatOptions;
|
||||||
import org.springframework.ai.vectorstore.VectorStore;
|
import org.springframework.ai.vectorstore.VectorStore;
|
||||||
import org.springframework.beans.factory.annotation.Value;
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
import org.springframework.boot.SpringApplication;
|
import org.springframework.boot.SpringApplication;
|
||||||
@@ -49,9 +49,10 @@ public class RagApplication {
|
|||||||
.order(3).build(),
|
.order(3).build(),
|
||||||
SimpleLoggerAdvisor.builder().order(4).build()
|
SimpleLoggerAdvisor.builder().order(4).build()
|
||||||
)
|
)
|
||||||
.defaultOptions(OllamaOptions.builder()
|
.defaultOptions(OpenAiChatOptions.builder()
|
||||||
.temperature(ragDefaults.temperature())
|
.temperature(ragDefaults.temperature())
|
||||||
.repeatPenalty(ragDefaults.repeatPenalty())
|
.topP(ragDefaults.topP())
|
||||||
|
.frequencyPenalty(ragDefaults.repeatPenalty() - 1.0) // Ollama repeatPenalty 1.1 -> frequencyPenalty 0.1
|
||||||
.build())
|
.build())
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ import org.springframework.ai.chat.client.advisor.api.AdvisorChain;
|
|||||||
import org.springframework.ai.chat.client.advisor.api.BaseAdvisor;
|
import org.springframework.ai.chat.client.advisor.api.BaseAdvisor;
|
||||||
import org.springframework.ai.chat.model.ChatModel;
|
import org.springframework.ai.chat.model.ChatModel;
|
||||||
import org.springframework.ai.chat.prompt.PromptTemplate;
|
import org.springframework.ai.chat.prompt.PromptTemplate;
|
||||||
import org.springframework.ai.ollama.api.OllamaOptions;
|
import org.springframework.ai.openai.OpenAiChatOptions;
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
@@ -48,11 +48,10 @@ public class ExpansionQueryAdvisor implements BaseAdvisor {
|
|||||||
|
|
||||||
public static ExpansionQueryAdvisorBuilder builder(ChatModel chatModel, RagExpansionProperties props) {
|
public static ExpansionQueryAdvisorBuilder builder(ChatModel chatModel, RagExpansionProperties props) {
|
||||||
return new ExpansionQueryAdvisorBuilder().chatClient(ChatClient.builder(chatModel)
|
return new ExpansionQueryAdvisorBuilder().chatClient(ChatClient.builder(chatModel)
|
||||||
.defaultOptions(OllamaOptions.builder()
|
.defaultOptions(OpenAiChatOptions.builder()
|
||||||
.temperature(props.temperature())
|
.temperature(props.temperature())
|
||||||
.topK(props.topK())
|
|
||||||
.topP(props.topP())
|
.topP(props.topP())
|
||||||
.repeatPenalty(props.repeatPenalty())
|
.frequencyPenalty(props.repeatPenalty() - 1.0) // Ollama repeatPenalty 1.0 -> frequencyPenalty 0.0
|
||||||
.build())
|
.build())
|
||||||
.build());
|
.build());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -36,16 +36,15 @@ public class ChatEntryController {
|
|||||||
log.trace(ApiLogMessage.NAME_OF_CURRENT_METHOD.getValue(), ApiUtils.getMethodName());
|
log.trace(ApiLogMessage.NAME_OF_CURRENT_METHOD.getValue(), ApiUtils.getMethodName());
|
||||||
|
|
||||||
boolean onlyContext = request.onlyContext() != null ? request.onlyContext() : ragDefaults.onlyContext();
|
boolean onlyContext = request.onlyContext() != null ? request.onlyContext() : ragDefaults.onlyContext();
|
||||||
int topK = request.topK() != null ? request.topK() : ragDefaults.topK();
|
|
||||||
double topP = request.topP() != null ? request.topP() : ragDefaults.topP();
|
double topP = request.topP() != null ? request.topP() : ragDefaults.topP();
|
||||||
|
|
||||||
ChatEntry entry = chatEntryService.addUserEntry(chatId, request.content(), onlyContext, topK, topP);
|
ChatEntry entry = chatEntryService.addUserEntry(chatId, request.content(), onlyContext, topP);
|
||||||
|
|
||||||
Chat chat = chatService.getChat(chatId);
|
Chat chat = chatService.getChat(chatId);
|
||||||
eventPublisher.publishQuerySent(
|
eventPublisher.publishQuerySent(
|
||||||
chat.getIdOwner().toString(),
|
chat.getIdOwner().toString(),
|
||||||
chatId.toString(),
|
chatId.toString(),
|
||||||
0); // TODO: add tokensUsed when Ollama response provides it
|
0); // TODO: add tokensUsed when usage info is available from Groq response
|
||||||
|
|
||||||
return ResponseEntity.ok(entry);
|
return ResponseEntity.ok(entry);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,6 +3,5 @@ package com.balex.rag.model.dto;
|
|||||||
public record UserEntryRequest(
|
public record UserEntryRequest(
|
||||||
String content,
|
String content,
|
||||||
Boolean onlyContext,
|
Boolean onlyContext,
|
||||||
Integer topK,
|
|
||||||
Double topP
|
Double topP
|
||||||
) {}
|
) {}
|
||||||
@@ -8,5 +8,5 @@ public interface ChatEntryService {
|
|||||||
|
|
||||||
List<ChatEntry> getEntriesByChatId(Long chatId);
|
List<ChatEntry> getEntriesByChatId(Long chatId);
|
||||||
|
|
||||||
ChatEntry addUserEntry(Long chatId, String content, boolean onlyContext, int topK, double topP);
|
ChatEntry addUserEntry(Long chatId, String content, boolean onlyContext, double topP);
|
||||||
}
|
}
|
||||||
@@ -11,7 +11,7 @@ import lombok.RequiredArgsConstructor;
|
|||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.springframework.ai.chat.client.ChatClient;
|
import org.springframework.ai.chat.client.ChatClient;
|
||||||
import org.springframework.ai.chat.memory.ChatMemory;
|
import org.springframework.ai.chat.memory.ChatMemory;
|
||||||
import org.springframework.ai.ollama.api.OllamaOptions;
|
import org.springframework.ai.openai.OpenAiChatOptions;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
import org.springframework.transaction.annotation.Transactional;
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
|
|
||||||
@@ -33,7 +33,7 @@ public class ChatEntryServiceImpl implements ChatEntryService {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
@Transactional
|
@Transactional
|
||||||
public ChatEntry addUserEntry(Long chatId, String content, boolean onlyContext, int topK, double topP) {
|
public ChatEntry addUserEntry(Long chatId, String content, boolean onlyContext, double topP) {
|
||||||
Chat chat = chatRepository.findById(chatId)
|
Chat chat = chatRepository.findById(chatId)
|
||||||
.orElseThrow(() -> new EntityNotFoundException("Chat not found with id: " + chatId));
|
.orElseThrow(() -> new EntityNotFoundException("Chat not found with id: " + chatId));
|
||||||
|
|
||||||
@@ -63,8 +63,7 @@ public class ChatEntryServiceImpl implements ChatEntryService {
|
|||||||
.system(systemPrompt)
|
.system(systemPrompt)
|
||||||
.user(content)
|
.user(content)
|
||||||
.advisors(a -> a.param(ChatMemory.CONVERSATION_ID, String.valueOf(chatId)))
|
.advisors(a -> a.param(ChatMemory.CONVERSATION_ID, String.valueOf(chatId)))
|
||||||
.options(OllamaOptions.builder()
|
.options(OpenAiChatOptions.builder()
|
||||||
.topK(topK)
|
|
||||||
.topP(topP)
|
.topP(topP)
|
||||||
.build())
|
.build())
|
||||||
.call()
|
.call()
|
||||||
|
|||||||
@@ -1,24 +1,29 @@
|
|||||||
spring.application.name=rag
|
spring.application.name=rag-service
|
||||||
#spring.ai.ollama.base-url=http://localhost:11431
|
|
||||||
spring.ai.ollama.base-url=http://localhost:11434
|
# --- LLM Provider: Groq (OpenAI-compatible API) ---
|
||||||
#spring.ai.ollama.chat.model=gemma3:4b-it-q4_K_M
|
spring.ai.openai.base-url=${SPRING_AI_OPENAI_BASE_URL:https://api.groq.com/openai/v1}
|
||||||
spring.ai.ollama.chat.model=llama3.1:8b-instruct-q4_K_M
|
spring.ai.openai.api-key=${SPRING_AI_OPENAI_API_KEY:}
|
||||||
jwt.secret=ywfI6dBznYmHbokihB/OBzZz6E0Fj+6PiqrM8dQ5c3t0HeYarblCbOGM8vQtOt472AtQ+MsCH7OVIKHOzjrPsQ==
|
spring.ai.openai.chat.model=${SPRING_AI_OPENAI_CHAT_MODEL:llama-3.3-70b-versatile}
|
||||||
|
|
||||||
|
# --- Embedding model: use local pgvector with a lightweight model ---
|
||||||
|
# Groq does not provide an embedding endpoint, so we disable OpenAI embedding auto-config
|
||||||
|
# and rely on the pgvector store's existing embedding setup
|
||||||
|
spring.ai.openai.embedding.enabled=false
|
||||||
|
|
||||||
|
jwt.secret=${JWT_SECRET:ywfI6dBznYmHbokihB/OBzZz6E0Fj+6PiqrM8dQ5c3t0HeYarblCbOGM8vQtOt472AtQ+MsCH7OVIKHOzjrPsQ==}
|
||||||
jwt.expiration=103600000
|
jwt.expiration=103600000
|
||||||
spring.datasource.url=jdbc:postgresql://localhost:5432/ragdb
|
spring.datasource.url=${SPRING_DATASOURCE_URL:jdbc:postgresql://localhost:5432/ragdb}
|
||||||
spring.datasource.username=postgres
|
spring.datasource.username=${SPRING_DATASOURCE_USERNAME:postgres}
|
||||||
spring.datasource.password=postgres
|
spring.datasource.password=${SPRING_DATASOURCE_PASSWORD:postgres}
|
||||||
logging.level.org.springframework.ai.chat.client.advisor=DEBUG
|
logging.level.org.springframework.ai.chat.client.advisor=DEBUG
|
||||||
logging.level.org.springframework.web.servlet.mvc.method.annotation.RequestMappingHandlerMapping=DEBUG
|
logging.level.org.springframework.web.servlet.mvc.method.annotation.RequestMappingHandlerMapping=DEBUG
|
||||||
logging.level.org.springframework.web=DEBUG
|
logging.level.org.springframework.web=DEBUG
|
||||||
logging.level.org.flywaydb=DEBUG
|
logging.level.org.flywaydb=DEBUG
|
||||||
logging.level.com.balex.rag.controller=DEBUG
|
logging.level.com.balex.rag.controller=DEBUG
|
||||||
app.document.chunk-size=200
|
app.document.chunk-size=200
|
||||||
#spring.main.allow-circular-references=true
|
|
||||||
server.compression.enabled=false
|
server.compression.enabled=false
|
||||||
server.tomcat.connection-timeout=60000
|
server.tomcat.connection-timeout=60000
|
||||||
spring.mvc.async.request-timeout=60000
|
spring.mvc.async.request-timeout=60000
|
||||||
#spring.main.web-application-type=reactive
|
|
||||||
end.points.users=/users
|
end.points.users=/users
|
||||||
end.points.id=/{id}
|
end.points.id=/{id}
|
||||||
end.points.all=/all
|
end.points.all=/all
|
||||||
@@ -31,7 +36,7 @@ end.points.register=/register
|
|||||||
end.points.chat=/chat
|
end.points.chat=/chat
|
||||||
end.points.entry=/entry
|
end.points.entry=/entry
|
||||||
end.points.document=/documents
|
end.points.document=/documents
|
||||||
rag.rerank-fetch-multiplier = 2
|
rag.rerank-fetch-multiplier=2
|
||||||
#Swagger
|
#Swagger
|
||||||
swagger.servers.first=http://localhost:8080
|
swagger.servers.first=http://localhost:8080
|
||||||
springdoc.swagger-ui.path=/swagger-ui.html
|
springdoc.swagger-ui.path=/swagger-ui.html
|
||||||
|
|||||||
Reference in New Issue
Block a user