RAG 시스템 실전 구축 (v42)

실제로 구축할 수 있는 RAG 시스템 구현 가이드

1. RAG 시스템 기본 구조

RAG(Retrieve-Augment-Generate) 시스템은 다음 세 가지 단계로 구성됩니다:

검색(Retrieval): 사용자 질문과 유사한 문서를 벡터 데이터베이스에서 찾음
보완(Augmentation): 검색된 문서를 프롬프트에 추가하여 컨텍스트 제공
생성(Generation): LLM이 답변 생성

# 기본 RAG 흐름
class BasicRAG:
    def __init__(self, embedding_model, vector_db):
        self.embedding_model = embedding_model
        self.vector_db = vector_db

    def retrieve(self, query):
        query_vector = self.embedding_model.encode(query)
        return self.vector_db.search(query_vector, k=5)

    def generate(self, query, retrieved_docs):
        prompt = f"질문: {query}\n참고 문서: {retrieved_docs}"
        return self.llm.generate(prompt)

2. 청킹 전략 (Chunking Strategies)

2.1 의미 기반 청킹 (Semantic Chunking)

의미 단위로 문서를 분할하여 의미적 일관성 유지:

from sentence_transformers import SentenceTransformer
import numpy as np

class SemanticChunker:
    def __init__(self, model_name='all-MiniLM-L6-v2'):
        self.model = SentenceTransformer(model_name)

    def chunk_semantically(self, text, min_chunk_size=100, max_chunk_size=500):
        sentences = text.split('. ')
        chunks = []
        current_chunk = ""

        for sentence in sentences:
            if len(current_chunk) + len(sentence) > max_chunk_size:
                if len(current_chunk) >= min_chunk_size:
                    chunks.append(current_chunk.strip())
                current_chunk = sentence + ". "
            else:
                current_chunk += sentence + ". "

        if current_chunk:
            chunks.append(current_chunk.strip())
        return chunks

# 사용 예시
chunker = SemanticChunker()
text = "AI 기술은 빠르게 발전하고 있습니다. 특히 자연어 처리(NLP) 분야에서는 많은 혁신이 일어나고 있습니다."
chunks = chunker.chunk_semantically(text)
print(chunks)

2.2 재귀적 청킹 (Recursive Chunking)

문서를 재귀적으로 분할하며 문맥을 유지:

class RecursiveChunker:
    def __init__(self, chunk_size=500, overlap=50):
        self.chunk_size = chunk_size
        self.overlap = overlap

    def recursive_chunk(self, text):
        chunks = []
        start = 0

        while start < len(text):
            end = min(start + self.chunk_size, len(text))
            chunk = text[start:end]
            chunks.append(chunk)
            start = end - self.overlap

        return chunks

# 예시
recursive_chunker = RecursiveChunker(chunk_size=200)
text = "대규모 언어 모델은 자연어 이해와 생성 능력을 갖추고 있습니다. 이러한 모델은 다양한 응용 프로그램에서 활용됩니다."
chunks = recursive_chunker.recursive_chunk(text)
print(chunks)

2.3 에이전트 기반 청킹 (Agentic Chunking)

문서의 주제와 키워드를 기반으로 청킹:

class AgenticChunker:
    def __init__(self):
        self.topic_keywords = {
            'AI': ['artificial intelligence', 'machine learning', 'deep learning'],
            'Data': ['database', 'data science', 'analytics']
        }

    def chunk_by_topic(self, text):
        chunks = []
        # 간단한 주제 기반 분할
        paragraphs = text.split('\n\n')
        for para in paragraphs:
            if any(keyword in para.lower() for keyword in self.topic_keywords['AI']):
                chunks.append(('AI', para))
            elif any(keyword in para.lower() for keyword in self.topic_keywords['Data']):
                chunks.append(('Data', para))
        return chunks

3. 임베딩 모델 선택과 비교

3.1 다양한 임베딩 모델 비교

import torch
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModel
import numpy as np

class EmbeddingBenchmark:
    def __init__(self):
        # 다양한 임베딩 모델 로드
        self.models = {
            'all-MiniLM-L6-v2': SentenceTransformer('all-MiniLM-L6-v2'),
            'all-mpnet-base-v2': SentenceTransformer('all-mpnet-base-v2'),
            'sentence-t5-3b': SentenceTransformer('sentence-t5-3b'),
            'bert-base-nli-mean-tokens': SentenceTransformer('bert-base-nli-mean-tokens')
        }

    def benchmark_models(self, texts):
        results = {}
        for name, model in self.models.items():
            embeddings = model.encode(texts)
            results[name] = {
                'shape': embeddings.shape,
                'memory_usage': embeddings.nbytes / 1024 / 1024,  # MB
                'avg_time': self._time_encoding(model, texts)
            }
        return results

    def _time_encoding(self, model, texts):
        import time
        start = time.time()
        model.encode(texts)
        return time.time() - start

# 성능 비교
benchmark = EmbeddingBenchmark()
texts = ["이 문장은 테스트 문장입니다.", "다른 문장입니다."]
results = benchmark.benchmark_models(texts)
for model_name, stats in results.items():
    print(f"{model_name}: {stats['memory_usage']:.2f}MB, {stats['avg_time']:.4f}s")

3.2 최적의 모델 선택

class OptimalEmbeddingSelector:
    def __init__(self):
        self.benchmark_results = {}

    def evaluate_model(self, model_name, texts, eval_dataset):
        """모델 성능 평가"""
        model = SentenceTransformer(model_name)
        embeddings = model.encode(texts)

        # 예: 검색 정확도 평가
        accuracy = self._evaluate_retrieval_quality(embeddings, eval_dataset)
        return accuracy

    def _evaluate_retrieval_quality(self, embeddings, eval_dataset):
        # 간단한 정확도 계산 예시
        return 0.85  # 실제 구현에서는 실제 레이블과 비교

# 사용 예시
selector = OptimalEmbeddingSelector()
best_model = 'all-MiniLM-L6-v2'  # 실제 실험 결과에 따라 결정

4. 벡터 데이터베이스 비교

4.1 Chroma 비교

import chromadb
from chromadb.config import Settings

class ChromaVectorDB:
    def __init__(self, collection_name='rag_collection'):
        self.client = chromadb.Client(Settings(allow_reset=True))
        self.collection = self.client.get_or_create_collection(collection_name)

    def add_documents(self, documents, ids):
        embeddings = self._get_embeddings(documents)
        self.collection.add(
            embeddings=embeddings,
            documents=documents,
            ids=ids
        )

    def search(self, query, k=5):
        query_embedding = self._get_embeddings([query])[0]
        results = self.collection.query(
            query_embeddings=[query_embedding],
            n_results=k
        )
        return results['documents'][0]

    def _get_embeddings(self, texts):
        # 임베딩 모델을 여기에 연결
        from sentence_transformers import SentenceTransformer
        model = SentenceTransformer('all-MiniLM-L6-v2')
        return model.encode(texts)

4.2 Qdrant 비교


python
from qdrant_client import QdrantClient
from qdrant_client.models import Filter, FieldCondition, MatchValue

class QdrantVectorDB:
    def __init__(self, host='localhost', port=6333, collection_name='rag_collection'):
        self.client = QdrantClient(host=host, port=port)
        self.collection_name = collection_name

        # 컬렉션 생성
        self.client.recreate_collection(
            collection_name=self.collection_name,
            vectors_config={"size": 384, "distance": "Cosine"}
        )

    def add_documents(self, documents, ids):
        points = [
            {
                "id": i,
                "vector": self._get_embeddings([doc])[0],
                "payload": {"text": doc}
            }
            for i, doc in enumerate(documents)
        ]
        self.client.upsert(
            collection_name=self.collection_name,
            points=points
        )

    def search(self, query, k=5):
        query_vector = self._get_embeddings([query])[0]
        results = self.client.search(
            collection_name=self.collection_name,
            query_vector=query_vector,
            limit=k
        )
        return [hit.payload['text'] for hit in results]

    def _get_embeddings(self, texts):
        from sentence_transformers import SentenceTransformer

---

📥 **Get the full guide on Gumroad**: https://gumroad.com/l/auto ($7)

推荐订阅源

DEV Community

RAG 시스템 실전 구축 (v42)

1. RAG 시스템 기본 구조

2. 청킹 전략 (Chunking Strategies)

2.1 의미 기반 청킹 (Semantic Chunking)

2.2 재귀적 청킹 (Recursive Chunking)

2.3 에이전트 기반 청킹 (Agentic Chunking)

3. 임베딩 모델 선택과 비교

3.1 다양한 임베딩 모델 비교

3.2 최적의 모델 선택

4. 벡터 데이터베이스 비교

4.1 Chroma 비교

4.2 Qdrant 비교