diff --git a/.idea/agent_proj.iml b/.idea/agent_proj.iml
index 5e376b1..b9ed519 100644
--- a/.idea/agent_proj.iml
+++ b/.idea/agent_proj.iml
@@ -2,7 +2,7 @@
-
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index f86cb7f..1511473 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,5 +3,5 @@
-
+
\ No newline at end of file
diff --git a/logs/Agent_20260228.log b/logs/Agent_20260228.log
index 3209a45..acb2335 100644
--- a/logs/Agent_20260228.log
+++ b/logs/Agent_20260228.log
@@ -81,3 +81,162 @@ pypdf.errors.PdfReadError: Not an encrypted file
2026-02-28 03:36:40,073 - Agent - INFO - vector_store.py:70 - [加载知识库]: /Users/czzhangheng/edu/agent_proj/data/扫拖一体机器人100问.txt 的 md5已存在, 跳过
2026-02-28 03:36:40,074 - Agent - INFO - vector_store.py:70 - [加载知识库]: /Users/czzhangheng/edu/agent_proj/data/维护保养.txt 的 md5已存在, 跳过
2026-02-28 03:36:51,201 - Agent - INFO - vector_store.py:88 - [加载知识库]/Users/czzhangheng/edu/agent_proj/data/扫地机器人100问.pdf 加载成功
+2026-02-28 13:09:38,673 - Agent - INFO - logger_handler.py:47 - info
+2026-02-28 13:09:38,673 - Agent - ERROR - logger_handler.py:48 - Error
+2026-02-28 13:09:52,924 - Agent - ERROR - vector_store.py:90 - [加载知识库]C:\Users\cz189\agent_proj\data\扫地机器人100问.pdf 失败,错误 `pypdf` package not found, please install it with `pip install pypdf`
+Traceback (most recent call last):
+ File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_community\document_loaders\parsers\pdf.py", line 359, in lazy_parse
+ import pypdf
+ModuleNotFoundError: No module named 'pypdf'
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+ File "C:\Users\cz189\agent_proj\rag\vector_store.py", line 74, in load_document
+ document: list[Documents] = get_file_document(path)
+ ^^^^^^^^^^^^^^^^^^^^^^^
+ File "C:\Users\cz189\agent_proj\rag\vector_store.py", line 58, in get_file_document
+ return pdf_loader(read_path)
+ ^^^^^^^^^^^^^^^^^^^^^
+ File "C:\Users\cz189\agent_proj\utils\file_handler.py", line 51, in pdf_loader
+ return PyPDFLoader(file_path, pwd).load()
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_core\document_loaders\base.py", line 43, in load
+ return list(self.lazy_load())
+ ^^^^^^^^^^^^^^^^^^^^^^
+ File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_community\document_loaders\pdf.py", line 305, in lazy_load
+ yield from self.parser.lazy_parse(blob)
+ File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_community\document_loaders\parsers\pdf.py", line 361, in lazy_parse
+ raise ImportError(
+ImportError: `pypdf` package not found, please install it with `pip install pypdf`
+2026-02-28 13:09:52,928 - Agent - ERROR - vector_store.py:90 - [加载知识库]C:\Users\cz189\agent_proj\data\扫地机器人100问2.txt 失败,错误 Error loading C:\Users\cz189\agent_proj\data\扫地机器人100问2.txt
+Traceback (most recent call last):
+ File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_community\document_loaders\text.py", line 43, in lazy_load
+ text = f.read()
+ ^^^^^^^^
+UnicodeDecodeError: 'gbk' codec can't decode byte 0xab in position 4: illegal multibyte sequence
+
+The above exception was the direct cause of the following exception:
+
+Traceback (most recent call last):
+ File "C:\Users\cz189\agent_proj\rag\vector_store.py", line 74, in load_document
+ document: list[Documents] = get_file_document(path)
+ ^^^^^^^^^^^^^^^^^^^^^^^
+ File "C:\Users\cz189\agent_proj\rag\vector_store.py", line 55, in get_file_document
+ return txt_loader(read_path)
+ ^^^^^^^^^^^^^^^^^^^^^
+ File "C:\Users\cz189\agent_proj\utils\file_handler.py", line 56, in txt_loader
+ return TextLoader(file_path).load()
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_core\document_loaders\base.py", line 43, in load
+ return list(self.lazy_load())
+ ^^^^^^^^^^^^^^^^^^^^^^
+ File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_community\document_loaders\text.py", line 56, in lazy_load
+ raise RuntimeError(f"Error loading {self.file_path}") from e
+RuntimeError: Error loading C:\Users\cz189\agent_proj\data\扫地机器人100问2.txt
+2026-02-28 13:09:52,930 - Agent - ERROR - vector_store.py:90 - [加载知识库]C:\Users\cz189\agent_proj\data\扫拖一体机器人100问.txt 失败,错误 Error loading C:\Users\cz189\agent_proj\data\扫拖一体机器人100问.txt
+Traceback (most recent call last):
+ File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_community\document_loaders\text.py", line 43, in lazy_load
+ text = f.read()
+ ^^^^^^^^
+UnicodeDecodeError: 'gbk' codec can't decode byte 0xab in position 4: illegal multibyte sequence
+
+The above exception was the direct cause of the following exception:
+
+Traceback (most recent call last):
+ File "C:\Users\cz189\agent_proj\rag\vector_store.py", line 74, in load_document
+ document: list[Documents] = get_file_document(path)
+ ^^^^^^^^^^^^^^^^^^^^^^^
+ File "C:\Users\cz189\agent_proj\rag\vector_store.py", line 55, in get_file_document
+ return txt_loader(read_path)
+ ^^^^^^^^^^^^^^^^^^^^^
+ File "C:\Users\cz189\agent_proj\utils\file_handler.py", line 56, in txt_loader
+ return TextLoader(file_path).load()
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_core\document_loaders\base.py", line 43, in load
+ return list(self.lazy_load())
+ ^^^^^^^^^^^^^^^^^^^^^^
+ File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_community\document_loaders\text.py", line 56, in lazy_load
+ raise RuntimeError(f"Error loading {self.file_path}") from e
+RuntimeError: Error loading C:\Users\cz189\agent_proj\data\扫拖一体机器人100问.txt
+2026-02-28 13:09:52,933 - Agent - ERROR - vector_store.py:90 - [加载知识库]C:\Users\cz189\agent_proj\data\故障排除.txt 失败,错误 Error loading C:\Users\cz189\agent_proj\data\故障排除.txt
+Traceback (most recent call last):
+ File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_community\document_loaders\text.py", line 43, in lazy_load
+ text = f.read()
+ ^^^^^^^^
+UnicodeDecodeError: 'gbk' codec can't decode byte 0xab in position 2: illegal multibyte sequence
+
+The above exception was the direct cause of the following exception:
+
+Traceback (most recent call last):
+ File "C:\Users\cz189\agent_proj\rag\vector_store.py", line 74, in load_document
+ document: list[Documents] = get_file_document(path)
+ ^^^^^^^^^^^^^^^^^^^^^^^
+ File "C:\Users\cz189\agent_proj\rag\vector_store.py", line 55, in get_file_document
+ return txt_loader(read_path)
+ ^^^^^^^^^^^^^^^^^^^^^
+ File "C:\Users\cz189\agent_proj\utils\file_handler.py", line 56, in txt_loader
+ return TextLoader(file_path).load()
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_core\document_loaders\base.py", line 43, in load
+ return list(self.lazy_load())
+ ^^^^^^^^^^^^^^^^^^^^^^
+ File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_community\document_loaders\text.py", line 56, in lazy_load
+ raise RuntimeError(f"Error loading {self.file_path}") from e
+RuntimeError: Error loading C:\Users\cz189\agent_proj\data\故障排除.txt
+2026-02-28 13:09:52,934 - Agent - ERROR - vector_store.py:90 - [加载知识库]C:\Users\cz189\agent_proj\data\维护保养.txt 失败,错误 Error loading C:\Users\cz189\agent_proj\data\维护保养.txt
+Traceback (most recent call last):
+ File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_community\document_loaders\text.py", line 43, in lazy_load
+ text = f.read()
+ ^^^^^^^^
+UnicodeDecodeError: 'gbk' codec can't decode byte 0xab in position 4: illegal multibyte sequence
+
+The above exception was the direct cause of the following exception:
+
+Traceback (most recent call last):
+ File "C:\Users\cz189\agent_proj\rag\vector_store.py", line 74, in load_document
+ document: list[Documents] = get_file_document(path)
+ ^^^^^^^^^^^^^^^^^^^^^^^
+ File "C:\Users\cz189\agent_proj\rag\vector_store.py", line 55, in get_file_document
+ return txt_loader(read_path)
+ ^^^^^^^^^^^^^^^^^^^^^
+ File "C:\Users\cz189\agent_proj\utils\file_handler.py", line 56, in txt_loader
+ return TextLoader(file_path).load()
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_core\document_loaders\base.py", line 43, in load
+ return list(self.lazy_load())
+ ^^^^^^^^^^^^^^^^^^^^^^
+ File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_community\document_loaders\text.py", line 56, in lazy_load
+ raise RuntimeError(f"Error loading {self.file_path}") from e
+RuntimeError: Error loading C:\Users\cz189\agent_proj\data\维护保养.txt
+2026-02-28 13:09:52,936 - Agent - ERROR - vector_store.py:90 - [加载知识库]C:\Users\cz189\agent_proj\data\选购指南.txt 失败,错误 Error loading C:\Users\cz189\agent_proj\data\选购指南.txt
+Traceback (most recent call last):
+ File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_community\document_loaders\text.py", line 43, in lazy_load
+ text = f.read()
+ ^^^^^^^^
+UnicodeDecodeError: 'gbk' codec can't decode byte 0xab in position 2: illegal multibyte sequence
+
+The above exception was the direct cause of the following exception:
+
+Traceback (most recent call last):
+ File "C:\Users\cz189\agent_proj\rag\vector_store.py", line 74, in load_document
+ document: list[Documents] = get_file_document(path)
+ ^^^^^^^^^^^^^^^^^^^^^^^
+ File "C:\Users\cz189\agent_proj\rag\vector_store.py", line 55, in get_file_document
+ return txt_loader(read_path)
+ ^^^^^^^^^^^^^^^^^^^^^
+ File "C:\Users\cz189\agent_proj\utils\file_handler.py", line 56, in txt_loader
+ return TextLoader(file_path).load()
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_core\document_loaders\base.py", line 43, in load
+ return list(self.lazy_load())
+ ^^^^^^^^^^^^^^^^^^^^^^
+ File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_community\document_loaders\text.py", line 56, in lazy_load
+ raise RuntimeError(f"Error loading {self.file_path}") from e
+RuntimeError: Error loading C:\Users\cz189\agent_proj\data\选购指南.txt
+2026-02-28 13:12:19,618 - Agent - INFO - vector_store.py:88 - [加载知识库]C:\Users\cz189\agent_proj\data\扫地机器人100问.pdf 加载成功
+2026-02-28 13:12:22,955 - Agent - INFO - vector_store.py:88 - [加载知识库]C:\Users\cz189\agent_proj\data\扫地机器人100问2.txt 加载成功
+2026-02-28 13:12:27,114 - Agent - INFO - vector_store.py:88 - [加载知识库]C:\Users\cz189\agent_proj\data\扫拖一体机器人100问.txt 加载成功
+2026-02-28 13:12:34,245 - Agent - INFO - vector_store.py:88 - [加载知识库]C:\Users\cz189\agent_proj\data\故障排除.txt 加载成功
+2026-02-28 13:12:38,453 - Agent - INFO - vector_store.py:88 - [加载知识库]C:\Users\cz189\agent_proj\data\维护保养.txt 加载成功
+2026-02-28 13:12:42,384 - Agent - INFO - vector_store.py:88 - [加载知识库]C:\Users\cz189\agent_proj\data\选购指南.txt 加载成功
diff --git a/md5.txt b/md5.txt
index 1258035..71cda04 100644
--- a/md5.txt
+++ b/md5.txt
@@ -1,5 +1,6 @@
-94bd6c0b4283b8524f4e8dfc5b72f8ab
-a29a4ba5a539a2a462def8f91eb98ccc
-928c24dbdeeb1c0a109f5620c8bac429
-d294281a5769e0a3d1455fbf637ee572
-fc539588792a89ff842cf241cf822de16d726f4604d9e7ebcf59a932ae9e9c00
+6d726f4604d9e7ebcf59a932ae9e9c00
+4ef62dc4189785055436124361296d05
+76d81647e3901472e01ad9859425506f
+85fd3b64ff87fe08fdc5c4a34998b555
+96a099807d1ad7c26e37b53d130f26d2
+1ce3fc50860e15d1c106900b63ef42a2
diff --git a/model/__pycache__/factory.cpython-311.pyc b/model/__pycache__/factory.cpython-311.pyc
new file mode 100644
index 0000000..d958fac
Binary files /dev/null and b/model/__pycache__/factory.cpython-311.pyc differ
diff --git a/rag/__pycache__/vector_store.cpython-311.pyc b/rag/__pycache__/vector_store.cpython-311.pyc
new file mode 100644
index 0000000..f978c01
Binary files /dev/null and b/rag/__pycache__/vector_store.cpython-311.pyc differ
diff --git a/rag/chroma_db/chroma.sqlite3 b/rag/chroma_db/chroma.sqlite3
index ee90f19..0c9fd39 100644
Binary files a/rag/chroma_db/chroma.sqlite3 and b/rag/chroma_db/chroma.sqlite3 differ
diff --git a/rag/rag_service.py b/rag/rag_service.py
new file mode 100644
index 0000000..049da9b
--- /dev/null
+++ b/rag/rag_service.py
@@ -0,0 +1,47 @@
+"""rag总结服务:用户提问,搜索参考资料,提问+参考资料提供模型,让模型总结回复"""
+from langchain_core.output_parsers import StrOutputParser
+
+from rag.vector_store import VectorStoreSerivce
+from utils.prompt_loader import load_rag_prompts
+from langchain_core.prompts import PromptTemplate
+from model.factory import chat_model
+from langchain_community.docstore.document import Document
+from typing import List
+
+class RagSummarizeService:
+ def __init__(self):
+ self.vector_store = VectorStoreSerivce()
+ self.retriever = self.vector_store.get_retriever()
+ self.prompt_template = load_rag_prompts()
+ self.prompt_text = PromptTemplate.from_template(self.prompt_template)
+ self.model = chat_model
+ self.chain = self._init_chain()
+
+
+ def _init_chain(self):
+ chain = self.prompt_text | self.model | StrOutputParser()
+ return chain
+
+ def retriever_docs(self, query: str) -> List[Document]:
+ return self.retriever.invoke(query)
+
+ def rag_summarize(self, query: str) -> str:
+ context_docs = self.retriever_docs(query)
+ context = ""
+ counter = 0
+ for doc in context_docs:
+ counter += 1
+ context += f"[参考资料{counter}]: 参考资料:{doc.page_content} | 参考源: {doc.metadata} \n"
+
+ return self.chain.invoke(
+ {
+ "input": query,
+ "context": context
+ }
+ )
+
+
+if __name__ == '__main__':
+ rag = RagSummarizeService()
+ response = rag.rag_summarize("查询小户型适合那些扫地机器人")
+ print(response)
diff --git a/utils/__pycache__/config_handler.cpython-311.pyc b/utils/__pycache__/config_handler.cpython-311.pyc
new file mode 100644
index 0000000..5201b7a
Binary files /dev/null and b/utils/__pycache__/config_handler.cpython-311.pyc differ
diff --git a/utils/__pycache__/file_handler.cpython-311.pyc b/utils/__pycache__/file_handler.cpython-311.pyc
new file mode 100644
index 0000000..a1a0c41
Binary files /dev/null and b/utils/__pycache__/file_handler.cpython-311.pyc differ
diff --git a/utils/__pycache__/logger_handler.cpython-311.pyc b/utils/__pycache__/logger_handler.cpython-311.pyc
new file mode 100644
index 0000000..08fa5dd
Binary files /dev/null and b/utils/__pycache__/logger_handler.cpython-311.pyc differ
diff --git a/utils/__pycache__/path_tool.cpython-311.pyc b/utils/__pycache__/path_tool.cpython-311.pyc
new file mode 100644
index 0000000..c362490
Binary files /dev/null and b/utils/__pycache__/path_tool.cpython-311.pyc differ
diff --git a/utils/__pycache__/prompt_loader.cpython-311.pyc b/utils/__pycache__/prompt_loader.cpython-311.pyc
new file mode 100644
index 0000000..8518485
Binary files /dev/null and b/utils/__pycache__/prompt_loader.cpython-311.pyc differ
diff --git a/utils/file_handler.py b/utils/file_handler.py
index 9629a43..cc423dc 100644
--- a/utils/file_handler.py
+++ b/utils/file_handler.py
@@ -53,4 +53,4 @@ def pdf_loader(file_path: str, pwd: str = None) -> List[Document]:
def txt_loader(file_path: str) -> List[Document]:
"""加载TXT文件"""
- return TextLoader(file_path).load()
+ return TextLoader(file_path, encoding='utf-8').load()
diff --git a/utils/logger_handler.py b/utils/logger_handler.py
index 65f34b3..62b521b 100644
--- a/utils/logger_handler.py
+++ b/utils/logger_handler.py
@@ -31,7 +31,7 @@ def get_logger(
logger.addHandler(console_handler)
if not log_file:
- log_file = os.path.join(LOG_ROOT, f"{name}_{datetime.now().strftime("%Y%m%d")}.log")
+ log_file = os.path.join(LOG_ROOT, f"{name}_{datetime.now().strftime('%Y%m%d')}.log")
file_handler = logging.FileHandler(log_file, encoding='utf-8')
file_handler.setLevel(file_level)