rag_service
This commit is contained in:
parent
1b791ca1b7
commit
56b276eb3f
|
|
@ -2,7 +2,7 @@
|
|||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="jdk" jdkName="edu" jdkType="Python SDK" />
|
||||
<orderEntry type="jdk" jdkName="RAG" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
||||
|
|
@ -3,5 +3,5 @@
|
|||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.12 (Eula)" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="edu" project-jdk-type="Python SDK" />
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="RAG" project-jdk-type="Python SDK" />
|
||||
</project>
|
||||
|
|
@ -81,3 +81,162 @@ pypdf.errors.PdfReadError: Not an encrypted file
|
|||
2026-02-28 03:36:40,073 - Agent - INFO - vector_store.py:70 - [加载知识库]: /Users/czzhangheng/edu/agent_proj/data/扫拖一体机器人100问.txt 的 md5已存在, 跳过
|
||||
2026-02-28 03:36:40,074 - Agent - INFO - vector_store.py:70 - [加载知识库]: /Users/czzhangheng/edu/agent_proj/data/维护保养.txt 的 md5已存在, 跳过
|
||||
2026-02-28 03:36:51,201 - Agent - INFO - vector_store.py:88 - [加载知识库]/Users/czzhangheng/edu/agent_proj/data/扫地机器人100问.pdf 加载成功
|
||||
2026-02-28 13:09:38,673 - Agent - INFO - logger_handler.py:47 - info
|
||||
2026-02-28 13:09:38,673 - Agent - ERROR - logger_handler.py:48 - Error
|
||||
2026-02-28 13:09:52,924 - Agent - ERROR - vector_store.py:90 - [加载知识库]C:\Users\cz189\agent_proj\data\扫地机器人100问.pdf 失败,错误 `pypdf` package not found, please install it with `pip install pypdf`
|
||||
Traceback (most recent call last):
|
||||
File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_community\document_loaders\parsers\pdf.py", line 359, in lazy_parse
|
||||
import pypdf
|
||||
ModuleNotFoundError: No module named 'pypdf'
|
||||
|
||||
During handling of the above exception, another exception occurred:
|
||||
|
||||
Traceback (most recent call last):
|
||||
File "C:\Users\cz189\agent_proj\rag\vector_store.py", line 74, in load_document
|
||||
document: list[Documents] = get_file_document(path)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "C:\Users\cz189\agent_proj\rag\vector_store.py", line 58, in get_file_document
|
||||
return pdf_loader(read_path)
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
File "C:\Users\cz189\agent_proj\utils\file_handler.py", line 51, in pdf_loader
|
||||
return PyPDFLoader(file_path, pwd).load()
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_core\document_loaders\base.py", line 43, in load
|
||||
return list(self.lazy_load())
|
||||
^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_community\document_loaders\pdf.py", line 305, in lazy_load
|
||||
yield from self.parser.lazy_parse(blob)
|
||||
File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_community\document_loaders\parsers\pdf.py", line 361, in lazy_parse
|
||||
raise ImportError(
|
||||
ImportError: `pypdf` package not found, please install it with `pip install pypdf`
|
||||
2026-02-28 13:09:52,928 - Agent - ERROR - vector_store.py:90 - [加载知识库]C:\Users\cz189\agent_proj\data\扫地机器人100问2.txt 失败,错误 Error loading C:\Users\cz189\agent_proj\data\扫地机器人100问2.txt
|
||||
Traceback (most recent call last):
|
||||
File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_community\document_loaders\text.py", line 43, in lazy_load
|
||||
text = f.read()
|
||||
^^^^^^^^
|
||||
UnicodeDecodeError: 'gbk' codec can't decode byte 0xab in position 4: illegal multibyte sequence
|
||||
|
||||
The above exception was the direct cause of the following exception:
|
||||
|
||||
Traceback (most recent call last):
|
||||
File "C:\Users\cz189\agent_proj\rag\vector_store.py", line 74, in load_document
|
||||
document: list[Documents] = get_file_document(path)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "C:\Users\cz189\agent_proj\rag\vector_store.py", line 55, in get_file_document
|
||||
return txt_loader(read_path)
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
File "C:\Users\cz189\agent_proj\utils\file_handler.py", line 56, in txt_loader
|
||||
return TextLoader(file_path).load()
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_core\document_loaders\base.py", line 43, in load
|
||||
return list(self.lazy_load())
|
||||
^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_community\document_loaders\text.py", line 56, in lazy_load
|
||||
raise RuntimeError(f"Error loading {self.file_path}") from e
|
||||
RuntimeError: Error loading C:\Users\cz189\agent_proj\data\扫地机器人100问2.txt
|
||||
2026-02-28 13:09:52,930 - Agent - ERROR - vector_store.py:90 - [加载知识库]C:\Users\cz189\agent_proj\data\扫拖一体机器人100问.txt 失败,错误 Error loading C:\Users\cz189\agent_proj\data\扫拖一体机器人100问.txt
|
||||
Traceback (most recent call last):
|
||||
File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_community\document_loaders\text.py", line 43, in lazy_load
|
||||
text = f.read()
|
||||
^^^^^^^^
|
||||
UnicodeDecodeError: 'gbk' codec can't decode byte 0xab in position 4: illegal multibyte sequence
|
||||
|
||||
The above exception was the direct cause of the following exception:
|
||||
|
||||
Traceback (most recent call last):
|
||||
File "C:\Users\cz189\agent_proj\rag\vector_store.py", line 74, in load_document
|
||||
document: list[Documents] = get_file_document(path)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "C:\Users\cz189\agent_proj\rag\vector_store.py", line 55, in get_file_document
|
||||
return txt_loader(read_path)
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
File "C:\Users\cz189\agent_proj\utils\file_handler.py", line 56, in txt_loader
|
||||
return TextLoader(file_path).load()
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_core\document_loaders\base.py", line 43, in load
|
||||
return list(self.lazy_load())
|
||||
^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_community\document_loaders\text.py", line 56, in lazy_load
|
||||
raise RuntimeError(f"Error loading {self.file_path}") from e
|
||||
RuntimeError: Error loading C:\Users\cz189\agent_proj\data\扫拖一体机器人100问.txt
|
||||
2026-02-28 13:09:52,933 - Agent - ERROR - vector_store.py:90 - [加载知识库]C:\Users\cz189\agent_proj\data\故障排除.txt 失败,错误 Error loading C:\Users\cz189\agent_proj\data\故障排除.txt
|
||||
Traceback (most recent call last):
|
||||
File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_community\document_loaders\text.py", line 43, in lazy_load
|
||||
text = f.read()
|
||||
^^^^^^^^
|
||||
UnicodeDecodeError: 'gbk' codec can't decode byte 0xab in position 2: illegal multibyte sequence
|
||||
|
||||
The above exception was the direct cause of the following exception:
|
||||
|
||||
Traceback (most recent call last):
|
||||
File "C:\Users\cz189\agent_proj\rag\vector_store.py", line 74, in load_document
|
||||
document: list[Documents] = get_file_document(path)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "C:\Users\cz189\agent_proj\rag\vector_store.py", line 55, in get_file_document
|
||||
return txt_loader(read_path)
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
File "C:\Users\cz189\agent_proj\utils\file_handler.py", line 56, in txt_loader
|
||||
return TextLoader(file_path).load()
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_core\document_loaders\base.py", line 43, in load
|
||||
return list(self.lazy_load())
|
||||
^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_community\document_loaders\text.py", line 56, in lazy_load
|
||||
raise RuntimeError(f"Error loading {self.file_path}") from e
|
||||
RuntimeError: Error loading C:\Users\cz189\agent_proj\data\故障排除.txt
|
||||
2026-02-28 13:09:52,934 - Agent - ERROR - vector_store.py:90 - [加载知识库]C:\Users\cz189\agent_proj\data\维护保养.txt 失败,错误 Error loading C:\Users\cz189\agent_proj\data\维护保养.txt
|
||||
Traceback (most recent call last):
|
||||
File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_community\document_loaders\text.py", line 43, in lazy_load
|
||||
text = f.read()
|
||||
^^^^^^^^
|
||||
UnicodeDecodeError: 'gbk' codec can't decode byte 0xab in position 4: illegal multibyte sequence
|
||||
|
||||
The above exception was the direct cause of the following exception:
|
||||
|
||||
Traceback (most recent call last):
|
||||
File "C:\Users\cz189\agent_proj\rag\vector_store.py", line 74, in load_document
|
||||
document: list[Documents] = get_file_document(path)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "C:\Users\cz189\agent_proj\rag\vector_store.py", line 55, in get_file_document
|
||||
return txt_loader(read_path)
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
File "C:\Users\cz189\agent_proj\utils\file_handler.py", line 56, in txt_loader
|
||||
return TextLoader(file_path).load()
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_core\document_loaders\base.py", line 43, in load
|
||||
return list(self.lazy_load())
|
||||
^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_community\document_loaders\text.py", line 56, in lazy_load
|
||||
raise RuntimeError(f"Error loading {self.file_path}") from e
|
||||
RuntimeError: Error loading C:\Users\cz189\agent_proj\data\维护保养.txt
|
||||
2026-02-28 13:09:52,936 - Agent - ERROR - vector_store.py:90 - [加载知识库]C:\Users\cz189\agent_proj\data\选购指南.txt 失败,错误 Error loading C:\Users\cz189\agent_proj\data\选购指南.txt
|
||||
Traceback (most recent call last):
|
||||
File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_community\document_loaders\text.py", line 43, in lazy_load
|
||||
text = f.read()
|
||||
^^^^^^^^
|
||||
UnicodeDecodeError: 'gbk' codec can't decode byte 0xab in position 2: illegal multibyte sequence
|
||||
|
||||
The above exception was the direct cause of the following exception:
|
||||
|
||||
Traceback (most recent call last):
|
||||
File "C:\Users\cz189\agent_proj\rag\vector_store.py", line 74, in load_document
|
||||
document: list[Documents] = get_file_document(path)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "C:\Users\cz189\agent_proj\rag\vector_store.py", line 55, in get_file_document
|
||||
return txt_loader(read_path)
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
File "C:\Users\cz189\agent_proj\utils\file_handler.py", line 56, in txt_loader
|
||||
return TextLoader(file_path).load()
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_core\document_loaders\base.py", line 43, in load
|
||||
return list(self.lazy_load())
|
||||
^^^^^^^^^^^^^^^^^^^^^^
|
||||
File "D:\Users\cz189\miniconda3\envs\RAG\Lib\site-packages\langchain_community\document_loaders\text.py", line 56, in lazy_load
|
||||
raise RuntimeError(f"Error loading {self.file_path}") from e
|
||||
RuntimeError: Error loading C:\Users\cz189\agent_proj\data\选购指南.txt
|
||||
2026-02-28 13:12:19,618 - Agent - INFO - vector_store.py:88 - [加载知识库]C:\Users\cz189\agent_proj\data\扫地机器人100问.pdf 加载成功
|
||||
2026-02-28 13:12:22,955 - Agent - INFO - vector_store.py:88 - [加载知识库]C:\Users\cz189\agent_proj\data\扫地机器人100问2.txt 加载成功
|
||||
2026-02-28 13:12:27,114 - Agent - INFO - vector_store.py:88 - [加载知识库]C:\Users\cz189\agent_proj\data\扫拖一体机器人100问.txt 加载成功
|
||||
2026-02-28 13:12:34,245 - Agent - INFO - vector_store.py:88 - [加载知识库]C:\Users\cz189\agent_proj\data\故障排除.txt 加载成功
|
||||
2026-02-28 13:12:38,453 - Agent - INFO - vector_store.py:88 - [加载知识库]C:\Users\cz189\agent_proj\data\维护保养.txt 加载成功
|
||||
2026-02-28 13:12:42,384 - Agent - INFO - vector_store.py:88 - [加载知识库]C:\Users\cz189\agent_proj\data\选购指南.txt 加载成功
|
||||
|
|
|
|||
11
md5.txt
11
md5.txt
|
|
@ -1,5 +1,6 @@
|
|||
94bd6c0b4283b8524f4e8dfc5b72f8ab
|
||||
a29a4ba5a539a2a462def8f91eb98ccc
|
||||
928c24dbdeeb1c0a109f5620c8bac429
|
||||
d294281a5769e0a3d1455fbf637ee572
|
||||
fc539588792a89ff842cf241cf822de16d726f4604d9e7ebcf59a932ae9e9c00
|
||||
6d726f4604d9e7ebcf59a932ae9e9c00
|
||||
4ef62dc4189785055436124361296d05
|
||||
76d81647e3901472e01ad9859425506f
|
||||
85fd3b64ff87fe08fdc5c4a34998b555
|
||||
96a099807d1ad7c26e37b53d130f26d2
|
||||
1ce3fc50860e15d1c106900b63ef42a2
|
||||
|
|
|
|||
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,47 @@
|
|||
"""rag总结服务:用户提问,搜索参考资料,提问+参考资料提供模型,让模型总结回复"""
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
|
||||
from rag.vector_store import VectorStoreSerivce
|
||||
from utils.prompt_loader import load_rag_prompts
|
||||
from langchain_core.prompts import PromptTemplate
|
||||
from model.factory import chat_model
|
||||
from langchain_community.docstore.document import Document
|
||||
from typing import List
|
||||
|
||||
class RagSummarizeService:
|
||||
def __init__(self):
|
||||
self.vector_store = VectorStoreSerivce()
|
||||
self.retriever = self.vector_store.get_retriever()
|
||||
self.prompt_template = load_rag_prompts()
|
||||
self.prompt_text = PromptTemplate.from_template(self.prompt_template)
|
||||
self.model = chat_model
|
||||
self.chain = self._init_chain()
|
||||
|
||||
|
||||
def _init_chain(self):
|
||||
chain = self.prompt_text | self.model | StrOutputParser()
|
||||
return chain
|
||||
|
||||
def retriever_docs(self, query: str) -> List[Document]:
|
||||
return self.retriever.invoke(query)
|
||||
|
||||
def rag_summarize(self, query: str) -> str:
|
||||
context_docs = self.retriever_docs(query)
|
||||
context = ""
|
||||
counter = 0
|
||||
for doc in context_docs:
|
||||
counter += 1
|
||||
context += f"[参考资料{counter}]: 参考资料:{doc.page_content} | 参考源: {doc.metadata} \n"
|
||||
|
||||
return self.chain.invoke(
|
||||
{
|
||||
"input": query,
|
||||
"context": context
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
rag = RagSummarizeService()
|
||||
response = rag.rag_summarize("查询小户型适合那些扫地机器人")
|
||||
print(response)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -53,4 +53,4 @@ def pdf_loader(file_path: str, pwd: str = None) -> List[Document]:
|
|||
|
||||
def txt_loader(file_path: str) -> List[Document]:
|
||||
"""加载TXT文件"""
|
||||
return TextLoader(file_path).load()
|
||||
return TextLoader(file_path, encoding='utf-8').load()
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ def get_logger(
|
|||
logger.addHandler(console_handler)
|
||||
|
||||
if not log_file:
|
||||
log_file = os.path.join(LOG_ROOT, f"{name}_{datetime.now().strftime("%Y%m%d")}.log")
|
||||
log_file = os.path.join(LOG_ROOT, f"{name}_{datetime.now().strftime('%Y%m%d')}.log")
|
||||
|
||||
file_handler = logging.FileHandler(log_file, encoding='utf-8')
|
||||
file_handler.setLevel(file_level)
|
||||
|
|
|
|||
Loading…
Reference in New Issue