RAG Proj
This commit is contained in:
parent
a4abc7fee4
commit
18057fd98d
|
|
@ -160,3 +160,5 @@ cython_debug/
|
|||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
|
||||
chroma_db/
|
||||
chat_history/
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
# 默认忽略的文件
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# 基于编辑器的 HTTP 客户端请求
|
||||
/httpRequests/
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="jdk" jdkName="RAG" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="DataSourceManagerImpl" format="xml" multifile-model="true">
|
||||
<data-source source="LOCAL" name="chroma" uuid="af88f5a8-29a4-4ea3-b006-f6bf52a7a680">
|
||||
<driver-ref>sqlite.xerial</driver-ref>
|
||||
<synchronize>true</synchronize>
|
||||
<jdbc-driver>org.sqlite.JDBC</jdbc-driver>
|
||||
<jdbc-url>jdbc:sqlite:$PROJECT_DIR$/chroma_db/chroma.sqlite3</jdbc-url>
|
||||
<working-dir>$ProjectFileDir$</working-dir>
|
||||
</data-source>
|
||||
</component>
|
||||
</project>
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="Black">
|
||||
<option name="sdkName" value="RAG" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="RAG" project-jdk-type="Python SDK" />
|
||||
</project>
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/RAG_proj.iml" filepath="$PROJECT_DIR$/.idea/RAG_proj.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
"""
|
||||
streamlit web 网页上传
|
||||
"""
|
||||
import time
|
||||
|
||||
import streamlit as st
|
||||
from knowledge_base import KnowledgeBaseService
|
||||
|
||||
# 标题
|
||||
st.title("知识库更新服务")
|
||||
|
||||
# file_uploader
|
||||
uploader_file = st.file_uploader(
|
||||
"请上传txt文件",
|
||||
type="txt",
|
||||
accept_multiple_files=False
|
||||
)
|
||||
|
||||
if "service" not in st.session_state:
|
||||
st.session_state["service"] = KnowledgeBaseService()
|
||||
|
||||
if uploader_file is not None:
|
||||
file_name = uploader_file.name
|
||||
file_type = uploader_file.type
|
||||
file_size = uploader_file.size / 1024
|
||||
|
||||
st.subheader(f"文件名:{file_name}")
|
||||
st.write(f"格式: {file_type}, 大小: {file_size:.2f}KB")
|
||||
# bytes
|
||||
text = uploader_file.getvalue().decode('utf-8')
|
||||
|
||||
with st.spinner("载入知识库中..."):
|
||||
time.sleep(1)
|
||||
result = st.session_state["service"].upload_by_str(text, file_name)
|
||||
st.write(result)
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
import streamlit as st
|
||||
import time
|
||||
from rag import RAGService
|
||||
import config_data as config
|
||||
|
||||
# 标题
|
||||
st.title("智能客服")
|
||||
st.divider()
|
||||
|
||||
if "message" not in st.session_state:
|
||||
st.session_state["message"] = list()
|
||||
msg_list = st.session_state["message"]
|
||||
msg_list.append({"role": "assistant", "content": "你好有什么可以帮您"})
|
||||
else:
|
||||
msg_list = st.session_state["message"]
|
||||
|
||||
def add_history(role, content):
|
||||
msg_list.append({"role": role, "content": content})
|
||||
|
||||
def show_history():
|
||||
for msg in msg_list:
|
||||
st.chat_message(msg["role"]).write(msg["content"])
|
||||
|
||||
show_history()
|
||||
|
||||
if "RAGService" not in st.session_state:
|
||||
st.session_state["RAGService"] = RAGService()
|
||||
rag_service = st.session_state["RAGService"]
|
||||
else:
|
||||
rag_service = st.session_state["RAGService"]
|
||||
|
||||
def get_answer(prompt : str):
|
||||
res = rag_service.chain.stream({"input": prompt}, config.session_config)
|
||||
return res
|
||||
|
||||
|
||||
prompt = st.chat_input()
|
||||
|
||||
if prompt:
|
||||
st.chat_message("user").write(prompt)
|
||||
add_history("user", prompt)
|
||||
|
||||
with st.spinner("AI thinking..."):
|
||||
time.sleep(0.5)
|
||||
ans = get_answer(prompt)
|
||||
|
||||
ai_list = []
|
||||
def capture(generator, cache_list):
|
||||
for chunk in generator:
|
||||
cache_list.append(chunk)
|
||||
yield chunk
|
||||
|
||||
st.chat_message("assistant").write_stream(capture(ans, ai_list))
|
||||
text = "".join(ai_list)
|
||||
add_history("assistant", text)
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
md5_path = "./md5.text"
|
||||
|
||||
# Chroma
|
||||
collection_name = "rag"
|
||||
persist_directory = "./chroma_db"
|
||||
|
||||
# spliter
|
||||
chunk_size = 1000
|
||||
chunk_overlap = 100
|
||||
separators = ["\n\n", "\n", ".", "!", "?", "。", " ", ",", ","]
|
||||
max_split_char_number = 100
|
||||
|
||||
#
|
||||
similarity_threshold = 1
|
||||
|
||||
embedding_model_name = "text-embedding-v4"
|
||||
chat_model_name = "qwen3-max"
|
||||
|
||||
session_config = {
|
||||
"configurable": {
|
||||
"session_id": "user_001",
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
身高:155-165cm, 体重:75-95 斤,建议尺码S。
|
||||
身高:160-170cm, 体重:90-115斤,建议尺码M。
|
||||
身高:165-175cm, 体重:115-135斤,建议尺码L。
|
||||
身高:170-178cm, 体重:130-150斤,建议尺码XL。
|
||||
身高:175-182cm, 体重:145-165斤,建议尺码2XL。
|
||||
身高:178-185cm, 体重:160-180斤,建议尺码3XL。
|
||||
身高:180-190cm, 体重:180-210斤,建议尺码4XL。
|
||||
身高:190cm+,体重:210斤+,建议尺码5XL。
|
||||
|
|
@ -0,0 +1,113 @@
|
|||
一、春季服装(纯棉、薄牛仔、针织棉、轻薄化纤)
|
||||
|
||||
1. 纯棉材质(春季衬衫、T恤、休闲裤)
|
||||
|
||||
洗涤:可机洗或手洗,水温≤30℃,中性洗涤剂;浅色与深色分开洗,首次洗加少许盐固色;机洗用洗衣袋+轻柔模式,避免摩擦起球。
|
||||
|
||||
养护:阴凉通风处阴干,避免暴晒褪色;收纳前完全干燥,折叠或宽肩悬挂;潮湿天放干燥剂防发霉。
|
||||
|
||||
2. 薄牛仔材质(春季牛仔裤、牛仔外套)
|
||||
|
||||
洗涤:水温≤30℃,中性洗涤剂;翻面清洗减少褪色,机洗选轻柔模式;避免频繁清洗,1-2周一次即可。
|
||||
|
||||
养护:翻面阴干,避免阳光直射;收纳时折叠平放或悬挂,宽肩衣架防止裤腰变形;裤兜内放防潮纸保持版型。
|
||||
|
||||
3. 针织棉材质(春季针织衫、薄开衫)
|
||||
|
||||
洗涤:手洗优先,水温≤25℃,中性洗涤剂轻轻按压;机洗需用洗衣袋,选针织专用模式;禁止用力搓揉、拧绞。
|
||||
|
||||
养护:平铺阴干,避免悬挂拉伸领口;收纳时折叠,可放樟脑丸防蛀;轻微起球用毛球修剪器处理。
|
||||
|
||||
4. 轻薄化纤材质(春季风衣、防晒衣)
|
||||
|
||||
洗涤:可机洗,水温30-40℃,中性或碱性洗涤剂;轻柔揉搓,顽固污渍轻轻刷洗;清洗时加柔顺剂减少静电。
|
||||
|
||||
养护:可阳光下晾晒,及时翻面确保干燥;收纳折叠或悬挂均可,避免重压产生永久性褶皱。
|
||||
|
||||
二、夏季服装(真丝、棉麻、冰丝、短袖纯棉、雪纺)
|
||||
|
||||
1. 真丝材质(夏季连衣裙、衬衫)
|
||||
|
||||
洗涤:建议干洗;手洗用真丝专用中性洗涤剂,水温≤25℃,浸泡≤15分钟,轻轻按压清洗;禁止搓揉、拧绞、漂白。
|
||||
|
||||
养护:阴凉通风处阴干,避免暴晒;悬挂用光滑衣架防勾丝,折叠收纳避免重压;熨烫用低温蒸汽,距离面料1-2cm。
|
||||
|
||||
2. 棉麻材质(夏季T恤、阔腿裤、衬衫)
|
||||
|
||||
洗涤:可机洗或手洗,水温≤30℃,中性洗涤剂;棉麻易皱,无需用力搓揉;浅色与深色分开洗。
|
||||
|
||||
养护:可阳光下晾晒,晒干后及时收纳;收纳前熨烫平整,折叠或悬挂均可;避免潮湿环境,防止发霉。
|
||||
|
||||
3. 冰丝材质(夏季T恤、短裤、连衣裙)
|
||||
|
||||
洗涤:手洗或机洗,水温≤30℃,中性洗涤剂;机洗用洗衣袋,选轻柔模式;禁止长时间浸泡(≤10分钟)。
|
||||
|
||||
养护:阴凉阴干,避免暴晒和高温烘烤;收纳折叠平放,避免尖锐物体勾划;穿着时避免粗糙物体摩擦。
|
||||
|
||||
4. 雪纺材质(夏季连衣裙、防晒衫)
|
||||
|
||||
洗涤:手洗优先,水温≤30℃,中性洗涤剂轻轻漂洗;机洗用洗衣袋,选轻柔模式;禁止用力拧绞。
|
||||
|
||||
养护:阴凉阴干,悬挂时用细衣架避免勾丝;收纳折叠时垫一层薄纸,防止粘连;轻微褶皱用低温蒸汽熨烫。
|
||||
|
||||
三、秋季服装(羊毛、羊绒、厚牛仔、灯芯绒、麂皮绒)
|
||||
|
||||
1. 羊毛/羊绒材质(秋季羊毛衫、薄羊绒大衣)
|
||||
|
||||
洗涤:优先干洗;手洗用羊毛专用洗涤剂,水温≤20℃,浸泡≤15分钟,轻轻按压;禁止机洗、搓揉、拧绞。
|
||||
|
||||
养护:平铺阴干,避免悬挂拉伸;收纳时放防虫蛀剂(樟脑丸、薰衣草香包),透气布袋包裹;宽肩悬挂或折叠收纳,避免重压。
|
||||
|
||||
2. 厚牛仔材质(秋季牛仔外套、厚牛仔裤)
|
||||
|
||||
洗涤:水温≤30℃,中性洗涤剂;翻面清洗减少褪色和磨损,机洗选标准模式;首次洗盐水浸泡30分钟固色。
|
||||
|
||||
养护:翻面阴干,避免暴晒;收纳时折叠平放,或悬挂用宽肩衣架;长期存放前完全干燥,放防潮剂。
|
||||
|
||||
3. 灯芯绒材质(秋季灯芯绒外套、裤子)
|
||||
|
||||
洗涤:手洗或机洗,水温≤30℃,中性洗涤剂;机洗用洗衣袋,选轻柔模式;禁止用力搓揉,避免绒面起毛。
|
||||
|
||||
养护:阴凉阴干,晾晒时反面朝上;收纳时折叠,避免重压破坏绒面;熨烫用低温蒸汽,熨斗垫薄布,顺绒方向熨烫。
|
||||
|
||||
4. 麂皮绒材质(秋季麂皮绒外套、夹克)
|
||||
|
||||
洗涤:建议干洗;人造麂皮可手洗,水温≤30℃,中性洗涤剂轻轻按压;禁止机洗、漂白、用力拧绞。
|
||||
|
||||
养护:阴凉阴干,避免暴晒和高温;收纳时悬挂,避免折叠产生折痕;表面灰尘用软毛刷轻轻刷除。
|
||||
|
||||
四、冬季服装(羽绒服、厚羊毛大衣、加绒牛仔、保暖内衣)
|
||||
|
||||
1. 羽绒服材质(冬季羽绒服、羽绒马甲)
|
||||
|
||||
洗涤:优先干洗;可水洗款用羽绒服专用洗涤剂,水温≤30℃,浸泡≤20分钟,轻轻按压;机洗选羽绒服专用模式,放洗衣袋。
|
||||
|
||||
养护:通风阴凉处阴干,晾晒时轻轻拍打恢复蓬松;完全干燥后收纳,压缩袋勿过度压缩;定期取出拍打通风。
|
||||
|
||||
2. 厚羊毛大衣材质(冬季羊毛大衣、毛呢外套)
|
||||
|
||||
洗涤:必须干洗,干洗能保护羊毛纤维弹性和柔软度;禁止水洗、机洗,避免纤维毡化、缩水。
|
||||
|
||||
养护:悬挂收纳用宽肩无痕衣架,远离潮湿和高温;收纳前拍打去除灰尘,放防虫蛀剂;避免尖锐物体勾划。
|
||||
|
||||
3. 加绒牛仔材质(冬季加绒牛仔裤、加绒牛仔外套)
|
||||
|
||||
洗涤:水温≤30℃,中性洗涤剂;翻面清洗,机洗选轻柔模式;避免长时间浸泡,减少绒层脱落。
|
||||
|
||||
养护:翻面阴干,避免暴晒;收纳时折叠平放,避免重压破坏绒层;清洗后及时晾干,防止发霉产生异味。
|
||||
|
||||
4. 保暖内衣材质(纯棉保暖内衣、德绒保暖内衣)
|
||||
|
||||
洗涤:可机洗或手洗,水温≤30℃,中性洗涤剂;机洗选轻柔模式,避免强力旋转;禁止使用漂白剂。
|
||||
|
||||
养护:阴凉阴干或阳光下短时间晾晒;收纳时折叠平整,放干燥处;德绒材质避免高温熨烫,防止破坏保暖纤维。
|
||||
|
||||
五、通用养护小贴士
|
||||
|
||||
1. 不同材质衣物分开清洗,避免染色、磨损;洗涤剂充分溶解后再放衣物,避免局部变色。
|
||||
|
||||
2. 污渍及时处理,时间越久越难去除;不同污渍针对性处理(油渍用洗洁精原液,血渍用冷水浸泡)。
|
||||
|
||||
3. 熨烫前查看衣物洗标,按材质调整温度;首次熨烫先在衣物内侧测试,避免烫伤。
|
||||
|
||||
4. 长期存放的衣物,收纳前务必完全干燥,定期检查是否发霉、虫蛀。
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
1. 肤色与服装颜色搭配原则
|
||||
冷白皮:适合冷色调和暖色调,亮色系(如宝蓝、正红、薄荷绿)更显白皙透亮;深色系(如黑色、深灰)可提升气场,避免过于苍白。
|
||||
黄皮/暖黄皮:优先选暖色调(如焦糖色、姜黄色、豆沙色),避免冷调荧光色(如荧光绿、冷粉),易显肤色暗沉;浅米色、燕麦色可柔和肤色,提升气色。
|
||||
黑皮:适合高饱和度亮色(如明黄、橙色、湖蓝),突出健康肤色;避免暗沉的土黄色、灰褐色,易显肤色暗沉无光。
|
||||
|
||||
2. 场合与服装颜色选择
|
||||
日常通勤:以基础色为主(黑白灰、米色、藏蓝),简约大气;可搭配低饱和度亮色(如雾霾蓝、浅紫)作为点缀,增加活力。
|
||||
正式场合(商务会议/面试):首选深色系(黑色、藏蓝、深灰),稳重专业;避免大面积亮色和花哨图案,保持简洁得体。
|
||||
休闲场合(逛街/出游):可选择高饱和度颜色或撞色搭配(如黄+白、蓝+白),清新活泼;条纹、格纹等基础图案也适合休闲场景。
|
||||
宴会/派对:可选择亮色(正红、酒红、宝蓝)或金属色(金色、银色),凸显气质;避免过于朴素的颜色,降低存在感。
|
||||
|
||||
3. 体型与服装颜色修饰技巧
|
||||
显瘦搭配:优先选深色系(黑色、深灰、藏蓝),视觉上收缩身形;避免大面积亮色和横条纹,易显臃肿。
|
||||
显高搭配:上浅下深(如浅粉上衣+黑色裤子),形成视觉落差,拉长比例;同色系搭配(如全身米色、全身黑色),显线条流畅。
|
||||
丰满体型:避免紧身+亮色组合,选择哑光面料的深色服装,搭配小面积亮色配饰(如丝巾、项链)提亮造型。
|
||||
瘦小体型:适合浅色系和暖色系(如白色、米色、浅黄),视觉上更显饱满;避免过于宽大的深色服装,易显单薄。
|
||||
|
||||
4. 季节与服装颜色选择
|
||||
春季:适合清新柔和的颜色(如樱花粉、薄荷绿、浅蓝色),契合万物复苏的氛围。
|
||||
夏季:适合冷色调和浅色系(如白色、天蓝色、浅绿色),清爽降温;避免深色系,吸热且显沉闷。
|
||||
秋季:适合暖色调和大地色系(如焦糖色、卡其色、棕色),呼应落叶和丰收的季节感。
|
||||
冬季:适合深色系和高饱和度亮色(如黑色、酒红、墨绿色),保暖且显气场;白色羽绒服也是冬季经典选择,干净利落。
|
||||
|
||||
5. 颜色搭配禁忌
|
||||
避免大面积高饱和色撞色(如红+绿、蓝+橙),易显杂乱刺眼,可通过中性色(黑白灰)过渡。
|
||||
黄皮避免冷调粉色和紫色,易显肤色蜡黄;黑皮避免荧光色,易显肤色暗沉。
|
||||
正式场合避免超过三种颜色搭配,保持简洁协调;休闲场合可适当增加颜色层次,但需有主次之分。
|
||||
|
||||
6. 配饰颜色搭配技巧
|
||||
同色系配饰:与服装颜色一致(如黑色上衣+黑色包包),简约高级,适合通勤。
|
||||
对比色配饰:与服装颜色形成反差(如白色连衣裙+红色项链),提亮造型,适合休闲和派对。
|
||||
中性色配饰:黑白灰、金属色配饰百搭,可搭配任何颜色服装,是搭配的“安全牌”。
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
import json
|
||||
import os
|
||||
from langchain_core.chat_history import BaseChatMessageHistory
|
||||
from langchain_core.chat_history import BaseMessage
|
||||
from langchain_core.messages import message_to_dict, messages_from_dict
|
||||
from typing import Sequence
|
||||
|
||||
def get_history(session_id):
|
||||
return FileChatMessageHistory(session_id, "./chat_history")
|
||||
|
||||
class FileChatMessageHistory(BaseChatMessageHistory):
|
||||
def __init__(self, session_id, storage_path):
|
||||
self.session_id = session_id
|
||||
self.storage_path = storage_path
|
||||
self.file_path = os.path.join(self.storage_path, self.session_id)
|
||||
os.makedirs(os.path.dirname(self.file_path), exist_ok=True)
|
||||
|
||||
def add_messages(self, messages : Sequence[BaseMessage]):
|
||||
all_messages = list(self.messages)
|
||||
all_messages.extend(messages)
|
||||
|
||||
new_messages = [message_to_dict(msg) for msg in all_messages]
|
||||
with open(self.file_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(new_messages, f, ensure_ascii=False, indent=4)
|
||||
|
||||
@property
|
||||
def messages(self) -> list[BaseMessage]:
|
||||
try:
|
||||
with open(self.file_path, 'r', encoding='utf-8') as f:
|
||||
message_data = json.load(f)
|
||||
return messages_from_dict(message_data)
|
||||
except FileNotFoundError:
|
||||
return []
|
||||
|
||||
def clear(self):
|
||||
with open(self.file_path, 'w', encoding='utf-8') as f:
|
||||
json.dump([], f)
|
||||
|
|
@ -0,0 +1,91 @@
|
|||
"""
|
||||
知识库
|
||||
"""
|
||||
|
||||
import os
|
||||
import config_data as config
|
||||
import hashlib
|
||||
from datetime import datetime
|
||||
|
||||
from langchain_chroma import Chroma
|
||||
from langchain_community.embeddings import DashScopeEmbeddings
|
||||
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
||||
|
||||
def check_md5(md5_str : str):
|
||||
"""检查字符串是否已处理
|
||||
return False: 未处理, True: 已处理
|
||||
"""
|
||||
if not os.path.exists(config.md5_path):
|
||||
open(config.md5_path, 'w', encoding='utf-8').close()
|
||||
return False
|
||||
else:
|
||||
with open(config.md5_path, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if line == md5_str:
|
||||
return True
|
||||
return False
|
||||
|
||||
def save_md5(md5_str : str):
|
||||
"""传入md5记录到文件内保存"""
|
||||
with open(config.md5_path, 'a', encoding='utf-8') as f:
|
||||
f.write(md5_str + '\n')
|
||||
|
||||
def get_md5(input_str: str, encoding='utf-8'):
|
||||
"""传入字符串转为md5, hashlib"""
|
||||
str_bytes = input_str.encode(encoding = encoding)
|
||||
md5_obj = hashlib.md5()
|
||||
md5_obj.update(str_bytes)
|
||||
md5_hex = md5_obj.hexdigest()
|
||||
return md5_hex
|
||||
|
||||
class KnowledgeBaseService(object):
|
||||
|
||||
def __init__(self):
|
||||
os.makedirs(config.persist_directory, exist_ok=True)
|
||||
self.chroma = Chroma(
|
||||
collection_name = config.collection_name,
|
||||
embedding_function = DashScopeEmbeddings(model = "text-embedding-v4"),
|
||||
persist_directory = config.persist_directory
|
||||
) # 向量存储实例
|
||||
|
||||
self.spliter = RecursiveCharacterTextSplitter(
|
||||
chunk_size=config.chunk_size,
|
||||
chunk_overlap=config.chunk_overlap, # 连续段字符重叠
|
||||
separators=config.separators, # 自然段划分
|
||||
length_function=len, #长度统计
|
||||
) # 文本分割器
|
||||
|
||||
def upload_by_str(self, data : str, filename : str):
|
||||
"""传入字符串,向量化,存库"""
|
||||
md5_hex = get_md5(data)
|
||||
if check_md5(md5_hex):
|
||||
return "[跳过]内容已存在知识库中"
|
||||
|
||||
if len(data) > config.max_split_char_number:
|
||||
knowledge_chunks: list[str] = self.spliter.split_text(data)
|
||||
else:
|
||||
knowledge_chunks = [data]
|
||||
|
||||
metadata = {
|
||||
"source": filename,
|
||||
"create_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
||||
"operator": "admin"
|
||||
}
|
||||
|
||||
self.chroma.add_texts(
|
||||
texts = knowledge_chunks,
|
||||
metadatas = [metadata for _ in range(len(knowledge_chunks))],
|
||||
)
|
||||
|
||||
save_md5(md5_hex)
|
||||
return "成功存取"
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
service = KnowledgeBaseService()
|
||||
res = service.upload_by_str("周杰伦2", "testfile")
|
||||
print(res)
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
27ac4134a42757595b727c5717c7572f
|
||||
1f341f680c13cbf4f480772c56bf1a44
|
||||
962c4e1dc3d7aeca6717351f333754cb
|
||||
|
|
@ -0,0 +1,83 @@
|
|||
"""
|
||||
rag service类
|
||||
带历史记忆
|
||||
"""
|
||||
from langchain_community.chat_models import ChatTongyi
|
||||
from langchain_community.embeddings import DashScopeEmbeddings
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
||||
from langchain_core.runnables import RunnablePassthrough, RunnableWithMessageHistory, RunnableLambda
|
||||
|
||||
from file_hisroty_store import get_history
|
||||
import config_data as config
|
||||
from vector_stores import VectorStoreService
|
||||
|
||||
class RAGService(object):
|
||||
def __init__(self):
|
||||
self.vector_service = VectorStoreService(
|
||||
embedding=DashScopeEmbeddings(model = config.embedding_model_name)
|
||||
)
|
||||
self.prompt_template = ChatPromptTemplate.from_messages(
|
||||
[
|
||||
("system", "以我提供的已知参考资料为主,简介专业回答用户问题,参考资料: {context}"),
|
||||
("system", "用户的对话历史记录,如下"),
|
||||
MessagesPlaceholder("history"),
|
||||
("user", "请回答用户提问: {input}"),
|
||||
]
|
||||
)
|
||||
self.chat_model = ChatTongyi(model = config.chat_model_name)
|
||||
self.chain = self.__get_chain()
|
||||
|
||||
def format_document(self, docs : list[Document]):
|
||||
format_str = ""
|
||||
if not docs:
|
||||
return "无相关参考资料"
|
||||
for doc in docs:
|
||||
format_str += f"文档片段:{doc.page_content} \n元数据 {doc.metadata} \n\n"
|
||||
return format_str
|
||||
|
||||
def __get_chain(self):
|
||||
"""获取最终执行链"""
|
||||
retriever = self.vector_service.get_retriever()
|
||||
|
||||
def format_for_retriever(value):
|
||||
return value["input"]
|
||||
|
||||
def format_for_prompt_template(value):
|
||||
return {"input": value["input"]["input"],
|
||||
"context": value["context"],
|
||||
"history": value["input"]["history"]}
|
||||
|
||||
chain = (
|
||||
{
|
||||
"input": RunnablePassthrough(),
|
||||
"context": RunnableLambda(format_for_retriever)| retriever | self.format_document
|
||||
}
|
||||
| RunnableLambda(format_for_prompt_template)
|
||||
| self.prompt_template |self.chat_model | StrOutputParser()
|
||||
)
|
||||
|
||||
# 历史增强
|
||||
conversation_chain = RunnableWithMessageHistory(
|
||||
chain,
|
||||
get_history,
|
||||
input_messages_key="input",
|
||||
history_messages_key="history",
|
||||
)
|
||||
|
||||
return conversation_chain
|
||||
|
||||
if __name__ == '__main__':
|
||||
# session_id 配置
|
||||
session_config = {
|
||||
"configurable": {
|
||||
"session_id": "user_001",
|
||||
}
|
||||
}
|
||||
# input 要求字典
|
||||
res = RAGService().chain.stream({"input": "春天穿什么颜色的衣服"}, session_config)
|
||||
for chunks in res:
|
||||
print(chunks, end="", flush=True)
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
"""向量存储服务"""
|
||||
from langchain_chroma import Chroma
|
||||
import config_data as config
|
||||
|
||||
class VectorStoreService(object):
|
||||
def __init__(self, embedding):
|
||||
"""
|
||||
:param embedding: 嵌入模型的嵌入
|
||||
"""
|
||||
|
||||
self.embedding = embedding
|
||||
self.vector_store = Chroma(
|
||||
collection_name=config.collection_name,
|
||||
embedding_function=self.embedding,
|
||||
persist_directory=config.persist_directory,
|
||||
)
|
||||
|
||||
def get_retriever(self):
|
||||
return self.vector_store.as_retriever(search_kwargs={"k": config.similarity_threshold})
|
||||
|
||||
if __name__ == '__main__':
|
||||
from langchain_community.embeddings import DashScopeEmbeddings
|
||||
embedding = DashScopeEmbeddings(model = "text-embedding-v4")
|
||||
retriver = VectorStoreService(embedding).get_retriever()
|
||||
doc = retriver.invoke("我的体重180斤,尺码推荐?")
|
||||
|
||||
Loading…
Reference in New Issue