From 18057fd98dd0e5f0bd243420b3002878ec0f1c98 Mon Sep 17 00:00:00 2001
From: czzhangheng1314 <cz189@qq.com>
Date: Mon, 23 Feb 2026 18:35:26 +0800
Subject: [PATCH] RAG Proj

---
 .gitignore                                    |   2 +
 .idea/.gitignore                              |   8 ++
 .idea/RAG_proj.iml                            |   8 ++
 .idea/dataSources.xml                         |  12 ++
 .../inspectionProfiles/profiles_settings.xml  |   6 +
 .idea/misc.xml                                |   7 ++
 .idea/modules.xml                             |   8 ++
 .idea/vcs.xml                                 |   6 +
 app_file_uploader.py                          |  35 ++++++
 app_qa.py                                     |  55 +++++++++
 config_data.py                                |  23 ++++
 data/尺码推荐.txt                             |   8 ++
 data/洗涤养护.txt                             | 113 ++++++++++++++++++
 data/颜色选择.txt                             |  32 +++++
 file_hisroty_store.py                         |  37 ++++++
 knowledge_base.py                             |  91 ++++++++++++++
 md5.text                                      |   3 +
 rag.py                                        |  83 +++++++++++++
 vector_stores.py                              |  26 ++++
 19 files changed, 563 insertions(+)
 create mode 100644 .idea/.gitignore
 create mode 100644 .idea/RAG_proj.iml
 create mode 100644 .idea/dataSources.xml
 create mode 100644 .idea/inspectionProfiles/profiles_settings.xml
 create mode 100644 .idea/misc.xml
 create mode 100644 .idea/modules.xml
 create mode 100644 .idea/vcs.xml
 create mode 100644 app_file_uploader.py
 create mode 100644 app_qa.py
 create mode 100644 config_data.py
 create mode 100644 data/尺码推荐.txt
 create mode 100644 data/洗涤养护.txt
 create mode 100644 data/颜色选择.txt
 create mode 100644 file_hisroty_store.py
 create mode 100644 knowledge_base.py
 create mode 100644 md5.text
 create mode 100644 rag.py
 create mode 100644 vector_stores.py
diff --git a/.gitignore b/.gitignore
index 5d381cc..6b3621a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -160,3 +160,5 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
 
+chroma_db/
+chat_history/
\ No newline at end of file
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..35410ca
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# 默认忽略的文件
+/shelf/
+/workspace.xml
+# 基于编辑器的 HTTP 客户端请求
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/RAG_proj.iml b/.idea/RAG_proj.iml
new file mode 100644
index 0000000..b9ed519
--- /dev/null
+++ b/.idea/RAG_proj.iml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="RAG" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/dataSources.xml b/.idea/dataSources.xml
new file mode 100644
index 0000000..258b442
--- /dev/null
+++ b/.idea/dataSources.xml
@@ -0,0 +1,12 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="DataSourceManagerImpl" format="xml" multifile-model="true">
+    <data-source source="LOCAL" name="chroma" uuid="af88f5a8-29a4-4ea3-b006-f6bf52a7a680">
+      <driver-ref>sqlite.xerial</driver-ref>
+      <synchronize>true</synchronize>
+      <jdbc-driver>org.sqlite.JDBC</jdbc-driver>
+      <jdbc-url>jdbc:sqlite:$PROJECT_DIR$/chroma_db/chroma.sqlite3</jdbc-url>
+      <working-dir>$ProjectFileDir$</working-dir>
+    </data-source>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..03c5b59
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="RAG" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="RAG" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..a816d69
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/RAG_proj.iml" filepath="$PROJECT_DIR$/.idea/RAG_proj.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/app_file_uploader.py b/app_file_uploader.py
new file mode 100644
index 0000000..c4eaa8a
--- /dev/null
+++ b/app_file_uploader.py
@@ -0,0 +1,35 @@
+"""
+streamlit web 网页上传
+"""
+import time
+
+import streamlit as st
+from knowledge_base import KnowledgeBaseService
+
+# 标题
+st.title("知识库更新服务")
+
+# file_uploader
+uploader_file = st.file_uploader(
+    "请上传txt文件",
+    type="txt",
+    accept_multiple_files=False
+)
+
+if "service" not in st.session_state:
+    st.session_state["service"] = KnowledgeBaseService()
+
+if uploader_file is not None:
+    file_name = uploader_file.name
+    file_type = uploader_file.type
+    file_size = uploader_file.size / 1024
+
+    st.subheader(f"文件名：{file_name}")
+    st.write(f"格式: {file_type}, 大小: {file_size:.2f}KB")
+    # bytes
+    text = uploader_file.getvalue().decode('utf-8')
+
+    with st.spinner("载入知识库中..."):
+        time.sleep(1)
+        result = st.session_state["service"].upload_by_str(text, file_name)
+        st.write(result)
\ No newline at end of file
diff --git a/app_qa.py b/app_qa.py
new file mode 100644
index 0000000..790f997
--- /dev/null
+++ b/app_qa.py
@@ -0,0 +1,55 @@
+import streamlit as st
+import time
+from rag import RAGService
+import config_data as config
+
+# 标题
+st.title("智能客服")
+st.divider()
+
+if "message" not in st.session_state:
+    st.session_state["message"] = list()
+    msg_list = st.session_state["message"]
+    msg_list.append({"role": "assistant", "content": "你好有什么可以帮您"})
+else:
+    msg_list = st.session_state["message"]
+
+def add_history(role, content):
+    msg_list.append({"role": role, "content": content})
+
+def show_history():
+    for msg in msg_list:
+        st.chat_message(msg["role"]).write(msg["content"])
+
+show_history()
+
+if "RAGService" not in st.session_state:
+    st.session_state["RAGService"] = RAGService()
+    rag_service = st.session_state["RAGService"]
+else:
+    rag_service = st.session_state["RAGService"]
+
+def get_answer(prompt : str):
+    res = rag_service.chain.stream({"input": prompt}, config.session_config)
+    return res
+
+
+prompt = st.chat_input()
+
+if prompt:
+    st.chat_message("user").write(prompt)
+    add_history("user", prompt)
+
+    with st.spinner("AI thinking..."):
+        time.sleep(0.5)
+        ans = get_answer(prompt)
+
+        ai_list = []
+        def capture(generator, cache_list):
+            for chunk in generator:
+                cache_list.append(chunk)
+                yield chunk
+
+        st.chat_message("assistant").write_stream(capture(ans, ai_list))
+        text = "".join(ai_list)
+        add_history("assistant", text)
diff --git a/config_data.py b/config_data.py
new file mode 100644
index 0000000..51b4a15
--- /dev/null
+++ b/config_data.py
@@ -0,0 +1,23 @@
+md5_path = "./md5.text"
+
+# Chroma
+collection_name = "rag"
+persist_directory = "./chroma_db"
+
+# spliter
+chunk_size = 1000
+chunk_overlap = 100
+separators = ["\n\n", "\n", ".", "!", "?", "。", " ", ",", "，"]
+max_split_char_number = 100
+
+#
+similarity_threshold = 1
+
+embedding_model_name = "text-embedding-v4"
+chat_model_name  = "qwen3-max"
+
+session_config = {
+    "configurable": {
+        "session_id": "user_001",
+    }
+}
diff --git a/data/尺码推荐.txt b/data/尺码推荐.txt
new file mode 100644
index 0000000..beeb0dc
--- /dev/null
+++ b/data/尺码推荐.txt
@@ -0,0 +1,8 @@
+身高：155-165cm， 体重：75-95 斤，建议尺码S。
+身高：160-170cm， 体重：90-115斤，建议尺码M。
+身高：165-175cm， 体重：115-135斤，建议尺码L。
+身高：170-178cm， 体重：130-150斤，建议尺码XL。
+身高：175-182cm， 体重：145-165斤，建议尺码2XL。
+身高：178-185cm， 体重：160-180斤，建议尺码3XL。
+身高：180-190cm， 体重：180-210斤，建议尺码4XL。
+身高：190cm+，体重：210斤+，建议尺码5XL。
\ No newline at end of file
diff --git a/data/洗涤养护.txt b/data/洗涤养护.txt
new file mode 100644
index 0000000..42fa520
--- /dev/null
+++ b/data/洗涤养护.txt
@@ -0,0 +1,113 @@
+一、春季服装（纯棉、薄牛仔、针织棉、轻薄化纤）
+
+1. 纯棉材质（春季衬衫、T恤、休闲裤）
+
+洗涤：可机洗或手洗，水温≤30℃，中性洗涤剂；浅色与深色分开洗，首次洗加少许盐固色；机洗用洗衣袋+轻柔模式，避免摩擦起球。
+
+养护：阴凉通风处阴干，避免暴晒褪色；收纳前完全干燥，折叠或宽肩悬挂；潮湿天放干燥剂防发霉。
+
+2. 薄牛仔材质（春季牛仔裤、牛仔外套）
+
+洗涤：水温≤30℃，中性洗涤剂；翻面清洗减少褪色，机洗选轻柔模式；避免频繁清洗，1-2周一次即可。
+
+养护：翻面阴干，避免阳光直射；收纳时折叠平放或悬挂，宽肩衣架防止裤腰变形；裤兜内放防潮纸保持版型。
+
+3. 针织棉材质（春季针织衫、薄开衫）
+
+洗涤：手洗优先，水温≤25℃，中性洗涤剂轻轻按压；机洗需用洗衣袋，选针织专用模式；禁止用力搓揉、拧绞。
+
+养护：平铺阴干，避免悬挂拉伸领口；收纳时折叠，可放樟脑丸防蛀；轻微起球用毛球修剪器处理。
+
+4. 轻薄化纤材质（春季风衣、防晒衣）
+
+洗涤：可机洗，水温30-40℃，中性或碱性洗涤剂；轻柔揉搓，顽固污渍轻轻刷洗；清洗时加柔顺剂减少静电。
+
+养护：可阳光下晾晒，及时翻面确保干燥；收纳折叠或悬挂均可，避免重压产生永久性褶皱。
+
+二、夏季服装（真丝、棉麻、冰丝、短袖纯棉、雪纺）
+
+1. 真丝材质（夏季连衣裙、衬衫）
+
+洗涤：建议干洗；手洗用真丝专用中性洗涤剂，水温≤25℃，浸泡≤15分钟，轻轻按压清洗；禁止搓揉、拧绞、漂白。
+
+养护：阴凉通风处阴干，避免暴晒；悬挂用光滑衣架防勾丝，折叠收纳避免重压；熨烫用低温蒸汽，距离面料1-2cm。
+
+2. 棉麻材质（夏季T恤、阔腿裤、衬衫）
+
+洗涤：可机洗或手洗，水温≤30℃，中性洗涤剂；棉麻易皱，无需用力搓揉；浅色与深色分开洗。
+
+养护：可阳光下晾晒，晒干后及时收纳；收纳前熨烫平整，折叠或悬挂均可；避免潮湿环境，防止发霉。
+
+3. 冰丝材质（夏季T恤、短裤、连衣裙）
+
+洗涤：手洗或机洗，水温≤30℃，中性洗涤剂；机洗用洗衣袋，选轻柔模式；禁止长时间浸泡（≤10分钟）。
+
+养护：阴凉阴干，避免暴晒和高温烘烤；收纳折叠平放，避免尖锐物体勾划；穿着时避免粗糙物体摩擦。
+
+4. 雪纺材质（夏季连衣裙、防晒衫）
+
+洗涤：手洗优先，水温≤30℃，中性洗涤剂轻轻漂洗；机洗用洗衣袋，选轻柔模式；禁止用力拧绞。
+
+养护：阴凉阴干，悬挂时用细衣架避免勾丝；收纳折叠时垫一层薄纸，防止粘连；轻微褶皱用低温蒸汽熨烫。
+
+三、秋季服装（羊毛、羊绒、厚牛仔、灯芯绒、麂皮绒）
+
+1. 羊毛/羊绒材质（秋季羊毛衫、薄羊绒大衣）
+
+洗涤：优先干洗；手洗用羊毛专用洗涤剂，水温≤20℃，浸泡≤15分钟，轻轻按压；禁止机洗、搓揉、拧绞。
+
+养护：平铺阴干，避免悬挂拉伸；收纳时放防虫蛀剂（樟脑丸、薰衣草香包），透气布袋包裹；宽肩悬挂或折叠收纳，避免重压。
+
+2. 厚牛仔材质（秋季牛仔外套、厚牛仔裤）
+
+洗涤：水温≤30℃，中性洗涤剂；翻面清洗减少褪色和磨损，机洗选标准模式；首次洗盐水浸泡30分钟固色。
+
+养护：翻面阴干，避免暴晒；收纳时折叠平放，或悬挂用宽肩衣架；长期存放前完全干燥，放防潮剂。
+
+3. 灯芯绒材质（秋季灯芯绒外套、裤子）
+
+洗涤：手洗或机洗，水温≤30℃，中性洗涤剂；机洗用洗衣袋，选轻柔模式；禁止用力搓揉，避免绒面起毛。
+
+养护：阴凉阴干，晾晒时反面朝上；收纳时折叠，避免重压破坏绒面；熨烫用低温蒸汽，熨斗垫薄布，顺绒方向熨烫。
+
+4. 麂皮绒材质（秋季麂皮绒外套、夹克）
+
+洗涤：建议干洗；人造麂皮可手洗，水温≤30℃，中性洗涤剂轻轻按压；禁止机洗、漂白、用力拧绞。
+
+养护：阴凉阴干，避免暴晒和高温；收纳时悬挂，避免折叠产生折痕；表面灰尘用软毛刷轻轻刷除。
+
+四、冬季服装（羽绒服、厚羊毛大衣、加绒牛仔、保暖内衣）
+
+1. 羽绒服材质（冬季羽绒服、羽绒马甲）
+
+洗涤：优先干洗；可水洗款用羽绒服专用洗涤剂，水温≤30℃，浸泡≤20分钟，轻轻按压；机洗选羽绒服专用模式，放洗衣袋。
+
+养护：通风阴凉处阴干，晾晒时轻轻拍打恢复蓬松；完全干燥后收纳，压缩袋勿过度压缩；定期取出拍打通风。
+
+2. 厚羊毛大衣材质（冬季羊毛大衣、毛呢外套）
+
+洗涤：必须干洗，干洗能保护羊毛纤维弹性和柔软度；禁止水洗、机洗，避免纤维毡化、缩水。
+
+养护：悬挂收纳用宽肩无痕衣架，远离潮湿和高温；收纳前拍打去除灰尘，放防虫蛀剂；避免尖锐物体勾划。
+
+3. 加绒牛仔材质（冬季加绒牛仔裤、加绒牛仔外套）
+
+洗涤：水温≤30℃，中性洗涤剂；翻面清洗，机洗选轻柔模式；避免长时间浸泡，减少绒层脱落。
+
+养护：翻面阴干，避免暴晒；收纳时折叠平放，避免重压破坏绒层；清洗后及时晾干，防止发霉产生异味。
+
+4. 保暖内衣材质（纯棉保暖内衣、德绒保暖内衣）
+
+洗涤：可机洗或手洗，水温≤30℃，中性洗涤剂；机洗选轻柔模式，避免强力旋转；禁止使用漂白剂。
+
+养护：阴凉阴干或阳光下短时间晾晒；收纳时折叠平整，放干燥处；德绒材质避免高温熨烫，防止破坏保暖纤维。
+
+五、通用养护小贴士
+
+1. 不同材质衣物分开清洗，避免染色、磨损；洗涤剂充分溶解后再放衣物，避免局部变色。
+
+2. 污渍及时处理，时间越久越难去除；不同污渍针对性处理（油渍用洗洁精原液，血渍用冷水浸泡）。
+
+3. 熨烫前查看衣物洗标，按材质调整温度；首次熨烫先在衣物内侧测试，避免烫伤。
+
+4. 长期存放的衣物，收纳前务必完全干燥，定期检查是否发霉、虫蛀。
diff --git a/data/颜色选择.txt b/data/颜色选择.txt
new file mode 100644
index 0000000..b18226d
--- /dev/null
+++ b/data/颜色选择.txt
@@ -0,0 +1,32 @@
+1.  肤色与服装颜色搭配原则
+    冷白皮：适合冷色调和暖色调，亮色系（如宝蓝、正红、薄荷绿）更显白皙透亮；深色系（如黑色、深灰）可提升气场，避免过于苍白。
+    黄皮/暖黄皮：优先选暖色调（如焦糖色、姜黄色、豆沙色），避免冷调荧光色（如荧光绿、冷粉），易显肤色暗沉；浅米色、燕麦色可柔和肤色，提升气色。
+    黑皮：适合高饱和度亮色（如明黄、橙色、湖蓝），突出健康肤色；避免暗沉的土黄色、灰褐色，易显肤色暗沉无光。
+
+2.  场合与服装颜色选择
+    日常通勤：以基础色为主（黑白灰、米色、藏蓝），简约大气；可搭配低饱和度亮色（如雾霾蓝、浅紫）作为点缀，增加活力。
+    正式场合（商务会议/面试）：首选深色系（黑色、藏蓝、深灰），稳重专业；避免大面积亮色和花哨图案，保持简洁得体。
+    休闲场合（逛街/出游）：可选择高饱和度颜色或撞色搭配（如黄+白、蓝+白），清新活泼；条纹、格纹等基础图案也适合休闲场景。
+    宴会/派对：可选择亮色（正红、酒红、宝蓝）或金属色（金色、银色），凸显气质；避免过于朴素的颜色，降低存在感。
+
+3.  体型与服装颜色修饰技巧
+    显瘦搭配：优先选深色系（黑色、深灰、藏蓝），视觉上收缩身形；避免大面积亮色和横条纹，易显臃肿。
+    显高搭配：上浅下深（如浅粉上衣+黑色裤子），形成视觉落差，拉长比例；同色系搭配（如全身米色、全身黑色），显线条流畅。
+    丰满体型：避免紧身+亮色组合，选择哑光面料的深色服装，搭配小面积亮色配饰（如丝巾、项链）提亮造型。
+    瘦小体型：适合浅色系和暖色系（如白色、米色、浅黄），视觉上更显饱满；避免过于宽大的深色服装，易显单薄。
+
+4.  季节与服装颜色选择
+    春季：适合清新柔和的颜色（如樱花粉、薄荷绿、浅蓝色），契合万物复苏的氛围。
+    夏季：适合冷色调和浅色系（如白色、天蓝色、浅绿色），清爽降温；避免深色系，吸热且显沉闷。
+    秋季：适合暖色调和大地色系（如焦糖色、卡其色、棕色），呼应落叶和丰收的季节感。
+    冬季：适合深色系和高饱和度亮色（如黑色、酒红、墨绿色），保暖且显气场；白色羽绒服也是冬季经典选择，干净利落。
+
+5.  颜色搭配禁忌
+    避免大面积高饱和色撞色（如红+绿、蓝+橙），易显杂乱刺眼，可通过中性色（黑白灰）过渡。
+    黄皮避免冷调粉色和紫色，易显肤色蜡黄；黑皮避免荧光色，易显肤色暗沉。
+    正式场合避免超过三种颜色搭配，保持简洁协调；休闲场合可适当增加颜色层次，但需有主次之分。
+
+6.  配饰颜色搭配技巧
+    同色系配饰：与服装颜色一致（如黑色上衣+黑色包包），简约高级，适合通勤。
+    对比色配饰：与服装颜色形成反差（如白色连衣裙+红色项链），提亮造型，适合休闲和派对。
+    中性色配饰：黑白灰、金属色配饰百搭，可搭配任何颜色服装，是搭配的“安全牌”。
\ No newline at end of file
diff --git a/file_hisroty_store.py b/file_hisroty_store.py
new file mode 100644
index 0000000..2e3fbff
--- /dev/null
+++ b/file_hisroty_store.py
@@ -0,0 +1,37 @@
+import json
+import os
+from langchain_core.chat_history import BaseChatMessageHistory
+from langchain_core.chat_history import BaseMessage
+from langchain_core.messages import message_to_dict, messages_from_dict
+from typing import Sequence
+
+def get_history(session_id):
+    return FileChatMessageHistory(session_id, "./chat_history")
+
+class FileChatMessageHistory(BaseChatMessageHistory):
+    def __init__(self, session_id, storage_path):
+        self.session_id = session_id
+        self.storage_path = storage_path
+        self.file_path = os.path.join(self.storage_path, self.session_id)
+        os.makedirs(os.path.dirname(self.file_path), exist_ok=True)
+
+    def add_messages(self, messages : Sequence[BaseMessage]):
+        all_messages = list(self.messages)
+        all_messages.extend(messages)
+
+        new_messages = [message_to_dict(msg) for msg in all_messages]
+        with open(self.file_path, 'w', encoding='utf-8') as f:
+            json.dump(new_messages, f, ensure_ascii=False, indent=4)
+
+    @property
+    def messages(self) -> list[BaseMessage]:
+        try:
+            with open(self.file_path, 'r', encoding='utf-8') as f:
+                message_data = json.load(f)
+                return messages_from_dict(message_data)
+        except FileNotFoundError:
+            return []
+
+    def clear(self):
+        with open(self.file_path, 'w', encoding='utf-8') as f:
+            json.dump([], f)
\ No newline at end of file
diff --git a/knowledge_base.py b/knowledge_base.py
new file mode 100644
index 0000000..083679f
--- /dev/null
+++ b/knowledge_base.py
@@ -0,0 +1,91 @@
+"""
+知识库
+"""
+
+import os
+import config_data as config
+import hashlib
+from datetime import datetime
+
+from langchain_chroma import Chroma
+from langchain_community.embeddings import DashScopeEmbeddings
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+
+def check_md5(md5_str : str):
+    """检查字符串是否已处理
+       return False: 未处理， True: 已处理
+    """
+    if not os.path.exists(config.md5_path):
+        open(config.md5_path, 'w', encoding='utf-8').close()
+        return False
+    else:
+        with open(config.md5_path, 'r', encoding='utf-8') as f:
+            lines = f.readlines()
+            for line in lines:
+                line = line.strip()
+                if line == md5_str:
+                    return True
+            return False
+
+def save_md5(md5_str : str):
+    """传入md5记录到文件内保存"""
+    with open(config.md5_path, 'a', encoding='utf-8') as f:
+        f.write(md5_str + '\n')
+
+def get_md5(input_str: str, encoding='utf-8'):
+    """传入字符串转为md5, hashlib"""
+    str_bytes = input_str.encode(encoding = encoding)
+    md5_obj = hashlib.md5()
+    md5_obj.update(str_bytes)
+    md5_hex = md5_obj.hexdigest()
+    return md5_hex
+
+class KnowledgeBaseService(object):
+
+    def __init__(self):
+        os.makedirs(config.persist_directory, exist_ok=True)
+        self.chroma = Chroma(
+            collection_name = config.collection_name,
+            embedding_function = DashScopeEmbeddings(model = "text-embedding-v4"),
+            persist_directory = config.persist_directory
+        ) # 向量存储实例
+
+        self.spliter = RecursiveCharacterTextSplitter(
+            chunk_size=config.chunk_size,
+            chunk_overlap=config.chunk_overlap,  # 连续段字符重叠
+            separators=config.separators,  # 自然段划分
+            length_function=len, #长度统计
+        ) # 文本分割器
+
+    def upload_by_str(self, data : str, filename : str):
+        """传入字符串，向量化，存库"""
+        md5_hex = get_md5(data)
+        if check_md5(md5_hex):
+            return "[跳过]内容已存在知识库中"
+
+        if len(data) > config.max_split_char_number:
+            knowledge_chunks: list[str] = self.spliter.split_text(data)
+        else:
+            knowledge_chunks = [data]
+
+        metadata = {
+            "source": filename,
+            "create_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+            "operator": "admin"
+        }
+
+        self.chroma.add_texts(
+            texts = knowledge_chunks,
+            metadatas = [metadata for _ in range(len(knowledge_chunks))],
+        )
+
+        save_md5(md5_hex)
+        return "成功存取"
+
+
+
+
+if __name__ == '__main__':
+    service = KnowledgeBaseService()
+    res = service.upload_by_str("周杰伦2", "testfile")
+    print(res)
\ No newline at end of file
diff --git a/md5.text b/md5.text
new file mode 100644
index 0000000..448a19b
--- /dev/null
+++ b/md5.text
@@ -0,0 +1,3 @@
+27ac4134a42757595b727c5717c7572f
+1f341f680c13cbf4f480772c56bf1a44
+962c4e1dc3d7aeca6717351f333754cb
diff --git a/rag.py b/rag.py
new file mode 100644
index 0000000..ed11d05
--- /dev/null
+++ b/rag.py
@@ -0,0 +1,83 @@
+"""
+rag service类
+带历史记忆
+"""
+from langchain_community.chat_models import ChatTongyi
+from langchain_community.embeddings import DashScopeEmbeddings
+from langchain_core.documents import Document
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+from langchain_core.runnables import RunnablePassthrough, RunnableWithMessageHistory, RunnableLambda
+
+from file_hisroty_store import get_history
+import config_data as config
+from vector_stores import VectorStoreService
+
+class RAGService(object):
+    def __init__(self):
+        self.vector_service = VectorStoreService(
+            embedding=DashScopeEmbeddings(model = config.embedding_model_name)
+        )
+        self.prompt_template = ChatPromptTemplate.from_messages(
+            [
+                ("system", "以我提供的已知参考资料为主，简介专业回答用户问题，参考资料: {context}"),
+                ("system", "用户的对话历史记录，如下"),
+                MessagesPlaceholder("history"),
+                ("user", "请回答用户提问: {input}"),
+            ]
+        )
+        self.chat_model = ChatTongyi(model = config.chat_model_name)
+        self.chain = self.__get_chain()
+
+    def format_document(self, docs : list[Document]):
+        format_str = ""
+        if not docs:
+            return "无相关参考资料"
+        for doc in docs:
+            format_str += f"文档片段:{doc.page_content} \n元数据 {doc.metadata} \n\n"
+        return format_str
+
+    def __get_chain(self):
+        """获取最终执行链"""
+        retriever = self.vector_service.get_retriever()
+
+        def format_for_retriever(value):
+            return value["input"]
+
+        def format_for_prompt_template(value):
+            return {"input": value["input"]["input"],
+                    "context": value["context"],
+                    "history": value["input"]["history"]}
+
+        chain = (
+            {
+                "input": RunnablePassthrough(),
+                "context": RunnableLambda(format_for_retriever)| retriever | self.format_document
+            }
+            | RunnableLambda(format_for_prompt_template)
+            | self.prompt_template |self.chat_model | StrOutputParser()
+        )
+
+        # 历史增强
+        conversation_chain = RunnableWithMessageHistory(
+            chain,
+            get_history,
+            input_messages_key="input",
+            history_messages_key="history",
+        )
+
+        return conversation_chain
+
+if __name__ == '__main__':
+    # session_id 配置
+    session_config = {
+        "configurable": {
+            "session_id": "user_001",
+        }
+    }
+    # input 要求字典
+    res = RAGService().chain.stream({"input": "春天穿什么颜色的衣服"}, session_config)
+    for chunks in res:
+        print(chunks, end="", flush=True)
+
+
diff --git a/vector_stores.py b/vector_stores.py
new file mode 100644
index 0000000..e927f46
--- /dev/null
+++ b/vector_stores.py
@@ -0,0 +1,26 @@
+"""向量存储服务"""
+from langchain_chroma import Chroma
+import config_data as config
+
+class VectorStoreService(object):
+    def __init__(self, embedding):
+        """
+        :param embedding: 嵌入模型的嵌入
+        """
+
+        self.embedding = embedding
+        self.vector_store = Chroma(
+            collection_name=config.collection_name,
+            embedding_function=self.embedding,
+            persist_directory=config.persist_directory,
+        )
+
+    def get_retriever(self):
+        return self.vector_store.as_retriever(search_kwargs={"k": config.similarity_threshold})
+
+if __name__ == '__main__':
+    from langchain_community.embeddings import DashScopeEmbeddings
+    embedding = DashScopeEmbeddings(model = "text-embedding-v4")
+    retriver = VectorStoreService(embedding).get_retriever()
+    doc = retriver.invoke("我的体重180斤，尺码推荐？")
+    
\ No newline at end of file