合并分支

* Merge branch 'main' into cluster-gemini
* FIX 修复json
* Merge branch 'main' into cluster-gemini
* FIX 修复返回值问题
* PERF 适配system instruction

---------

Merge request URL: https://g-ldyi2063.coding.net/p/dev/d/modalDeploy/git/merge/4882
Co-authored-by: 康宇佳
This commit is contained in:
康宇佳 2025-06-27 10:32:34 +08:00 committed by Coding
parent e323073b5f
commit 40df14e2c9
1 changed files with 42 additions and 2 deletions

View File

@ -72,13 +72,14 @@ with (downloader_image.imports()):
调用模型转换非标准json为json
"""
try:
logger.info("🐬尝试模型修复JSON字符串")
resp, resp_code = client.generate_content(model_id="gemini-2.5-flash",
contents=[types.Content(role='user',
parts=[
types.Part.from_text(
text="<prompt>"
"<instruction>"
"请格式化以下一段非标准json格式字符串为json标准格式 \n{0}"
"请格式化以下一段非标准json格式字符串为json标准格式字段值部分缺失请关联上下文补全特殊字符可能破坏json结构的请转义 \n{0}"
"</instruction>"
"</prompt>".format(
json_like_str
@ -101,7 +102,10 @@ with (downloader_image.imports()):
"").replace("\\",
"")
# 解析识别结果
result_json = json.loads(result_text)
try:
result_json = json.loads(result_text)
except:
result_json = fix_json(result_text)
return result_json
else:
logger.error(f"😭格式化json推理失败, Reason {reason}")
@ -113,6 +117,42 @@ with (downloader_image.imports()):
return None
def fix_json(malformed_json_str: str) -> any:
"""
修复包含未转义双引号的JSON字符串无论其是否被格式化或压缩在同一行
该函数通过正则表达式查找所有字符串类型的值并转义其内部的所有双引号
:param malformed_json_str: 格式错误的JSON字符串
:return: 修复后的可被解析的JSON字符串
"""
logger.info("🐬尝试正则修复JSON字符串")
def fix_value_callback(match):
"""
re.sub的回调函数用于处理匹配到的键和损坏的值
- match.group(1): 捕获的键部分例如 '"product": '
- match.group(2): 捕获的值部分包含两侧的引号例如 '"Reloading ""3D "..."'
"""
key_part = match.group(1)
value_part = match.group(2)
# 提取引号内的核心内容
# "Reloading ""3D "..." -> Reloading ""3D "...
content = value_part[1:-1]
# 核心修复:将内容中的所有双引号替换为转义后的 \"
fixed_content = content.replace('"', '\\"')
# 重新构建键值对,将修复后的值内容用双引号包裹
# 注意:这里的 `return` 语句只返回被替换的部分
return f'{key_part}"{fixed_content}"'
# --- 核心正则表达式 ---
regex_pattern_final = r'(\"[^\"]+\"\s*:\s*)(\"(?:.*?)\"(?=\s*[,}\]]))'
fixed_json_str = re.sub(regex_pattern_final, fix_value_callback, malformed_json_str)
return json.loads(fixed_json_str)
def parse_stage1_result(client: GoogleAuthUtils.GoogleGenaiClient,
result_text: str,
correct_config) -> List[Dict[str, Any]]: