FIX 修复json
This commit is contained in:
parent
d9c222e4cb
commit
a0cbf5a678
|
|
@ -72,13 +72,14 @@ with (downloader_image.imports()):
|
|||
调用模型转换非标准json为json
|
||||
"""
|
||||
try:
|
||||
logger.info("🐬尝试模型修复JSON字符串")
|
||||
resp, resp_code = client.generate_content(model_id="gemini-2.5-flash",
|
||||
contents=[types.Content(role='user',
|
||||
parts=[
|
||||
types.Part.from_text(
|
||||
text="<prompt>"
|
||||
"<instruction>"
|
||||
"请格式化以下一段非标准json格式字符串为json标准格式 \n{0}"
|
||||
"请格式化以下一段非标准json格式字符串为json标准格式,字段值部分缺失请关联上下文补全,特殊字符可能破坏json结构的请转义 \n{0}"
|
||||
"</instruction>"
|
||||
"</prompt>".format(
|
||||
json_like_str
|
||||
|
|
@ -101,7 +102,10 @@ with (downloader_image.imports()):
|
|||
"").replace("\\",
|
||||
"")
|
||||
# 解析识别结果
|
||||
result_json = json.loads(result_text)
|
||||
try:
|
||||
result_json = json.loads(result_text)
|
||||
except:
|
||||
result_json = fix_json(result_text)
|
||||
return result_json
|
||||
else:
|
||||
logger.error(f"😭格式化json推理失败, Reason {reason}")
|
||||
|
|
@ -113,6 +117,42 @@ with (downloader_image.imports()):
|
|||
return None
|
||||
|
||||
|
||||
def fix_json(malformed_json_str: str) -> any:
|
||||
"""
|
||||
修复包含未转义双引号的JSON字符串,无论其是否被格式化或压缩在同一行。
|
||||
该函数通过正则表达式查找所有字符串类型的值,并转义其内部的所有双引号。
|
||||
:param malformed_json_str: 格式错误的JSON字符串。
|
||||
:return: 修复后的、可被解析的JSON字符串。
|
||||
"""
|
||||
logger.info("🐬尝试正则修复JSON字符串")
|
||||
|
||||
def fix_value_callback(match):
|
||||
"""
|
||||
re.sub的回调函数,用于处理匹配到的键和损坏的值。
|
||||
- match.group(1): 捕获的键部分,例如 '"product": '
|
||||
- match.group(2): 捕获的值部分,包含两侧的引号,例如 '"Reloading ""3D "..."'
|
||||
"""
|
||||
key_part = match.group(1)
|
||||
value_part = match.group(2)
|
||||
|
||||
# 提取引号内的核心内容
|
||||
# "Reloading ""3D "..." -> Reloading ""3D "...
|
||||
content = value_part[1:-1]
|
||||
|
||||
# 核心修复:将内容中的所有双引号替换为转义后的 \"
|
||||
fixed_content = content.replace('"', '\\"')
|
||||
|
||||
# 重新构建键值对,将修复后的值内容用双引号包裹
|
||||
# 注意:这里的 `return` 语句只返回被替换的部分
|
||||
return f'{key_part}"{fixed_content}"'
|
||||
|
||||
# --- 核心正则表达式 ---
|
||||
regex_pattern_final = r'(\"[^\"]+\"\s*:\s*)(\"(?:.*?)\"(?=\s*[,}\]]))'
|
||||
fixed_json_str = re.sub(regex_pattern_final, fix_value_callback, malformed_json_str)
|
||||
|
||||
return json.loads(fixed_json_str)
|
||||
|
||||
|
||||
def parse_stage1_result(client: GoogleAuthUtils.GoogleGenaiClient,
|
||||
result_text: str,
|
||||
correct_config) -> List[Dict[str, Any]]:
|
||||
|
|
|
|||
Loading…
Reference in New Issue