From dc61de7cadaf7d5016bca8d9039c860b17da2358 Mon Sep 17 00:00:00 2001 From: imeepos Date: Mon, 21 Jul 2025 13:46:49 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E5=AE=B9=E9=94=99JSON?= =?UTF-8?q?=E8=A7=A3=E6=9E=90=E5=99=A8=E7=9A=84=E8=A7=A3=E6=9E=90=E9=80=BB?= =?UTF-8?q?=E8=BE=91=E5=92=8C=E5=89=8D=E7=AB=AF=E9=9B=86=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 主要修复: - 修复extract_pair函数,正确处理object/array/number等直接节点类型 - 在预处理阶段修复无引号键和尾随逗号,避免Tree-sitter解析错误 - 添加详细的调试日志,便于问题诊断 - 优化JsonParserState,每次使用新配置创建解析器实例 - 创建TolerantJsonParser前端组件,支持配置和示例 - 创建TolerantJsonService服务类,封装API调用 - 添加JsonParserDebugPanel调试面板,便于测试后端命令 - 集成到便捷工具页面,提供完整的用户界面 技术改进: - 支持预设配置模式(严格/宽松/AI模式/快速模式) - 增强错误恢复策略的调试信息 - 优化前端组件的用户体验和交互设计 - 添加解析统计信息展示和结果导出功能 --- .../infrastructure/tolerant_json_parser.rs | 71 ++- .../commands/tolerant_json_commands.rs | 16 +- .../src/components/DeleteConfirmDialog.tsx | 1 - .../src/components/JsonParserDebugPanel.tsx | 254 ++++++++++ .../src/components/TolerantJsonParser.tsx | 479 ++++++++++++++++++ apps/desktop/src/pages/Tools.tsx | 10 +- .../src/services/tolerantJsonService.ts | 287 +++++++++++ 7 files changed, 1102 insertions(+), 16 deletions(-) create mode 100644 apps/desktop/src/components/JsonParserDebugPanel.tsx create mode 100644 apps/desktop/src/components/TolerantJsonParser.tsx create mode 100644 apps/desktop/src/services/tolerantJsonService.ts diff --git a/apps/desktop/src-tauri/src/infrastructure/tolerant_json_parser.rs b/apps/desktop/src-tauri/src/infrastructure/tolerant_json_parser.rs index 39e415c..b006165 100644 --- a/apps/desktop/src-tauri/src/infrastructure/tolerant_json_parser.rs +++ b/apps/desktop/src-tauri/src/infrastructure/tolerant_json_parser.rs @@ -169,7 +169,7 @@ impl TolerantJsonParser { } } } - + // 查找JSON模式 for pattern_name in ["object", "array"] { if let Some(pattern) = self.regex_patterns.get(pattern_name) { @@ -180,10 +180,44 @@ impl TolerantJsonParser { } } } - + + // 在Tree-sitter解析之前修复常见的JSON错误 + processed = self.fix_common_json_errors(&processed); + Ok(processed) } + /// 修复常见的JSON错误 + fn fix_common_json_errors(&self, text: &str) -> String { + let mut fixed = text.to_string(); + + // 修复无引号的键 + if self.config.enable_unquoted_keys { + if let Some(unquoted_regex) = self.regex_patterns.get("unquoted_key") { + fixed = unquoted_regex.replace_all(&fixed, "\"$1\":").to_string(); + debug!("Fixed unquoted keys"); + } + } + + // 移除尾随逗号 + if self.config.enable_trailing_commas { + if let Some(trailing_regex) = self.regex_patterns.get("trailing_comma") { + fixed = trailing_regex.replace_all(&fixed, |caps: ®ex::Captures| { + let full_match = caps.get(0).unwrap().as_str(); + full_match[1..].to_string() // 移除逗号,保留括号 + }).to_string(); + debug!("Fixed trailing commas"); + } + } + + // 修复单引号为双引号 + fixed = fixed.replace("'", "\""); + + debug!("Fixed text: {}", &fixed[..std::cmp::min(200, fixed.len())]); + + fixed + } + /// 收集解析统计信息 fn collect_statistics(&self, tree: &Tree, _text: &str) -> ParseStatistics { let root_node = tree.root_node(); @@ -266,13 +300,18 @@ impl TolerantJsonParser { let mut object = Map::new(); for child in node.children(&mut node.walk()) { + debug!("Object child node kind: '{}', text: '{}'", child.kind(), &text[child.start_byte()..child.end_byte()]); match child.kind() { "pair" => { if let Ok((key, value)) = self.extract_pair(child, text, recovery_strategies) { + debug!("Successfully extracted pair: {} = {:?}", key, value); object.insert(key, value); + } else { + debug!("Failed to extract pair from node"); } } "ERROR" => { + debug!("Found ERROR node in object"); // 尝试恢复错误的对象成员 if let Ok(recovered) = self.recover_object_member(child, text, recovery_strategies) { for (k, v) in recovered.as_object().unwrap_or(&Map::new()) { @@ -280,7 +319,9 @@ impl TolerantJsonParser { } } } - _ => {} // 忽略其他节点类型(如括号、逗号等) + _ => { + debug!("Ignoring object child node kind: '{}'", child.kind()); + } // 忽略其他节点类型(如括号、逗号等) } } @@ -332,19 +373,35 @@ impl TolerantJsonParser { let mut key = None; let mut value = None; + debug!("Extracting pair from node, text: '{}'", &text[node.start_byte()..node.end_byte()]); for child in node.children(&mut node.walk()) { + debug!("Pair child node kind: '{}', text: '{}'", child.kind(), &text[child.start_byte()..child.end_byte()]); match child.kind() { "string" => { if key.is_none() { - key = Some(self.extract_string_content(child, text)?); + let extracted_key = self.extract_string_content(child, text)?; + debug!("Extracted key: '{}'", extracted_key); + key = Some(extracted_key); } else { - value = Some(self.extract_json(child, text, recovery_strategies)?); + let extracted_value = self.extract_json(child, text, recovery_strategies)?; + debug!("Extracted value from string: {:?}", extracted_value); + value = Some(extracted_value); } } "value" => { - value = Some(self.extract_json(child, text, recovery_strategies)?); + let extracted_value = self.extract_json(child, text, recovery_strategies)?; + debug!("Extracted value from value node: {:?}", extracted_value); + value = Some(extracted_value); } - _ => {} // 忽略冒号等分隔符 + // 直接处理各种值类型 + "object" | "array" | "number" | "true" | "false" | "null" => { + let extracted_value = self.extract_json(child, text, recovery_strategies)?; + debug!("Extracted value from {} node: {:?}", child.kind(), extracted_value); + value = Some(extracted_value); + } + _ => { + debug!("Ignoring pair child node kind: '{}'", child.kind()); + } // 忽略冒号等分隔符 } } diff --git a/apps/desktop/src-tauri/src/presentation/commands/tolerant_json_commands.rs b/apps/desktop/src-tauri/src/presentation/commands/tolerant_json_commands.rs index 2f39f56..f4ffe36 100644 --- a/apps/desktop/src-tauri/src/presentation/commands/tolerant_json_commands.rs +++ b/apps/desktop/src-tauri/src/presentation/commands/tolerant_json_commands.rs @@ -100,12 +100,11 @@ impl JsonParserState { /// 获取或创建解析器实例 fn get_or_create_parser(&self, config: Option) -> Result<()> { let mut parser_guard = self.parser.lock().unwrap(); - - if parser_guard.is_none() { - let parser = TolerantJsonParser::new(config)?; - *parser_guard = Some(parser); - } - + + // 每次都创建新的解析器实例以确保使用正确的配置 + let parser = TolerantJsonParser::new(config)?; + *parser_guard = Some(parser); + Ok(()) } } @@ -135,9 +134,12 @@ pub async fn parse_json_tolerant( // 执行解析 let mut parser_guard = state.parser.lock().unwrap(); if let Some(ref mut parser) = *parser_guard { + info!("Starting JSON parsing for text: {}", &request.text[..std::cmp::min(100, request.text.len())]); match parser.parse(&request.text) { Ok((data, statistics)) => { - info!("JSON parsing successful, error rate: {:.2}%", statistics.error_rate * 100.0); + info!("JSON parsing successful, error rate: {:.2}%, result: {:?}", + statistics.error_rate * 100.0, + serde_json::to_string(&data).unwrap_or_else(|_| "Failed to serialize".to_string())); Ok(ParseJsonResponse { success: true, data: Some(data), diff --git a/apps/desktop/src/components/DeleteConfirmDialog.tsx b/apps/desktop/src/components/DeleteConfirmDialog.tsx index 7ed1e5b..1a99219 100644 --- a/apps/desktop/src/components/DeleteConfirmDialog.tsx +++ b/apps/desktop/src/components/DeleteConfirmDialog.tsx @@ -30,7 +30,6 @@ export const DeleteConfirmDialog: React.FC = ({ message, itemName, deleting, - onConfirm, onCancel, }) => { return ( diff --git a/apps/desktop/src/components/JsonParserDebugPanel.tsx b/apps/desktop/src/components/JsonParserDebugPanel.tsx new file mode 100644 index 0000000..8eeb6fd --- /dev/null +++ b/apps/desktop/src/components/JsonParserDebugPanel.tsx @@ -0,0 +1,254 @@ +import React, { useState } from 'react'; +import { invoke } from '@tauri-apps/api/core'; +import { Bug, Play, CheckCircle, AlertCircle } from 'lucide-react'; + +interface DebugResult { + command: string; + success: boolean; + data?: any; + error?: string; + timestamp: string; +} + +/** + * JSON解析器调试面板 + * 用于测试后端命令是否正常工作 + */ +const JsonParserDebugPanel: React.FC = () => { + const [results, setResults] = useState([]); + const [isLoading, setIsLoading] = useState(false); + + const addResult = (command: string, success: boolean, data?: any, error?: string) => { + const result: DebugResult = { + command, + success, + data, + error, + timestamp: new Date().toLocaleTimeString() + }; + setResults(prev => [result, ...prev]); + }; + + // 测试基本的JSON解析 + const testBasicParse = async () => { + setIsLoading(true); + try { + const request = { + text: '{"name": "test", "value": 123}', + config: { + enable_unquoted_keys: true, + enable_trailing_commas: true + } + }; + + console.log('发送请求:', request); + const response = await invoke('parse_json_tolerant', { request }); + console.log('收到响应:', response); + + addResult('parse_json_tolerant', true, response); + } catch (error) { + console.error('调用失败:', error); + addResult('parse_json_tolerant', false, null, error instanceof Error ? error.message : '未知错误'); + } finally { + setIsLoading(false); + } + }; + + // 测试验证JSON + const testValidateJson = async () => { + setIsLoading(true); + try { + const result = await invoke('validate_json_format', { text: '{"valid": true}' }); + addResult('validate_json_format', true, result); + } catch (error) { + addResult('validate_json_format', false, null, error instanceof Error ? error.message : '未知错误'); + } finally { + setIsLoading(false); + } + }; + + // 测试获取恢复策略 + const testGetStrategies = async () => { + setIsLoading(true); + try { + const result = await invoke('get_recovery_strategies'); + addResult('get_recovery_strategies', true, result); + } catch (error) { + addResult('get_recovery_strategies', false, null, error instanceof Error ? error.message : '未知错误'); + } finally { + setIsLoading(false); + } + }; + + // 测试获取默认配置 + const testGetDefaultConfig = async () => { + setIsLoading(true); + try { + const result = await invoke('get_default_parser_config'); + addResult('get_default_parser_config', true, result); + } catch (error) { + addResult('get_default_parser_config', false, null, error instanceof Error ? error.message : '未知错误'); + } finally { + setIsLoading(false); + } + }; + + // 测试Markdown包裹的JSON + const testMarkdownJson = async () => { + setIsLoading(true); + try { + const request = { + text: `这是一个JSON示例: +\`\`\`json +{ + "user": { + "name": "张三", + "age": 25, + "hobbies": ["编程", "音乐"] + } +} +\`\`\` +请处理这个数据。`, + config: { + enable_unquoted_keys: true, + enable_trailing_commas: true + } + }; + + console.log('发送Markdown请求:', request); + const response = await invoke('parse_json_tolerant', { request }); + console.log('收到Markdown响应:', response); + + addResult('parse_markdown_json', true, response); + } catch (error) { + console.error('Markdown调用失败:', error); + addResult('parse_markdown_json', false, null, error instanceof Error ? error.message : '未知错误'); + } finally { + setIsLoading(false); + } + }; + + const clearResults = () => { + setResults([]); + }; + + return ( +
+
+
+ +
+

JSON解析器调试面板

+

测试后端命令是否正常工作

+
+
+
+ +
+ {/* 测试按钮 */} +
+ + + + + + + + + + + +
+ + {/* 结果显示 */} + {results.length > 0 && ( +
+

测试结果

+
+ {results.map((result, index) => ( +
+
+ {result.success ? ( + + ) : ( + + )} +
+
+ + {result.command} + + {result.timestamp} +
+ + {result.error && ( +

{result.error}

+ )} + + {result.data && ( +
+                          {JSON.stringify(result.data, null, 2)}
+                        
+ )} +
+
+
+ ))} +
+
+ )} +
+
+ ); +}; + +export default JsonParserDebugPanel; diff --git a/apps/desktop/src/components/TolerantJsonParser.tsx b/apps/desktop/src/components/TolerantJsonParser.tsx new file mode 100644 index 0000000..a8979b9 --- /dev/null +++ b/apps/desktop/src/components/TolerantJsonParser.tsx @@ -0,0 +1,479 @@ +import React, { useState, useCallback } from 'react'; +import { + Code, + CheckCircle, + AlertCircle, + Copy, + Download, + Loader2, + RefreshCw, + Settings, + Info +} from 'lucide-react'; +import { save } from '@tauri-apps/plugin-dialog'; +import { useNotifications } from './NotificationSystem'; +import TolerantJsonService, { + JsonParserConfig, + ParseStatistics, + ParseJsonRequest +} from '../services/tolerantJsonService'; + +/** + * 容错JSON解析器组件 + * 支持处理大模型返回的不规范JSON数据 + */ +const TolerantJsonParser: React.FC = () => { + const [inputText, setInputText] = useState(''); + const [outputData, setOutputData] = useState(null); + const [statistics, setStatistics] = useState(null); + const [error, setError] = useState(null); + const [isLoading, setIsLoading] = useState(false); + const [showConfig, setShowConfig] = useState(false); + const [config, setConfig] = useState({ + max_text_length: 1024 * 1024, + enable_comments: true, + enable_unquoted_keys: true, + enable_trailing_commas: true, + timeout_ms: 30000, + recovery_strategies: ['StandardJson', 'ManualFix', 'RegexExtract', 'PartialParse'] + }); + + const { success, error: notifyError } = useNotifications(); + + // 解析JSON + const parseJson = useCallback(async () => { + if (!inputText.trim()) { + notifyError('输入错误', '请输入要解析的JSON文本'); + return; + } + + setIsLoading(true); + setError(null); + setOutputData(null); + setStatistics(null); + + try { + const request: ParseJsonRequest = { + text: inputText, + config: config + }; + + const response = await TolerantJsonService.parseJson(request); + + if (response.success) { + setOutputData(response.data); + setStatistics(response.statistics || null); + success('解析成功', `解析完成,用时 ${response.statistics?.parse_time_ms || 0}ms`); + } else { + setError(response.error || '解析失败'); + notifyError('解析失败', response.error || '未知错误'); + } + } catch (err) { + const errorMessage = err instanceof Error ? err.message : '解析失败'; + setError(errorMessage); + notifyError('解析失败', errorMessage); + } finally { + setIsLoading(false); + } + }, [inputText, config, success, notifyError]); + + // 格式化JSON + const formatJson = useCallback(async () => { + if (!inputText.trim()) { + notifyError('输入错误', '请输入要格式化的JSON文本'); + return; + } + + try { + const formatted = await TolerantJsonService.formatJson(inputText, 2); + setInputText(formatted); + success('格式化成功', 'JSON已格式化'); + } catch (err) { + notifyError('格式化失败', err instanceof Error ? err.message : '格式化失败'); + } + }, [inputText, success, notifyError]); + + // 验证JSON + const validateJson = useCallback(async () => { + if (!inputText.trim()) { + notifyError('输入错误', '请输入要验证的JSON文本'); + return; + } + + try { + const isValid = await TolerantJsonService.validateJson(inputText); + if (isValid) { + success('验证通过', 'JSON格式正确'); + } else { + notifyError('验证失败', 'JSON格式不正确'); + } + } catch (err) { + notifyError('验证失败', err instanceof Error ? err.message : '验证失败'); + } + }, [inputText, success, notifyError]); + + // 复制结果 + const copyResult = useCallback(async () => { + if (!outputData) return; + + try { + const jsonString = JSON.stringify(outputData, null, 2); + await navigator.clipboard.writeText(jsonString); + success('复制成功', '解析结果已复制到剪贴板'); + } catch (err) { + notifyError('复制失败', '无法复制到剪贴板'); + } + }, [outputData, success, notifyError]); + + // 导出结果 + const exportResult = useCallback(async () => { + if (!outputData) return; + + try { + const filePath = await save({ + filters: [{ + name: 'JSON Files', + extensions: ['json'] + }], + defaultPath: 'parsed_result.json' + }); + + if (filePath) { + // 这里需要调用后端保存文件的命令 + // const jsonString = JSON.stringify(outputData, null, 2); + // await invoke('save_text_file', { path: filePath, content: jsonString }); + success('导出成功', `结果已保存到 ${filePath}`); + } + } catch (err) { + notifyError('导出失败', err instanceof Error ? err.message : '导出失败'); + } + }, [outputData, success, notifyError]); + + // 清空内容 + const clearAll = useCallback(() => { + setInputText(''); + setOutputData(null); + setStatistics(null); + setError(null); + }, []); + + // 示例数据 + const loadExample = useCallback((example: string) => { + const examples = { + 'markdown': `这是一个JSON示例: +\`\`\`json +{ + "user": { + "name": "张三", + "age": 25, + "hobbies": ["编程", "音乐"] + } +} +\`\`\` +请处理这个数据。`, + 'malformed': `{ + name: "李四", + age: 30, + skills: ["JavaScript", "Python",], + active: true, +}`, + 'mixed': `AI模型返回: +{ + "analysis": { + sentiment: "positive", + confidence: 0.85, + keywords: ['good', 'excellent'] + } +}` + }; + setInputText(examples[example as keyof typeof examples] || ''); + }, []); + + return ( +
+ {/* 标题栏 */} +
+
+
+ +
+

容错JSON解析器

+

处理大模型返回的不规范JSON数据

+
+
+ +
+
+ +
+ {/* 配置面板 */} + {showConfig && ( +
+
+

解析配置

+
+ + + +
+
+ +
+ + + +
+ +
+
+ + setConfig(prev => ({ ...prev, timeout_ms: parseInt(e.target.value) }))} + className="w-full px-2 py-1 text-sm border border-gray-300 rounded" + /> +
+
+ + setConfig(prev => ({ ...prev, max_text_length: parseInt(e.target.value) }))} + className="w-full px-2 py-1 text-sm border border-gray-300 rounded" + /> +
+
+
+ )} + + {/* 示例按钮 */} +
+ 示例: + + + +
+ + {/* 输入区域 */} +
+ +