fix: markdown解析bug

2025-07-22 16:09:31 +08:00 · 2025-07-22 16:09:31 +08:00 · dde679fd6e
parent 725514af2d
commit dde679fd6e
3 changed files with 59 additions and 13 deletions
--- a/apps/desktop/src-tauri/src/infrastructure/markdown_parser.rs
+++ b/apps/desktop/src-tauri/src/infrastructure/markdown_parser.rs
@ -63,6 +63,8 @@ pub struct Position {
    pub column: usize,
    /// 字符偏移量（从0开始）
    pub offset: usize,
    /// 字节偏移量（从0开始）
    pub byte_offset: usize,
 }
 /// 范围信息
@ -154,13 +156,14 @@ impl MarkdownParser {
        })
    }
-    /// 计算文本中的位置信息
+    /// 根据字节偏移计算位置信息
-    fn calculate_position(&self, text: &str, offset: usize) -> Position {
+    fn calculate_position_from_byte_offset(&self, text: &str, byte_offset: usize) -> Position {
        let mut line = 0;
        let mut column = 0;
        let mut char_offset = 0;
-        for (i, ch) in text.char_indices() {
+        for (byte_idx, ch) in text.char_indices() {
-            if i >= offset {
+            if byte_idx >= byte_offset {
                break;
            }
            if ch == '\n' {
@ -169,15 +172,52 @@ impl MarkdownParser {
            } else {
                column += 1;
            }
            char_offset += 1;
        }
        Position {
            line,
            column,
-            offset,
+            offset: char_offset,
            byte_offset,
        }
    }
    /// 根据字符偏移计算位置信息
    fn calculate_position_from_char_offset(&self, text: &str, char_offset: usize) -> Position {
        let mut line = 0;
        let mut column = 0;
        let mut current_char_offset = 0;
        let mut byte_offset = 0;
        for (byte_idx, ch) in text.char_indices() {
            if current_char_offset >= char_offset {
                break;
            }
            if ch == '\n' {
                line += 1;
                column = 0;
            } else {
                column += 1;
            }
            current_char_offset += 1;
            byte_offset = byte_idx + ch.len_utf8();
        }
        Position {
            line,
            column,
            offset: char_offset,
            byte_offset,
        }
    }
    /// 计算文本中的位置信息（保持向后兼容）
    fn calculate_position(&self, text: &str, offset: usize) -> Position {
        // 为了向后兼容，假设传入的是字节偏移
        self.calculate_position_from_byte_offset(text, offset)
    }
    /// 解析Markdown文本
    pub fn parse(&mut self, text: &str) -> Result<MarkdownParseResult> {
        let start_time = std::time::Instant::now();
@ -222,8 +262,8 @@ impl MarkdownParser {
            node_type: MarkdownNodeType::Document,
            content: source_text.to_string(),
            range: Range {
-                start: Position { line: 0, column: 0, offset: 0 },
+                start: self.calculate_position_from_byte_offset(source_text, 0),
-                end: self.calculate_position(source_text, source_text.len()),
+                end: self.calculate_position_from_byte_offset(source_text, source_text.len()),
            },
            children: Vec::new(),
            attributes: HashMap::new(),
--- a/apps/desktop/src/components/EnhancedMarkdownRenderer.tsx
+++ b/apps/desktop/src/components/EnhancedMarkdownRenderer.tsx
@ -99,13 +99,13 @@ export const EnhancedMarkdownRenderer: React.FC<EnhancedMarkdownRendererProps> =
      return null;
    }
-    // 计算节点在原始文本中的字符偏移位置 
+    // 计算节点在原始文本中的字节偏移位置（与grounding数据的字节偏移匹配）
-    const nodeStartOffset = node.range?.start?.offset || 0;
+    const nodeStartOffset = node.range?.start?.byte_offset || 0;
-    const nodeEndOffset = node.range?.end?.offset || 0;
+    const nodeEndOffset = node.range?.end?.byte_offset || 0;
    // 查找与当前节点位置重叠的grounding支持信息
    const relatedSupports = groundingMetadata.grounding_supports.filter(support => {
-      // 这里是字符串二进制的offset
+      // grounding数据使用字节偏移
      const segmentStart = support.segment.startIndex;
      const segmentEnd = support.segment.endIndex;
@ -113,7 +113,11 @@ export const EnhancedMarkdownRenderer: React.FC<EnhancedMarkdownRendererProps> =
      // 检查节点范围与grounding片段是否有重叠
      return hasOverlap;
    });
-
+    console.log({
      relatedSupports,
      nodeStartOffset,
      nodeEndOffset
    })
    if (relatedSupports.length > 0) {
      // 获取相关的来源信息
      const relatedSources = relatedSupports.flatMap(support =>
--- a/apps/desktop/src/types/markdown.ts
+++ b/apps/desktop/src/types/markdown.ts
@ -37,6 +37,8 @@ export interface Position {
  column: number;
  /** 字符偏移量（从0开始） */
  offset: number;
  /** 字节偏移量（从0开始） */
  byte_offset: number;
 }
 /**