diff --git a/apps/desktop/src-tauri/src/infrastructure/markdown_parser.rs b/apps/desktop/src-tauri/src/infrastructure/markdown_parser.rs index 0a19205..7079767 100644 --- a/apps/desktop/src-tauri/src/infrastructure/markdown_parser.rs +++ b/apps/desktop/src-tauri/src/infrastructure/markdown_parser.rs @@ -63,6 +63,8 @@ pub struct Position { pub column: usize, /// 字符偏移量(从0开始) pub offset: usize, + /// 字节偏移量(从0开始) + pub byte_offset: usize, } /// 范围信息 @@ -154,13 +156,14 @@ impl MarkdownParser { }) } - /// 计算文本中的位置信息 - fn calculate_position(&self, text: &str, offset: usize) -> Position { + /// 根据字节偏移计算位置信息 + fn calculate_position_from_byte_offset(&self, text: &str, byte_offset: usize) -> Position { let mut line = 0; let mut column = 0; + let mut char_offset = 0; - for (i, ch) in text.char_indices() { - if i >= offset { + for (byte_idx, ch) in text.char_indices() { + if byte_idx >= byte_offset { break; } if ch == '\n' { @@ -169,15 +172,52 @@ impl MarkdownParser { } else { column += 1; } + char_offset += 1; } Position { line, column, - offset, + offset: char_offset, + byte_offset, } } + /// 根据字符偏移计算位置信息 + fn calculate_position_from_char_offset(&self, text: &str, char_offset: usize) -> Position { + let mut line = 0; + let mut column = 0; + let mut current_char_offset = 0; + let mut byte_offset = 0; + + for (byte_idx, ch) in text.char_indices() { + if current_char_offset >= char_offset { + break; + } + if ch == '\n' { + line += 1; + column = 0; + } else { + column += 1; + } + current_char_offset += 1; + byte_offset = byte_idx + ch.len_utf8(); + } + + Position { + line, + column, + offset: char_offset, + byte_offset, + } + } + + /// 计算文本中的位置信息(保持向后兼容) + fn calculate_position(&self, text: &str, offset: usize) -> Position { + // 为了向后兼容,假设传入的是字节偏移 + self.calculate_position_from_byte_offset(text, offset) + } + /// 解析Markdown文本 pub fn parse(&mut self, text: &str) -> Result { let start_time = std::time::Instant::now(); @@ -222,8 +262,8 @@ impl MarkdownParser { node_type: MarkdownNodeType::Document, content: source_text.to_string(), range: Range { - start: Position { line: 0, column: 0, offset: 0 }, - end: self.calculate_position(source_text, source_text.len()), + start: self.calculate_position_from_byte_offset(source_text, 0), + end: self.calculate_position_from_byte_offset(source_text, source_text.len()), }, children: Vec::new(), attributes: HashMap::new(), diff --git a/apps/desktop/src/components/EnhancedMarkdownRenderer.tsx b/apps/desktop/src/components/EnhancedMarkdownRenderer.tsx index c922aa0..fccd8ab 100644 --- a/apps/desktop/src/components/EnhancedMarkdownRenderer.tsx +++ b/apps/desktop/src/components/EnhancedMarkdownRenderer.tsx @@ -99,13 +99,13 @@ export const EnhancedMarkdownRenderer: React.FC = return null; } - // 计算节点在原始文本中的字符偏移位置 - const nodeStartOffset = node.range?.start?.offset || 0; - const nodeEndOffset = node.range?.end?.offset || 0; + // 计算节点在原始文本中的字节偏移位置(与grounding数据的字节偏移匹配) + const nodeStartOffset = node.range?.start?.byte_offset || 0; + const nodeEndOffset = node.range?.end?.byte_offset || 0; + // 查找与当前节点位置重叠的grounding支持信息 - const relatedSupports = groundingMetadata.grounding_supports.filter(support => { - // 这里是字符串二进制的offset + // grounding数据使用字节偏移 const segmentStart = support.segment.startIndex; const segmentEnd = support.segment.endIndex; @@ -113,7 +113,11 @@ export const EnhancedMarkdownRenderer: React.FC = // 检查节点范围与grounding片段是否有重叠 return hasOverlap; }); - + console.log({ + relatedSupports, + nodeStartOffset, + nodeEndOffset + }) if (relatedSupports.length > 0) { // 获取相关的来源信息 const relatedSources = relatedSupports.flatMap(support => diff --git a/apps/desktop/src/types/markdown.ts b/apps/desktop/src/types/markdown.ts index 00fd600..be97383 100644 --- a/apps/desktop/src/types/markdown.ts +++ b/apps/desktop/src/types/markdown.ts @@ -37,6 +37,8 @@ export interface Position { column: number; /** 字符偏移量(从0开始) */ offset: number; + /** 字节偏移量(从0开始) */ + byte_offset: number; } /**