fix: markdown解析bug
This commit is contained in:
parent
725514af2d
commit
dde679fd6e
|
|
@ -63,6 +63,8 @@ pub struct Position {
|
|||
pub column: usize,
|
||||
/// 字符偏移量(从0开始)
|
||||
pub offset: usize,
|
||||
/// 字节偏移量(从0开始)
|
||||
pub byte_offset: usize,
|
||||
}
|
||||
|
||||
/// 范围信息
|
||||
|
|
@ -154,13 +156,14 @@ impl MarkdownParser {
|
|||
})
|
||||
}
|
||||
|
||||
/// 计算文本中的位置信息
|
||||
fn calculate_position(&self, text: &str, offset: usize) -> Position {
|
||||
/// 根据字节偏移计算位置信息
|
||||
fn calculate_position_from_byte_offset(&self, text: &str, byte_offset: usize) -> Position {
|
||||
let mut line = 0;
|
||||
let mut column = 0;
|
||||
let mut char_offset = 0;
|
||||
|
||||
for (i, ch) in text.char_indices() {
|
||||
if i >= offset {
|
||||
for (byte_idx, ch) in text.char_indices() {
|
||||
if byte_idx >= byte_offset {
|
||||
break;
|
||||
}
|
||||
if ch == '\n' {
|
||||
|
|
@ -169,15 +172,52 @@ impl MarkdownParser {
|
|||
} else {
|
||||
column += 1;
|
||||
}
|
||||
char_offset += 1;
|
||||
}
|
||||
|
||||
Position {
|
||||
line,
|
||||
column,
|
||||
offset,
|
||||
offset: char_offset,
|
||||
byte_offset,
|
||||
}
|
||||
}
|
||||
|
||||
/// 根据字符偏移计算位置信息
|
||||
fn calculate_position_from_char_offset(&self, text: &str, char_offset: usize) -> Position {
|
||||
let mut line = 0;
|
||||
let mut column = 0;
|
||||
let mut current_char_offset = 0;
|
||||
let mut byte_offset = 0;
|
||||
|
||||
for (byte_idx, ch) in text.char_indices() {
|
||||
if current_char_offset >= char_offset {
|
||||
break;
|
||||
}
|
||||
if ch == '\n' {
|
||||
line += 1;
|
||||
column = 0;
|
||||
} else {
|
||||
column += 1;
|
||||
}
|
||||
current_char_offset += 1;
|
||||
byte_offset = byte_idx + ch.len_utf8();
|
||||
}
|
||||
|
||||
Position {
|
||||
line,
|
||||
column,
|
||||
offset: char_offset,
|
||||
byte_offset,
|
||||
}
|
||||
}
|
||||
|
||||
/// 计算文本中的位置信息(保持向后兼容)
|
||||
fn calculate_position(&self, text: &str, offset: usize) -> Position {
|
||||
// 为了向后兼容,假设传入的是字节偏移
|
||||
self.calculate_position_from_byte_offset(text, offset)
|
||||
}
|
||||
|
||||
/// 解析Markdown文本
|
||||
pub fn parse(&mut self, text: &str) -> Result<MarkdownParseResult> {
|
||||
let start_time = std::time::Instant::now();
|
||||
|
|
@ -222,8 +262,8 @@ impl MarkdownParser {
|
|||
node_type: MarkdownNodeType::Document,
|
||||
content: source_text.to_string(),
|
||||
range: Range {
|
||||
start: Position { line: 0, column: 0, offset: 0 },
|
||||
end: self.calculate_position(source_text, source_text.len()),
|
||||
start: self.calculate_position_from_byte_offset(source_text, 0),
|
||||
end: self.calculate_position_from_byte_offset(source_text, source_text.len()),
|
||||
},
|
||||
children: Vec::new(),
|
||||
attributes: HashMap::new(),
|
||||
|
|
|
|||
|
|
@ -99,13 +99,13 @@ export const EnhancedMarkdownRenderer: React.FC<EnhancedMarkdownRendererProps> =
|
|||
return null;
|
||||
}
|
||||
|
||||
// 计算节点在原始文本中的字符偏移位置
|
||||
const nodeStartOffset = node.range?.start?.offset || 0;
|
||||
const nodeEndOffset = node.range?.end?.offset || 0;
|
||||
// 计算节点在原始文本中的字节偏移位置(与grounding数据的字节偏移匹配)
|
||||
const nodeStartOffset = node.range?.start?.byte_offset || 0;
|
||||
const nodeEndOffset = node.range?.end?.byte_offset || 0;
|
||||
|
||||
// 查找与当前节点位置重叠的grounding支持信息
|
||||
|
||||
const relatedSupports = groundingMetadata.grounding_supports.filter(support => {
|
||||
// 这里是字符串二进制的offset
|
||||
// grounding数据使用字节偏移
|
||||
const segmentStart = support.segment.startIndex;
|
||||
const segmentEnd = support.segment.endIndex;
|
||||
|
||||
|
|
@ -113,7 +113,11 @@ export const EnhancedMarkdownRenderer: React.FC<EnhancedMarkdownRendererProps> =
|
|||
// 检查节点范围与grounding片段是否有重叠
|
||||
return hasOverlap;
|
||||
});
|
||||
|
||||
console.log({
|
||||
relatedSupports,
|
||||
nodeStartOffset,
|
||||
nodeEndOffset
|
||||
})
|
||||
if (relatedSupports.length > 0) {
|
||||
// 获取相关的来源信息
|
||||
const relatedSources = relatedSupports.flatMap(support =>
|
||||
|
|
|
|||
|
|
@ -37,6 +37,8 @@ export interface Position {
|
|||
column: number;
|
||||
/** 字符偏移量(从0开始) */
|
||||
offset: number;
|
||||
/** 字节偏移量(从0开始) */
|
||||
byte_offset: number;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Reference in New Issue