mixvideo-v2/apps/desktop/src/pages/tools/DataCleaningTool.tsx

371 lines
13 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import React, { useState } from 'react';
import {
FileText,
Upload,
Download,
Loader2,
CheckCircle,
AlertCircle,
Info,
Trash2,
ArrowLeft
} from 'lucide-react';
import { useNavigate } from 'react-router-dom';
import { invoke } from '@tauri-apps/api/core';
import { open, save } from '@tauri-apps/plugin-dialog';
import { listen } from '@tauri-apps/api/event';
import { useNotifications } from '../../components/NotificationSystem';
interface DataCleaningProgress {
current: number;
total: number;
percentage: number;
status: string;
}
interface DataCleaningResult {
success: boolean;
message: string;
original_count: number;
removed_count: number;
final_count: number;
output_file: string;
}
/**
* AI检索图片/数据清洗工具详情页
* 遵循 Tauri 开发规范和 UI/UX 设计标准
*/
const DataCleaningTool: React.FC = () => {
const navigate = useNavigate();
const [allDataFile, setAllDataFile] = useState<string>('');
const [removeDataFile, setRemoveDataFile] = useState<string>('');
const [outputFile, setOutputFile] = useState<string>('');
const [isProcessing, setIsProcessing] = useState(false);
const [progress, setProgress] = useState<DataCleaningProgress | null>(null);
const [result, setResult] = useState<DataCleaningResult | null>(null);
const { success, error } = useNotifications();
// 选择全部数据文件
const selectAllDataFile = async () => {
try {
const selected = await open({
multiple: false,
filters: [{
name: 'JSONL Files',
extensions: ['jsonl']
}]
});
if (selected && typeof selected === 'string') {
setAllDataFile(selected);
}
} catch (err) {
error('文件选择失败', '无法选择全部数据文件');
}
};
// 选择要去除的数据文件
const selectRemoveDataFile = async () => {
try {
const selected = await open({
multiple: false,
filters: [{
name: 'JSONL Files',
extensions: ['jsonl']
}]
});
if (selected && typeof selected === 'string') {
setRemoveDataFile(selected);
}
} catch (err) {
error('文件选择失败', '无法选择要去除的数据文件');
}
};
// 选择输出文件
const selectOutputFile = async () => {
try {
const selected = await save({
filters: [{
name: 'JSONL Files',
extensions: ['jsonl']
}],
defaultPath: 'cleaned_data.jsonl'
});
if (selected) {
setOutputFile(selected);
}
} catch (err) {
error('文件选择失败', '无法选择输出文件');
}
};
// 开始数据清洗
const startDataCleaning = async () => {
if (!allDataFile || !removeDataFile || !outputFile) {
error('参数错误', '请选择所有必需的文件');
return;
}
setIsProcessing(true);
setProgress(null);
setResult(null);
try {
// 监听进度事件
const unlisten = await listen<DataCleaningProgress>('data-cleaning-progress', (event) => {
setProgress(event.payload);
});
// 调用后端命令
const cleaningResult = await invoke<DataCleaningResult>('clean_jsonl_data', {
allDataFile,
removeDataFile,
outputFile
});
setResult(cleaningResult);
if (cleaningResult.success) {
success('数据清洗完成',
`原始数据: ${cleaningResult.original_count} 条,` +
`去除重复: ${cleaningResult.removed_count} 条,` +
`最终结果: ${cleaningResult.final_count}`
);
} else {
error('数据清洗失败', cleaningResult.message);
}
// 清理事件监听器
unlisten();
} catch (err) {
const errorMessage = err instanceof Error ? err.message : '未知错误';
error('数据清洗失败', errorMessage);
} finally {
setIsProcessing(false);
}
};
// 重置表单
const resetForm = () => {
setAllDataFile('');
setRemoveDataFile('');
setOutputFile('');
setProgress(null);
setResult(null);
};
return (
<div className="space-y-6">
{/* 页面标题和返回按钮 */}
<div className="flex items-center gap-4">
<button
onClick={() => navigate('/tools')}
className="flex items-center gap-2 px-3 py-2 text-gray-600 hover:text-gray-900 hover:bg-gray-100 rounded-lg transition-colors"
>
<ArrowLeft className="w-4 h-4" />
</button>
<div className="flex items-center gap-3">
<div className="w-10 h-10 bg-gradient-to-br from-purple-500 to-purple-600 rounded-lg flex items-center justify-center shadow-sm">
<FileText className="w-5 h-5 text-white" />
</div>
<div>
<h1 className="text-2xl font-bold text-gray-900">AI检索图片/</h1>
<p className="text-gray-600">JSONL格式数据去重处理工具</p>
</div>
</div>
</div>
{/* 工具主体内容 */}
<div className="bg-white rounded-xl shadow-sm border border-gray-200 overflow-hidden">
<div className="p-6 border-b border-gray-200">
<div className="flex items-center gap-3">
<FileText className="w-6 h-6 text-purple-600" />
<div>
<h2 className="text-lg font-semibold text-gray-900"></h2>
<p className="text-sm text-gray-600">JSONL格式数据</p>
</div>
</div>
</div>
<div className="p-6 space-y-6">
{/* 使用说明 */}
<div className="bg-blue-50 border border-blue-200 rounded-lg p-4">
<div className="flex items-start gap-3">
<Info className="w-5 h-5 text-blue-600 mt-0.5 flex-shrink-0" />
<div className="text-sm text-blue-800">
<p className="font-medium mb-2">使</p>
<ul className="space-y-1 list-disc list-inside">
<li>JSONL文件</li>
<li>JSONL文件</li>
<li>URI字段进行匹配</li>
<li></li>
</ul>
</div>
</div>
</div>
{/* 文件选择区域 */}
<div className="grid grid-cols-1 md:grid-cols-2 gap-6">
{/* 全部数据文件 */}
<div className="space-y-2">
<label className="block text-sm font-medium text-gray-700">
<span className="text-red-500">*</span>
</label>
<div className="flex gap-2">
<button
onClick={selectAllDataFile}
disabled={isProcessing}
className="flex items-center gap-2 px-4 py-2 bg-gray-100 hover:bg-gray-200 disabled:bg-gray-50 disabled:text-gray-400 text-gray-700 rounded-lg border border-gray-300 transition-colors"
>
<Upload className="w-4 h-4" />
</button>
</div>
{allDataFile && (
<p className="text-xs text-gray-600 break-all bg-gray-50 p-2 rounded">
{allDataFile}
</p>
)}
</div>
{/* 要去除的数据文件 */}
<div className="space-y-2">
<label className="block text-sm font-medium text-gray-700">
<span className="text-red-500">*</span>
</label>
<div className="flex gap-2">
<button
onClick={selectRemoveDataFile}
disabled={isProcessing}
className="flex items-center gap-2 px-4 py-2 bg-gray-100 hover:bg-gray-200 disabled:bg-gray-50 disabled:text-gray-400 text-gray-700 rounded-lg border border-gray-300 transition-colors"
>
<Upload className="w-4 h-4" />
</button>
</div>
{removeDataFile && (
<p className="text-xs text-gray-600 break-all bg-gray-50 p-2 rounded">
{removeDataFile}
</p>
)}
</div>
</div>
{/* 输出文件选择 */}
<div className="space-y-2">
<label className="block text-sm font-medium text-gray-700">
<span className="text-red-500">*</span>
</label>
<div className="flex gap-2">
<button
onClick={selectOutputFile}
disabled={isProcessing}
className="flex items-center gap-2 px-4 py-2 bg-gray-100 hover:bg-gray-200 disabled:bg-gray-50 disabled:text-gray-400 text-gray-700 rounded-lg border border-gray-300 transition-colors"
>
<Download className="w-4 h-4" />
</button>
</div>
{outputFile && (
<p className="text-xs text-gray-600 break-all bg-gray-50 p-2 rounded">
{outputFile}
</p>
)}
</div>
{/* 进度显示 */}
{progress && (
<div className="bg-gray-50 border border-gray-200 rounded-lg p-4">
<div className="flex items-center gap-3 mb-3">
<Loader2 className="w-5 h-5 text-purple-600 animate-spin" />
<div>
<p className="text-sm font-medium text-gray-900">...</p>
<p className="text-xs text-gray-600">{progress.status}</p>
</div>
</div>
<div className="w-full bg-gray-200 rounded-full h-2">
<div
className="bg-purple-600 h-2 rounded-full transition-all duration-300"
style={{ width: `${progress.percentage}%` }}
></div>
</div>
<p className="text-xs text-gray-600 mt-2">
{progress.current} / {progress.total} ({progress.percentage.toFixed(1)}%)
</p>
</div>
)}
{/* 结果显示 */}
{result && (
<div className={`border rounded-lg p-4 ${
result.success
? 'bg-green-50 border-green-200'
: 'bg-red-50 border-red-200'
}`}>
<div className="flex items-start gap-3">
{result.success ? (
<CheckCircle className="w-5 h-5 text-green-600 mt-0.5 flex-shrink-0" />
) : (
<AlertCircle className="w-5 h-5 text-red-600 mt-0.5 flex-shrink-0" />
)}
<div className="flex-1">
<p className={`text-sm font-medium ${
result.success ? 'text-green-800' : 'text-red-800'
}`}>
{result.success ? '数据清洗完成' : '数据清洗失败'}
</p>
<p className={`text-sm mt-1 ${
result.success ? 'text-green-700' : 'text-red-700'
}`}>
{result.message}
</p>
{result.success && (
<div className="mt-2 text-xs text-green-700 space-y-1">
<p>: {result.original_count} </p>
<p>: {result.removed_count} </p>
<p>: {result.final_count} </p>
<p>: {result.output_file}</p>
</div>
)}
</div>
</div>
</div>
)}
{/* 操作按钮 */}
<div className="flex gap-3 pt-4 border-t border-gray-200">
<button
onClick={startDataCleaning}
disabled={!allDataFile || !removeDataFile || !outputFile || isProcessing}
className="flex items-center gap-2 px-6 py-2 bg-purple-600 hover:bg-purple-700 disabled:bg-gray-300 disabled:cursor-not-allowed text-white rounded-lg font-medium transition-colors"
>
{isProcessing ? (
<Loader2 className="w-4 h-4 animate-spin" />
) : (
<FileText className="w-4 h-4" />
)}
{isProcessing ? '处理中...' : '开始处理'}
</button>
<button
onClick={resetForm}
disabled={isProcessing}
className="flex items-center gap-2 px-6 py-2 bg-gray-100 hover:bg-gray-200 disabled:bg-gray-50 disabled:text-gray-400 text-gray-700 rounded-lg font-medium transition-colors"
>
<Trash2 className="w-4 h-4" />
</button>
</div>
</div>
</div>
</div>
);
};
export default DataCleaningTool;