#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ YOLO Annotation Utility Tool YOLO format annotation file manipulation utility Usage examples: # Interactive mode (recommended) python yolo_annotation_modify.py # Remove classes python yolo_annotation_modify.py remove --dataset dataset/yolo --classes "0,1" # Merge classes python yolo_annotation_modify.py merge --dataset dataset/yolo --source "0,1,2" --target "vehicle" # Rename classes python yolo_annotation_modify.py rename --dataset dataset/yolo --mapping "0:vehicle,1:person" # Show info python yolo_annotation_modify.py info --dataset dataset/yolo """ import yaml import argparse from pathlib import Path from typing import Dict, List, Set, Union import shutil from datetime import datetime import sys import os from collections import defaultdict class YOLOAnnotationEditor: """Class for editing YOLO format annotations""" def __init__(self, dataset_path: str): """ Args: dataset_path: Path to YOLO dataset directory (containing data.yaml) """ self.dataset_path = Path(dataset_path) self.yaml_path = self.dataset_path / "data.yaml" if not self.yaml_path.exists(): raise FileNotFoundError(f"data.yaml not found in {dataset_path}") self.config = self._load_yaml() self.labels_path = self.dataset_path / "labels" # Find all label files self.label_files = list(self.labels_path.rglob("*.txt")) print(f"[+] Found {len(self.label_files)} label files") def _load_yaml(self) -> dict: """Load YOLO data.yaml file""" with open(self.yaml_path, 'r', encoding='utf-8') as f: return yaml.safe_load(f) def backup_labels(self) -> Path: """ Backup entire labels folder Returns: Path to backup folder """ timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') backup_path = self.dataset_path / f"labels_backup_{timestamp}" print(f"[*] labels 폴더 백업 중...") shutil.copytree(self.labels_path, backup_path) print(f"[+] 백업 생성됨: {backup_path}") return backup_path def save_yaml(self, backup: bool = True): """ Save modified data.yaml Args: backup: Whether to backup original file """ if backup and self.yaml_path.exists(): backup_path = self.yaml_path.parent / f"data_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.yaml" shutil.copy2(self.yaml_path, backup_path) print(f"[+] data.yaml 백업 생성됨: {backup_path}") with open(self.yaml_path, 'w', encoding='utf-8') as f: yaml.dump(self.config, f, allow_unicode=True, sort_keys=False) print(f"[+] data.yaml 저장 완료") def get_class_info(self) -> Dict: """Get class information and count annotations""" names = self.config.get('names', {}) # Count annotations per class class_counts = defaultdict(int) total_annotations = 0 for label_file in self.label_files: try: with open(label_file, 'r') as f: for line in f: line = line.strip() if line: class_id = int(line.split()[0]) class_counts[class_id] += 1 total_annotations += 1 except Exception as e: print(f"[!] Error reading {label_file}: {e}") info = {} for class_id, class_name in names.items(): info[class_id] = { 'id': class_id, 'name': class_name, 'annotation_count': class_counts.get(class_id, 0) } return info def print_info(self): """Print annotation information""" print("\n" + "="*60) print(f"Dataset: {self.dataset_path}") print("="*60) names = self.config.get('names', {}) print(f"\n전체 통계:") print(f" - 라벨 파일 수: {len(self.label_files)}") print(f" - 클래스 수: {len(names)}") print(f"\n클래스 상세:") print(f"{'ID':<6} {'클래스명':<35} {'어노테이션 수':<15}") print("-" * 60) class_info = self.get_class_info() total_annotations = 0 for class_id in sorted(class_info.keys()): info = class_info[class_id] ann_count = info['annotation_count'] total_annotations += ann_count print(f"{info['id']:<6} {info['name']:<35} {ann_count:<15}") print("-" * 60) print(f"{'합계':<42} {total_annotations:<15}") print("="*60 + "\n") def remove_classes(self, class_ids: List[Union[int, str]]) -> 'YOLOAnnotationEditor': """ Remove specific classes Args: class_ids: List of class IDs or names to remove """ names = self.config.get('names', {}) # Convert names to IDs name_to_id = {name: cid for cid, name in names.items()} ids_to_remove = set() for item in class_ids: try: # Try as ID first class_id = int(item) if class_id in names: ids_to_remove.add(class_id) print(f" - 제거: {names[class_id]} (ID: {class_id})") else: print(f"[!] ID {class_id}를 찾을 수 없습니다") except ValueError: # Try as name if item in name_to_id: class_id = name_to_id[item] ids_to_remove.add(class_id) print(f" - 제거: {item} (ID: {class_id})") else: print(f"[!] 이름 '{item}'을 찾을 수 없습니다") if not ids_to_remove: print("[!] 제거할 클래스를 찾을 수 없습니다") return self # Remove from data.yaml new_names = {cid: name for cid, name in names.items() if cid not in ids_to_remove} self.config['names'] = new_names # Remove annotations from label files total_removed = 0 for label_file in self.label_files: try: with open(label_file, 'r') as f: lines = f.readlines() new_lines = [] for line in lines: if line.strip(): class_id = int(line.split()[0]) if class_id not in ids_to_remove: new_lines.append(line) else: total_removed += 1 with open(label_file, 'w') as f: f.writelines(new_lines) except Exception as e: print(f"[!] Error processing {label_file}: {e}") print(f"[+] {total_removed}개의 어노테이션이 제거되었습니다") return self def merge_classes(self, source_ids: List[Union[int, str]], target_name: str) -> 'YOLOAnnotationEditor': """ Merge multiple classes into one Args: source_ids: List of source class IDs or names to merge target_name: Target class name after merge """ names = self.config.get('names', {}) name_to_id = {name: cid for cid, name in names.items()} # Find source class IDs source_class_ids = set() for item in source_ids: try: class_id = int(item) if class_id in names: source_class_ids.add(class_id) print(f" - 통합 대상: {names[class_id]} (ID: {class_id})") else: print(f"[!] ID {class_id}를 찾을 수 없습니다") except ValueError: if item in name_to_id: class_id = name_to_id[item] source_class_ids.add(class_id) print(f" - 통합 대상: {item} (ID: {class_id})") else: print(f"[!] 이름 '{item}'을 찾을 수 없습니다") if not source_class_ids: print("[!] 통합할 클래스를 찾을 수 없습니다") return self # Use the smallest ID as the new class ID new_class_id = min(source_class_ids) # Update data.yaml new_names = {cid: name for cid, name in names.items() if cid not in source_class_ids} new_names[new_class_id] = target_name self.config['names'] = new_names # Update label files total_merged = 0 for label_file in self.label_files: try: with open(label_file, 'r') as f: lines = f.readlines() new_lines = [] for line in lines: if line.strip(): parts = line.split() class_id = int(parts[0]) if class_id in source_class_ids: # Replace with new class ID parts[0] = str(new_class_id) new_lines.append(' '.join(parts) + '\n') total_merged += 1 else: new_lines.append(line) with open(label_file, 'w') as f: f.writelines(new_lines) except Exception as e: print(f"[!] Error processing {label_file}: {e}") print(f"[+] '{target_name}' (ID: {new_class_id})로 통합되었습니다") print(f"[+] {total_merged}개의 어노테이션이 변경되었습니다") return self def rename_classes(self, mapping: Dict[Union[int, str], str]) -> 'YOLOAnnotationEditor': """ Rename classes Args: mapping: Dictionary of {old_id_or_name: new_name} """ names = self.config.get('names', {}) name_to_id = {name: cid for cid, name in names.items()} renamed_count = 0 for old_key, new_name in mapping.items(): try: # Try as ID first class_id = int(old_key) if class_id in names: old_name = names[class_id] names[class_id] = new_name print(f" - 변경: '{old_name}' → '{new_name}' (ID: {class_id})") renamed_count += 1 else: print(f"[!] ID {class_id}를 찾을 수 없습니다") except ValueError: # Try as name if old_key in name_to_id: class_id = name_to_id[old_key] names[class_id] = new_name print(f" - 변경: '{old_key}' → '{new_name}' (ID: {class_id})") renamed_count += 1 else: print(f"[!] 이름 '{old_key}'를 찾을 수 없습니다") self.config['names'] = names print(f"[+] {renamed_count}개 클래스 이름이 변경되었습니다") return self def reindex_classes(self, start_id: int = 0) -> 'YOLOAnnotationEditor': """ Reindex class IDs sequentially Args: start_id: Starting ID (default: 0 for YOLO) """ names = self.config.get('names', {}) # Create ID mapping: old ID -> new ID id_mapping = {} new_id = start_id for old_id in sorted(names.keys()): id_mapping[old_id] = new_id print(f" - ID 변경: {old_id} → {new_id} ({names[old_id]})") new_id += 1 # Update data.yaml new_names = {id_mapping[old_id]: name for old_id, name in names.items()} self.config['names'] = new_names # Update label files total_updated = 0 for label_file in self.label_files: try: with open(label_file, 'r') as f: lines = f.readlines() new_lines = [] for line in lines: if line.strip(): parts = line.split() old_class_id = int(parts[0]) if old_class_id in id_mapping: parts[0] = str(id_mapping[old_class_id]) new_lines.append(' '.join(parts) + '\n') total_updated += 1 else: new_lines.append(line) with open(label_file, 'w') as f: f.writelines(new_lines) except Exception as e: print(f"[!] Error processing {label_file}: {e}") print(f"[+] 클래스 ID가 재할당되었습니다 (시작: {start_id})") print(f"[+] {total_updated}개의 어노테이션이 업데이트되었습니다") return self def interactive_mode(): """Interactive mode for user-friendly editing""" print("\n" + "="*60) print(" YOLO 어노테이션 편집기 - 대화형 모드") print("="*60) # 1. Select dataset print("\n[1/3] 데이터셋 선택") print("-" * 60) # Find YOLO datasets from glob import glob yaml_files = glob("**/data.yaml", recursive=True) dataset_path = None while not dataset_path: if yaml_files: print("\n발견된 YOLO 데이터셋:") for idx, f in enumerate(yaml_files, 1): dataset_dir = str(Path(f).parent) print(f" {idx}. {dataset_dir}") print(f" 0. 직접 경로 입력") choice = input("\n데이터셋 번호 선택 (직접 입력은 0): ").strip() if choice == '0': dataset_path = input("데이터셋 디렉토리 경로 입력: ").strip() else: try: dataset_path = str(Path(yaml_files[int(choice) - 1]).parent) except (ValueError, IndexError): print("[!] 잘못된 선택입니다. 다시 선택해주세요.") continue else: dataset_path = input("데이터셋 디렉토리 경로 입력: ").strip() yaml_path = Path(dataset_path) / "data.yaml" if not yaml_path.exists(): print(f"[!] data.yaml을 찾을 수 없습니다: {yaml_path}") print("[!] 다시 입력해주세요.") dataset_path = None # Load and show info print(f"\n[+] 로딩 중: {dataset_path}") editor = YOLOAnnotationEditor(dataset_path) editor.print_info() # 2. Select operations print("\n[2/3] 작업 선택") print("-" * 60) operations = [] while True: print("\n사용 가능한 작업:") print(" 1. 클래스 제거") print(" 2. 클래스 통합 (병합)") print(" 3. 클래스 이름 변경") print(" 4. 클래스 ID 재할당") print(" 5. 현재 정보 표시") print(" 0. 완료 (저장 단계로)") choice = input("\n작업 선택 (0-5): ").strip() if choice == '0': if not operations: print("[!] 선택된 작업이 없습니다. 종료합니다...") return break elif choice == '1': # Remove print("\n현재 클래스:") for cid, name in sorted(editor.config['names'].items()): print(f" {cid}: {name}") classes = input("\n제거할 클래스 입력 (ID 또는 이름, 쉼표로 구분): ").strip() if classes: class_list = [c.strip() for c in classes.split(',')] print(f"\n[*] 클래스 제거 중: {class_list}") editor.remove_classes(class_list) operations.append(f"제거: {', '.join(class_list)}") elif choice == '2': # Merge print("\n현재 클래스:") for cid, name in sorted(editor.config['names'].items()): print(f" {cid}: {name}") source = input("\n통합할 클래스 입력 (ID 또는 이름, 쉼표로 구분): ").strip() if source: target = input("통합 후 클래스 이름 입력: ").strip() source_list = [c.strip() for c in source.split(',')] print(f"\n[*] '{target}'로 통합 중: {source_list}") editor.merge_classes(source_list, target) operations.append(f"통합: {', '.join(source_list)} → {target}") elif choice == '3': # Rename print("\n현재 클래스:") for cid, name in sorted(editor.config['names'].items()): print(f" {cid}: {name}") print("\n매핑 입력 (형식: 기존:새이름,기존:새이름 - 쉼표로 구분)") print("예시: 0:vehicle,1:person 또는 truck:vehicle,person:human") mapping_input = input("\n입력: ").strip() if mapping_input: mapping = {} for pair in mapping_input.split(','): if ':' in pair: old, new = pair.split(':', 1) mapping[old.strip()] = new.strip() if mapping: print(f"\n[*] 클래스 이름 변경 중") editor.rename_classes(mapping) operations.append(f"이름 변경: {len(mapping)}개 클래스") elif choice == '4': # Reindex while True: start_id = input("\n시작 ID 입력 (기본값: 0): ").strip() if not start_id: start_id = 0 break try: start_id = int(start_id) break except ValueError: print("[!] 숫자를 입력해주세요.") print(f"\n[*] 클래스 ID 재할당 중 (시작: {start_id})") editor.reindex_classes(start_id=start_id) operations.append(f"ID 재할당: {start_id}부터") elif choice == '5': # Show info editor.print_info() else: print("[!] 잘못된 선택입니다") # 3. Save print("\n[3/3] 결과 저장") print("-" * 60) print("\n수행된 작업:") for idx, op in enumerate(operations, 1): print(f" {idx}. {op}") # Ask for labels backup print("\n[!] 주의: labels 폴더의 모든 .txt 파일이 수정됩니다.") create_labels_backup = input("labels 폴더 백업 생성? (Y/n): ").strip().lower() if create_labels_backup != 'n': editor.backup_labels() create_yaml_backup = input("data.yaml 백업 생성? (Y/n): ").strip().lower() yaml_backup = create_yaml_backup != 'n' print(f"\n[*] 저장 중...") editor.save_yaml(backup=yaml_backup) print("\n" + "="*60) print(" 작업이 성공적으로 완료되었습니다!") print("="*60) print(f"\n데이터셋: {dataset_path}") print(f"설정 파일: {editor.yaml_path}") print(f"라벨 파일: {len(editor.label_files)}개 업데이트됨") print() def main(): parser = argparse.ArgumentParser( description='YOLO Annotation Editing Utility', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=__doc__ ) subparsers = parser.add_subparsers(dest='command', help='Available commands') # info command info_parser = subparsers.add_parser('info', help='Show annotation info') info_parser.add_argument('-d', '--dataset', required=True, help='Dataset directory') # remove command remove_parser = subparsers.add_parser('remove', help='Remove classes') remove_parser.add_argument('-d', '--dataset', required=True, help='Dataset directory') remove_parser.add_argument('-c', '--classes', required=True, help='Classes to remove (comma-separated IDs or names)') remove_parser.add_argument('--no-backup', action='store_true', help='Do not create backup for labels and yaml') remove_parser.add_argument('--no-labels-backup', action='store_true', help='Do not create backup for labels folder') # merge command merge_parser = subparsers.add_parser('merge', help='Merge classes') merge_parser.add_argument('-d', '--dataset', required=True, help='Dataset directory') merge_parser.add_argument('-s', '--source', required=True, help='Source classes to merge (comma-separated)') merge_parser.add_argument('-t', '--target', required=True, help='Target class name after merge') merge_parser.add_argument('--no-backup', action='store_true', help='Do not create backup for labels and yaml') merge_parser.add_argument('--no-labels-backup', action='store_true', help='Do not create backup for labels folder') # rename command rename_parser = subparsers.add_parser('rename', help='Rename classes') rename_parser.add_argument('-d', '--dataset', required=True, help='Dataset directory') rename_parser.add_argument('-m', '--mapping', required=True, help='Name mapping (old:new,old2:new2 format)') rename_parser.add_argument('--no-backup', action='store_true', help='Do not create backup for labels and yaml') rename_parser.add_argument('--no-labels-backup', action='store_true', help='Do not create backup for labels folder') # reindex command reindex_parser = subparsers.add_parser('reindex', help='Reindex class IDs') reindex_parser.add_argument('-d', '--dataset', required=True, help='Dataset directory') reindex_parser.add_argument('--start', type=int, default=0, help='Starting ID (default: 0)') reindex_parser.add_argument('--no-backup', action='store_true', help='Do not create backup for labels and yaml') reindex_parser.add_argument('--no-labels-backup', action='store_true', help='Do not create backup for labels folder') # If no arguments provided, enter interactive mode if len(sys.argv) == 1: interactive_mode() return args = parser.parse_args() if not args.command: parser.print_help() return # Execute command if args.command == 'info': editor = YOLOAnnotationEditor(args.dataset) editor.print_info() elif args.command == 'remove': classes = [c.strip() for c in args.classes.split(',')] editor = YOLOAnnotationEditor(args.dataset) # Backup labels if requested if not args.no_backup and not args.no_labels_backup: editor.backup_labels() print(f"\n[*] 클래스 제거 중...") editor.remove_classes(classes) editor.save_yaml(backup=not args.no_backup) elif args.command == 'merge': source_classes = [c.strip() for c in args.source.split(',')] editor = YOLOAnnotationEditor(args.dataset) # Backup labels if requested if not args.no_backup and not args.no_labels_backup: editor.backup_labels() print(f"\n[*] 클래스 통합 중...") editor.merge_classes(source_classes, args.target) editor.save_yaml(backup=not args.no_backup) elif args.command == 'rename': mapping = {} for pair in args.mapping.split(','): old, new = pair.split(':') mapping[old.strip()] = new.strip() editor = YOLOAnnotationEditor(args.dataset) # Backup labels if requested if not args.no_backup and not args.no_labels_backup: editor.backup_labels() print(f"\n[*] 클래스 이름 변경 중...") editor.rename_classes(mapping) editor.save_yaml(backup=not args.no_backup) elif args.command == 'reindex': editor = YOLOAnnotationEditor(args.dataset) # Backup labels if requested if not args.no_backup and not args.no_labels_backup: editor.backup_labels() print(f"\n[*] 클래스 ID 재할당 중...") editor.reindex_classes(start_id=args.start) editor.save_yaml(backup=not args.no_backup) if __name__ == '__main__': main()