UTILITY_AI_ANNOTATION_TOOL/mmdet_annotation_modify.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
COCO Annotation Utility Tool
COCO format annotation file manipulation utility

Usage examples:
    # Interactive mode (recommended)
    python Utility_lableing_tool.py

    # Remove classes
    python Utility_lableing_tool.py remove --input annotations.json --output new.json --classes "person,car"

    # Merge classes
    python Utility_lableing_tool.py merge --input annotations.json --output new.json --source "car,truck,bus" --target "vehicle"

    # Rename classes
    python Utility_lableing_tool.py rename --input annotations.json --output new.json --mapping "old_name:new_name,another_old:another_new"

    # Show info
    python Utility_lableing_tool.py info --input annotations.json
"""

import json
import argparse
from pathlib import Path
from typing import Dict, List, Set, Union
import shutil
from datetime import datetime
import sys
import os


class COCOAnnotationEditor:
    """Class for editing COCO format annotations"""

    def __init__(self, annotation_path: str):
        """
        Args:
            annotation_path: Path to COCO format annotation JSON file
        """
        self.annotation_path = Path(annotation_path)
        self.data = self._load_annotation()

    def _load_annotation(self) -> dict:
        """Load annotation file"""
        with open(self.annotation_path, 'r', encoding='utf-8') as f:
            return json.load(f)

    def save(self, output_path: str, backup: bool = True):
        """
        Save modified annotation

        Args:
            output_path: Output file path
            backup: Whether to backup original file
        """
        output_path = Path(output_path)

        # Create backup
        if backup and output_path.exists():
            backup_path = output_path.parent / f"{output_path.stem}_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}{output_path.suffix}"
            shutil.copy2(output_path, backup_path)
            print(f"[+] 백업 생성됨: {backup_path}")

        # Save
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(self.data, f, ensure_ascii=False, indent=2)
        print(f"[+] 저장 완료: {output_path}")

    def get_category_info(self) -> Dict:
        """Get category information"""
        categories = self.data.get('categories', [])
        annotations = self.data.get('annotations', [])

        # Count annotations per category
        category_counts = {}
        for ann in annotations:
            cat_id = ann['category_id']
            category_counts[cat_id] = category_counts.get(cat_id, 0) + 1

        info = {}
        for cat in categories:
            cat_id = cat['id']
            info[cat_id] = {
                'id': cat_id,
                'name': cat['name'],
                'supercategory': cat.get('supercategory', ''),
                'annotation_count': category_counts.get(cat_id, 0)
            }

        return info

    def print_info(self):
        """Print annotation information"""
        print("\n" + "="*60)
        print(f"파일: {self.annotation_path}")
        print("="*60)

        print(f"\n전체 통계:")
        print(f"  - 이미지 수: {len(self.data.get('images', []))}")
        print(f"  - 어노테이션 수: {len(self.data.get('annotations', []))}")
        print(f"  - 클래스 수: {len(self.data.get('categories', []))}")

        print(f"\n클래스 상세:")
        print(f"{'ID':<6} {'클래스명':<30} {'어노테이션 수':<15}")
        print("-" * 55)

        category_info = self.get_category_info()
        total_annotations = 0
        for cat_id, info in sorted(category_info.items()):
            ann_count = info['annotation_count']
            total_annotations += ann_count
            print(f"{info['id']:<6} {info['name']:<30} {ann_count:<15}")

        print("-" * 55)
        print(f"{'합계':<37} {total_annotations:<15}")
        print("="*60 + "\n")

    def remove_categories(self, category_names: List[str]) -> 'COCOAnnotationEditor':
        """
        Remove specific categories

        Args:
            category_names: List of category names or IDs to remove
        """
        # Build ID to name mapping
        id_to_cat = {cat['id']: cat for cat in self.data.get('categories', [])}
        name_to_cat = {cat['name']: cat for cat in self.data.get('categories', [])}

        # Find category IDs to remove
        categories_to_remove = set()

        for item in category_names:
            # Try to parse as ID first
            try:
                cat_id = int(item)
                if cat_id in id_to_cat:
                    cat = id_to_cat[cat_id]
                    categories_to_remove.add(cat['id'])
                    print(f"  - 제거: {cat['name']} (ID: {cat['id']})")
                else:
                    print(f"[!] ID {cat_id}를 찾을 수 없습니다")
            except ValueError:
                # Not a number, treat as name
                if item in name_to_cat:
                    cat = name_to_cat[item]
                    categories_to_remove.add(cat['id'])
                    print(f"  - 제거: {cat['name']} (ID: {cat['id']})")
                else:
                    print(f"[!] 이름 '{item}'을 찾을 수 없습니다")

        # Update categories
        remaining_categories = [
            cat for cat in self.data.get('categories', [])
            if cat['id'] not in categories_to_remove
        ]

        # Update categories
        self.data['categories'] = remaining_categories

        # Remove annotations of removed categories
        original_count = len(self.data.get('annotations', []))
        self.data['annotations'] = [
            ann for ann in self.data.get('annotations', [])
            if ann['category_id'] not in categories_to_remove
        ]
        removed_count = original_count - len(self.data['annotations'])

        print(f"[+] {removed_count}개의 어노테이션이 제거되었습니다")
        return self


    def merge_categories(self, source_names: List[str], target_name: str,
                        target_supercategory: str = '') -> 'COCOAnnotationEditor':
        """
        Merge multiple categories into one

        Args:
            source_names: List of source category names or IDs to merge
            target_name: Target category name after merge
            target_supercategory: Target supercategory (optional)
        """
        # Find source category IDs
        source_ids = set()
        source_categories = []

        # Build ID to name mapping
        id_to_cat = {cat['id']: cat for cat in self.data.get('categories', [])}
        name_to_cat = {cat['name']: cat for cat in self.data.get('categories', [])}

        for item in source_names:
            # Try to parse as ID first
            try:
                cat_id = int(item)
                if cat_id in id_to_cat:
                    cat = id_to_cat[cat_id]
                    source_ids.add(cat['id'])
                    source_categories.append(cat)
                    print(f"  - 통합 대상: {cat['name']} (ID: {cat['id']})")
                else:
                    print(f"[!] ID {cat_id}를 찾을 수 없습니다")
            except ValueError:
                # Not a number, treat as name
                if item in name_to_cat:
                    cat = name_to_cat[item]
                    source_ids.add(cat['id'])
                    source_categories.append(cat)
                    print(f"  - 통합 대상: {cat['name']} (ID: {cat['id']})")
                else:
                    print(f"[!] 이름 '{item}'을 찾을 수 없습니다")

        if not source_ids:
            print(f"[!] 통합할 클래스를 찾을 수 없습니다")
            return self

        # New category ID (use first source ID)
        new_category_id = min(source_ids)

        # Create new category
        new_category = {
            'id': new_category_id,
            'name': target_name,
            'supercategory': ''
        }

        # Remove source categories and add new category
        self.data['categories'] = [
            cat for cat in self.data.get('categories', [])
            if cat['id'] not in source_ids
        ]
        self.data['categories'].append(new_category)

        # Update annotations (change all source IDs to new ID)
        for ann in self.data.get('annotations', []):
            if ann['category_id'] in source_ids:
                ann['category_id'] = new_category_id

        print(f"[+] '{target_name}' (ID: {new_category_id})로 통합되었습니다")
        return self

    def rename_categories(self, mapping: Dict[str, str]) -> 'COCOAnnotationEditor':
        """
        Rename categories

        Args:
            mapping: Dictionary of {old_name_or_id: new_name}
        """
        # Build ID to cat mapping
        id_to_cat = {cat['id']: cat for cat in self.data.get('categories', [])}
        name_to_cat = {cat['name']: cat for cat in self.data.get('categories', [])}

        renamed_count = 0
        for old_key, new_name in mapping.items():
            # Try to parse as ID first
            try:
                cat_id = int(old_key)
                if cat_id in id_to_cat:
                    cat = id_to_cat[cat_id]
                    old_name = cat['name']
                    cat['name'] = new_name
                    print(f"  - 변경: '{old_name}' → '{new_name}' (ID: {cat['id']})")
                    renamed_count += 1
                else:
                    print(f"[!] ID {cat_id}를 찾을 수 없습니다")
            except ValueError:
                # Not a number, treat as name
                if old_key in name_to_cat:
                    cat = name_to_cat[old_key]
                    cat['name'] = new_name
                    print(f"  - 변경: '{old_key}' → '{new_name}' (ID: {cat['id']})")
                    renamed_count += 1
                else:
                    print(f"[!] 이름 '{old_key}'를 찾을 수 없습니다")

        print(f"[+] {renamed_count}개 클래스 이름이 변경되었습니다")
        return self

    def reindex_categories(self, start_id: int = 1) -> 'COCOAnnotationEditor':
        """
        Reindex category IDs sequentially

        Args:
            start_id: Starting ID (default: 1)
        """
        # Create mapping: old ID -> new ID
        id_mapping = {}
        new_id = start_id

        for cat in sorted(self.data.get('categories', []), key=lambda x: x['id']):
            old_id = cat['id']
            id_mapping[old_id] = new_id
            print(f"  - ID 변경: {old_id} → {new_id} ({cat['name']})")
            new_id += 1

        # Update category IDs
        for cat in self.data.get('categories', []):
            cat['id'] = id_mapping[cat['id']]

        # Update annotation category IDs
        for ann in self.data.get('annotations', []):
            ann['category_id'] = id_mapping[ann['category_id']]

        print(f"[+] 클래스 ID가 재할당되었습니다 (시작: {start_id})")
        return self


def interactive_mode():
    """Interactive mode for user-friendly editing"""
    print("\n" + "="*60)
    print("  COCO 어노테이션 편집기 - 대화형 모드")
    print("="*60)

    # 1. Select annotation file
    print("\n[1/3] 어노테이션 파일 선택")
    print("-" * 60)

    # Find annotation files in common locations
    common_paths = [
        "dataset/*/annotations/*.json",
        "data/*/annotations/*.json",
        "annotations/*.json",
        "*.json"
    ]

    from glob import glob
    found_files = []
    for pattern in common_paths:
        found_files.extend(glob(pattern, recursive=True))

    input_file = None
    while not input_file:
        if found_files:
            print("\n발견된 어노테이션 파일:")
            for idx, f in enumerate(found_files[:10], 1):
                print(f"  {idx}. {f}")
            if len(found_files) > 10:
                print(f"  ... 외 {len(found_files) - 10}개 더")
            print(f"  0. 직접 경로 입력")

            choice = input("\n파일 번호 선택 (직접 입력은 0): ").strip()
            if choice == '0':
                input_file = input("어노테이션 파일 경로 입력: ").strip()
            else:
                try:
                    input_file = found_files[int(choice) - 1]
                except (ValueError, IndexError):
                    print("[!] 잘못된 선택입니다. 다시 선택해주세요.")
                    continue
        else:
            input_file = input("어노테이션 파일 경로 입력: ").strip()

        if not Path(input_file).exists():
            print(f"[!] 파일을 찾을 수 없습니다: {input_file}")
            print("[!] 다시 입력해주세요.")
            input_file = None

    # Load and show info
    print(f"\n[+] 로딩 중: {input_file}")
    editor = COCOAnnotationEditor(input_file)
    editor.print_info()

    # Store original categories for reference
    original_categories = {cat['id']: cat['name'] for cat in editor.data.get('categories', [])}

    # 2. Select operations
    print("\n[2/3] 작업 선택")
    print("-" * 60)
    operations = []

    while True:
        print("\n사용 가능한 작업:")
        print("  1. 클래스 제거")
        print("  2. 클래스 통합 (병합)")
        print("  3. 클래스 이름 변경")
        print("  4. 클래스 ID 재할당")
        print("  5. 현재 정보 표시")
        print("  0. 완료 (저장 단계로)")

        choice = input("\n작업 선택 (0-5): ").strip()

        if choice == '0':
            if not operations:
                print("[!] 선택된 작업이 없습니다. 종료합니다...")
                return
            break

        elif choice == '1':  # Remove
            print("\n현재 클래스:")
            for cat in editor.data.get('categories', []):
                print(f"  - {cat['name']} (ID: {cat['id']})")

            classes = input("\n제거할 클래스 입력 (이름 또는 ID, 쉼표로 구분): ").strip()
            if classes:
                class_list = [c.strip() for c in classes.split(',')]
                print(f"\n[*] 클래스 제거 중: {class_list}")
                editor.remove_categories(class_list)
                operations.append(f"제거: {', '.join(class_list)}")

        elif choice == '2':  # Merge
            print("\n현재 클래스:")
            for cat in editor.data.get('categories', []):
                print(f"  - {cat['name']} (ID: {cat['id']})")

            source = input("\n통합할 클래스 입력 (이름 또는 ID, 쉼표로 구분): ").strip()
            if source:
                target = input("통합 후 클래스 이름 입력: ").strip()

                source_list = [c.strip() for c in source.split(',')]
                print(f"\n[*] '{target}'로 통합 중: {source_list}")
                editor.merge_categories(source_list, target, '')
                operations.append(f"통합: {', '.join(source_list)} → {target}")

        elif choice == '3':  # Rename
            print("\n현재 클래스:")
            for cat in editor.data.get('categories', []):
                print(f"  - {cat['name']} (ID: {cat['id']})")

            print("\n매핑 입력 (형식: 기존:새이름,기존:새이름 - 쉼표로 구분)")
            print("예시: tt:transformer,ss:substation 또는 1:transformer,2:substation")
            mapping_input = input("\n입력: ").strip()

            if mapping_input:
                mapping = {}
                for pair in mapping_input.split(','):
                    if ':' in pair:
                        old, new = pair.split(':', 1)
                        mapping[old.strip()] = new.strip()

                if mapping:
                    print(f"\n[*] 클래스 이름 변경 중")
                    editor.rename_categories(mapping)
                    operations.append(f"이름 변경: {len(mapping)}개 클래스")

        elif choice == '4':  # Reindex
            while True:
                start_id = input("\n시작 ID 입력 (기본값: 1): ").strip()
                if not start_id:
                    start_id = 1
                    break
                try:
                    start_id = int(start_id)
                    break
                except ValueError:
                    print("[!] 숫자를 입력해주세요.")

            print(f"\n[*] 클래스 ID 재할당 중 (시작: {start_id})")
            editor.reindex_categories(start_id=start_id)
            operations.append(f"ID 재할당: {start_id}부터")

        elif choice == '5':  # Show info
            editor.print_info()

        else:
            print("[!] 잘못된 선택입니다")

    # 3. Save
    print("\n[3/3] 결과 저장")
    print("-" * 60)
    print("\n수행된 작업:")
    for idx, op in enumerate(operations, 1):
        print(f"  {idx}. {op}")

    print(f"\n입력 파일: {input_file}")
    default_output = str(Path(input_file).parent / f"{Path(input_file).stem}_edited{Path(input_file).suffix}")
    output_file = input(f"\n출력 파일 경로 입력 (기본값: {default_output}): ").strip()
    if not output_file:
        output_file = default_output

    create_backup = input("출력 파일이 존재하면 백업 생성? (Y/n): ").strip().lower()
    backup = create_backup != 'n'

    print(f"\n[*] 저장 중: {output_file}...")
    editor.save(output_file, backup=backup)

    print("\n" + "="*60)
    print("  작업이 성공적으로 완료되었습니다!")
    print("="*60)
    print(f"\n입력:  {input_file}")
    print(f"출력: {output_file}")
    if backup and Path(output_file).exists():
        print(f"(파일이 존재했다면 백업이 생성되었습니다)")
    print()


def main():
    parser = argparse.ArgumentParser(
        description='COCO Annotation Editing Utility',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=__doc__
    )

    subparsers = parser.add_subparsers(dest='command', help='Available commands')

    # info command
    info_parser = subparsers.add_parser('info', help='Show annotation info')
    info_parser.add_argument('-i', '--input', required=True, help='Input annotation file')

    # remove command
    remove_parser = subparsers.add_parser('remove', help='Remove categories')
    remove_parser.add_argument('-i', '--input', required=True, help='Input annotation file')
    remove_parser.add_argument('-o', '--output', required=True, help='Output annotation file')
    remove_parser.add_argument('-c', '--classes', required=True, help='Classes to remove (comma-separated)')
    remove_parser.add_argument('--no-backup', action='store_true', help='Do not create backup')

    # merge command
    merge_parser = subparsers.add_parser('merge', help='Merge categories')
    merge_parser.add_argument('-i', '--input', required=True, help='Input annotation file')
    merge_parser.add_argument('-o', '--output', required=True, help='Output annotation file')
    merge_parser.add_argument('-s', '--source', required=True, help='Source classes to merge (comma-separated)')
    merge_parser.add_argument('-t', '--target', required=True, help='Target class name after merge')
    merge_parser.add_argument('--supercategory', default='', help='Supercategory for merged class')
    merge_parser.add_argument('--no-backup', action='store_true', help='Do not create backup')

    # rename command
    rename_parser = subparsers.add_parser('rename', help='Rename categories')
    rename_parser.add_argument('-i', '--input', required=True, help='Input annotation file')
    rename_parser.add_argument('-o', '--output', required=True, help='Output annotation file')
    rename_parser.add_argument('-m', '--mapping', required=True,
                              help='Name mapping (old:new,old2:new2 format)')
    rename_parser.add_argument('--no-backup', action='store_true', help='Do not create backup')

    # reindex command
    reindex_parser = subparsers.add_parser('reindex', help='Reindex category IDs')
    reindex_parser.add_argument('-i', '--input', required=True, help='Input annotation file')
    reindex_parser.add_argument('-o', '--output', required=True, help='Output annotation file')
    reindex_parser.add_argument('--start', type=int, default=1, help='Starting ID (default: 1)')
    reindex_parser.add_argument('--no-backup', action='store_true', help='Do not create backup')

    # If no arguments provided, enter interactive mode
    if len(sys.argv) == 1:
        interactive_mode()
        return

    args = parser.parse_args()

    if not args.command:
        parser.print_help()
        return

    # Execute command
    if args.command == 'info':
        editor = COCOAnnotationEditor(args.input)
        editor.print_info()

    elif args.command == 'remove':
        classes = [c.strip() for c in args.classes.split(',')]
        print(f"\n[*] 클래스 제거 중...")
        editor = COCOAnnotationEditor(args.input)
        editor.remove_categories(classes)
        editor.save(args.output, backup=not args.no_backup)

    elif args.command == 'merge':
        source_classes = [c.strip() for c in args.source.split(',')]
        print(f"\n[*] 클래스 통합 중...")
        editor = COCOAnnotationEditor(args.input)
        editor.merge_categories(source_classes, args.target, '')
        editor.save(args.output, backup=not args.no_backup)

    elif args.command == 'rename':
        mapping = {}
        for pair in args.mapping.split(','):
            old, new = pair.split(':')
            mapping[old.strip()] = new.strip()
        print(f"\n[*] 클래스 이름 변경 중...")
        editor = COCOAnnotationEditor(args.input)
        editor.rename_categories(mapping)
        editor.save(args.output, backup=not args.no_backup)

    elif args.command == 'reindex':
        print(f"\n[*] 클래스 ID 재할당 중...")
        editor = COCOAnnotationEditor(args.input)
        editor.reindex_categories(start_id=args.start)
        editor.save(args.output, backup=not args.no_backup)


if __name__ == '__main__':
    main()