init commit

2025-11-08 19:15:39 +01:00
parent ecffcb08e8
commit c7adacf53b
470 changed files with 73751 additions and 0 deletions
--- a/ultralytics/models/yolo/obb/init.py
+++ b/ultralytics/models/yolo/obb/init.py
@@ -0,0 +1,7 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+from .predict import OBBPredictor
+from .train import OBBTrainer
+from .val import OBBValidator
+
+__all__ = "OBBPredictor", "OBBTrainer", "OBBValidator"
--- a/ultralytics/models/yolo/obb/pycache/init.cpython-310.pyc
+++ b/ultralytics/models/yolo/obb/pycache/init.cpython-310.pyc
--- a/ultralytics/models/yolo/obb/pycache/predict.cpython-310.pyc
+++ b/ultralytics/models/yolo/obb/pycache/predict.cpython-310.pyc
--- a/ultralytics/models/yolo/obb/pycache/train.cpython-310.pyc
+++ b/ultralytics/models/yolo/obb/pycache/train.cpython-310.pyc
--- a/ultralytics/models/yolo/obb/pycache/val.cpython-310.pyc
+++ b/ultralytics/models/yolo/obb/pycache/val.cpython-310.pyc
--- a/ultralytics/models/yolo/obb/predict.py
+++ b/ultralytics/models/yolo/obb/predict.py
@@ -0,0 +1,65 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+import torch
+
+from ultralytics.engine.results import Results
+from ultralytics.models.yolo.detect.predict import DetectionPredictor
+from ultralytics.utils import DEFAULT_CFG, ops
+
+
+class OBBPredictor(DetectionPredictor):
+    """
+    A class extending the DetectionPredictor class for prediction based on an Oriented Bounding Box (OBB) model.
+
+    This predictor handles oriented bounding box detection tasks, processing images and returning results with rotated
+    bounding boxes.
+
+    Attributes:
+        args (namespace): Configuration arguments for the predictor.
+        model (torch.nn.Module): The loaded YOLO OBB model.
+
+    Examples:
+        >>> from ultralytics.utils import ASSETS
+        >>> from ultralytics.models.yolo.obb import OBBPredictor
+        >>> args = dict(model="yolo11n-obb.pt", source=ASSETS)
+        >>> predictor = OBBPredictor(overrides=args)
+        >>> predictor.predict_cli()
+    """
+
+    def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
+        """
+        Initialize OBBPredictor with optional model and data configuration overrides.
+
+        Args:
+            cfg (dict, optional): Default configuration for the predictor.
+            overrides (dict, optional): Configuration overrides that take precedence over the default config.
+            _callbacks (list, optional): List of callback functions to be invoked during prediction.
+
+        Examples:
+            >>> from ultralytics.utils import ASSETS
+            >>> from ultralytics.models.yolo.obb import OBBPredictor
+            >>> args = dict(model="yolo11n-obb.pt", source=ASSETS)
+            >>> predictor = OBBPredictor(overrides=args)
+        """
+        super().__init__(cfg, overrides, _callbacks)
+        self.args.task = "obb"
+
+    def construct_result(self, pred, img, orig_img, img_path):
+        """
+        Construct the result object from the prediction.
+
+        Args:
+            pred (torch.Tensor): The predicted bounding boxes, scores, and rotation angles with shape (N, 7) where
+                the last dimension contains [x, y, w, h, confidence, class_id, angle].
+            img (torch.Tensor): The image after preprocessing with shape (B, C, H, W).
+            orig_img (np.ndarray): The original image before preprocessing.
+            img_path (str): The path to the original image.
+
+        Returns:
+            (Results): The result object containing the original image, image path, class names, and oriented bounding
+                boxes.
+        """
+        rboxes = ops.regularize_rboxes(torch.cat([pred[:, :4], pred[:, -1:]], dim=-1))
+        rboxes[:, :4] = ops.scale_boxes(img.shape[2:], rboxes[:, :4], orig_img.shape, xywh=True)
+        obb = torch.cat([rboxes, pred[:, 4:6]], dim=-1)
+        return Results(orig_img, path=img_path, names=self.model.names, obb=obb)
--- a/ultralytics/models/yolo/obb/train.py
+++ b/ultralytics/models/yolo/obb/train.py
@@ -0,0 +1,82 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+from __future__ import annotations
+
+from copy import copy
+from pathlib import Path
+from typing import Any
+
+from ultralytics.models import yolo
+from ultralytics.nn.tasks import OBBModel
+from ultralytics.utils import DEFAULT_CFG, RANK
+
+
+class OBBTrainer(yolo.detect.DetectionTrainer):
+    """
+    A class extending the DetectionTrainer class for training based on an Oriented Bounding Box (OBB) model.
+
+    This trainer specializes in training YOLO models that detect oriented bounding boxes, which are useful for
+    detecting objects at arbitrary angles rather than just axis-aligned rectangles.
+
+    Attributes:
+        loss_names (tuple): Names of the loss components used during training including box_loss, cls_loss,
+            and dfl_loss.
+
+    Methods:
+        get_model: Return OBBModel initialized with specified config and weights.
+        get_validator: Return an instance of OBBValidator for validation of YOLO model.
+
+    Examples:
+        >>> from ultralytics.models.yolo.obb import OBBTrainer
+        >>> args = dict(model="yolo11n-obb.pt", data="dota8.yaml", epochs=3)
+        >>> trainer = OBBTrainer(overrides=args)
+        >>> trainer.train()
+    """
+
+    def __init__(self, cfg=DEFAULT_CFG, overrides: dict | None = None, _callbacks: list[Any] | None = None):
+        """
+        Initialize an OBBTrainer object for training Oriented Bounding Box (OBB) models.
+
+        Args:
+            cfg (dict, optional): Configuration dictionary for the trainer. Contains training parameters and
+                model configuration.
+            overrides (dict, optional): Dictionary of parameter overrides for the configuration. Any values here
+                will take precedence over those in cfg.
+            _callbacks (list[Any], optional): List of callback functions to be invoked during training.
+        """
+        if overrides is None:
+            overrides = {}
+        overrides["task"] = "obb"
+        super().__init__(cfg, overrides, _callbacks)
+
+    def get_model(
+        self, cfg: str | dict | None = None, weights: str | Path | None = None, verbose: bool = True
+    ) -> OBBModel:
+        """
+        Return OBBModel initialized with specified config and weights.
+
+        Args:
+            cfg (str | dict, optional): Model configuration. Can be a path to a YAML config file, a dictionary
+                containing configuration parameters, or None to use default configuration.
+            weights (str | Path, optional): Path to pretrained weights file. If None, random initialization is used.
+            verbose (bool): Whether to display model information during initialization.
+
+        Returns:
+            (OBBModel): Initialized OBBModel with the specified configuration and weights.
+
+        Examples:
+            >>> trainer = OBBTrainer()
+            >>> model = trainer.get_model(cfg="yolo11n-obb.yaml", weights="yolo11n-obb.pt")
+        """
+        model = OBBModel(cfg, nc=self.data["nc"], ch=self.data["channels"], verbose=verbose and RANK == -1)
+        if weights:
+            model.load(weights)
+
+        return model
+
+    def get_validator(self):
+        """Return an instance of OBBValidator for validation of YOLO model."""
+        self.loss_names = "box_loss", "cls_loss", "dfl_loss"
+        return yolo.obb.OBBValidator(
+            self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
+        )
--- a/ultralytics/models/yolo/obb/val.py
+++ b/ultralytics/models/yolo/obb/val.py
@@ -0,0 +1,299 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+import torch
+
+from ultralytics.models.yolo.detect import DetectionValidator
+from ultralytics.utils import LOGGER, ops
+from ultralytics.utils.metrics import OBBMetrics, batch_probiou
+from ultralytics.utils.nms import TorchNMS
+
+
+class OBBValidator(DetectionValidator):
+    """
+    A class extending the DetectionValidator class for validation based on an Oriented Bounding Box (OBB) model.
+
+    This validator specializes in evaluating models that predict rotated bounding boxes, commonly used for aerial and
+    satellite imagery where objects can appear at various orientations.
+
+    Attributes:
+        args (dict): Configuration arguments for the validator.
+        metrics (OBBMetrics): Metrics object for evaluating OBB model performance.
+        is_dota (bool): Flag indicating whether the validation dataset is in DOTA format.
+
+    Methods:
+        init_metrics: Initialize evaluation metrics for YOLO.
+        _process_batch: Process batch of detections and ground truth boxes to compute IoU matrix.
+        _prepare_batch: Prepare batch data for OBB validation.
+        _prepare_pred: Prepare predictions with scaled and padded bounding boxes.
+        plot_predictions: Plot predicted bounding boxes on input images.
+        pred_to_json: Serialize YOLO predictions to COCO json format.
+        save_one_txt: Save YOLO detections to a txt file in normalized coordinates.
+        eval_json: Evaluate YOLO output in JSON format and return performance statistics.
+
+    Examples:
+        >>> from ultralytics.models.yolo.obb import OBBValidator
+        >>> args = dict(model="yolo11n-obb.pt", data="dota8.yaml")
+        >>> validator = OBBValidator(args=args)
+        >>> validator(model=args["model"])
+    """
+
+    def __init__(self, dataloader=None, save_dir=None, args=None, _callbacks=None) -> None:
+        """
+        Initialize OBBValidator and set task to 'obb', metrics to OBBMetrics.
+
+        This constructor initializes an OBBValidator instance for validating Oriented Bounding Box (OBB) models.
+        It extends the DetectionValidator class and configures it specifically for the OBB task.
+
+        Args:
+            dataloader (torch.utils.data.DataLoader, optional): Dataloader to be used for validation.
+            save_dir (str | Path, optional): Directory to save results.
+            args (dict | SimpleNamespace, optional): Arguments containing validation parameters.
+            _callbacks (list, optional): List of callback functions to be called during validation.
+        """
+        super().__init__(dataloader, save_dir, args, _callbacks)
+        self.args.task = "obb"
+        self.metrics = OBBMetrics()
+
+    def init_metrics(self, model: torch.nn.Module) -> None:
+        """
+        Initialize evaluation metrics for YOLO obb validation.
+
+        Args:
+            model (torch.nn.Module): Model to validate.
+        """
+        super().init_metrics(model)
+        val = self.data.get(self.args.split, "")  # validation path
+        self.is_dota = isinstance(val, str) and "DOTA" in val  # check if dataset is DOTA format
+        self.confusion_matrix.task = "obb"  # set confusion matrix task to 'obb'
+
+    def _process_batch(self, preds: dict[str, torch.Tensor], batch: dict[str, torch.Tensor]) -> dict[str, np.ndarray]:
+        """
+        Compute the correct prediction matrix for a batch of detections and ground truth bounding boxes.
+
+        Args:
+            preds (dict[str, torch.Tensor]): Prediction dictionary containing 'cls' and 'bboxes' keys with detected
+                class labels and bounding boxes.
+            batch (dict[str, torch.Tensor]): Batch dictionary containing 'cls' and 'bboxes' keys with ground truth
+                class labels and bounding boxes.
+
+        Returns:
+            (dict[str, np.ndarray]): Dictionary containing 'tp' key with the correct prediction matrix as a numpy
+                array with shape (N, 10), which includes 10 IoU levels for each detection, indicating the accuracy
+                of predictions compared to the ground truth.
+
+        Examples:
+            >>> detections = torch.rand(100, 7)  # 100 sample detections
+            >>> gt_bboxes = torch.rand(50, 5)  # 50 sample ground truth boxes
+            >>> gt_cls = torch.randint(0, 5, (50,))  # 50 ground truth class labels
+            >>> correct_matrix = validator._process_batch(detections, gt_bboxes, gt_cls)
+        """
+        if batch["cls"].shape[0] == 0 or preds["cls"].shape[0] == 0:
+            return {"tp": np.zeros((preds["cls"].shape[0], self.niou), dtype=bool)}
+        iou = batch_probiou(batch["bboxes"], preds["bboxes"])
+        return {"tp": self.match_predictions(preds["cls"], batch["cls"], iou).cpu().numpy()}
+
+    def postprocess(self, preds: torch.Tensor) -> list[dict[str, torch.Tensor]]:
+        """
+        Args:
+            preds (torch.Tensor): Raw predictions from the model.
+
+        Returns:
+            (list[dict[str, torch.Tensor]]): Processed predictions with angle information concatenated to bboxes.
+        """
+        preds = super().postprocess(preds)
+        for pred in preds:
+            pred["bboxes"] = torch.cat([pred["bboxes"], pred.pop("extra")], dim=-1)  # concatenate angle
+        return preds
+
+    def _prepare_batch(self, si: int, batch: dict[str, Any]) -> dict[str, Any]:
+        """
+        Prepare batch data for OBB validation with proper scaling and formatting.
+
+        Args:
+            si (int): Batch index to process.
+            batch (dict[str, Any]): Dictionary containing batch data with keys:
+                - batch_idx: Tensor of batch indices
+                - cls: Tensor of class labels
+                - bboxes: Tensor of bounding boxes
+                - ori_shape: Original image shapes
+                - img: Batch of images
+                - ratio_pad: Ratio and padding information
+
+        Returns:
+            (dict[str, Any]): Prepared batch data with scaled bounding boxes and metadata.
+        """
+        idx = batch["batch_idx"] == si
+        cls = batch["cls"][idx].squeeze(-1)
+        bbox = batch["bboxes"][idx]
+        ori_shape = batch["ori_shape"][si]
+        imgsz = batch["img"].shape[2:]
+        ratio_pad = batch["ratio_pad"][si]
+        if cls.shape[0]:
+            bbox[..., :4].mul_(torch.tensor(imgsz, device=self.device)[[1, 0, 1, 0]])  # target boxes
+        return {
+            "cls": cls,
+            "bboxes": bbox,
+            "ori_shape": ori_shape,
+            "imgsz": imgsz,
+            "ratio_pad": ratio_pad,
+            "im_file": batch["im_file"][si],
+        }
+
+    def plot_predictions(self, batch: dict[str, Any], preds: list[torch.Tensor], ni: int) -> None:
+        """
+        Plot predicted bounding boxes on input images and save the result.
+
+        Args:
+            batch (dict[str, Any]): Batch data containing images, file paths, and other metadata.
+            preds (list[torch.Tensor]): List of prediction tensors for each image in the batch.
+            ni (int): Batch index used for naming the output file.
+
+        Examples:
+            >>> validator = OBBValidator()
+            >>> batch = {"img": images, "im_file": paths}
+            >>> preds = [torch.rand(10, 7)]  # Example predictions for one image
+            >>> validator.plot_predictions(batch, preds, 0)
+        """
+        for p in preds:
+            # TODO: fix this duplicated `xywh2xyxy`
+            p["bboxes"][:, :4] = ops.xywh2xyxy(p["bboxes"][:, :4])  # convert to xyxy format for plotting
+        super().plot_predictions(batch, preds, ni)  # plot bboxes
+
+    def pred_to_json(self, predn: dict[str, torch.Tensor], pbatch: dict[str, Any]) -> None:
+        """
+        Convert YOLO predictions to COCO JSON format with rotated bounding box information.
+
+        Args:
+            predn (dict[str, torch.Tensor]): Prediction dictionary containing 'bboxes', 'conf', and 'cls' keys
+                with bounding box coordinates, confidence scores, and class predictions.
+            pbatch (dict[str, Any]): Batch dictionary containing 'imgsz', 'ori_shape', 'ratio_pad', and 'im_file'.
+
+        Notes:
+            This method processes rotated bounding box predictions and converts them to both rbox format
+            (x, y, w, h, angle) and polygon format (x1, y1, x2, y2, x3, y3, x4, y4) before adding them
+            to the JSON dictionary.
+        """
+        path = Path(pbatch["im_file"])
+        stem = path.stem
+        image_id = int(stem) if stem.isnumeric() else stem
+        rbox = predn["bboxes"]
+        poly = ops.xywhr2xyxyxyxy(rbox).view(-1, 8)
+        for r, b, s, c in zip(rbox.tolist(), poly.tolist(), predn["conf"].tolist(), predn["cls"].tolist()):
+            self.jdict.append(
+                {
+                    "image_id": image_id,
+                    "file_name": path.name,
+                    "category_id": self.class_map[int(c)],
+                    "score": round(s, 5),
+                    "rbox": [round(x, 3) for x in r],
+                    "poly": [round(x, 3) for x in b],
+                }
+            )
+
+    def save_one_txt(self, predn: dict[str, torch.Tensor], save_conf: bool, shape: tuple[int, int], file: Path) -> None:
+        """
+        Save YOLO OBB detections to a text file in normalized coordinates.
+
+        Args:
+            predn (torch.Tensor): Predicted detections with shape (N, 7) containing bounding boxes, confidence scores,
+                class predictions, and angles in format (x, y, w, h, conf, cls, angle).
+            save_conf (bool): Whether to save confidence scores in the text file.
+            shape (tuple[int, int]): Original image shape in format (height, width).
+            file (Path): Output file path to save detections.
+
+        Examples:
+            >>> validator = OBBValidator()
+            >>> predn = torch.tensor([[100, 100, 50, 30, 0.9, 0, 45]])  # One detection: x,y,w,h,conf,cls,angle
+            >>> validator.save_one_txt(predn, True, (640, 480), "detection.txt")
+        """
+        import numpy as np
+
+        from ultralytics.engine.results import Results
+
+        Results(
+            np.zeros((shape[0], shape[1]), dtype=np.uint8),
+            path=None,
+            names=self.names,
+            obb=torch.cat([predn["bboxes"], predn["conf"].unsqueeze(-1), predn["cls"].unsqueeze(-1)], dim=1),
+        ).save_txt(file, save_conf=save_conf)
+
+    def scale_preds(self, predn: dict[str, torch.Tensor], pbatch: dict[str, Any]) -> dict[str, torch.Tensor]:
+        """Scales predictions to the original image size."""
+        return {
+            **predn,
+            "bboxes": ops.scale_boxes(
+                pbatch["imgsz"], predn["bboxes"].clone(), pbatch["ori_shape"], ratio_pad=pbatch["ratio_pad"], xywh=True
+            ),
+        }
+
+    def eval_json(self, stats: dict[str, Any]) -> dict[str, Any]:
+        """
+        Evaluate YOLO output in JSON format and save predictions in DOTA format.
+
+        Args:
+            stats (dict[str, Any]): Performance statistics dictionary.
+
+        Returns:
+            (dict[str, Any]): Updated performance statistics.
+        """
+        if self.args.save_json and self.is_dota and len(self.jdict):
+            import json
+            import re
+            from collections import defaultdict
+
+            pred_json = self.save_dir / "predictions.json"  # predictions
+            pred_txt = self.save_dir / "predictions_txt"  # predictions
+            pred_txt.mkdir(parents=True, exist_ok=True)
+            data = json.load(open(pred_json))
+            # Save split results
+            LOGGER.info(f"Saving predictions with DOTA format to {pred_txt}...")
+            for d in data:
+                image_id = d["image_id"]
+                score = d["score"]
+                classname = self.names[d["category_id"] - 1].replace(" ", "-")
+                p = d["poly"]
+
+                with open(f"{pred_txt / f'Task1_{classname}'}.txt", "a", encoding="utf-8") as f:
+                    f.writelines(f"{image_id} {score} {p[0]} {p[1]} {p[2]} {p[3]} {p[4]} {p[5]} {p[6]} {p[7]}\n")
+            # Save merged results, this could result slightly lower map than using official merging script,
+            # because of the probiou calculation.
+            pred_merged_txt = self.save_dir / "predictions_merged_txt"  # predictions
+            pred_merged_txt.mkdir(parents=True, exist_ok=True)
+            merged_results = defaultdict(list)
+            LOGGER.info(f"Saving merged predictions with DOTA format to {pred_merged_txt}...")
+            for d in data:
+                image_id = d["image_id"].split("__", 1)[0]
+                pattern = re.compile(r"\d+___\d+")
+                x, y = (int(c) for c in re.findall(pattern, d["image_id"])[0].split("___"))
+                bbox, score, cls = d["rbox"], d["score"], d["category_id"] - 1
+                bbox[0] += x
+                bbox[1] += y
+                bbox.extend([score, cls])
+                merged_results[image_id].append(bbox)
+            for image_id, bbox in merged_results.items():
+                bbox = torch.tensor(bbox)
+                max_wh = torch.max(bbox[:, :2]).item() * 2
+                c = bbox[:, 6:7] * max_wh  # classes
+                scores = bbox[:, 5]  # scores
+                b = bbox[:, :5].clone()
+                b[:, :2] += c
+                # 0.3 could get results close to the ones from official merging script, even slightly better.
+                i = TorchNMS.fast_nms(b, scores, 0.3, iou_func=batch_probiou)
+                bbox = bbox[i]
+
+                b = ops.xywhr2xyxyxyxy(bbox[:, :5]).view(-1, 8)
+                for x in torch.cat([b, bbox[:, 5:7]], dim=-1).tolist():
+                    classname = self.names[int(x[-1])].replace(" ", "-")
+                    p = [round(i, 3) for i in x[:-2]]  # poly
+                    score = round(x[-2], 3)
+
+                    with open(f"{pred_merged_txt / f'Task1_{classname}'}.txt", "a", encoding="utf-8") as f:
+                        f.writelines(f"{image_id} {score} {p[0]} {p[1]} {p[2]} {p[3]} {p[4]} {p[5]} {p[6]} {p[7]}\n")
+
+        return stats