init commit

2025-11-08 19:15:39 +01:00
parent ecffcb08e8
commit c7adacf53b
470 changed files with 73751 additions and 0 deletions
--- a/ultralytics/solutions/init.py
+++ b/ultralytics/solutions/init.py
@@ -0,0 +1,41 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+from .ai_gym import AIGym
+from .analytics import Analytics
+from .distance_calculation import DistanceCalculation
+from .heatmap import Heatmap
+from .instance_segmentation import InstanceSegmentation
+from .object_blurrer import ObjectBlurrer
+from .object_counter import ObjectCounter
+from .object_cropper import ObjectCropper
+from .parking_management import ParkingManagement, ParkingPtsSelection
+from .queue_management import QueueManager
+from .region_counter import RegionCounter
+from .security_alarm import SecurityAlarm
+from .similarity_search import SearchApp, VisualAISearch
+from .speed_estimation import SpeedEstimator
+from .streamlit_inference import Inference
+from .trackzone import TrackZone
+from .vision_eye import VisionEye
+
+__all__ = (
+    "ObjectCounter",
+    "ObjectCropper",
+    "ObjectBlurrer",
+    "AIGym",
+    "RegionCounter",
+    "SecurityAlarm",
+    "Heatmap",
+    "InstanceSegmentation",
+    "VisionEye",
+    "SpeedEstimator",
+    "DistanceCalculation",
+    "QueueManager",
+    "ParkingManagement",
+    "ParkingPtsSelection",
+    "Analytics",
+    "Inference",
+    "TrackZone",
+    "SearchApp",
+    "VisualAISearch",
+)
--- a/ultralytics/solutions/ai_gym.py
+++ b/ultralytics/solutions/ai_gym.py
@@ -0,0 +1,114 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+from collections import defaultdict
+from typing import Any
+
+from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
+
+
+class AIGym(BaseSolution):
+    """
+    A class to manage gym steps of people in a real-time video stream based on their poses.
+
+    This class extends BaseSolution to monitor workouts using YOLO pose estimation models. It tracks and counts
+    repetitions of exercises based on predefined angle thresholds for up and down positions.
+
+    Attributes:
+        states (dict[float, int, str]): Stores per-track angle, count, and stage for workout monitoring.
+        up_angle (float): Angle threshold for considering the 'up' position of an exercise.
+        down_angle (float): Angle threshold for considering the 'down' position of an exercise.
+        kpts (list[int]): Indices of keypoints used for angle calculation.
+
+    Methods:
+        process: Process a frame to detect poses, calculate angles, and count repetitions.
+
+    Examples:
+        >>> gym = AIGym(model="yolo11n-pose.pt")
+        >>> image = cv2.imread("gym_scene.jpg")
+        >>> results = gym.process(image)
+        >>> processed_image = results.plot_im
+        >>> cv2.imshow("Processed Image", processed_image)
+        >>> cv2.waitKey(0)
+    """
+
+    def __init__(self, **kwargs: Any) -> None:
+        """
+        Initialize AIGym for workout monitoring using pose estimation and predefined angles.
+
+        Args:
+            **kwargs (Any): Keyword arguments passed to the parent class constructor.
+                model (str): Model name or path, defaults to "yolo11n-pose.pt".
+        """
+        kwargs["model"] = kwargs.get("model", "yolo11n-pose.pt")
+        super().__init__(**kwargs)
+        self.states = defaultdict(lambda: {"angle": 0, "count": 0, "stage": "-"})  # Dict for count, angle and stage
+
+        # Extract details from CFG single time for usage later
+        self.up_angle = float(self.CFG["up_angle"])  # Pose up predefined angle to consider up pose
+        self.down_angle = float(self.CFG["down_angle"])  # Pose down predefined angle to consider down pose
+        self.kpts = self.CFG["kpts"]  # User selected kpts of workouts storage for further usage
+
+    def process(self, im0) -> SolutionResults:
+        """
+        Monitor workouts using Ultralytics YOLO Pose Model.
+
+        This function processes an input image to track and analyze human poses for workout monitoring. It uses
+        the YOLO Pose model to detect keypoints, estimate angles, and count repetitions based on predefined
+        angle thresholds.
+
+        Args:
+            im0 (np.ndarray): Input image for processing.
+
+        Returns:
+            (SolutionResults): Contains processed image `plot_im`,
+                'workout_count' (list of completed reps),
+                'workout_stage' (list of current stages),
+                'workout_angle' (list of angles), and
+                'total_tracks' (total number of tracked individuals).
+
+        Examples:
+            >>> gym = AIGym()
+            >>> image = cv2.imread("workout.jpg")
+            >>> results = gym.process(image)
+            >>> processed_image = results.plot_im
+        """
+        annotator = SolutionAnnotator(im0, line_width=self.line_width)  # Initialize annotator
+
+        self.extract_tracks(im0)  # Extract tracks (bounding boxes, classes, and masks)
+
+        if len(self.boxes):
+            kpt_data = self.tracks.keypoints.data
+
+            for i, k in enumerate(kpt_data):
+                state = self.states[self.track_ids[i]]  # get state details
+                # Get keypoints and estimate the angle
+                state["angle"] = annotator.estimate_pose_angle(*[k[int(idx)] for idx in self.kpts])
+                annotator.draw_specific_kpts(k, self.kpts, radius=self.line_width * 3)
+
+                # Determine stage and count logic based on angle thresholds
+                if state["angle"] < self.down_angle:
+                    if state["stage"] == "up":
+                        state["count"] += 1
+                    state["stage"] = "down"
+                elif state["angle"] > self.up_angle:
+                    state["stage"] = "up"
+
+                # Display angle, count, and stage text
+                if self.show_labels:
+                    annotator.plot_angle_and_count_and_stage(
+                        angle_text=state["angle"],  # angle text for display
+                        count_text=state["count"],  # count text for workouts
+                        stage_text=state["stage"],  # stage position text
+                        center_kpt=k[int(self.kpts[1])],  # center keypoint for display
+                    )
+        plot_im = annotator.result()
+        self.display_output(plot_im)  # Display output image, if environment support display
+
+        # Return SolutionResults
+        return SolutionResults(
+            plot_im=plot_im,
+            workout_count=[v["count"] for v in self.states.values()],
+            workout_stage=[v["stage"] for v in self.states.values()],
+            workout_angle=[v["angle"] for v in self.states.values()],
+            total_tracks=len(self.track_ids),
+        )
--- a/ultralytics/solutions/analytics.py
+++ b/ultralytics/solutions/analytics.py
@@ -0,0 +1,265 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+from __future__ import annotations
+
+from itertools import cycle
+from typing import Any
+
+import cv2
+import numpy as np
+
+from ultralytics.solutions.solutions import BaseSolution, SolutionResults  # Import a parent class
+
+
+class Analytics(BaseSolution):
+    """
+    A class for creating and updating various types of charts for visual analytics.
+
+    This class extends BaseSolution to provide functionality for generating line, bar, pie, and area charts
+    based on object detection and tracking data.
+
+    Attributes:
+        type (str): The type of analytics chart to generate ('line', 'bar', 'pie', or 'area').
+        x_label (str): Label for the x-axis.
+        y_label (str): Label for the y-axis.
+        bg_color (str): Background color of the chart frame.
+        fg_color (str): Foreground color of the chart frame.
+        title (str): Title of the chart window.
+        max_points (int): Maximum number of data points to display on the chart.
+        fontsize (int): Font size for text display.
+        color_cycle (cycle): Cyclic iterator for chart colors.
+        total_counts (int): Total count of detected objects (used for line charts).
+        clswise_count (dict[str, int]): Dictionary for class-wise object counts.
+        fig (Figure): Matplotlib figure object for the chart.
+        ax (Axes): Matplotlib axes object for the chart.
+        canvas (FigureCanvasAgg): Canvas for rendering the chart.
+        lines (dict): Dictionary to store line objects for area charts.
+        color_mapping (dict[str, str]): Dictionary mapping class labels to colors for consistent visualization.
+
+    Methods:
+        process: Process image data and update the chart.
+        update_graph: Update the chart with new data points.
+
+    Examples:
+        >>> analytics = Analytics(analytics_type="line")
+        >>> frame = cv2.imread("image.jpg")
+        >>> results = analytics.process(frame, frame_number=1)
+        >>> cv2.imshow("Analytics", results.plot_im)
+    """
+
+    def __init__(self, **kwargs: Any) -> None:
+        """Initialize Analytics class with various chart types for visual data representation."""
+        super().__init__(**kwargs)
+
+        import matplotlib.pyplot as plt  # scope for faster 'import ultralytics'
+        from matplotlib.backends.backend_agg import FigureCanvasAgg
+        from matplotlib.figure import Figure
+
+        self.type = self.CFG["analytics_type"]  # type of analytics i.e "line", "pie", "bar" or "area" charts.
+        self.x_label = "Classes" if self.type in {"bar", "pie"} else "Frame#"
+        self.y_label = "Total Counts"
+
+        # Predefined data
+        self.bg_color = "#F3F3F3"  # background color of frame
+        self.fg_color = "#111E68"  # foreground color of frame
+        self.title = "Ultralytics Solutions"  # window name
+        self.max_points = 45  # maximum points to be drawn on window
+        self.fontsize = 25  # text font size for display
+        figsize = self.CFG["figsize"]  # set output image size i.e (12.8, 7.2) -> w = 1280, h = 720
+        self.color_cycle = cycle(["#DD00BA", "#042AFF", "#FF4447", "#7D24FF", "#BD00FF"])
+
+        self.total_counts = 0  # count variable for storing total counts i.e. for line
+        self.clswise_count = {}  # dictionary for class-wise counts
+        self.update_every = kwargs.get("update_every", 30)  # Only update graph every 30 frames by default
+        self.last_plot_im = None  # Cache of the last rendered chart
+
+        # Ensure line and area chart
+        if self.type in {"line", "area"}:
+            self.lines = {}
+            self.fig = Figure(facecolor=self.bg_color, figsize=figsize)
+            self.canvas = FigureCanvasAgg(self.fig)  # Set common axis properties
+            self.ax = self.fig.add_subplot(111, facecolor=self.bg_color)
+            if self.type == "line":
+                (self.line,) = self.ax.plot([], [], color="cyan", linewidth=self.line_width)
+        elif self.type in {"bar", "pie"}:
+            # Initialize bar or pie plot
+            self.fig, self.ax = plt.subplots(figsize=figsize, facecolor=self.bg_color)
+            self.canvas = FigureCanvasAgg(self.fig)  # Set common axis properties
+            self.ax.set_facecolor(self.bg_color)
+            self.color_mapping = {}
+
+            if self.type == "pie":  # Ensure pie chart is circular
+                self.ax.axis("equal")
+
+    def process(self, im0: np.ndarray, frame_number: int) -> SolutionResults:
+        """
+        Process image data and run object tracking to update analytics charts.
+
+        Args:
+            im0 (np.ndarray): Input image for processing.
+            frame_number (int): Video frame number for plotting the data.
+
+        Returns:
+            (SolutionResults): Contains processed image `plot_im`, 'total_tracks' (int, total number of tracked objects)
+                and 'classwise_count' (dict, per-class object count).
+
+        Raises:
+            ModuleNotFoundError: If an unsupported chart type is specified.
+
+        Examples:
+            >>> analytics = Analytics(analytics_type="line")
+            >>> frame = np.zeros((480, 640, 3), dtype=np.uint8)
+            >>> results = analytics.process(frame, frame_number=1)
+        """
+        self.extract_tracks(im0)  # Extract tracks
+        if self.type == "line":
+            for _ in self.boxes:
+                self.total_counts += 1
+            update_required = frame_number % self.update_every == 0 or self.last_plot_im is None
+            if update_required:
+                self.last_plot_im = self.update_graph(frame_number=frame_number)
+            plot_im = self.last_plot_im
+            self.total_counts = 0
+        elif self.type in {"pie", "bar", "area"}:
+            from collections import Counter
+
+            self.clswise_count = Counter(self.names[int(cls)] for cls in self.clss)
+            update_required = frame_number % self.update_every == 0 or self.last_plot_im is None
+            if update_required:
+                self.last_plot_im = self.update_graph(
+                    frame_number=frame_number, count_dict=self.clswise_count, plot=self.type
+                )
+            plot_im = self.last_plot_im
+        else:
+            raise ModuleNotFoundError(f"{self.type} chart is not supported ❌")
+
+        # return output dictionary with summary for more usage
+        return SolutionResults(plot_im=plot_im, total_tracks=len(self.track_ids), classwise_count=self.clswise_count)
+
+    def update_graph(
+        self, frame_number: int, count_dict: dict[str, int] | None = None, plot: str = "line"
+    ) -> np.ndarray:
+        """
+        Update the graph with new data for single or multiple classes.
+
+        Args:
+            frame_number (int): The current frame number.
+            count_dict (dict[str, int], optional): Dictionary with class names as keys and counts as values for
+                multiple classes. If None, updates a single line graph.
+            plot (str): Type of the plot. Options are 'line', 'bar', 'pie', or 'area'.
+
+        Returns:
+            (np.ndarray): Updated image containing the graph.
+
+        Examples:
+            >>> analytics = Analytics(analytics_type="bar")
+            >>> frame_num = 10
+            >>> results_dict = {"person": 5, "car": 3}
+            >>> updated_image = analytics.update_graph(frame_num, results_dict, plot="bar")
+        """
+        if count_dict is None:
+            # Single line update
+            x_data = np.append(self.line.get_xdata(), float(frame_number))
+            y_data = np.append(self.line.get_ydata(), float(self.total_counts))
+
+            if len(x_data) > self.max_points:
+                x_data, y_data = x_data[-self.max_points :], y_data[-self.max_points :]
+
+            self.line.set_data(x_data, y_data)
+            self.line.set_label("Counts")
+            self.line.set_color("#7b0068")  # Pink color
+            self.line.set_marker("*")
+            self.line.set_markersize(self.line_width * 5)
+        else:
+            labels = list(count_dict.keys())
+            counts = list(count_dict.values())
+            if plot == "area":
+                color_cycle = cycle(["#DD00BA", "#042AFF", "#FF4447", "#7D24FF", "#BD00FF"])
+                # Multiple lines or area update
+                x_data = self.ax.lines[0].get_xdata() if self.ax.lines else np.array([])
+                y_data_dict = {key: np.array([]) for key in count_dict.keys()}
+                if self.ax.lines:
+                    for line, key in zip(self.ax.lines, count_dict.keys()):
+                        y_data_dict[key] = line.get_ydata()
+
+                x_data = np.append(x_data, float(frame_number))
+                max_length = len(x_data)
+                for key in count_dict.keys():
+                    y_data_dict[key] = np.append(y_data_dict[key], float(count_dict[key]))
+                    if len(y_data_dict[key]) < max_length:
+                        y_data_dict[key] = np.pad(y_data_dict[key], (0, max_length - len(y_data_dict[key])))
+                if len(x_data) > self.max_points:
+                    x_data = x_data[1:]
+                    for key in count_dict.keys():
+                        y_data_dict[key] = y_data_dict[key][1:]
+
+                self.ax.clear()
+                for key, y_data in y_data_dict.items():
+                    color = next(color_cycle)
+                    self.ax.fill_between(x_data, y_data, color=color, alpha=0.55)
+                    self.ax.plot(
+                        x_data,
+                        y_data,
+                        color=color,
+                        linewidth=self.line_width,
+                        marker="o",
+                        markersize=self.line_width * 5,
+                        label=f"{key} Data Points",
+                    )
+            elif plot == "bar":
+                self.ax.clear()  # clear bar data
+                for label in labels:  # Map labels to colors
+                    if label not in self.color_mapping:
+                        self.color_mapping[label] = next(self.color_cycle)
+                colors = [self.color_mapping[label] for label in labels]
+                bars = self.ax.bar(labels, counts, color=colors)
+                for bar, count in zip(bars, counts):
+                    self.ax.text(
+                        bar.get_x() + bar.get_width() / 2,
+                        bar.get_height(),
+                        str(count),
+                        ha="center",
+                        va="bottom",
+                        color=self.fg_color,
+                    )
+                # Create the legend using labels from the bars
+                for bar, label in zip(bars, labels):
+                    bar.set_label(label)  # Assign label to each bar
+                self.ax.legend(loc="upper left", fontsize=13, facecolor=self.fg_color, edgecolor=self.fg_color)
+            elif plot == "pie":
+                total = sum(counts)
+                percentages = [size / total * 100 for size in counts]
+                self.ax.clear()
+
+                start_angle = 90
+                # Create pie chart and create legend labels with percentages
+                wedges, _ = self.ax.pie(
+                    counts, labels=labels, startangle=start_angle, textprops={"color": self.fg_color}, autopct=None
+                )
+                legend_labels = [f"{label} ({percentage:.1f}%)" for label, percentage in zip(labels, percentages)]
+
+                # Assign the legend using the wedges and manually created labels
+                self.ax.legend(wedges, legend_labels, title="Classes", loc="center left", bbox_to_anchor=(1, 0, 0.5, 1))
+                self.fig.subplots_adjust(left=0.1, right=0.75)  # Adjust layout to fit the legend
+
+        # Common plot settings
+        self.ax.set_facecolor("#f0f0f0")  # Set to light gray or any other color you like
+        self.ax.grid(True, linestyle="--", linewidth=0.5, alpha=0.5)  # Display grid for more data insights
+        self.ax.set_title(self.title, color=self.fg_color, fontsize=self.fontsize)
+        self.ax.set_xlabel(self.x_label, color=self.fg_color, fontsize=self.fontsize - 3)
+        self.ax.set_ylabel(self.y_label, color=self.fg_color, fontsize=self.fontsize - 3)
+
+        # Add and format legend
+        legend = self.ax.legend(loc="upper left", fontsize=13, facecolor=self.bg_color, edgecolor=self.bg_color)
+        for text in legend.get_texts():
+            text.set_color(self.fg_color)
+
+        # Redraw graph, update view, capture, and display the updated plot
+        self.ax.relim()
+        self.ax.autoscale_view()
+        self.canvas.draw()
+        im0 = np.array(self.canvas.renderer.buffer_rgba())
+        im0 = cv2.cvtColor(im0[:, :, :3], cv2.COLOR_RGBA2BGR)
+        self.display_output(im0)
+
+        return im0  # Return the image
--- a/ultralytics/solutions/config.py
+++ b/ultralytics/solutions/config.py
@@ -0,0 +1,108 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any
+
+import cv2
+
+
+@dataclass
+class SolutionConfig:
+    """
+    Manages configuration parameters for Ultralytics Vision AI solutions.
+
+    The SolutionConfig class serves as a centralized configuration container for all the
+    Ultralytics solution modules: https://docs.ultralytics.com/solutions/#solutions.
+    It leverages Python `dataclass` for clear, type-safe, and maintainable parameter definitions.
+
+    Attributes:
+        source (str, optional): Path to the input source (video, RTSP, etc.). Only usable with Solutions CLI.
+        model (str, optional): Path to the Ultralytics YOLO model to be used for inference.
+        classes (list[int], optional): List of class indices to filter detections.
+        show_conf (bool): Whether to show confidence scores on the visual output.
+        show_labels (bool): Whether to display class labels on visual output.
+        region (list[tuple[int, int]], optional): Polygonal region or line for object counting.
+        colormap (int, optional): OpenCV colormap constant for visual overlays (e.g., cv2.COLORMAP_JET).
+        show_in (bool): Whether to display count number for objects entering the region.
+        show_out (bool): Whether to display count number for objects leaving the region.
+        up_angle (float): Upper angle threshold used in pose-based workouts monitoring.
+        down_angle (int): Lower angle threshold used in pose-based workouts monitoring.
+        kpts (list[int]): Keypoint indices to monitor, e.g., for pose analytics.
+        analytics_type (str): Type of analytics to perform ("line", "area", "bar", "pie", etc.).
+        figsize (tuple[int, int], optional): Size of the matplotlib figure used for analytical plots (width, height).
+        blur_ratio (float): Ratio used to blur objects in the video frames (0.0 to 1.0).
+        vision_point (tuple[int, int]): Reference point for directional tracking or perspective drawing.
+        crop_dir (str): Directory path to save cropped detection images.
+        json_file (str): Path to a JSON file containing data for parking areas.
+        line_width (int): Width for visual display i.e. bounding boxes, keypoints, counts.
+        records (int): Number of detection records to send email alerts.
+        fps (float): Frame rate (Frames Per Second) for speed estimation calculation.
+        max_hist (int): Maximum number of historical points or states stored per tracked object for speed estimation.
+        meter_per_pixel (float): Scale for real-world measurement, used in speed or distance calculations.
+        max_speed (int): Maximum speed limit (e.g., km/h or mph) used in visual alerts or constraints.
+        show (bool): Whether to display the visual output on screen.
+        iou (float): Intersection-over-Union threshold for detection filtering.
+        conf (float): Confidence threshold for keeping predictions.
+        device (str, optional): Device to run inference on (e.g., 'cpu', '0' for CUDA GPU).
+        max_det (int): Maximum number of detections allowed per video frame.
+        half (bool): Whether to use FP16 precision (requires a supported CUDA device).
+        tracker (str): Path to tracking configuration YAML file (e.g., 'botsort.yaml').
+        verbose (bool): Enable verbose logging output for debugging or diagnostics.
+        data (str): Path to image directory used for similarity search.
+
+    Methods:
+        update: Update the configuration with user-defined keyword arguments and raise error on invalid keys.
+
+    Examples:
+        >>> from ultralytics.solutions.config import SolutionConfig
+        >>> cfg = SolutionConfig(model="yolo11n.pt", region=[(0, 0), (100, 0), (100, 100), (0, 100)])
+        >>> cfg.update(show=False, conf=0.3)
+        >>> print(cfg.model)
+    """
+
+    source: str | None = None
+    model: str | None = None
+    classes: list[int] | None = None
+    show_conf: bool = True
+    show_labels: bool = True
+    region: list[tuple[int, int]] | None = None
+    colormap: int | None = cv2.COLORMAP_DEEPGREEN
+    show_in: bool = True
+    show_out: bool = True
+    up_angle: float = 145.0
+    down_angle: int = 90
+    kpts: list[int] = field(default_factory=lambda: [6, 8, 10])
+    analytics_type: str = "line"
+    figsize: tuple[int, int] | None = (12.8, 7.2)
+    blur_ratio: float = 0.5
+    vision_point: tuple[int, int] = (20, 20)
+    crop_dir: str = "cropped-detections"
+    json_file: str = None
+    line_width: int = 2
+    records: int = 5
+    fps: float = 30.0
+    max_hist: int = 5
+    meter_per_pixel: float = 0.05
+    max_speed: int = 120
+    show: bool = False
+    iou: float = 0.7
+    conf: float = 0.25
+    device: str | None = None
+    max_det: int = 300
+    half: bool = False
+    tracker: str = "botsort.yaml"
+    verbose: bool = True
+    data: str = "images"
+
+    def update(self, **kwargs: Any):
+        """Update configuration parameters with new values provided as keyword arguments."""
+        for key, value in kwargs.items():
+            if hasattr(self, key):
+                setattr(self, key, value)
+            else:
+                url = "https://docs.ultralytics.com/solutions/#solutions-arguments"
+                raise ValueError(f"{key} is not a valid solution argument, see {url}")
+
+        return self
--- a/ultralytics/solutions/distance_calculation.py
+++ b/ultralytics/solutions/distance_calculation.py
@@ -0,0 +1,126 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+import math
+from typing import Any
+
+import cv2
+
+from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
+from ultralytics.utils.plotting import colors
+
+
+class DistanceCalculation(BaseSolution):
+    """
+    A class to calculate distance between two objects in a real-time video stream based on their tracks.
+
+    This class extends BaseSolution to provide functionality for selecting objects and calculating the distance
+    between them in a video stream using YOLO object detection and tracking.
+
+    Attributes:
+        left_mouse_count (int): Counter for left mouse button clicks.
+        selected_boxes (dict[int, list[float]]): Dictionary to store selected bounding boxes and their track IDs.
+        centroids (list[list[int]]): List to store centroids of selected bounding boxes.
+
+    Methods:
+        mouse_event_for_distance: Handle mouse events for selecting objects in the video stream.
+        process: Process video frames and calculate the distance between selected objects.
+
+    Examples:
+        >>> distance_calc = DistanceCalculation()
+        >>> frame = cv2.imread("frame.jpg")
+        >>> results = distance_calc.process(frame)
+        >>> cv2.imshow("Distance Calculation", results.plot_im)
+        >>> cv2.waitKey(0)
+    """
+
+    def __init__(self, **kwargs: Any) -> None:
+        """Initialize the DistanceCalculation class for measuring object distances in video streams."""
+        super().__init__(**kwargs)
+
+        # Mouse event information
+        self.left_mouse_count = 0
+        self.selected_boxes: dict[int, list[float]] = {}
+        self.centroids: list[list[int]] = []  # Store centroids of selected objects
+
+    def mouse_event_for_distance(self, event: int, x: int, y: int, flags: int, param: Any) -> None:
+        """
+        Handle mouse events to select regions in a real-time video stream for distance calculation.
+
+        Args:
+            event (int): Type of mouse event (e.g., cv2.EVENT_MOUSEMOVE, cv2.EVENT_LBUTTONDOWN).
+            x (int): X-coordinate of the mouse pointer.
+            y (int): Y-coordinate of the mouse pointer.
+            flags (int): Flags associated with the event (e.g., cv2.EVENT_FLAG_CTRLKEY, cv2.EVENT_FLAG_SHIFTKEY).
+            param (Any): Additional parameters passed to the function.
+
+        Examples:
+            >>> # Assuming 'dc' is an instance of DistanceCalculation
+            >>> cv2.setMouseCallback("window_name", dc.mouse_event_for_distance)
+        """
+        if event == cv2.EVENT_LBUTTONDOWN:
+            self.left_mouse_count += 1
+            if self.left_mouse_count <= 2:
+                for box, track_id in zip(self.boxes, self.track_ids):
+                    if box[0] < x < box[2] and box[1] < y < box[3] and track_id not in self.selected_boxes:
+                        self.selected_boxes[track_id] = box
+
+        elif event == cv2.EVENT_RBUTTONDOWN:
+            self.selected_boxes = {}
+            self.left_mouse_count = 0
+
+    def process(self, im0) -> SolutionResults:
+        """
+        Process a video frame and calculate the distance between two selected bounding boxes.
+
+        This method extracts tracks from the input frame, annotates bounding boxes, and calculates the distance
+        between two user-selected objects if they have been chosen.
+
+        Args:
+            im0 (np.ndarray): The input image frame to process.
+
+        Returns:
+            (SolutionResults): Contains processed image `plot_im`, `total_tracks` (int) representing the total number
+                of tracked objects, and `pixels_distance` (float) representing the distance between selected objects
+                in pixels.
+
+        Examples:
+            >>> import numpy as np
+            >>> from ultralytics.solutions import DistanceCalculation
+            >>> dc = DistanceCalculation()
+            >>> frame = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
+            >>> results = dc.process(frame)
+            >>> print(f"Distance: {results.pixels_distance:.2f} pixels")
+        """
+        self.extract_tracks(im0)  # Extract tracks
+        annotator = SolutionAnnotator(im0, line_width=self.line_width)  # Initialize annotator
+
+        pixels_distance = 0
+        # Iterate over bounding boxes, track ids and classes index
+        for box, track_id, cls, conf in zip(self.boxes, self.track_ids, self.clss, self.confs):
+            annotator.box_label(box, color=colors(int(cls), True), label=self.adjust_box_label(cls, conf, track_id))
+
+            # Update selected boxes if they're being tracked
+            if len(self.selected_boxes) == 2:
+                for trk_id in self.selected_boxes.keys():
+                    if trk_id == track_id:
+                        self.selected_boxes[track_id] = box
+
+        if len(self.selected_boxes) == 2:
+            # Calculate centroids of selected boxes
+            self.centroids.extend(
+                [[int((box[0] + box[2]) // 2), int((box[1] + box[3]) // 2)] for box in self.selected_boxes.values()]
+            )
+            # Calculate Euclidean distance between centroids
+            pixels_distance = math.sqrt(
+                (self.centroids[0][0] - self.centroids[1][0]) ** 2 + (self.centroids[0][1] - self.centroids[1][1]) ** 2
+            )
+            annotator.plot_distance_and_line(pixels_distance, self.centroids)
+
+        self.centroids = []  # Reset centroids for next frame
+        plot_im = annotator.result()
+        self.display_output(plot_im)  # Display output with base class function
+        if self.CFG.get("show") and self.env_check:
+            cv2.setMouseCallback("Ultralytics Solutions", self.mouse_event_for_distance)
+
+        # Return SolutionResults with processed image and calculated metrics
+        return SolutionResults(plot_im=plot_im, pixels_distance=pixels_distance, total_tracks=len(self.track_ids))
--- a/ultralytics/solutions/heatmap.py
+++ b/ultralytics/solutions/heatmap.py
@@ -0,0 +1,131 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+from __future__ import annotations
+
+from typing import Any
+
+import cv2
+import numpy as np
+
+from ultralytics.solutions.object_counter import ObjectCounter
+from ultralytics.solutions.solutions import SolutionAnnotator, SolutionResults
+
+
+class Heatmap(ObjectCounter):
+    """
+    A class to draw heatmaps in real-time video streams based on object tracks.
+
+    This class extends the ObjectCounter class to generate and visualize heatmaps of object movements in video
+    streams. It uses tracked object positions to create a cumulative heatmap effect over time.
+
+    Attributes:
+        initialized (bool): Flag indicating whether the heatmap has been initialized.
+        colormap (int): OpenCV colormap used for heatmap visualization.
+        heatmap (np.ndarray): Array storing the cumulative heatmap data.
+        annotator (SolutionAnnotator): Object for drawing annotations on the image.
+
+    Methods:
+        heatmap_effect: Calculate and update the heatmap effect for a given bounding box.
+        process: Generate and apply the heatmap effect to each frame.
+
+    Examples:
+        >>> from ultralytics.solutions import Heatmap
+        >>> heatmap = Heatmap(model="yolo11n.pt", colormap=cv2.COLORMAP_JET)
+        >>> frame = cv2.imread("frame.jpg")
+        >>> processed_frame = heatmap.process(frame)
+    """
+
+    def __init__(self, **kwargs: Any) -> None:
+        """
+        Initialize the Heatmap class for real-time video stream heatmap generation based on object tracks.
+
+        Args:
+            **kwargs (Any): Keyword arguments passed to the parent ObjectCounter class.
+        """
+        super().__init__(**kwargs)
+
+        self.initialized = False  # Flag for heatmap initialization
+        if self.region is not None:  # Check if user provided the region coordinates
+            self.initialize_region()
+
+        # Store colormap
+        self.colormap = self.CFG["colormap"]
+        self.heatmap = None
+
+    def heatmap_effect(self, box: list[float]) -> None:
+        """
+        Efficiently calculate heatmap area and effect location for applying colormap.
+
+        Args:
+            box (list[float]): Bounding box coordinates [x0, y0, x1, y1].
+        """
+        x0, y0, x1, y1 = map(int, box)
+        radius_squared = (min(x1 - x0, y1 - y0) // 2) ** 2
+
+        # Create a meshgrid with region of interest (ROI) for vectorized distance calculations
+        xv, yv = np.meshgrid(np.arange(x0, x1), np.arange(y0, y1))
+
+        # Calculate squared distances from the center
+        dist_squared = (xv - ((x0 + x1) // 2)) ** 2 + (yv - ((y0 + y1) // 2)) ** 2
+
+        # Create a mask of points within the radius
+        within_radius = dist_squared <= radius_squared
+
+        # Update only the values within the bounding box in a single vectorized operation
+        self.heatmap[y0:y1, x0:x1][within_radius] += 2
+
+    def process(self, im0: np.ndarray) -> SolutionResults:
+        """
+        Generate heatmap for each frame using Ultralytics tracking.
+
+        Args:
+            im0 (np.ndarray): Input image array for processing.
+
+        Returns:
+            (SolutionResults): Contains processed image `plot_im`,
+                'in_count' (int, count of objects entering the region),
+                'out_count' (int, count of objects exiting the region),
+                'classwise_count' (dict, per-class object count), and
+                'total_tracks' (int, total number of tracked objects).
+        """
+        if not self.initialized:
+            self.heatmap = np.zeros_like(im0, dtype=np.float32) * 0.99
+            self.initialized = True  # Initialize heatmap only once
+
+        self.extract_tracks(im0)  # Extract tracks
+        self.annotator = SolutionAnnotator(im0, line_width=self.line_width)  # Initialize annotator
+
+        # Iterate over bounding boxes, track ids and classes index
+        for box, track_id, cls in zip(self.boxes, self.track_ids, self.clss):
+            # Apply heatmap effect for the bounding box
+            self.heatmap_effect(box)
+
+            if self.region is not None:
+                self.annotator.draw_region(reg_pts=self.region, color=(104, 0, 123), thickness=self.line_width * 2)
+                self.store_tracking_history(track_id, box)  # Store track history
+                # Get previous position if available
+                prev_position = None
+                if len(self.track_history[track_id]) > 1:
+                    prev_position = self.track_history[track_id][-2]
+                self.count_objects(self.track_history[track_id][-1], track_id, prev_position, cls)  # object counting
+
+        plot_im = self.annotator.result()
+        if self.region is not None:
+            self.display_counts(plot_im)  # Display the counts on the frame
+
+        # Normalize, apply colormap to heatmap and combine with original image
+        if self.track_data.is_track:
+            normalized_heatmap = cv2.normalize(self.heatmap, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
+            colored_heatmap = cv2.applyColorMap(normalized_heatmap, self.colormap)
+            plot_im = cv2.addWeighted(plot_im, 0.5, colored_heatmap, 0.5, 0)
+
+        self.display_output(plot_im)  # Display output with base class function
+
+        # Return SolutionResults
+        return SolutionResults(
+            plot_im=plot_im,
+            in_count=self.in_count,
+            out_count=self.out_count,
+            classwise_count=dict(self.classwise_count),
+            total_tracks=len(self.track_ids),
+        )
--- a/ultralytics/solutions/instance_segmentation.py
+++ b/ultralytics/solutions/instance_segmentation.py
@@ -0,0 +1,89 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+from typing import Any
+
+from ultralytics.engine.results import Results
+from ultralytics.solutions.solutions import BaseSolution, SolutionResults
+
+
+class InstanceSegmentation(BaseSolution):
+    """
+    A class to manage instance segmentation in images or video streams.
+
+    This class extends the BaseSolution class and provides functionality for performing instance segmentation, including
+    drawing segmented masks with bounding boxes and labels.
+
+    Attributes:
+        model (str): The segmentation model to use for inference.
+        line_width (int): Width of the bounding box and text lines.
+        names (dict[int, str]): Dictionary mapping class indices to class names.
+        clss (list[int]): List of detected class indices.
+        track_ids (list[int]): List of track IDs for detected instances.
+        masks (list[np.ndarray]): List of segmentation masks for detected instances.
+        show_conf (bool): Whether to display confidence scores.
+        show_labels (bool): Whether to display class labels.
+        show_boxes (bool): Whether to display bounding boxes.
+
+    Methods:
+        process: Process the input image to perform instance segmentation and annotate results.
+        extract_tracks: Extract tracks including bounding boxes, classes, and masks from model predictions.
+
+    Examples:
+        >>> segmenter = InstanceSegmentation()
+        >>> frame = cv2.imread("frame.jpg")
+        >>> results = segmenter.process(frame)
+        >>> print(f"Total segmented instances: {results.total_tracks}")
+    """
+
+    def __init__(self, **kwargs: Any) -> None:
+        """
+        Initialize the InstanceSegmentation class for detecting and annotating segmented instances.
+
+        Args:
+            **kwargs (Any): Keyword arguments passed to the BaseSolution parent class.
+                model (str): Model name or path, defaults to "yolo11n-seg.pt".
+        """
+        kwargs["model"] = kwargs.get("model", "yolo11n-seg.pt")
+        super().__init__(**kwargs)
+
+        self.show_conf = self.CFG.get("show_conf", True)
+        self.show_labels = self.CFG.get("show_labels", True)
+        self.show_boxes = self.CFG.get("show_boxes", True)
+
+    def process(self, im0) -> SolutionResults:
+        """
+        Perform instance segmentation on the input image and annotate the results.
+
+        Args:
+            im0 (np.ndarray): The input image for segmentation.
+
+        Returns:
+            (SolutionResults): Object containing the annotated image and total number of tracked instances.
+
+        Examples:
+            >>> segmenter = InstanceSegmentation()
+            >>> frame = cv2.imread("image.jpg")
+            >>> summary = segmenter.process(frame)
+            >>> print(summary)
+        """
+        self.extract_tracks(im0)  # Extract tracks (bounding boxes, classes, and masks)
+        self.masks = getattr(self.tracks, "masks", None)
+
+        # Iterate over detected classes, track IDs, and segmentation masks
+        if self.masks is None:
+            self.LOGGER.warning("No masks detected! Ensure you're using a supported Ultralytics segmentation model.")
+            plot_im = im0
+        else:
+            results = Results(im0, path=None, names=self.names, boxes=self.track_data.data, masks=self.masks.data)
+            plot_im = results.plot(
+                line_width=self.line_width,
+                boxes=self.show_boxes,
+                conf=self.show_conf,
+                labels=self.show_labels,
+                color_mode="instance",
+            )
+
+        self.display_output(plot_im)  # Display the annotated output using the base class function
+
+        # Return SolutionResults
+        return SolutionResults(plot_im=plot_im, total_tracks=len(self.track_ids))
--- a/ultralytics/solutions/object_blurrer.py
+++ b/ultralytics/solutions/object_blurrer.py
@@ -0,0 +1,92 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+from typing import Any
+
+import cv2
+
+from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
+from ultralytics.utils import LOGGER
+from ultralytics.utils.plotting import colors
+
+
+class ObjectBlurrer(BaseSolution):
+    """
+    A class to manage the blurring of detected objects in a real-time video stream.
+
+    This class extends the BaseSolution class and provides functionality for blurring objects based on detected bounding
+    boxes. The blurred areas are updated directly in the input image, allowing for privacy preservation or other effects.
+
+    Attributes:
+        blur_ratio (int): The intensity of the blur effect applied to detected objects (higher values create more blur).
+        iou (float): Intersection over Union threshold for object detection.
+        conf (float): Confidence threshold for object detection.
+
+    Methods:
+        process: Apply a blurring effect to detected objects in the input image.
+        extract_tracks: Extract tracking information from detected objects.
+        display_output: Display the processed output image.
+
+    Examples:
+        >>> blurrer = ObjectBlurrer()
+        >>> frame = cv2.imread("frame.jpg")
+        >>> processed_results = blurrer.process(frame)
+        >>> print(f"Total blurred objects: {processed_results.total_tracks}")
+    """
+
+    def __init__(self, **kwargs: Any) -> None:
+        """
+        Initialize the ObjectBlurrer class for applying a blur effect to objects detected in video streams or images.
+
+        Args:
+            **kwargs (Any): Keyword arguments passed to the parent class and for configuration.
+                blur_ratio (float): Intensity of the blur effect (0.1-1.0, default=0.5).
+        """
+        super().__init__(**kwargs)
+        blur_ratio = self.CFG["blur_ratio"]
+        if blur_ratio < 0.1:
+            LOGGER.warning("blur ratio cannot be less than 0.1, updating it to default value 0.5")
+            blur_ratio = 0.5
+        self.blur_ratio = int(blur_ratio * 100)
+
+    def process(self, im0) -> SolutionResults:
+        """
+        Apply a blurring effect to detected objects in the input image.
+
+        This method extracts tracking information, applies blur to regions corresponding to detected objects,
+        and annotates the image with bounding boxes.
+
+        Args:
+            im0 (np.ndarray): The input image containing detected objects.
+
+        Returns:
+            (SolutionResults): Object containing the processed image and number of tracked objects.
+                - plot_im (np.ndarray): The annotated output image with blurred objects.
+                - total_tracks (int): The total number of tracked objects in the frame.
+
+        Examples:
+            >>> blurrer = ObjectBlurrer()
+            >>> frame = cv2.imread("image.jpg")
+            >>> results = blurrer.process(frame)
+            >>> print(f"Blurred {results.total_tracks} objects")
+        """
+        self.extract_tracks(im0)  # Extract tracks
+        annotator = SolutionAnnotator(im0, self.line_width)
+
+        # Iterate over bounding boxes and classes
+        for box, cls, conf in zip(self.boxes, self.clss, self.confs):
+            # Crop and blur the detected object
+            blur_obj = cv2.blur(
+                im0[int(box[1]) : int(box[3]), int(box[0]) : int(box[2])],
+                (self.blur_ratio, self.blur_ratio),
+            )
+            # Update the blurred area in the original image
+            im0[int(box[1]) : int(box[3]), int(box[0]) : int(box[2])] = blur_obj
+            annotator.box_label(
+                box, label=self.adjust_box_label(cls, conf), color=colors(cls, True)
+            )  # Annotate bounding box
+
+        plot_im = annotator.result()
+        self.display_output(plot_im)  # Display the output using the base class function
+
+        # Return a SolutionResults
+        return SolutionResults(plot_im=plot_im, total_tracks=len(self.track_ids))
--- a/ultralytics/solutions/object_counter.py
+++ b/ultralytics/solutions/object_counter.py
@@ -0,0 +1,197 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+from __future__ import annotations
+
+from collections import defaultdict
+from typing import Any
+
+from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
+from ultralytics.utils.plotting import colors
+
+
+class ObjectCounter(BaseSolution):
+    """
+    A class to manage the counting of objects in a real-time video stream based on their tracks.
+
+    This class extends the BaseSolution class and provides functionality for counting objects moving in and out of a
+    specified region in a video stream. It supports both polygonal and linear regions for counting.
+
+    Attributes:
+        in_count (int): Counter for objects moving inward.
+        out_count (int): Counter for objects moving outward.
+        counted_ids (list[int]): List of IDs of objects that have been counted.
+        classwise_counts (dict[str, dict[str, int]]): Dictionary for counts, categorized by object class.
+        region_initialized (bool): Flag indicating whether the counting region has been initialized.
+        show_in (bool): Flag to control display of inward count.
+        show_out (bool): Flag to control display of outward count.
+        margin (int): Margin for background rectangle size to display counts properly.
+
+    Methods:
+        count_objects: Count objects within a polygonal or linear region based on their tracks.
+        display_counts: Display object counts on the frame.
+        process: Process input data and update counts.
+
+    Examples:
+        >>> counter = ObjectCounter()
+        >>> frame = cv2.imread("frame.jpg")
+        >>> results = counter.process(frame)
+        >>> print(f"Inward count: {counter.in_count}, Outward count: {counter.out_count}")
+    """
+
+    def __init__(self, **kwargs: Any) -> None:
+        """Initialize the ObjectCounter class for real-time object counting in video streams."""
+        super().__init__(**kwargs)
+
+        self.in_count = 0  # Counter for objects moving inward
+        self.out_count = 0  # Counter for objects moving outward
+        self.counted_ids = []  # List of IDs of objects that have been counted
+        self.classwise_count = defaultdict(lambda: {"IN": 0, "OUT": 0})  # Dictionary for counts, categorized by class
+        self.region_initialized = False  # Flag indicating whether the region has been initialized
+
+        self.show_in = self.CFG["show_in"]
+        self.show_out = self.CFG["show_out"]
+        self.margin = self.line_width * 2  # Scales the background rectangle size to display counts properly
+
+    def count_objects(
+        self,
+        current_centroid: tuple[float, float],
+        track_id: int,
+        prev_position: tuple[float, float] | None,
+        cls: int,
+    ) -> None:
+        """
+        Count objects within a polygonal or linear region based on their tracks.
+
+        Args:
+            current_centroid (tuple[float, float]): Current centroid coordinates (x, y) in the current frame.
+            track_id (int): Unique identifier for the tracked object.
+            prev_position (tuple[float, float], optional): Last frame position coordinates (x, y) of the track.
+            cls (int): Class index for classwise count updates.
+
+        Examples:
+            >>> counter = ObjectCounter()
+            >>> track_line = {1: [100, 200], 2: [110, 210], 3: [120, 220]}
+            >>> box = [130, 230, 150, 250]
+            >>> track_id_num = 1
+            >>> previous_position = (120, 220)
+            >>> class_to_count = 0  # In COCO model, class 0 = person
+            >>> counter.count_objects((140, 240), track_id_num, previous_position, class_to_count)
+        """
+        if prev_position is None or track_id in self.counted_ids:
+            return
+
+        if len(self.region) == 2:  # Linear region (defined as a line segment)
+            if self.r_s.intersects(self.LineString([prev_position, current_centroid])):
+                # Determine orientation of the region (vertical or horizontal)
+                if abs(self.region[0][0] - self.region[1][0]) < abs(self.region[0][1] - self.region[1][1]):
+                    # Vertical region: Compare x-coordinates to determine direction
+                    if current_centroid[0] > prev_position[0]:  # Moving right
+                        self.in_count += 1
+                        self.classwise_count[self.names[cls]]["IN"] += 1
+                    else:  # Moving left
+                        self.out_count += 1
+                        self.classwise_count[self.names[cls]]["OUT"] += 1
+                # Horizontal region: Compare y-coordinates to determine direction
+                elif current_centroid[1] > prev_position[1]:  # Moving downward
+                    self.in_count += 1
+                    self.classwise_count[self.names[cls]]["IN"] += 1
+                else:  # Moving upward
+                    self.out_count += 1
+                    self.classwise_count[self.names[cls]]["OUT"] += 1
+                self.counted_ids.append(track_id)
+
+        elif len(self.region) > 2:  # Polygonal region
+            if self.r_s.contains(self.Point(current_centroid)):
+                # Determine motion direction for vertical or horizontal polygons
+                region_width = max(p[0] for p in self.region) - min(p[0] for p in self.region)
+                region_height = max(p[1] for p in self.region) - min(p[1] for p in self.region)
+
+                if (
+                    region_width < region_height
+                    and current_centroid[0] > prev_position[0]
+                    or region_width >= region_height
+                    and current_centroid[1] > prev_position[1]
+                ):  # Moving right or downward
+                    self.in_count += 1
+                    self.classwise_count[self.names[cls]]["IN"] += 1
+                else:  # Moving left or upward
+                    self.out_count += 1
+                    self.classwise_count[self.names[cls]]["OUT"] += 1
+                self.counted_ids.append(track_id)
+
+    def display_counts(self, plot_im) -> None:
+        """
+        Display object counts on the input image or frame.
+
+        Args:
+            plot_im (np.ndarray): The image or frame to display counts on.
+
+        Examples:
+            >>> counter = ObjectCounter()
+            >>> frame = cv2.imread("image.jpg")
+            >>> counter.display_counts(frame)
+        """
+        labels_dict = {
+            str.capitalize(key): f"{'IN ' + str(value['IN']) if self.show_in else ''} "
+            f"{'OUT ' + str(value['OUT']) if self.show_out else ''}".strip()
+            for key, value in self.classwise_count.items()
+            if value["IN"] != 0 or value["OUT"] != 0 and (self.show_in or self.show_out)
+        }
+        if labels_dict:
+            self.annotator.display_analytics(plot_im, labels_dict, (104, 31, 17), (255, 255, 255), self.margin)
+
+    def process(self, im0) -> SolutionResults:
+        """
+        Process input data (frames or object tracks) and update object counts.
+
+        This method initializes the counting region, extracts tracks, draws bounding boxes and regions, updates
+        object counts, and displays the results on the input image.
+
+        Args:
+            im0 (np.ndarray): The input image or frame to be processed.
+
+        Returns:
+            (SolutionResults): Contains processed image `im0`, 'in_count' (int, count of objects entering the region),
+                'out_count' (int, count of objects exiting the region), 'classwise_count' (dict, per-class object count),
+                and 'total_tracks' (int, total number of tracked objects).
+
+        Examples:
+            >>> counter = ObjectCounter()
+            >>> frame = cv2.imread("path/to/image.jpg")
+            >>> results = counter.process(frame)
+        """
+        if not self.region_initialized:
+            self.initialize_region()
+            self.region_initialized = True
+
+        self.extract_tracks(im0)  # Extract tracks
+        self.annotator = SolutionAnnotator(im0, line_width=self.line_width)  # Initialize annotator
+
+        self.annotator.draw_region(
+            reg_pts=self.region, color=(104, 0, 123), thickness=self.line_width * 2
+        )  # Draw region
+
+        # Iterate over bounding boxes, track ids and classes index
+        for box, track_id, cls, conf in zip(self.boxes, self.track_ids, self.clss, self.confs):
+            # Draw bounding box and counting region
+            self.annotator.box_label(box, label=self.adjust_box_label(cls, conf, track_id), color=colors(cls, True))
+            self.store_tracking_history(track_id, box)  # Store track history
+
+            # Store previous position of track for object counting
+            prev_position = None
+            if len(self.track_history[track_id]) > 1:
+                prev_position = self.track_history[track_id][-2]
+            self.count_objects(self.track_history[track_id][-1], track_id, prev_position, cls)  # object counting
+
+        plot_im = self.annotator.result()
+        self.display_counts(plot_im)  # Display the counts on the frame
+        self.display_output(plot_im)  # Display output with base class function
+
+        # Return SolutionResults
+        return SolutionResults(
+            plot_im=plot_im,
+            in_count=self.in_count,
+            out_count=self.out_count,
+            classwise_count=dict(self.classwise_count),
+            total_tracks=len(self.track_ids),
+        )
--- a/ultralytics/solutions/object_cropper.py
+++ b/ultralytics/solutions/object_cropper.py
@@ -0,0 +1,93 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+import os
+from pathlib import Path
+from typing import Any
+
+from ultralytics.solutions.solutions import BaseSolution, SolutionResults
+from ultralytics.utils.plotting import save_one_box
+
+
+class ObjectCropper(BaseSolution):
+    """
+    A class to manage the cropping of detected objects in a real-time video stream or images.
+
+    This class extends the BaseSolution class and provides functionality for cropping objects based on detected bounding
+    boxes. The cropped images are saved to a specified directory for further analysis or usage.
+
+    Attributes:
+        crop_dir (str): Directory where cropped object images are stored.
+        crop_idx (int): Counter for the total number of cropped objects.
+        iou (float): IoU (Intersection over Union) threshold for non-maximum suppression.
+        conf (float): Confidence threshold for filtering detections.
+
+    Methods:
+        process: Crop detected objects from the input image and save them to the output directory.
+
+    Examples:
+        >>> cropper = ObjectCropper()
+        >>> frame = cv2.imread("frame.jpg")
+        >>> processed_results = cropper.process(frame)
+        >>> print(f"Total cropped objects: {cropper.crop_idx}")
+    """
+
+    def __init__(self, **kwargs: Any) -> None:
+        """
+        Initialize the ObjectCropper class for cropping objects from detected bounding boxes.
+
+        Args:
+            **kwargs (Any): Keyword arguments passed to the parent class and used for configuration.
+                crop_dir (str): Path to the directory for saving cropped object images.
+        """
+        super().__init__(**kwargs)
+
+        self.crop_dir = self.CFG["crop_dir"]  # Directory for storing cropped detections
+        if not os.path.exists(self.crop_dir):
+            os.mkdir(self.crop_dir)  # Create directory if it does not exist
+        if self.CFG["show"]:
+            self.LOGGER.warning(
+                f"show=True disabled for crop solution, results will be saved in the directory named: {self.crop_dir}"
+            )
+        self.crop_idx = 0  # Initialize counter for total cropped objects
+        self.iou = self.CFG["iou"]
+        self.conf = self.CFG["conf"]
+
+    def process(self, im0) -> SolutionResults:
+        """
+        Crop detected objects from the input image and save them as separate images.
+
+        Args:
+            im0 (np.ndarray): The input image containing detected objects.
+
+        Returns:
+            (SolutionResults): A SolutionResults object containing the total number of cropped objects and processed
+                image.
+
+        Examples:
+            >>> cropper = ObjectCropper()
+            >>> frame = cv2.imread("image.jpg")
+            >>> results = cropper.process(frame)
+            >>> print(f"Total cropped objects: {results.total_crop_objects}")
+        """
+        with self.profilers[0]:
+            results = self.model.predict(
+                im0,
+                classes=self.classes,
+                conf=self.conf,
+                iou=self.iou,
+                device=self.CFG["device"],
+                verbose=False,
+            )[0]
+            self.clss = results.boxes.cls.tolist()  # required for logging only.
+
+        for box in results.boxes:
+            self.crop_idx += 1
+            save_one_box(
+                box.xyxy,
+                im0,
+                file=Path(self.crop_dir) / f"crop_{self.crop_idx}.jpg",
+                BGR=True,
+            )
+
+        # Return SolutionResults
+        return SolutionResults(plot_im=im0, total_crop_objects=self.crop_idx)
--- a/ultralytics/solutions/parking_management.py
+++ b/ultralytics/solutions/parking_management.py
@@ -0,0 +1,278 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+from __future__ import annotations
+
+import json
+from typing import Any
+
+import cv2
+import numpy as np
+
+from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
+from ultralytics.utils import LOGGER
+from ultralytics.utils.checks import check_imshow
+
+
+class ParkingPtsSelection:
+    """
+    A class for selecting and managing parking zone points on images using a Tkinter-based UI.
+
+    This class provides functionality to upload an image, select points to define parking zones, and save the
+    selected points to a JSON file. It uses Tkinter for the graphical user interface.
+
+    Attributes:
+        tk (module): The Tkinter module for GUI operations.
+        filedialog (module): Tkinter's filedialog module for file selection operations.
+        messagebox (module): Tkinter's messagebox module for displaying message boxes.
+        master (tk.Tk): The main Tkinter window.
+        canvas (tk.Canvas): The canvas widget for displaying the image and drawing bounding boxes.
+        image (PIL.Image.Image): The uploaded image.
+        canvas_image (ImageTk.PhotoImage): The image displayed on the canvas.
+        rg_data (list[list[tuple[int, int]]]): List of bounding boxes, each defined by 4 points.
+        current_box (list[tuple[int, int]]): Temporary storage for the points of the current bounding box.
+        imgw (int): Original width of the uploaded image.
+        imgh (int): Original height of the uploaded image.
+        canvas_max_width (int): Maximum width of the canvas.
+        canvas_max_height (int): Maximum height of the canvas.
+
+    Methods:
+        initialize_properties: Initialize properties for image, canvas, bounding boxes, and dimensions.
+        upload_image: Upload and display an image on the canvas, resizing it to fit within specified dimensions.
+        on_canvas_click: Handle mouse clicks to add points for bounding boxes on the canvas.
+        draw_box: Draw a bounding box on the canvas using the provided coordinates.
+        remove_last_bounding_box: Remove the last bounding box from the list and redraw the canvas.
+        redraw_canvas: Redraw the canvas with the image and all bounding boxes.
+        save_to_json: Save the selected parking zone points to a JSON file with scaled coordinates.
+
+    Examples:
+        >>> parking_selector = ParkingPtsSelection()
+        >>> # Use the GUI to upload an image, select parking zones, and save the data
+    """
+
+    def __init__(self) -> None:
+        """Initialize the ParkingPtsSelection class, setting up UI and properties for parking zone point selection."""
+        try:  # Check if tkinter is installed
+            import tkinter as tk
+            from tkinter import filedialog, messagebox
+        except ImportError:  # Display error with recommendations
+            import platform
+
+            install_cmd = {
+                "Linux": "sudo apt install python3-tk (Debian/Ubuntu) | sudo dnf install python3-tkinter (Fedora) | "
+                "sudo pacman -S tk (Arch)",
+                "Windows": "reinstall Python and enable the checkbox `tcl/tk and IDLE` on **Optional Features** during installation",
+                "Darwin": "reinstall Python from https://www.python.org/downloads/macos/ or `brew install python-tk`",
+            }.get(platform.system(), "Unknown OS. Check your Python installation.")
+
+            LOGGER.warning(f" Tkinter is not configured or supported. Potential fix: {install_cmd}")
+            return
+
+        if not check_imshow(warn=True):
+            return
+
+        self.tk, self.filedialog, self.messagebox = tk, filedialog, messagebox
+        self.master = self.tk.Tk()  # Reference to the main application window
+        self.master.title("Ultralytics Parking Zones Points Selector")
+        self.master.resizable(False, False)
+
+        self.canvas = self.tk.Canvas(self.master, bg="white")  # Canvas widget for displaying images
+        self.canvas.pack(side=self.tk.BOTTOM)
+
+        self.image = None  # Variable to store the loaded image
+        self.canvas_image = None  # Reference to the image displayed on the canvas
+        self.canvas_max_width = None  # Maximum allowed width for the canvas
+        self.canvas_max_height = None  # Maximum allowed height for the canvas
+        self.rg_data = None  # Data for region annotation management
+        self.current_box = None  # Stores the currently selected bounding box
+        self.imgh = None  # Height of the current image
+        self.imgw = None  # Width of the current image
+
+        # Button frame with buttons
+        button_frame = self.tk.Frame(self.master)
+        button_frame.pack(side=self.tk.TOP)
+
+        for text, cmd in [
+            ("Upload Image", self.upload_image),
+            ("Remove Last BBox", self.remove_last_bounding_box),
+            ("Save", self.save_to_json),
+        ]:
+            self.tk.Button(button_frame, text=text, command=cmd).pack(side=self.tk.LEFT)
+
+        self.initialize_properties()
+        self.master.mainloop()
+
+    def initialize_properties(self) -> None:
+        """Initialize properties for image, canvas, bounding boxes, and dimensions."""
+        self.image = self.canvas_image = None
+        self.rg_data, self.current_box = [], []
+        self.imgw = self.imgh = 0
+        self.canvas_max_width, self.canvas_max_height = 1280, 720
+
+    def upload_image(self) -> None:
+        """Upload and display an image on the canvas, resizing it to fit within specified dimensions."""
+        from PIL import Image, ImageTk  # Scoped import because ImageTk requires tkinter package
+
+        file = self.filedialog.askopenfilename(filetypes=[("Image Files", "*.png *.jpg *.jpeg")])
+        if not file:
+            LOGGER.info("No image selected.")
+            return
+
+        self.image = Image.open(file)
+        self.imgw, self.imgh = self.image.size
+        aspect_ratio = self.imgw / self.imgh
+        canvas_width = (
+            min(self.canvas_max_width, self.imgw) if aspect_ratio > 1 else int(self.canvas_max_height * aspect_ratio)
+        )
+        canvas_height = (
+            min(self.canvas_max_height, self.imgh) if aspect_ratio <= 1 else int(canvas_width / aspect_ratio)
+        )
+
+        self.canvas.config(width=canvas_width, height=canvas_height)
+        self.canvas_image = ImageTk.PhotoImage(self.image.resize((canvas_width, canvas_height)))
+        self.canvas.create_image(0, 0, anchor=self.tk.NW, image=self.canvas_image)
+        self.canvas.bind("<Button-1>", self.on_canvas_click)
+
+        self.rg_data.clear(), self.current_box.clear()
+
+    def on_canvas_click(self, event) -> None:
+        """Handle mouse clicks to add points for bounding boxes on the canvas."""
+        self.current_box.append((event.x, event.y))
+        self.canvas.create_oval(event.x - 3, event.y - 3, event.x + 3, event.y + 3, fill="red")
+        if len(self.current_box) == 4:
+            self.rg_data.append(self.current_box.copy())
+            self.draw_box(self.current_box)
+            self.current_box.clear()
+
+    def draw_box(self, box: list[tuple[int, int]]) -> None:
+        """Draw a bounding box on the canvas using the provided coordinates."""
+        for i in range(4):
+            self.canvas.create_line(box[i], box[(i + 1) % 4], fill="blue", width=2)
+
+    def remove_last_bounding_box(self) -> None:
+        """Remove the last bounding box from the list and redraw the canvas."""
+        if not self.rg_data:
+            self.messagebox.showwarning("Warning", "No bounding boxes to remove.")
+            return
+        self.rg_data.pop()
+        self.redraw_canvas()
+
+    def redraw_canvas(self) -> None:
+        """Redraw the canvas with the image and all bounding boxes."""
+        self.canvas.delete("all")
+        self.canvas.create_image(0, 0, anchor=self.tk.NW, image=self.canvas_image)
+        for box in self.rg_data:
+            self.draw_box(box)
+
+    def save_to_json(self) -> None:
+        """Save the selected parking zone points to a JSON file with scaled coordinates."""
+        scale_w, scale_h = self.imgw / self.canvas.winfo_width(), self.imgh / self.canvas.winfo_height()
+        data = [{"points": [(int(x * scale_w), int(y * scale_h)) for x, y in box]} for box in self.rg_data]
+
+        from io import StringIO  # Function level import, as it's only required to store coordinates
+
+        write_buffer = StringIO()
+        json.dump(data, write_buffer, indent=4)
+        with open("bounding_boxes.json", "w", encoding="utf-8") as f:
+            f.write(write_buffer.getvalue())
+        self.messagebox.showinfo("Success", "Bounding boxes saved to bounding_boxes.json")
+
+
+class ParkingManagement(BaseSolution):
+    """
+    Manages parking occupancy and availability using YOLO model for real-time monitoring and visualization.
+
+    This class extends BaseSolution to provide functionality for parking lot management, including detection of
+    occupied spaces, visualization of parking regions, and display of occupancy statistics.
+
+    Attributes:
+        json_file (str): Path to the JSON file containing parking region details.
+        json (list[dict]): Loaded JSON data containing parking region information.
+        pr_info (dict[str, int]): Dictionary storing parking information (Occupancy and Available spaces).
+        arc (tuple[int, int, int]): RGB color tuple for available region visualization.
+        occ (tuple[int, int, int]): RGB color tuple for occupied region visualization.
+        dc (tuple[int, int, int]): RGB color tuple for centroid visualization of detected objects.
+
+    Methods:
+        process: Process the input image for parking lot management and visualization.
+
+    Examples:
+        >>> from ultralytics.solutions import ParkingManagement
+        >>> parking_manager = ParkingManagement(model="yolo11n.pt", json_file="parking_regions.json")
+        >>> print(f"Occupied spaces: {parking_manager.pr_info['Occupancy']}")
+        >>> print(f"Available spaces: {parking_manager.pr_info['Available']}")
+    """
+
+    def __init__(self, **kwargs: Any) -> None:
+        """Initialize the parking management system with a YOLO model and visualization settings."""
+        super().__init__(**kwargs)
+
+        self.json_file = self.CFG["json_file"]  # Load parking regions JSON data
+        if self.json_file is None:
+            LOGGER.warning("json_file argument missing. Parking region details required.")
+            raise ValueError("❌ Json file path can not be empty")
+
+        with open(self.json_file) as f:
+            self.json = json.load(f)
+
+        self.pr_info = {"Occupancy": 0, "Available": 0}  # Dictionary for parking information
+
+        self.arc = (0, 0, 255)  # Available region color
+        self.occ = (0, 255, 0)  # Occupied region color
+        self.dc = (255, 0, 189)  # Centroid color for each box
+
+    def process(self, im0: np.ndarray) -> SolutionResults:
+        """
+        Process the input image for parking lot management and visualization.
+
+        This function analyzes the input image, extracts tracks, and determines the occupancy status of parking
+        regions defined in the JSON file. It annotates the image with occupied and available parking spots,
+        and updates the parking information.
+
+        Args:
+            im0 (np.ndarray): The input inference image.
+
+        Returns:
+            (SolutionResults): Contains processed image `plot_im`, 'filled_slots' (number of occupied parking slots),
+                'available_slots' (number of available parking slots), and 'total_tracks' (total number of tracked objects).
+
+        Examples:
+            >>> parking_manager = ParkingManagement(json_file="parking_regions.json")
+            >>> image = cv2.imread("parking_lot.jpg")
+            >>> results = parking_manager.process(image)
+        """
+        self.extract_tracks(im0)  # Extract tracks from im0
+        es, fs = len(self.json), 0  # Empty slots, filled slots
+        annotator = SolutionAnnotator(im0, self.line_width)  # Initialize annotator
+
+        for region in self.json:
+            # Convert points to a NumPy array with the correct dtype and reshape properly
+            pts_array = np.array(region["points"], dtype=np.int32).reshape((-1, 1, 2))
+            rg_occupied = False  # Occupied region initialization
+            for box, cls in zip(self.boxes, self.clss):
+                xc, yc = int((box[0] + box[2]) / 2), int((box[1] + box[3]) / 2)
+                dist = cv2.pointPolygonTest(pts_array, (xc, yc), False)
+                if dist >= 0:
+                    # cv2.circle(im0, (xc, yc), radius=self.line_width * 4, color=self.dc, thickness=-1)
+                    annotator.display_objects_labels(
+                        im0, self.model.names[int(cls)], (104, 31, 17), (255, 255, 255), xc, yc, 10
+                    )
+                    rg_occupied = True
+                    break
+            fs, es = (fs + 1, es - 1) if rg_occupied else (fs, es)
+            # Plot regions
+            cv2.polylines(im0, [pts_array], isClosed=True, color=self.occ if rg_occupied else self.arc, thickness=2)
+
+        self.pr_info["Occupancy"], self.pr_info["Available"] = fs, es
+
+        annotator.display_analytics(im0, self.pr_info, (104, 31, 17), (255, 255, 255), 10)
+
+        plot_im = annotator.result()
+        self.display_output(plot_im)  # Display output with base class function
+
+        # Return SolutionResults
+        return SolutionResults(
+            plot_im=plot_im,
+            filled_slots=self.pr_info["Occupancy"],
+            available_slots=self.pr_info["Available"],
+            total_tracks=len(self.track_ids),
+        )
--- a/ultralytics/solutions/queue_management.py
+++ b/ultralytics/solutions/queue_management.py
@@ -0,0 +1,95 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+from typing import Any
+
+from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
+from ultralytics.utils.plotting import colors
+
+
+class QueueManager(BaseSolution):
+    """
+    Manages queue counting in real-time video streams based on object tracks.
+
+    This class extends BaseSolution to provide functionality for tracking and counting objects within a specified
+    region in video frames.
+
+    Attributes:
+        counts (int): The current count of objects in the queue.
+        rect_color (tuple[int, int, int]): RGB color tuple for drawing the queue region rectangle.
+        region_length (int): The number of points defining the queue region.
+        track_line (list[tuple[int, int]]): List of track line coordinates.
+        track_history (dict[int, list[tuple[int, int]]]): Dictionary storing tracking history for each object.
+
+    Methods:
+        initialize_region: Initialize the queue region.
+        process: Process a single frame for queue management.
+        extract_tracks: Extract object tracks from the current frame.
+        store_tracking_history: Store the tracking history for an object.
+        display_output: Display the processed output.
+
+    Examples:
+        >>> cap = cv2.VideoCapture("path/to/video.mp4")
+        >>> queue_manager = QueueManager(region=[100, 100, 200, 200, 300, 300])
+        >>> while cap.isOpened():
+        >>>     success, im0 = cap.read()
+        >>>     if not success:
+        >>>         break
+        >>>     results = queue_manager.process(im0)
+    """
+
+    def __init__(self, **kwargs: Any) -> None:
+        """Initialize the QueueManager with parameters for tracking and counting objects in a video stream."""
+        super().__init__(**kwargs)
+        self.initialize_region()
+        self.counts = 0  # Queue counts information
+        self.rect_color = (255, 255, 255)  # Rectangle color for visualization
+        self.region_length = len(self.region)  # Store region length for further usage
+
+    def process(self, im0) -> SolutionResults:
+        """
+        Process queue management for a single frame of video.
+
+        Args:
+            im0 (np.ndarray): Input image for processing, typically a frame from a video stream.
+
+        Returns:
+            (SolutionResults): Contains processed image `im0`, 'queue_count' (int, number of objects in the queue) and
+                'total_tracks' (int, total number of tracked objects).
+
+        Examples:
+            >>> queue_manager = QueueManager()
+            >>> frame = cv2.imread("frame.jpg")
+            >>> results = queue_manager.process(frame)
+        """
+        self.counts = 0  # Reset counts every frame
+        self.extract_tracks(im0)  # Extract tracks from the current frame
+        annotator = SolutionAnnotator(im0, line_width=self.line_width)  # Initialize annotator
+        annotator.draw_region(reg_pts=self.region, color=self.rect_color, thickness=self.line_width * 2)  # Draw region
+
+        for box, track_id, cls, conf in zip(self.boxes, self.track_ids, self.clss, self.confs):
+            # Draw bounding box and counting region
+            annotator.box_label(box, label=self.adjust_box_label(cls, conf, track_id), color=colors(track_id, True))
+            self.store_tracking_history(track_id, box)  # Store track history
+
+            # Cache frequently accessed attributes
+            track_history = self.track_history.get(track_id, [])
+
+            # Store previous position of track and check if the object is inside the counting region
+            prev_position = None
+            if len(track_history) > 1:
+                prev_position = track_history[-2]
+            if self.region_length >= 3 and prev_position and self.r_s.contains(self.Point(self.track_line[-1])):
+                self.counts += 1
+
+        # Display queue counts
+        annotator.queue_counts_display(
+            f"Queue Counts : {str(self.counts)}",
+            points=self.region,
+            region_color=self.rect_color,
+            txt_color=(104, 31, 17),
+        )
+        plot_im = annotator.result()
+        self.display_output(plot_im)  # Display output with base class function
+
+        # Return a SolutionResults object with processed data
+        return SolutionResults(plot_im=plot_im, queue_count=self.counts, total_tracks=len(self.track_ids))
--- a/ultralytics/solutions/region_counter.py
+++ b/ultralytics/solutions/region_counter.py
@@ -0,0 +1,136 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+from __future__ import annotations
+
+from typing import Any
+
+import numpy as np
+
+from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
+from ultralytics.utils.plotting import colors
+
+
+class RegionCounter(BaseSolution):
+    """
+    A class for real-time counting of objects within user-defined regions in a video stream.
+
+    This class inherits from `BaseSolution` and provides functionality to define polygonal regions in a video frame,
+    track objects, and count those objects that pass through each defined region. Useful for applications requiring
+    counting in specified areas, such as monitoring zones or segmented sections.
+
+    Attributes:
+        region_template (dict): Template for creating new counting regions with default attributes including name,
+            polygon coordinates, and display colors.
+        counting_regions (list): List storing all defined regions, where each entry is based on `region_template`
+            and includes specific region settings like name, coordinates, and color.
+        region_counts (dict): Dictionary storing the count of objects for each named region.
+
+    Methods:
+        add_region: Add a new counting region with specified attributes.
+        process: Process video frames to count objects in each region.
+        initialize_regions: Initialize zones to count the objects in each one. Zones could be multiple as well.
+
+    Examples:
+        Initialize a RegionCounter and add a counting region
+        >>> counter = RegionCounter()
+        >>> counter.add_region("Zone1", [(100, 100), (200, 100), (200, 200), (100, 200)], (255, 0, 0), (255, 255, 255))
+        >>> results = counter.process(frame)
+        >>> print(f"Total tracks: {results.total_tracks}")
+    """
+
+    def __init__(self, **kwargs: Any) -> None:
+        """Initialize the RegionCounter for real-time object counting in user-defined regions."""
+        super().__init__(**kwargs)
+        self.region_template = {
+            "name": "Default Region",
+            "polygon": None,
+            "counts": 0,
+            "region_color": (255, 255, 255),
+            "text_color": (0, 0, 0),
+        }
+        self.region_counts = {}
+        self.counting_regions = []
+        self.initialize_regions()
+
+    def add_region(
+        self,
+        name: str,
+        polygon_points: list[tuple],
+        region_color: tuple[int, int, int],
+        text_color: tuple[int, int, int],
+    ) -> dict[str, Any]:
+        """
+        Add a new region to the counting list based on the provided template with specific attributes.
+
+        Args:
+            name (str): Name assigned to the new region.
+            polygon_points (list[tuple]): List of (x, y) coordinates defining the region's polygon.
+            region_color (tuple[int, int, int]): BGR color for region visualization.
+            text_color (tuple[int, int, int]): BGR color for the text within the region.
+
+        Returns:
+            (dict[str, any]): Returns a dictionary including the region information i.e. name, region_color etc.
+        """
+        region = self.region_template.copy()
+        region.update(
+            {
+                "name": name,
+                "polygon": self.Polygon(polygon_points),
+                "region_color": region_color,
+                "text_color": text_color,
+            }
+        )
+        self.counting_regions.append(region)
+        return region
+
+    def initialize_regions(self):
+        """Initialize regions only once."""
+        if self.region is None:
+            self.initialize_region()
+        if not isinstance(self.region, dict):  # Ensure self.region is initialized and structured as a dictionary
+            self.region = {"Region#01": self.region}
+        for i, (name, pts) in enumerate(self.region.items()):
+            region = self.add_region(name, pts, colors(i, True), (255, 255, 255))
+            region["prepared_polygon"] = self.prep(region["polygon"])
+
+    def process(self, im0: np.ndarray) -> SolutionResults:
+        """
+        Process the input frame to detect and count objects within each defined region.
+
+        Args:
+            im0 (np.ndarray): Input image frame where objects and regions are annotated.
+
+        Returns:
+            (SolutionResults): Contains processed image `plot_im`, 'total_tracks' (int, total number of tracked objects),
+                and 'region_counts' (dict, counts of objects per region).
+        """
+        self.extract_tracks(im0)
+        annotator = SolutionAnnotator(im0, line_width=self.line_width)
+
+        for box, cls, track_id, conf in zip(self.boxes, self.clss, self.track_ids, self.confs):
+            annotator.box_label(box, label=self.adjust_box_label(cls, conf, track_id), color=colors(track_id, True))
+            center = self.Point(((box[0] + box[2]) / 2, (box[1] + box[3]) / 2))
+            for region in self.counting_regions:
+                if region["prepared_polygon"].contains(center):
+                    region["counts"] += 1
+                    self.region_counts[region["name"]] = region["counts"]
+
+        # Display region counts
+        for region in self.counting_regions:
+            poly = region["polygon"]
+            pts = list(map(tuple, np.array(poly.exterior.coords, dtype=np.int32)))
+            (x1, y1), (x2, y2) = [(int(poly.centroid.x), int(poly.centroid.y))] * 2
+            annotator.draw_region(pts, region["region_color"], self.line_width * 2)
+            annotator.adaptive_label(
+                [x1, y1, x2, y2],
+                label=str(region["counts"]),
+                color=region["region_color"],
+                txt_color=region["text_color"],
+                margin=self.line_width * 4,
+                shape="rect",
+            )
+            region["counts"] = 0  # Reset for next frame
+        plot_im = annotator.result()
+        self.display_output(plot_im)
+
+        return SolutionResults(plot_im=plot_im, total_tracks=len(self.track_ids), region_counts=self.region_counts)
--- a/ultralytics/solutions/security_alarm.py
+++ b/ultralytics/solutions/security_alarm.py
@@ -0,0 +1,156 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+from typing import Any
+
+from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
+from ultralytics.utils import LOGGER
+from ultralytics.utils.plotting import colors
+
+
+class SecurityAlarm(BaseSolution):
+    """
+    A class to manage security alarm functionalities for real-time monitoring.
+
+    This class extends the BaseSolution class and provides features to monitor objects in a frame, send email
+    notifications when specific thresholds are exceeded for total detections, and annotate the output frame for
+    visualization.
+
+    Attributes:
+        email_sent (bool): Flag to track if an email has already been sent for the current event.
+        records (int): Threshold for the number of detected objects to trigger an alert.
+        server (smtplib.SMTP): SMTP server connection for sending email alerts.
+        to_email (str): Recipient's email address for alerts.
+        from_email (str): Sender's email address for alerts.
+
+    Methods:
+        authenticate: Set up email server authentication for sending alerts.
+        send_email: Send an email notification with details and an image attachment.
+        process: Monitor the frame, process detections, and trigger alerts if thresholds are crossed.
+
+    Examples:
+        >>> security = SecurityAlarm()
+        >>> security.authenticate("abc@gmail.com", "1111222233334444", "xyz@gmail.com")
+        >>> frame = cv2.imread("frame.jpg")
+        >>> results = security.process(frame)
+    """
+
+    def __init__(self, **kwargs: Any) -> None:
+        """
+        Initialize the SecurityAlarm class with parameters for real-time object monitoring.
+
+        Args:
+            **kwargs (Any): Additional keyword arguments passed to the parent class.
+        """
+        super().__init__(**kwargs)
+        self.email_sent = False
+        self.records = self.CFG["records"]
+        self.server = None
+        self.to_email = ""
+        self.from_email = ""
+
+    def authenticate(self, from_email: str, password: str, to_email: str) -> None:
+        """
+        Authenticate the email server for sending alert notifications.
+
+        Args:
+            from_email (str): Sender's email address.
+            password (str): Password for the sender's email account.
+            to_email (str): Recipient's email address.
+
+        This method initializes a secure connection with the SMTP server and logs in using the provided credentials.
+
+        Examples:
+            >>> alarm = SecurityAlarm()
+            >>> alarm.authenticate("sender@example.com", "password123", "recipient@example.com")
+        """
+        import smtplib
+
+        self.server = smtplib.SMTP("smtp.gmail.com: 587")
+        self.server.starttls()
+        self.server.login(from_email, password)
+        self.to_email = to_email
+        self.from_email = from_email
+
+    def send_email(self, im0, records: int = 5) -> None:
+        """
+        Send an email notification with an image attachment indicating the number of objects detected.
+
+        Args:
+            im0 (np.ndarray): The input image or frame to be attached to the email.
+            records (int, optional): The number of detected objects to be included in the email message.
+
+        This method encodes the input image, composes the email message with details about the detection, and sends it
+        to the specified recipient.
+
+        Examples:
+            >>> alarm = SecurityAlarm()
+            >>> frame = cv2.imread("path/to/image.jpg")
+            >>> alarm.send_email(frame, records=10)
+        """
+        from email.mime.image import MIMEImage
+        from email.mime.multipart import MIMEMultipart
+        from email.mime.text import MIMEText
+
+        import cv2
+
+        img_bytes = cv2.imencode(".jpg", im0)[1].tobytes()  # Encode the image as JPEG
+
+        # Create the email
+        message = MIMEMultipart()
+        message["From"] = self.from_email
+        message["To"] = self.to_email
+        message["Subject"] = "Security Alert"
+
+        # Add the text message body
+        message_body = f"Ultralytics ALERT!!! {records} objects have been detected!!"
+        message.attach(MIMEText(message_body))
+
+        # Attach the image
+        image_attachment = MIMEImage(img_bytes, name="ultralytics.jpg")
+        message.attach(image_attachment)
+
+        # Send the email
+        try:
+            self.server.send_message(message)
+            LOGGER.info("Email sent successfully!")
+        except Exception as e:
+            LOGGER.error(f"Failed to send email: {e}")
+
+    def process(self, im0) -> SolutionResults:
+        """
+        Monitor the frame, process object detections, and trigger alerts if thresholds are exceeded.
+
+        Args:
+            im0 (np.ndarray): The input image or frame to be processed and annotated.
+
+        Returns:
+            (SolutionResults): Contains processed image `plot_im`, 'total_tracks' (total number of tracked objects) and
+                'email_sent' (whether an email alert was triggered).
+
+        This method processes the input frame, extracts detections, annotates the frame with bounding boxes, and sends
+        an email notification if the number of detected objects surpasses the specified threshold and an alert has not
+        already been sent.
+
+        Examples:
+            >>> alarm = SecurityAlarm()
+            >>> frame = cv2.imread("path/to/image.jpg")
+            >>> results = alarm.process(frame)
+        """
+        self.extract_tracks(im0)  # Extract tracks
+        annotator = SolutionAnnotator(im0, line_width=self.line_width)  # Initialize annotator
+
+        # Iterate over bounding boxes and classes index
+        for box, cls in zip(self.boxes, self.clss):
+            # Draw bounding box
+            annotator.box_label(box, label=self.names[cls], color=colors(cls, True))
+
+        total_det = len(self.clss)
+        if total_det >= self.records and not self.email_sent:  # Only send email if not sent before
+            self.send_email(im0, total_det)
+            self.email_sent = True
+
+        plot_im = annotator.result()
+        self.display_output(plot_im)  # Display output with base class function
+
+        # Return a SolutionResults
+        return SolutionResults(plot_im=plot_im, total_tracks=len(self.track_ids), email_sent=self.email_sent)
--- a/ultralytics/solutions/similarity_search.py
+++ b/ultralytics/solutions/similarity_search.py
@@ -0,0 +1,224 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+from PIL import Image
+
+from ultralytics.data.utils import IMG_FORMATS
+from ultralytics.utils import LOGGER, TORCH_VERSION
+from ultralytics.utils.checks import check_requirements
+from ultralytics.utils.torch_utils import TORCH_2_4, select_device
+
+os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"  # Avoid OpenMP conflict on some systems
+
+
+class VisualAISearch:
+    """
+    A semantic image search system that leverages OpenCLIP for generating high-quality image and text embeddings and
+    FAISS for fast similarity-based retrieval.
+
+    This class aligns image and text embeddings in a shared semantic space, enabling users to search large collections
+    of images using natural language queries with high accuracy and speed.
+
+    Attributes:
+        data (str): Directory containing images.
+        device (str): Computation device, e.g., 'cpu' or 'cuda'.
+        faiss_index (str): Path to the FAISS index file.
+        data_path_npy (str): Path to the numpy file storing image paths.
+        data_dir (Path): Path object for the data directory.
+        model: Loaded CLIP model.
+        index: FAISS index for similarity search.
+        image_paths (list[str]): List of image file paths.
+
+    Methods:
+        extract_image_feature: Extract CLIP embedding from an image.
+        extract_text_feature: Extract CLIP embedding from text.
+        load_or_build_index: Load existing FAISS index or build new one.
+        search: Perform semantic search for similar images.
+
+    Examples:
+        Initialize and search for images
+        >>> searcher = VisualAISearch(data="path/to/images", device="cuda")
+        >>> results = searcher.search("a cat sitting on a chair", k=10)
+    """
+
+    def __init__(self, **kwargs: Any) -> None:
+        """Initialize the VisualAISearch class with FAISS index and CLIP model."""
+        assert TORCH_2_4, f"VisualAISearch requires torch>=2.4 (found torch=={TORCH_VERSION})"
+        from ultralytics.nn.text_model import build_text_model
+
+        check_requirements("faiss-cpu")
+
+        self.faiss = __import__("faiss")
+        self.faiss_index = "faiss.index"
+        self.data_path_npy = "paths.npy"
+        self.data_dir = Path(kwargs.get("data", "images"))
+        self.device = select_device(kwargs.get("device", "cpu"))
+
+        if not self.data_dir.exists():
+            from ultralytics.utils import ASSETS_URL
+
+            LOGGER.warning(f"{self.data_dir} not found. Downloading images.zip from {ASSETS_URL}/images.zip")
+            from ultralytics.utils.downloads import safe_download
+
+            safe_download(url=f"{ASSETS_URL}/images.zip", unzip=True, retry=3)
+            self.data_dir = Path("images")
+
+        self.model = build_text_model("clip:ViT-B/32", device=self.device)
+
+        self.index = None
+        self.image_paths = []
+
+        self.load_or_build_index()
+
+    def extract_image_feature(self, path: Path) -> np.ndarray:
+        """Extract CLIP image embedding from the given image path."""
+        return self.model.encode_image(Image.open(path)).cpu().numpy()
+
+    def extract_text_feature(self, text: str) -> np.ndarray:
+        """Extract CLIP text embedding from the given text query."""
+        return self.model.encode_text(self.model.tokenize([text])).cpu().numpy()
+
+    def load_or_build_index(self) -> None:
+        """
+        Load existing FAISS index or build a new one from image features.
+
+        Checks if FAISS index and image paths exist on disk. If found, loads them directly. Otherwise, builds a new
+        index by extracting features from all images in the data directory, normalizes the features, and saves both the
+        index and image paths for future use.
+        """
+        # Check if the FAISS index and corresponding image paths already exist
+        if Path(self.faiss_index).exists() and Path(self.data_path_npy).exists():
+            LOGGER.info("Loading existing FAISS index...")
+            self.index = self.faiss.read_index(self.faiss_index)  # Load the FAISS index from disk
+            self.image_paths = np.load(self.data_path_npy)  # Load the saved image path list
+            return  # Exit the function as the index is successfully loaded
+
+        # If the index doesn't exist, start building it from scratch
+        LOGGER.info("Building FAISS index from images...")
+        vectors = []  # List to store feature vectors of images
+
+        # Iterate over all image files in the data directory
+        for file in self.data_dir.iterdir():
+            # Skip files that are not valid image formats
+            if file.suffix.lower().lstrip(".") not in IMG_FORMATS:
+                continue
+            try:
+                # Extract feature vector for the image and add to the list
+                vectors.append(self.extract_image_feature(file))
+                self.image_paths.append(file.name)  # Store the corresponding image name
+            except Exception as e:
+                LOGGER.warning(f"Skipping {file.name}: {e}")
+
+        # If no vectors were successfully created, raise an error
+        if not vectors:
+            raise RuntimeError("No image embeddings could be generated.")
+
+        vectors = np.vstack(vectors).astype("float32")  # Stack all vectors into a NumPy array and convert to float32
+        self.faiss.normalize_L2(vectors)  # Normalize vectors to unit length for cosine similarity
+
+        self.index = self.faiss.IndexFlatIP(vectors.shape[1])  # Create a new FAISS index using inner product
+        self.index.add(vectors)  # Add the normalized vectors to the FAISS index
+        self.faiss.write_index(self.index, self.faiss_index)  # Save the newly built FAISS index to disk
+        np.save(self.data_path_npy, np.array(self.image_paths))  # Save the list of image paths to disk
+
+        LOGGER.info(f"Indexed {len(self.image_paths)} images.")
+
+    def search(self, query: str, k: int = 30, similarity_thresh: float = 0.1) -> list[str]:
+        """
+        Return top-k semantically similar images to the given query.
+
+        Args:
+            query (str): Natural language text query to search for.
+            k (int, optional): Maximum number of results to return.
+            similarity_thresh (float, optional): Minimum similarity threshold for filtering results.
+
+        Returns:
+            (list[str]): List of image filenames ranked by similarity score.
+
+        Examples:
+            Search for images matching a query
+            >>> searcher = VisualAISearch(data="images")
+            >>> results = searcher.search("red car", k=5, similarity_thresh=0.2)
+        """
+        text_feat = self.extract_text_feature(query).astype("float32")
+        self.faiss.normalize_L2(text_feat)
+
+        D, index = self.index.search(text_feat, k)
+        results = [
+            (self.image_paths[i], float(D[0][idx])) for idx, i in enumerate(index[0]) if D[0][idx] >= similarity_thresh
+        ]
+        results.sort(key=lambda x: x[1], reverse=True)
+
+        LOGGER.info("\nRanked Results:")
+        for name, score in results:
+            LOGGER.info(f"  - {name} | Similarity: {score:.4f}")
+
+        return [r[0] for r in results]
+
+    def __call__(self, query: str) -> list[str]:
+        """Direct call interface for the search function."""
+        return self.search(query)
+
+
+class SearchApp:
+    """
+    A Flask-based web interface for semantic image search with natural language queries.
+
+    This class provides a clean, responsive frontend that enables users to input natural language queries and
+    instantly view the most relevant images retrieved from the indexed database.
+
+    Attributes:
+        render_template: Flask template rendering function.
+        request: Flask request object.
+        searcher (VisualAISearch): Instance of the VisualAISearch class.
+        app (Flask): Flask application instance.
+
+    Methods:
+        index: Process user queries and display search results.
+        run: Start the Flask web application.
+
+    Examples:
+        Start a search application
+        >>> app = SearchApp(data="path/to/images", device="cuda")
+        >>> app.run(debug=True)
+    """
+
+    def __init__(self, data: str = "images", device: str = None) -> None:
+        """
+        Initialize the SearchApp with VisualAISearch backend.
+
+        Args:
+            data (str, optional): Path to directory containing images to index and search.
+            device (str, optional): Device to run inference on (e.g. 'cpu', 'cuda').
+        """
+        check_requirements("flask>=3.0.1")
+        from flask import Flask, render_template, request
+
+        self.render_template = render_template
+        self.request = request
+        self.searcher = VisualAISearch(data=data, device=device)
+        self.app = Flask(
+            __name__,
+            template_folder="templates",
+            static_folder=Path(data).resolve(),  # Absolute path to serve images
+            static_url_path="/images",  # URL prefix for images
+        )
+        self.app.add_url_rule("/", view_func=self.index, methods=["GET", "POST"])
+
+    def index(self) -> str:
+        """Process user query and display search results in the web interface."""
+        results = []
+        if self.request.method == "POST":
+            query = self.request.form.get("query", "").strip()
+            results = self.searcher(query)
+        return self.render_template("similarity-search.html", results=results)
+
+    def run(self, debug: bool = False) -> None:
+        """Start the Flask web application server."""
+        self.app.run(debug=debug)
--- a/ultralytics/solutions/solutions.py
+++ b/ultralytics/solutions/solutions.py
@@ -0,0 +1,827 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+from __future__ import annotations
+
+import math
+from collections import Counter, defaultdict
+from functools import lru_cache
+from typing import Any
+
+import cv2
+import numpy as np
+
+from ultralytics import YOLO
+from ultralytics.solutions.config import SolutionConfig
+from ultralytics.utils import ASSETS_URL, LOGGER, ops
+from ultralytics.utils.checks import check_imshow, check_requirements
+from ultralytics.utils.plotting import Annotator
+
+
+class BaseSolution:
+    """
+    A base class for managing Ultralytics Solutions.
+
+    This class provides core functionality for various Ultralytics Solutions, including model loading, object tracking,
+    and region initialization. It serves as the foundation for implementing specific computer vision solutions such as
+    object counting, pose estimation, and analytics.
+
+    Attributes:
+        LineString: Class for creating line string geometries from shapely.
+        Polygon: Class for creating polygon geometries from shapely.
+        Point: Class for creating point geometries from shapely.
+        prep: Prepared geometry function from shapely for optimized spatial operations.
+        CFG (dict[str, Any]): Configuration dictionary loaded from YAML file and updated with kwargs.
+        LOGGER: Logger instance for solution-specific logging.
+        annotator: Annotator instance for drawing on images.
+        tracks: YOLO tracking results from the latest inference.
+        track_data: Extracted tracking data (boxes or OBB) from tracks.
+        boxes (list): Bounding box coordinates from tracking results.
+        clss (list[int]): Class indices from tracking results.
+        track_ids (list[int]): Track IDs from tracking results.
+        confs (list[float]): Confidence scores from tracking results.
+        track_line: Current track line for storing tracking history.
+        masks: Segmentation masks from tracking results.
+        r_s: Region or line geometry object for spatial operations.
+        frame_no (int): Current frame number for logging purposes.
+        region (list[tuple[int, int]]): List of coordinate tuples defining region of interest.
+        line_width (int): Width of lines used in visualizations.
+        model (YOLO): Loaded YOLO model instance.
+        names (dict[int, str]): Dictionary mapping class indices to class names.
+        classes (list[int]): List of class indices to track.
+        show_conf (bool): Flag to show confidence scores in annotations.
+        show_labels (bool): Flag to show class labels in annotations.
+        device (str): Device for model inference.
+        track_add_args (dict[str, Any]): Additional arguments for tracking configuration.
+        env_check (bool): Flag indicating whether environment supports image display.
+        track_history (defaultdict): Dictionary storing tracking history for each object.
+        profilers (tuple): Profiler instances for performance monitoring.
+
+    Methods:
+        adjust_box_label: Generate formatted label for bounding box.
+        extract_tracks: Apply object tracking and extract tracks from input image.
+        store_tracking_history: Store object tracking history for given track ID and bounding box.
+        initialize_region: Initialize counting region and line segment based on configuration.
+        display_output: Display processing results including frames or saved results.
+        process: Process method to be implemented by each Solution subclass.
+
+    Examples:
+        >>> solution = BaseSolution(model="yolo11n.pt", region=[(0, 0), (100, 0), (100, 100), (0, 100)])
+        >>> solution.initialize_region()
+        >>> image = cv2.imread("image.jpg")
+        >>> solution.extract_tracks(image)
+        >>> solution.display_output(image)
+    """
+
+    def __init__(self, is_cli: bool = False, **kwargs: Any) -> None:
+        """
+        Initialize the BaseSolution class with configuration settings and YOLO model.
+
+        Args:
+            is_cli (bool): Enable CLI mode if set to True.
+            **kwargs (Any): Additional configuration parameters that override defaults.
+        """
+        self.CFG = vars(SolutionConfig().update(**kwargs))
+        self.LOGGER = LOGGER  # Store logger object to be used in multiple solution classes
+
+        check_requirements("shapely>=2.0.0")
+        from shapely.geometry import LineString, Point, Polygon
+        from shapely.prepared import prep
+
+        self.LineString = LineString
+        self.Polygon = Polygon
+        self.Point = Point
+        self.prep = prep
+        self.annotator = None  # Initialize annotator
+        self.tracks = None
+        self.track_data = None
+        self.boxes = []
+        self.clss = []
+        self.track_ids = []
+        self.track_line = None
+        self.masks = None
+        self.r_s = None
+        self.frame_no = -1  # Only for logging
+
+        self.LOGGER.info(f"Ultralytics Solutions: ✅ {self.CFG}")
+        self.region = self.CFG["region"]  # Store region data for other classes usage
+        self.line_width = self.CFG["line_width"]
+
+        # Load Model and store additional information (classes, show_conf, show_label)
+        if self.CFG["model"] is None:
+            self.CFG["model"] = "yolo11n.pt"
+        self.model = YOLO(self.CFG["model"])
+        self.names = self.model.names
+        self.classes = self.CFG["classes"]
+        self.show_conf = self.CFG["show_conf"]
+        self.show_labels = self.CFG["show_labels"]
+        self.device = self.CFG["device"]
+
+        self.track_add_args = {  # Tracker additional arguments for advance configuration
+            k: self.CFG[k] for k in {"iou", "conf", "device", "max_det", "half", "tracker"}
+        }  # verbose must be passed to track method; setting it False in YOLO still logs the track information.
+
+        if is_cli and self.CFG["source"] is None:
+            d_s = "solutions_ci_demo.mp4" if "-pose" not in self.CFG["model"] else "solution_ci_pose_demo.mp4"
+            self.LOGGER.warning(f"source not provided. using default source {ASSETS_URL}/{d_s}")
+            from ultralytics.utils.downloads import safe_download
+
+            safe_download(f"{ASSETS_URL}/{d_s}")  # download source from ultralytics assets
+            self.CFG["source"] = d_s  # set default source
+
+        # Initialize environment and region setup
+        self.env_check = check_imshow(warn=True)
+        self.track_history = defaultdict(list)
+
+        self.profilers = (
+            ops.Profile(device=self.device),  # track
+            ops.Profile(device=self.device),  # solution
+        )
+
+    def adjust_box_label(self, cls: int, conf: float, track_id: int | None = None) -> str | None:
+        """
+        Generate a formatted label for a bounding box.
+
+        This method constructs a label string for a bounding box using the class index and confidence score.
+        Optionally includes the track ID if provided. The label format adapts based on the display settings
+        defined in `self.show_conf` and `self.show_labels`.
+
+        Args:
+            cls (int): The class index of the detected object.
+            conf (float): The confidence score of the detection.
+            track_id (int, optional): The unique identifier for the tracked object.
+
+        Returns:
+            (str | None): The formatted label string if `self.show_labels` is True; otherwise, None.
+        """
+        name = ("" if track_id is None else f"{track_id} ") + self.names[cls]
+        return (f"{name} {conf:.2f}" if self.show_conf else name) if self.show_labels else None
+
+    def extract_tracks(self, im0: np.ndarray) -> None:
+        """
+        Apply object tracking and extract tracks from an input image or frame.
+
+        Args:
+            im0 (np.ndarray): The input image or frame.
+
+        Examples:
+            >>> solution = BaseSolution()
+            >>> frame = cv2.imread("path/to/image.jpg")
+            >>> solution.extract_tracks(frame)
+        """
+        with self.profilers[0]:
+            self.tracks = self.model.track(
+                source=im0, persist=True, classes=self.classes, verbose=False, **self.track_add_args
+            )[0]
+        is_obb = self.tracks.obb is not None
+        self.track_data = self.tracks.obb if is_obb else self.tracks.boxes  # Extract tracks for OBB or object detection
+
+        if self.track_data and self.track_data.is_track:
+            self.boxes = (self.track_data.xyxyxyxy if is_obb else self.track_data.xyxy).cpu()
+            self.clss = self.track_data.cls.cpu().tolist()
+            self.track_ids = self.track_data.id.int().cpu().tolist()
+            self.confs = self.track_data.conf.cpu().tolist()
+        else:
+            self.LOGGER.warning("no tracks found!")
+            self.boxes, self.clss, self.track_ids, self.confs = [], [], [], []
+
+    def store_tracking_history(self, track_id: int, box) -> None:
+        """
+        Store the tracking history of an object.
+
+        This method updates the tracking history for a given object by appending the center point of its
+        bounding box to the track line. It maintains a maximum of 30 points in the tracking history.
+
+        Args:
+            track_id (int): The unique identifier for the tracked object.
+            box (list[float]): The bounding box coordinates of the object in the format [x1, y1, x2, y2].
+
+        Examples:
+            >>> solution = BaseSolution()
+            >>> solution.store_tracking_history(1, [100, 200, 300, 400])
+        """
+        # Store tracking history
+        self.track_line = self.track_history[track_id]
+        self.track_line.append(tuple(box.mean(dim=0)) if box.numel() > 4 else (box[:4:2].mean(), box[1:4:2].mean()))
+        if len(self.track_line) > 30:
+            self.track_line.pop(0)
+
+    def initialize_region(self) -> None:
+        """Initialize the counting region and line segment based on configuration settings."""
+        if self.region is None:
+            self.region = [(10, 200), (540, 200), (540, 180), (10, 180)]
+        self.r_s = (
+            self.Polygon(self.region) if len(self.region) >= 3 else self.LineString(self.region)
+        )  # region or line
+
+    def display_output(self, plot_im: np.ndarray) -> None:
+        """
+        Display the results of the processing, which could involve showing frames, printing counts, or saving results.
+
+        This method is responsible for visualizing the output of the object detection and tracking process. It displays
+        the processed frame with annotations, and allows for user interaction to close the display.
+
+        Args:
+            plot_im (np.ndarray): The image or frame that has been processed and annotated.
+
+        Examples:
+            >>> solution = BaseSolution()
+            >>> frame = cv2.imread("path/to/image.jpg")
+            >>> solution.display_output(frame)
+
+        Notes:
+            - This method will only display output if the 'show' configuration is set to True and the environment
+              supports image display.
+            - The display can be closed by pressing the 'q' key.
+        """
+        if self.CFG.get("show") and self.env_check:
+            cv2.imshow("Ultralytics Solutions", plot_im)
+            if cv2.waitKey(1) & 0xFF == ord("q"):
+                cv2.destroyAllWindows()  # Closes current frame window
+                return
+
+    def process(self, *args: Any, **kwargs: Any):
+        """Process method should be implemented by each Solution subclass."""
+
+    def __call__(self, *args: Any, **kwargs: Any):
+        """Allow instances to be called like a function with flexible arguments."""
+        with self.profilers[1]:
+            result = self.process(*args, **kwargs)  # Call the subclass-specific process method
+        track_or_predict = "predict" if type(self).__name__ == "ObjectCropper" else "track"
+        track_or_predict_speed = self.profilers[0].dt * 1e3
+        solution_speed = (self.profilers[1].dt - self.profilers[0].dt) * 1e3  # solution time = process - track
+        result.speed = {track_or_predict: track_or_predict_speed, "solution": solution_speed}
+        if self.CFG["verbose"]:
+            self.frame_no += 1
+            counts = Counter(self.clss)  # Only for logging.
+            LOGGER.info(
+                f"{self.frame_no}: {result.plot_im.shape[0]}x{result.plot_im.shape[1]} {solution_speed:.1f}ms,"
+                f" {', '.join([f'{v} {self.names[k]}' for k, v in counts.items()])}\n"
+                f"Speed: {track_or_predict_speed:.1f}ms {track_or_predict}, "
+                f"{solution_speed:.1f}ms solution per image at shape "
+                f"(1, {getattr(self.model, 'ch', 3)}, {result.plot_im.shape[0]}, {result.plot_im.shape[1]})\n"
+            )
+        return result
+
+
+class SolutionAnnotator(Annotator):
+    """
+    A specialized annotator class for visualizing and analyzing computer vision tasks.
+
+    This class extends the base Annotator class, providing additional methods for drawing regions, centroids, tracking
+    trails, and visual annotations for Ultralytics Solutions. It offers comprehensive visualization capabilities for
+    various computer vision applications including object detection, tracking, pose estimation, and analytics.
+
+    Attributes:
+        im (np.ndarray): The image being annotated.
+        line_width (int): Thickness of lines used in annotations.
+        font_size (int): Size of the font used for text annotations.
+        font (str): Path to the font file used for text rendering.
+        pil (bool): Whether to use PIL for text rendering.
+        example (str): An example attribute for demonstration purposes.
+
+    Methods:
+        draw_region: Draw a region using specified points, colors, and thickness.
+        queue_counts_display: Display queue counts in the specified region.
+        display_analytics: Display overall statistics for parking lot management.
+        estimate_pose_angle: Calculate the angle between three points in an object pose.
+        draw_specific_kpts: Draw specific keypoints on the image.
+        plot_workout_information: Draw a labeled text box on the image.
+        plot_angle_and_count_and_stage: Visualize angle, step count, and stage for workout monitoring.
+        plot_distance_and_line: Display the distance between centroids and connect them with a line.
+        display_objects_labels: Annotate bounding boxes with object class labels.
+        sweep_annotator: Visualize a vertical sweep line and optional label.
+        visioneye: Map and connect object centroids to a visual "eye" point.
+        adaptive_label: Draw a circular or rectangle background shape label in center of a bounding box.
+
+    Examples:
+        >>> annotator = SolutionAnnotator(image)
+        >>> annotator.draw_region([(0, 0), (100, 100)], color=(0, 255, 0), thickness=5)
+        >>> annotator.display_analytics(
+        ...     image, text={"Available Spots": 5}, txt_color=(0, 0, 0), bg_color=(255, 255, 255), margin=10
+        ... )
+    """
+
+    def __init__(
+        self,
+        im: np.ndarray,
+        line_width: int | None = None,
+        font_size: int | None = None,
+        font: str = "Arial.ttf",
+        pil: bool = False,
+        example: str = "abc",
+    ):
+        """
+        Initialize the SolutionAnnotator class with an image for annotation.
+
+        Args:
+            im (np.ndarray): The image to be annotated.
+            line_width (int, optional): Line thickness for drawing on the image.
+            font_size (int, optional): Font size for text annotations.
+            font (str): Path to the font file.
+            pil (bool): Indicates whether to use PIL for rendering text.
+            example (str): An example parameter for demonstration purposes.
+        """
+        super().__init__(im, line_width, font_size, font, pil, example)
+
+    def draw_region(
+        self,
+        reg_pts: list[tuple[int, int]] | None = None,
+        color: tuple[int, int, int] = (0, 255, 0),
+        thickness: int = 5,
+    ):
+        """
+        Draw a region or line on the image.
+
+        Args:
+            reg_pts (list[tuple[int, int]], optional): Region points (for line 2 points, for region 4+ points).
+            color (tuple[int, int, int]): RGB color value for the region.
+            thickness (int): Line thickness for drawing the region.
+        """
+        cv2.polylines(self.im, [np.array(reg_pts, dtype=np.int32)], isClosed=True, color=color, thickness=thickness)
+
+        # Draw small circles at the corner points
+        for point in reg_pts:
+            cv2.circle(self.im, (point[0], point[1]), thickness * 2, color, -1)  # -1 fills the circle
+
+    def queue_counts_display(
+        self,
+        label: str,
+        points: list[tuple[int, int]] | None = None,
+        region_color: tuple[int, int, int] = (255, 255, 255),
+        txt_color: tuple[int, int, int] = (0, 0, 0),
+    ):
+        """
+        Display queue counts on an image centered at the points with customizable font size and colors.
+
+        Args:
+            label (str): Queue counts label.
+            points (list[tuple[int, int]], optional): Region points for center point calculation to display text.
+            region_color (tuple[int, int, int]): RGB queue region color.
+            txt_color (tuple[int, int, int]): RGB text display color.
+        """
+        x_values = [point[0] for point in points]
+        y_values = [point[1] for point in points]
+        center_x = sum(x_values) // len(points)
+        center_y = sum(y_values) // len(points)
+
+        text_size = cv2.getTextSize(label, 0, fontScale=self.sf, thickness=self.tf)[0]
+        text_width = text_size[0]
+        text_height = text_size[1]
+
+        rect_width = text_width + 20
+        rect_height = text_height + 20
+        rect_top_left = (center_x - rect_width // 2, center_y - rect_height // 2)
+        rect_bottom_right = (center_x + rect_width // 2, center_y + rect_height // 2)
+        cv2.rectangle(self.im, rect_top_left, rect_bottom_right, region_color, -1)
+
+        text_x = center_x - text_width // 2
+        text_y = center_y + text_height // 2
+
+        # Draw text
+        cv2.putText(
+            self.im,
+            label,
+            (text_x, text_y),
+            0,
+            fontScale=self.sf,
+            color=txt_color,
+            thickness=self.tf,
+            lineType=cv2.LINE_AA,
+        )
+
+    def display_analytics(
+        self,
+        im0: np.ndarray,
+        text: dict[str, Any],
+        txt_color: tuple[int, int, int],
+        bg_color: tuple[int, int, int],
+        margin: int,
+    ):
+        """
+        Display the overall statistics for parking lots, object counter etc.
+
+        Args:
+            im0 (np.ndarray): Inference image.
+            text (dict[str, Any]): Labels dictionary.
+            txt_color (tuple[int, int, int]): Display color for text foreground.
+            bg_color (tuple[int, int, int]): Display color for text background.
+            margin (int): Gap between text and rectangle for better display.
+        """
+        horizontal_gap = int(im0.shape[1] * 0.02)
+        vertical_gap = int(im0.shape[0] * 0.01)
+        text_y_offset = 0
+        for label, value in text.items():
+            txt = f"{label}: {value}"
+            text_size = cv2.getTextSize(txt, 0, self.sf, self.tf)[0]
+            if text_size[0] < 5 or text_size[1] < 5:
+                text_size = (5, 5)
+            text_x = im0.shape[1] - text_size[0] - margin * 2 - horizontal_gap
+            text_y = text_y_offset + text_size[1] + margin * 2 + vertical_gap
+            rect_x1 = text_x - margin * 2
+            rect_y1 = text_y - text_size[1] - margin * 2
+            rect_x2 = text_x + text_size[0] + margin * 2
+            rect_y2 = text_y + margin * 2
+            cv2.rectangle(im0, (rect_x1, rect_y1), (rect_x2, rect_y2), bg_color, -1)
+            cv2.putText(im0, txt, (text_x, text_y), 0, self.sf, txt_color, self.tf, lineType=cv2.LINE_AA)
+            text_y_offset = rect_y2
+
+    @staticmethod
+    @lru_cache(maxsize=256)
+    def estimate_pose_angle(a: list[float], b: list[float], c: list[float]) -> float:
+        """
+        Calculate the angle between three points for workout monitoring.
+
+        Args:
+            a (list[float]): The coordinates of the first point.
+            b (list[float]): The coordinates of the second point (vertex).
+            c (list[float]): The coordinates of the third point.
+
+        Returns:
+            (float): The angle in degrees between the three points.
+        """
+        radians = math.atan2(c[1] - b[1], c[0] - b[0]) - math.atan2(a[1] - b[1], a[0] - b[0])
+        angle = abs(radians * 180.0 / math.pi)
+        return angle if angle <= 180.0 else (360 - angle)
+
+    def draw_specific_kpts(
+        self,
+        keypoints: list[list[float]],
+        indices: list[int] | None = None,
+        radius: int = 2,
+        conf_thresh: float = 0.25,
+    ) -> np.ndarray:
+        """
+        Draw specific keypoints for gym steps counting.
+
+        Args:
+            keypoints (list[list[float]]): Keypoints data to be plotted, each in format [x, y, confidence].
+            indices (list[int], optional): Keypoint indices to be plotted.
+            radius (int): Keypoint radius.
+            conf_thresh (float): Confidence threshold for keypoints.
+
+        Returns:
+            (np.ndarray): Image with drawn keypoints.
+
+        Notes:
+            Keypoint format: [x, y] or [x, y, confidence].
+            Modifies self.im in-place.
+        """
+        indices = indices or [2, 5, 7]
+        points = [(int(k[0]), int(k[1])) for i, k in enumerate(keypoints) if i in indices and k[2] >= conf_thresh]
+
+        # Draw lines between consecutive points
+        for start, end in zip(points[:-1], points[1:]):
+            cv2.line(self.im, start, end, (0, 255, 0), 2, lineType=cv2.LINE_AA)
+
+        # Draw circles for keypoints
+        for pt in points:
+            cv2.circle(self.im, pt, radius, (0, 0, 255), -1, lineType=cv2.LINE_AA)
+
+        return self.im
+
+    def plot_workout_information(
+        self,
+        display_text: str,
+        position: tuple[int, int],
+        color: tuple[int, int, int] = (104, 31, 17),
+        txt_color: tuple[int, int, int] = (255, 255, 255),
+    ) -> int:
+        """
+        Draw workout text with a background on the image.
+
+        Args:
+            display_text (str): The text to be displayed.
+            position (tuple[int, int]): Coordinates (x, y) on the image where the text will be placed.
+            color (tuple[int, int, int]): Text background color.
+            txt_color (tuple[int, int, int]): Text foreground color.
+
+        Returns:
+            (int): The height of the text.
+        """
+        (text_width, text_height), _ = cv2.getTextSize(display_text, 0, fontScale=self.sf, thickness=self.tf)
+
+        # Draw background rectangle
+        cv2.rectangle(
+            self.im,
+            (position[0], position[1] - text_height - 5),
+            (position[0] + text_width + 10, position[1] - text_height - 5 + text_height + 10 + self.tf),
+            color,
+            -1,
+        )
+        # Draw text
+        cv2.putText(self.im, display_text, position, 0, self.sf, txt_color, self.tf)
+
+        return text_height
+
+    def plot_angle_and_count_and_stage(
+        self,
+        angle_text: str,
+        count_text: str,
+        stage_text: str,
+        center_kpt: list[int],
+        color: tuple[int, int, int] = (104, 31, 17),
+        txt_color: tuple[int, int, int] = (255, 255, 255),
+    ):
+        """
+        Plot the pose angle, count value, and step stage for workout monitoring.
+
+        Args:
+            angle_text (str): Angle value for workout monitoring.
+            count_text (str): Counts value for workout monitoring.
+            stage_text (str): Stage decision for workout monitoring.
+            center_kpt (list[int]): Centroid pose index for workout monitoring.
+            color (tuple[int, int, int]): Text background color.
+            txt_color (tuple[int, int, int]): Text foreground color.
+        """
+        # Format text
+        angle_text, count_text, stage_text = f" {angle_text:.2f}", f"Steps : {count_text}", f" {stage_text}"
+
+        # Draw angle, count and stage text
+        angle_height = self.plot_workout_information(
+            angle_text, (int(center_kpt[0]), int(center_kpt[1])), color, txt_color
+        )
+        count_height = self.plot_workout_information(
+            count_text, (int(center_kpt[0]), int(center_kpt[1]) + angle_height + 20), color, txt_color
+        )
+        self.plot_workout_information(
+            stage_text, (int(center_kpt[0]), int(center_kpt[1]) + angle_height + count_height + 40), color, txt_color
+        )
+
+    def plot_distance_and_line(
+        self,
+        pixels_distance: float,
+        centroids: list[tuple[int, int]],
+        line_color: tuple[int, int, int] = (104, 31, 17),
+        centroid_color: tuple[int, int, int] = (255, 0, 255),
+    ):
+        """
+        Plot the distance and line between two centroids on the frame.
+
+        Args:
+            pixels_distance (float): Pixels distance between two bbox centroids.
+            centroids (list[tuple[int, int]]): Bounding box centroids data.
+            line_color (tuple[int, int, int]): Distance line color.
+            centroid_color (tuple[int, int, int]): Bounding box centroid color.
+        """
+        # Get the text size
+        text = f"Pixels Distance: {pixels_distance:.2f}"
+        (text_width_m, text_height_m), _ = cv2.getTextSize(text, 0, self.sf, self.tf)
+
+        # Define corners with 10-pixel margin and draw rectangle
+        cv2.rectangle(self.im, (15, 25), (15 + text_width_m + 20, 25 + text_height_m + 20), line_color, -1)
+
+        # Calculate the position for the text with a 10-pixel margin and draw text
+        text_position = (25, 25 + text_height_m + 10)
+        cv2.putText(
+            self.im,
+            text,
+            text_position,
+            0,
+            self.sf,
+            (255, 255, 255),
+            self.tf,
+            cv2.LINE_AA,
+        )
+
+        cv2.line(self.im, centroids[0], centroids[1], line_color, 3)
+        cv2.circle(self.im, centroids[0], 6, centroid_color, -1)
+        cv2.circle(self.im, centroids[1], 6, centroid_color, -1)
+
+    def display_objects_labels(
+        self,
+        im0: np.ndarray,
+        text: str,
+        txt_color: tuple[int, int, int],
+        bg_color: tuple[int, int, int],
+        x_center: float,
+        y_center: float,
+        margin: int,
+    ):
+        """
+        Display the bounding boxes labels in parking management app.
+
+        Args:
+            im0 (np.ndarray): Inference image.
+            text (str): Object/class name.
+            txt_color (tuple[int, int, int]): Display color for text foreground.
+            bg_color (tuple[int, int, int]): Display color for text background.
+            x_center (float): The x position center point for bounding box.
+            y_center (float): The y position center point for bounding box.
+            margin (int): The gap between text and rectangle for better display.
+        """
+        text_size = cv2.getTextSize(text, 0, fontScale=self.sf, thickness=self.tf)[0]
+        text_x = x_center - text_size[0] // 2
+        text_y = y_center + text_size[1] // 2
+
+        rect_x1 = text_x - margin
+        rect_y1 = text_y - text_size[1] - margin
+        rect_x2 = text_x + text_size[0] + margin
+        rect_y2 = text_y + margin
+        cv2.rectangle(
+            im0,
+            (int(rect_x1), int(rect_y1)),
+            (int(rect_x2), int(rect_y2)),
+            tuple(map(int, bg_color)),  # Ensure color values are int
+            -1,
+        )
+
+        cv2.putText(
+            im0,
+            text,
+            (int(text_x), int(text_y)),
+            0,
+            self.sf,
+            tuple(map(int, txt_color)),  # Ensure color values are int
+            self.tf,
+            lineType=cv2.LINE_AA,
+        )
+
+    def sweep_annotator(
+        self,
+        line_x: int = 0,
+        line_y: int = 0,
+        label: str | None = None,
+        color: tuple[int, int, int] = (221, 0, 186),
+        txt_color: tuple[int, int, int] = (255, 255, 255),
+    ):
+        """
+        Draw a sweep annotation line and an optional label.
+
+        Args:
+            line_x (int): The x-coordinate of the sweep line.
+            line_y (int): The y-coordinate limit of the sweep line.
+            label (str, optional): Text label to be drawn in center of sweep line. If None, no label is drawn.
+            color (tuple[int, int, int]): RGB color for the line and label background.
+            txt_color (tuple[int, int, int]): RGB color for the label text.
+        """
+        # Draw the sweep line
+        cv2.line(self.im, (line_x, 0), (line_x, line_y), color, self.tf * 2)
+
+        # Draw label, if provided
+        if label:
+            (text_width, text_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, self.sf, self.tf)
+            cv2.rectangle(
+                self.im,
+                (line_x - text_width // 2 - 10, line_y // 2 - text_height // 2 - 10),
+                (line_x + text_width // 2 + 10, line_y // 2 + text_height // 2 + 10),
+                color,
+                -1,
+            )
+            cv2.putText(
+                self.im,
+                label,
+                (line_x - text_width // 2, line_y // 2 + text_height // 2),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                self.sf,
+                txt_color,
+                self.tf,
+            )
+
+    def visioneye(
+        self,
+        box: list[float],
+        center_point: tuple[int, int],
+        color: tuple[int, int, int] = (235, 219, 11),
+        pin_color: tuple[int, int, int] = (255, 0, 255),
+    ):
+        """
+        Perform pinpoint human-vision eye mapping and plotting.
+
+        Args:
+            box (list[float]): Bounding box coordinates in format [x1, y1, x2, y2].
+            center_point (tuple[int, int]): Center point for vision eye view.
+            color (tuple[int, int, int]): Object centroid and line color.
+            pin_color (tuple[int, int, int]): Visioneye point color.
+        """
+        center_bbox = int((box[0] + box[2]) / 2), int((box[1] + box[3]) / 2)
+        cv2.circle(self.im, center_point, self.tf * 2, pin_color, -1)
+        cv2.circle(self.im, center_bbox, self.tf * 2, color, -1)
+        cv2.line(self.im, center_point, center_bbox, color, self.tf)
+
+    def adaptive_label(
+        self,
+        box: tuple[float, float, float, float],
+        label: str = "",
+        color: tuple[int, int, int] = (128, 128, 128),
+        txt_color: tuple[int, int, int] = (255, 255, 255),
+        shape: str = "rect",
+        margin: int = 5,
+    ):
+        """
+        Draw a label with a background rectangle or circle centered within a given bounding box.
+
+        Args:
+            box (tuple[float, float, float, float]): The bounding box coordinates (x1, y1, x2, y2).
+            label (str): The text label to be displayed.
+            color (tuple[int, int, int]): The background color of the rectangle (B, G, R).
+            txt_color (tuple[int, int, int]): The color of the text (R, G, B).
+            shape (str): The shape of the label i.e "circle" or "rect"
+            margin (int): The margin between the text and the rectangle border.
+        """
+        if shape == "circle" and len(label) > 3:
+            LOGGER.warning(f"Length of label is {len(label)}, only first 3 letters will be used for circle annotation.")
+            label = label[:3]
+
+        x_center, y_center = int((box[0] + box[2]) / 2), int((box[1] + box[3]) / 2)  # Calculate center of the bbox
+        text_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, self.sf - 0.15, self.tf)[0]  # Get size of the text
+        text_x, text_y = x_center - text_size[0] // 2, y_center + text_size[1] // 2  # Calculate top-left corner of text
+
+        if shape == "circle":
+            cv2.circle(
+                self.im,
+                (x_center, y_center),
+                int(((text_size[0] ** 2 + text_size[1] ** 2) ** 0.5) / 2) + margin,  # Calculate the radius
+                color,
+                -1,
+            )
+        else:
+            cv2.rectangle(
+                self.im,
+                (text_x - margin, text_y - text_size[1] - margin),  # Calculate coordinates of the rectangle
+                (text_x + text_size[0] + margin, text_y + margin),  # Calculate coordinates of the rectangle
+                color,
+                -1,
+            )
+
+        # Draw the text on top of the rectangle
+        cv2.putText(
+            self.im,
+            label,
+            (text_x, text_y),  # Calculate top-left corner of the text
+            cv2.FONT_HERSHEY_SIMPLEX,
+            self.sf - 0.15,
+            self.get_txt_color(color, txt_color),
+            self.tf,
+            lineType=cv2.LINE_AA,
+        )
+
+
+class SolutionResults:
+    """
+    A class to encapsulate the results of Ultralytics Solutions.
+
+    This class is designed to store and manage various outputs generated by the solution pipeline, including counts,
+    angles, workout stages, and other analytics data. It provides a structured way to access and manipulate results
+    from different computer vision solutions such as object counting, pose estimation, and tracking analytics.
+
+    Attributes:
+        plot_im (np.ndarray): Processed image with counts, blurred, or other effects from solutions.
+        in_count (int): The total number of "in" counts in a video stream.
+        out_count (int): The total number of "out" counts in a video stream.
+        classwise_count (dict[str, int]): A dictionary containing counts of objects categorized by class.
+        queue_count (int): The count of objects in a queue or waiting area.
+        workout_count (int): The count of workout repetitions.
+        workout_angle (float): The angle calculated during a workout exercise.
+        workout_stage (str): The current stage of the workout.
+        pixels_distance (float): The calculated distance in pixels between two points or objects.
+        available_slots (int): The number of available slots in a monitored area.
+        filled_slots (int): The number of filled slots in a monitored area.
+        email_sent (bool): A flag indicating whether an email notification was sent.
+        total_tracks (int): The total number of tracked objects.
+        region_counts (dict[str, int]): The count of objects within a specific region.
+        speed_dict (dict[str, float]): A dictionary containing speed information for tracked objects.
+        total_crop_objects (int): Total number of cropped objects using ObjectCropper class.
+        speed (dict[str, float]): Performance timing information for tracking and solution processing.
+    """
+
+    def __init__(self, **kwargs):
+        """
+        Initialize a SolutionResults object with default or user-specified values.
+
+        Args:
+            **kwargs (Any): Optional arguments to override default attribute values.
+        """
+        self.plot_im = None
+        self.in_count = 0
+        self.out_count = 0
+        self.classwise_count = {}
+        self.queue_count = 0
+        self.workout_count = 0
+        self.workout_angle = 0.0
+        self.workout_stage = None
+        self.pixels_distance = 0.0
+        self.available_slots = 0
+        self.filled_slots = 0
+        self.email_sent = False
+        self.total_tracks = 0
+        self.region_counts = {}
+        self.speed_dict = {}  # for speed estimation
+        self.total_crop_objects = 0
+        self.speed = {}
+
+        # Override with user-defined values
+        self.__dict__.update(kwargs)
+
+    def __str__(self) -> str:
+        """
+        Return a formatted string representation of the SolutionResults object.
+
+        Returns:
+            (str): A string representation listing non-null attributes.
+        """
+        attrs = {
+            k: v
+            for k, v in self.__dict__.items()
+            if k != "plot_im" and v not in [None, {}, 0, 0.0, False]  # Exclude `plot_im` explicitly
+        }
+        return ", ".join(f"{k}={v}" for k, v in attrs.items())
--- a/ultralytics/solutions/speed_estimation.py
+++ b/ultralytics/solutions/speed_estimation.py
@@ -0,0 +1,117 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+from collections import deque
+from math import sqrt
+from typing import Any
+
+from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
+from ultralytics.utils.plotting import colors
+
+
+class SpeedEstimator(BaseSolution):
+    """
+    A class to estimate the speed of objects in a real-time video stream based on their tracks.
+
+    This class extends the BaseSolution class and provides functionality for estimating object speeds using
+    tracking data in video streams. Speed is calculated based on pixel displacement over time and converted
+    to real-world units using a configurable meters-per-pixel scale factor.
+
+    Attributes:
+        fps (float): Video frame rate for time calculations.
+        frame_count (int): Global frame counter for tracking temporal information.
+        trk_frame_ids (dict): Maps track IDs to their first frame index.
+        spd (dict): Final speed per object in km/h once locked.
+        trk_hist (dict): Maps track IDs to deque of position history.
+        locked_ids (set): Track IDs whose speed has been finalized.
+        max_hist (int): Required frame history before computing speed.
+        meter_per_pixel (float): Real-world meters represented by one pixel for scene scale conversion.
+        max_speed (int): Maximum allowed object speed; values above this will be capped.
+
+    Methods:
+        process: Process input frames to estimate object speeds based on tracking data.
+        store_tracking_history: Store the tracking history for an object.
+        extract_tracks: Extract tracks from the current frame.
+        display_output: Display the output with annotations.
+
+    Examples:
+        Initialize speed estimator and process a frame
+        >>> estimator = SpeedEstimator(meter_per_pixel=0.04, max_speed=120)
+        >>> frame = cv2.imread("frame.jpg")
+        >>> results = estimator.process(frame)
+        >>> cv2.imshow("Speed Estimation", results.plot_im)
+    """
+
+    def __init__(self, **kwargs: Any) -> None:
+        """
+        Initialize the SpeedEstimator object with speed estimation parameters and data structures.
+
+        Args:
+            **kwargs (Any): Additional keyword arguments passed to the parent class.
+        """
+        super().__init__(**kwargs)
+
+        self.fps = self.CFG["fps"]  # Video frame rate for time calculations
+        self.frame_count = 0  # Global frame counter
+        self.trk_frame_ids = {}  # Track ID → first frame index
+        self.spd = {}  # Final speed per object (km/h), once locked
+        self.trk_hist = {}  # Track ID → deque of (time, position)
+        self.locked_ids = set()  # Track IDs whose speed has been finalized
+        self.max_hist = self.CFG["max_hist"]  # Required frame history before computing speed
+        self.meter_per_pixel = self.CFG["meter_per_pixel"]  # Scene scale, depends on camera details
+        self.max_speed = self.CFG["max_speed"]  # Maximum speed adjustment
+
+    def process(self, im0) -> SolutionResults:
+        """
+        Process an input frame to estimate object speeds based on tracking data.
+
+        Args:
+            im0 (np.ndarray): Input image for processing with shape (H, W, C) for RGB images.
+
+        Returns:
+            (SolutionResults): Contains processed image `plot_im` and `total_tracks` (number of tracked objects).
+
+        Examples:
+            Process a frame for speed estimation
+            >>> estimator = SpeedEstimator()
+            >>> image = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
+            >>> results = estimator.process(image)
+        """
+        self.frame_count += 1
+        self.extract_tracks(im0)
+        annotator = SolutionAnnotator(im0, line_width=self.line_width)
+
+        for box, track_id, _, _ in zip(self.boxes, self.track_ids, self.clss, self.confs):
+            self.store_tracking_history(track_id, box)
+
+            if track_id not in self.trk_hist:  # Initialize history if new track found
+                self.trk_hist[track_id] = deque(maxlen=self.max_hist)
+                self.trk_frame_ids[track_id] = self.frame_count
+
+            if track_id not in self.locked_ids:  # Update history until speed is locked
+                trk_hist = self.trk_hist[track_id]
+                trk_hist.append(self.track_line[-1])
+
+                # Compute and lock speed once enough history is collected
+                if len(trk_hist) == self.max_hist:
+                    p0, p1 = trk_hist[0], trk_hist[-1]  # First and last points of track
+                    dt = (self.frame_count - self.trk_frame_ids[track_id]) / self.fps  # Time in seconds
+                    if dt > 0:
+                        dx, dy = p1[0] - p0[0], p1[1] - p0[1]  # Pixel displacement
+                        pixel_distance = sqrt(dx * dx + dy * dy)  # Calculate pixel distance
+                        meters = pixel_distance * self.meter_per_pixel  # Convert to meters
+                        self.spd[track_id] = int(
+                            min((meters / dt) * 3.6, self.max_speed)
+                        )  # Convert to km/h and store final speed
+                        self.locked_ids.add(track_id)  # Prevent further updates
+                        self.trk_hist.pop(track_id, None)  # Free memory
+                        self.trk_frame_ids.pop(track_id, None)  # Remove frame start reference
+
+            if track_id in self.spd:
+                speed_label = f"{self.spd[track_id]} km/h"
+                annotator.box_label(box, label=speed_label, color=colors(track_id, True))  # Draw bounding box
+
+        plot_im = annotator.result()
+        self.display_output(plot_im)  # Display output with base class function
+
+        # Return results with processed image and tracking summary
+        return SolutionResults(plot_im=plot_im, total_tracks=len(self.track_ids))
--- a/ultralytics/solutions/streamlit_inference.py
+++ b/ultralytics/solutions/streamlit_inference.py
@@ -0,0 +1,262 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+import io
+import os
+from typing import Any
+
+import cv2
+import torch
+
+from ultralytics import YOLO
+from ultralytics.utils import LOGGER
+from ultralytics.utils.checks import check_requirements
+from ultralytics.utils.downloads import GITHUB_ASSETS_STEMS
+
+torch.classes.__path__ = []  # Torch module __path__._path issue: https://github.com/datalab-to/marker/issues/442
+
+
+class Inference:
+    """
+    A class to perform object detection, image classification, image segmentation and pose estimation inference.
+
+    This class provides functionalities for loading models, configuring settings, uploading video files, and performing
+    real-time inference using Streamlit and Ultralytics YOLO models.
+
+    Attributes:
+        st (module): Streamlit module for UI creation.
+        temp_dict (dict): Temporary dictionary to store the model path and other configuration.
+        model_path (str): Path to the loaded model.
+        model (YOLO): The YOLO model instance.
+        source (str): Selected video source (webcam or video file).
+        enable_trk (bool): Enable tracking option.
+        conf (float): Confidence threshold for detection.
+        iou (float): IoU threshold for non-maximum suppression.
+        org_frame (Any): Container for the original frame to be displayed.
+        ann_frame (Any): Container for the annotated frame to be displayed.
+        vid_file_name (str | int): Name of the uploaded video file or webcam index.
+        selected_ind (list[int]): List of selected class indices for detection.
+
+    Methods:
+        web_ui: Set up the Streamlit web interface with custom HTML elements.
+        sidebar: Configure the Streamlit sidebar for model and inference settings.
+        source_upload: Handle video file uploads through the Streamlit interface.
+        configure: Configure the model and load selected classes for inference.
+        inference: Perform real-time object detection inference.
+
+    Examples:
+        Create an Inference instance with a custom model
+        >>> inf = Inference(model="path/to/model.pt")
+        >>> inf.inference()
+
+        Create an Inference instance with default settings
+        >>> inf = Inference()
+        >>> inf.inference()
+    """
+
+    def __init__(self, **kwargs: Any) -> None:
+        """
+        Initialize the Inference class, checking Streamlit requirements and setting up the model path.
+
+        Args:
+            **kwargs (Any): Additional keyword arguments for model configuration.
+        """
+        check_requirements("streamlit>=1.29.0")  # scope imports for faster ultralytics package load speeds
+        import streamlit as st
+
+        self.st = st  # Reference to the Streamlit module
+        self.source = None  # Video source selection (webcam or video file)
+        self.img_file_names = []  # List of image file names
+        self.enable_trk = False  # Flag to toggle object tracking
+        self.conf = 0.25  # Confidence threshold for detection
+        self.iou = 0.45  # Intersection-over-Union (IoU) threshold for non-maximum suppression
+        self.org_frame = None  # Container for the original frame display
+        self.ann_frame = None  # Container for the annotated frame display
+        self.vid_file_name = None  # Video file name or webcam index
+        self.selected_ind: list[int] = []  # List of selected class indices for detection
+        self.model = None  # YOLO model instance
+
+        self.temp_dict = {"model": None, **kwargs}
+        self.model_path = None  # Model file path
+        if self.temp_dict["model"] is not None:
+            self.model_path = self.temp_dict["model"]
+
+        LOGGER.info(f"Ultralytics Solutions: ✅ {self.temp_dict}")
+
+    def web_ui(self) -> None:
+        """Set up the Streamlit web interface with custom HTML elements."""
+        menu_style_cfg = """<style>MainMenu {visibility: hidden;}</style>"""  # Hide main menu style
+
+        # Main title of streamlit application
+        main_title_cfg = """<div><h1 style="color:#111F68; text-align:center; font-size:40px; margin-top:-50px;
+        font-family: 'Archivo', sans-serif; margin-bottom:20px;">Ultralytics YOLO Streamlit Application</h1></div>"""
+
+        # Subtitle of streamlit application
+        sub_title_cfg = """<div><h5 style="color:#042AFF; text-align:center; font-family: 'Archivo', sans-serif;
+        margin-top:-15px; margin-bottom:50px;">Experience real-time object detection on your webcam, videos, and images
+        with the power of Ultralytics YOLO! 🚀</h5></div>"""
+
+        # Set html page configuration and append custom HTML
+        self.st.set_page_config(page_title="Ultralytics Streamlit App", layout="wide")
+        self.st.markdown(menu_style_cfg, unsafe_allow_html=True)
+        self.st.markdown(main_title_cfg, unsafe_allow_html=True)
+        self.st.markdown(sub_title_cfg, unsafe_allow_html=True)
+
+    def sidebar(self) -> None:
+        """Configure the Streamlit sidebar for model and inference settings."""
+        with self.st.sidebar:  # Add Ultralytics LOGO
+            logo = "https://raw.githubusercontent.com/ultralytics/assets/main/logo/Ultralytics_Logotype_Original.svg"
+            self.st.image(logo, width=250)
+
+        self.st.sidebar.title("User Configuration")  # Add elements to vertical setting menu
+        self.source = self.st.sidebar.selectbox(
+            "Source",
+            ("webcam", "video", "image"),
+        )  # Add source selection dropdown
+        if self.source in ["webcam", "video"]:
+            self.enable_trk = self.st.sidebar.radio("Enable Tracking", ("Yes", "No")) == "Yes"  # Enable object tracking
+        self.conf = float(
+            self.st.sidebar.slider("Confidence Threshold", 0.0, 1.0, self.conf, 0.01)
+        )  # Slider for confidence
+        self.iou = float(self.st.sidebar.slider("IoU Threshold", 0.0, 1.0, self.iou, 0.01))  # Slider for NMS threshold
+
+        if self.source != "image":  # Only create columns for video/webcam
+            col1, col2 = self.st.columns(2)  # Create two columns for displaying frames
+            self.org_frame = col1.empty()  # Container for original frame
+            self.ann_frame = col2.empty()  # Container for annotated frame
+
+    def source_upload(self) -> None:
+        """Handle video file uploads through the Streamlit interface."""
+        from ultralytics.data.utils import IMG_FORMATS, VID_FORMATS  # scope import
+
+        self.vid_file_name = ""
+        if self.source == "video":
+            vid_file = self.st.sidebar.file_uploader("Upload Video File", type=VID_FORMATS)
+            if vid_file is not None:
+                g = io.BytesIO(vid_file.read())  # BytesIO Object
+                with open("ultralytics.mp4", "wb") as out:  # Open temporary file as bytes
+                    out.write(g.read())  # Read bytes into file
+                self.vid_file_name = "ultralytics.mp4"
+        elif self.source == "webcam":
+            self.vid_file_name = 0  # Use webcam index 0
+        elif self.source == "image":
+            import tempfile  # scope import
+
+            if imgfiles := self.st.sidebar.file_uploader(
+                "Upload Image Files", type=IMG_FORMATS, accept_multiple_files=True
+            ):
+                for imgfile in imgfiles:  # Save each uploaded image to a temporary file
+                    with tempfile.NamedTemporaryFile(delete=False, suffix=f".{imgfile.name.split('.')[-1]}") as tf:
+                        tf.write(imgfile.read())
+                        self.img_file_names.append({"path": tf.name, "name": imgfile.name})
+
+    def configure(self) -> None:
+        """Configure the model and load selected classes for inference."""
+        # Add dropdown menu for model selection
+        M_ORD, T_ORD = ["yolo11n", "yolo11s", "yolo11m", "yolo11l", "yolo11x"], ["", "-seg", "-pose", "-obb", "-cls"]
+        available_models = sorted(
+            [
+                x.replace("yolo", "YOLO")
+                for x in GITHUB_ASSETS_STEMS
+                if any(x.startswith(b) for b in M_ORD) and "grayscale" not in x
+            ],
+            key=lambda x: (M_ORD.index(x[:7].lower()), T_ORD.index(x[7:].lower() or "")),
+        )
+        if self.model_path:  # Insert user provided custom model in available_models
+            available_models.insert(0, self.model_path)
+        selected_model = self.st.sidebar.selectbox("Model", available_models)
+
+        with self.st.spinner("Model is downloading..."):
+            if selected_model.endswith((".pt", ".onnx", ".torchscript", ".mlpackage", ".engine")) or any(
+                fmt in selected_model for fmt in ("openvino_model", "rknn_model")
+            ):
+                model_path = selected_model
+            else:
+                model_path = f"{selected_model.lower()}.pt"  # Default to .pt if no model provided during function call.
+            self.model = YOLO(model_path)  # Load the YOLO model
+            class_names = list(self.model.names.values())  # Convert dictionary to list of class names
+        self.st.success("Model loaded successfully!")
+
+        # Multiselect box with class names and get indices of selected classes
+        selected_classes = self.st.sidebar.multiselect("Classes", class_names, default=class_names[:3])
+        self.selected_ind = [class_names.index(option) for option in selected_classes]
+
+        if not isinstance(self.selected_ind, list):  # Ensure selected_options is a list
+            self.selected_ind = list(self.selected_ind)
+
+    def image_inference(self) -> None:
+        """Perform inference on uploaded images."""
+        for img_info in self.img_file_names:
+            img_path = img_info["path"]
+            image = cv2.imread(img_path)  # Load and display the original image
+            if image is not None:
+                self.st.markdown(f"#### Processed: {img_info['name']}")
+                col1, col2 = self.st.columns(2)
+                with col1:
+                    self.st.image(image, channels="BGR", caption="Original Image")
+                results = self.model(image, conf=self.conf, iou=self.iou, classes=self.selected_ind)
+                annotated_image = results[0].plot()
+                with col2:
+                    self.st.image(annotated_image, channels="BGR", caption="Predicted Image")
+                try:  # Clean up temporary file
+                    os.unlink(img_path)
+                except FileNotFoundError:
+                    pass  # File doesn't exist, ignore
+            else:
+                self.st.error("Could not load the uploaded image.")
+
+    def inference(self) -> None:
+        """Perform real-time object detection inference on video or webcam feed."""
+        self.web_ui()  # Initialize the web interface
+        self.sidebar()  # Create the sidebar
+        self.source_upload()  # Upload the video source
+        self.configure()  # Configure the app
+
+        if self.st.sidebar.button("Start"):
+            if self.source == "image":
+                if self.img_file_names:
+                    self.image_inference()
+                else:
+                    self.st.info("Please upload an image file to perform inference.")
+                return
+
+            stop_button = self.st.sidebar.button("Stop")  # Button to stop the inference
+            cap = cv2.VideoCapture(self.vid_file_name)  # Capture the video
+            if not cap.isOpened():
+                self.st.error("Could not open webcam or video source.")
+                return
+
+            while cap.isOpened():
+                success, frame = cap.read()
+                if not success:
+                    self.st.warning("Failed to read frame from webcam. Please verify the webcam is connected properly.")
+                    break
+
+                # Process frame with model
+                if self.enable_trk:
+                    results = self.model.track(
+                        frame, conf=self.conf, iou=self.iou, classes=self.selected_ind, persist=True
+                    )
+                else:
+                    results = self.model(frame, conf=self.conf, iou=self.iou, classes=self.selected_ind)
+
+                annotated_frame = results[0].plot()  # Add annotations on frame
+
+                if stop_button:
+                    cap.release()  # Release the capture
+                    self.st.stop()  # Stop streamlit app
+
+                self.org_frame.image(frame, channels="BGR", caption="Original Frame")  # Display original frame
+                self.ann_frame.image(annotated_frame, channels="BGR", caption="Predicted Frame")  # Display processed
+
+            cap.release()  # Release the capture
+        cv2.destroyAllWindows()  # Destroy all OpenCV windows
+
+
+if __name__ == "__main__":
+    import sys  # Import the sys module for accessing command-line arguments
+
+    # Check if a model name is provided as a command-line argument
+    args = len(sys.argv)
+    model = sys.argv[1] if args > 1 else None  # Assign first argument as the model name if provided
+    # Create an instance of the Inference class and run inference
+    Inference(model=model).inference()
--- a/ultralytics/solutions/templates/similarity-search.html
+++ b/ultralytics/solutions/templates/similarity-search.html
@@ -0,0 +1,167 @@
+<!-- Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license -->
+
+<!--Similarity search webpage-->
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Semantic Image Search</title>
+    <link
+      href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600&display=swap"
+      rel="stylesheet"
+    />
+    <style>
+      body {
+        background: linear-gradient(135deg, #f0f4ff, #f9fbff);
+        font-family: "Inter", sans-serif;
+        color: #111e68;
+        padding: 2rem;
+        margin: 0;
+        min-height: 100vh;
+      }
+
+      h1 {
+        text-align: center;
+        margin-bottom: 2rem;
+        font-size: 2.5rem;
+        font-weight: 600;
+      }
+
+      form {
+        display: flex;
+        flex-wrap: wrap;
+        justify-content: center;
+        align-items: center;
+        gap: 1rem;
+        margin-bottom: 3rem;
+      }
+
+      input[type="text"] {
+        width: 300px;
+        padding: 0.75rem 1rem;
+        font-size: 1rem;
+        border-radius: 10px;
+        border: 1px solid #ccc;
+        box-shadow: 0 2px 6px rgba(0, 0, 0, 0.05);
+        transition: box-shadow 0.3s ease;
+      }
+
+      input[type="text"]:focus {
+        outline: none;
+        box-shadow: 0 0 0 3px rgba(17, 30, 104, 0.2);
+      }
+
+      button {
+        background-color: #111e68;
+        color: white;
+        font-weight: 600;
+        font-size: 1rem;
+        padding: 0.75rem 1.5rem;
+        border-radius: 10px;
+        border: none;
+        cursor: pointer;
+        transition:
+          background-color 0.3s ease,
+          transform 0.2s ease;
+      }
+
+      button:hover {
+        background-color: #1f2e9f;
+        transform: translateY(-2px);
+      }
+
+      .grid {
+        display: grid;
+        grid-template-columns: repeat(auto-fill, minmax(260px, 1fr));
+        gap: 1.5rem;
+        max-width: 1600px;
+        margin: auto;
+      }
+
+      .card {
+        background: white;
+        border-radius: 16px;
+        overflow: hidden;
+        box-shadow: 0 6px 14px rgba(0, 0, 0, 0.08);
+        transition:
+          transform 0.3s ease,
+          box-shadow 0.3s ease;
+      }
+
+      .card:hover {
+        transform: translateY(-6px);
+        box-shadow: 0 10px 20px rgba(0, 0, 0, 0.1);
+      }
+
+      .card img {
+        width: 100%;
+        height: 100%;
+        object-fit: cover;
+        display: block;
+      }
+    </style>
+  </head>
+  <script>
+    function filterResults(k) {
+      const cards = document.querySelectorAll(".grid .card");
+      cards.forEach((card, idx) => {
+        card.style.display = idx < k ? "block" : "none";
+      });
+      const buttons = document.querySelectorAll(".topk-btn");
+      buttons.forEach((btn) => btn.classList.remove("active"));
+      event.target.classList.add("active");
+    }
+    document.addEventListener("DOMContentLoaded", () => {
+      filterResults(10);
+    });
+  </script>
+  <body>
+    <div style="text-align: center; margin-bottom: 1rem">
+      <img
+        src="https://raw.githubusercontent.com/ultralytics/assets/main/logo/favicon.png"
+        alt="Ultralytics Logo"
+        style="height: 40px"
+      />
+    </div>
+    <h1>Semantic Image Search with AI</h1>
+
+    <!-- Search box -->
+    <form method="POST">
+      <input
+        type="text"
+        name="query"
+        placeholder="Describe the scene (e.g., man walking)"
+        value="{{ request.form['query'] }}"
+        required
+      />
+      <button type="submit">Search</button>
+      {% if results %}
+      <div class="top-k-buttons">
+        <button type="button" class="topk-btn" onclick="filterResults(5)">
+          Top 5
+        </button>
+        <button
+          type="button"
+          class="topk-btn active"
+          onclick="filterResults(10)"
+        >
+          Top 10
+        </button>
+        <button type="button" class="topk-btn" onclick="filterResults(30)">
+          Top 30
+        </button>
+      </div>
+      {% endif %}
+    </form>
+
+    <!-- Search results grid -->
+    <div class="grid">
+      {% for img in results %}
+      <div class="card">
+        <img src="{{ url_for('static', filename=img) }}" alt="Result Image" />
+      </div>
+      {% endfor %}
+    </div>
+  </body>
+</html>
--- a/ultralytics/solutions/trackzone.py
+++ b/ultralytics/solutions/trackzone.py
@@ -0,0 +1,91 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+from typing import Any
+
+import cv2
+import numpy as np
+
+from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
+from ultralytics.utils.plotting import colors
+
+
+class TrackZone(BaseSolution):
+    """
+    A class to manage region-based object tracking in a video stream.
+
+    This class extends the BaseSolution class and provides functionality for tracking objects within a specific region
+    defined by a polygonal area. Objects outside the region are excluded from tracking.
+
+    Attributes:
+        region (np.ndarray): The polygonal region for tracking, represented as a convex hull of points.
+        line_width (int): Width of the lines used for drawing bounding boxes and region boundaries.
+        names (list[str]): List of class names that the model can detect.
+        boxes (list[np.ndarray]): Bounding boxes of tracked objects.
+        track_ids (list[int]): Unique identifiers for each tracked object.
+        clss (list[int]): Class indices of tracked objects.
+
+    Methods:
+        process: Process each frame of the video, applying region-based tracking.
+        extract_tracks: Extract tracking information from the input frame.
+        display_output: Display the processed output.
+
+    Examples:
+        >>> tracker = TrackZone()
+        >>> frame = cv2.imread("frame.jpg")
+        >>> results = tracker.process(frame)
+        >>> cv2.imshow("Tracked Frame", results.plot_im)
+    """
+
+    def __init__(self, **kwargs: Any) -> None:
+        """
+        Initialize the TrackZone class for tracking objects within a defined region in video streams.
+
+        Args:
+            **kwargs (Any): Additional keyword arguments passed to the parent class.
+        """
+        super().__init__(**kwargs)
+        default_region = [(75, 75), (565, 75), (565, 285), (75, 285)]
+        self.region = cv2.convexHull(np.array(self.region or default_region, dtype=np.int32))
+        self.mask = None
+
+    def process(self, im0: np.ndarray) -> SolutionResults:
+        """
+        Process the input frame to track objects within a defined region.
+
+        This method initializes the annotator, creates a mask for the specified region, extracts tracks
+        only from the masked area, and updates tracking information. Objects outside the region are ignored.
+
+        Args:
+            im0 (np.ndarray): The input image or frame to be processed.
+
+        Returns:
+            (SolutionResults): Contains processed image `plot_im` and `total_tracks` (int) representing the
+                               total number of tracked objects within the defined region.
+
+        Examples:
+            >>> tracker = TrackZone()
+            >>> frame = cv2.imread("path/to/image.jpg")
+            >>> results = tracker.process(frame)
+        """
+        annotator = SolutionAnnotator(im0, line_width=self.line_width)  # Initialize annotator
+
+        if self.mask is None:  # Create a mask for the region
+            self.mask = np.zeros_like(im0[:, :, 0])
+            cv2.fillPoly(self.mask, [self.region], 255)
+        masked_frame = cv2.bitwise_and(im0, im0, mask=self.mask)
+        self.extract_tracks(masked_frame)
+
+        # Draw the region boundary
+        cv2.polylines(im0, [self.region], isClosed=True, color=(255, 255, 255), thickness=self.line_width * 2)
+
+        # Iterate over boxes, track ids, classes indexes list and draw bounding boxes
+        for box, track_id, cls, conf in zip(self.boxes, self.track_ids, self.clss, self.confs):
+            annotator.box_label(
+                box, label=self.adjust_box_label(cls, conf, track_id=track_id), color=colors(track_id, True)
+            )
+
+        plot_im = annotator.result()
+        self.display_output(plot_im)  # Display output with base class function
+
+        # Return a SolutionResults
+        return SolutionResults(plot_im=plot_im, total_tracks=len(self.track_ids))
--- a/ultralytics/solutions/vision_eye.py
+++ b/ultralytics/solutions/vision_eye.py
@@ -0,0 +1,70 @@
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+
+from typing import Any
+
+from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
+from ultralytics.utils.plotting import colors
+
+
+class VisionEye(BaseSolution):
+    """
+    A class to manage object detection and vision mapping in images or video streams.
+
+    This class extends the BaseSolution class and provides functionality for detecting objects,
+    mapping vision points, and annotating results with bounding boxes and labels.
+
+    Attributes:
+        vision_point (tuple[int, int]): Coordinates (x, y) where vision will view objects and draw tracks.
+
+    Methods:
+        process: Process the input image to detect objects, annotate them, and apply vision mapping.
+
+    Examples:
+        >>> vision_eye = VisionEye()
+        >>> frame = cv2.imread("frame.jpg")
+        >>> results = vision_eye.process(frame)
+        >>> print(f"Total detected instances: {results.total_tracks}")
+    """
+
+    def __init__(self, **kwargs: Any) -> None:
+        """
+        Initialize the VisionEye class for detecting objects and applying vision mapping.
+
+        Args:
+            **kwargs (Any): Keyword arguments passed to the parent class and for configuring vision_point.
+        """
+        super().__init__(**kwargs)
+        # Set the vision point where the system will view objects and draw tracks
+        self.vision_point = self.CFG["vision_point"]
+
+    def process(self, im0) -> SolutionResults:
+        """
+        Perform object detection, vision mapping, and annotation on the input image.
+
+        Args:
+            im0 (np.ndarray): The input image for detection and annotation.
+
+        Returns:
+            (SolutionResults): Object containing the annotated image and tracking statistics.
+                - plot_im: Annotated output image with bounding boxes and vision mapping
+                - total_tracks: Number of tracked objects in the frame
+
+        Examples:
+            >>> vision_eye = VisionEye()
+            >>> frame = cv2.imread("image.jpg")
+            >>> results = vision_eye.process(frame)
+            >>> print(f"Detected {results.total_tracks} objects")
+        """
+        self.extract_tracks(im0)  # Extract tracks (bounding boxes, classes, and masks)
+        annotator = SolutionAnnotator(im0, self.line_width)
+
+        for cls, t_id, box, conf in zip(self.clss, self.track_ids, self.boxes, self.confs):
+            # Annotate the image with bounding boxes, labels, and vision mapping
+            annotator.box_label(box, label=self.adjust_box_label(cls, conf, t_id), color=colors(int(t_id), True))
+            annotator.visioneye(box, self.vision_point)
+
+        plot_im = annotator.result()
+        self.display_output(plot_im)  # Display the annotated output using the base class function
+
+        # Return a SolutionResults object with the annotated image and tracking statistics
+        return SolutionResults(plot_im=plot_im, total_tracks=len(self.track_ids))