Reference for `ultralytics/utils/ops.py`

Note

This file is available at https://github.com/ultralytics/ultralytics/blob/main/ultralytics/utils/ops.py. If you spot a problem please help fix it by contributing a Pull Request 🛠️. Thank you 🙏!

ultralytics.utils.ops.Profile

Profile(t: float = 0.0, device: Optional[device] = None)

Bases: ContextDecorator

Ultralytics Profile class for timing code execution.

Use as a decorator with @Profile() or as a context manager with 'with Profile():'. Provides accurate timing measurements with CUDA synchronization support for GPU operations.

Attributes:

Name	Type	Description
`t`	`float`	Accumulated time in seconds.
`device`	`device`	Device used for model inference.
`cuda`	`bool`	Whether CUDA is being used for timing synchronization.

Examples:

Use as a context manager to time code execution

>>> with Profile(device=device) as dt:
...     pass  # slow operation here
>>> print(dt)  # prints "Elapsed time is 9.5367431640625e-07 s"

Use as a decorator to time function execution

>>> @Profile()
... def slow_function():
...     time.sleep(0.1)

Parameters:

Name	Type	Description	Default
`t`	`float`	Initial accumulated time in seconds.	`0.0`
`device`	`device`	Device used for model inference to enable CUDA synchronization.	`None`

Source code in ultralytics/utils/ops.py

def __init__(self, t: float = 0.0, device: Optional[torch.device] = None):
    """
    Initialize the Profile class.

    Args:
        t (float): Initial accumulated time in seconds.
        device (torch.device, optional): Device used for model inference to enable CUDA synchronization.
    """
    self.t = t
    self.device = device
    self.cuda = bool(device and str(device).startswith("cuda"))

enter

__enter__()

Start timing.

Source code in ultralytics/utils/ops.py

def __enter__(self):
    """Start timing."""
    self.start = self.time()
    return self

exit

__exit__(type, value, traceback)

Stop timing.

Source code in ultralytics/utils/ops.py

def __exit__(self, type, value, traceback):  # noqa
    """Stop timing."""
    self.dt = self.time() - self.start  # delta-time
    self.t += self.dt  # accumulate dt

str

__str__()

Return a human-readable string representing the accumulated elapsed time.

Source code in ultralytics/utils/ops.py

def __str__(self):
    """Return a human-readable string representing the accumulated elapsed time."""
    return f"Elapsed time is {self.t} s"

time

time()

Get current time with CUDA synchronization if applicable.

Source code in ultralytics/utils/ops.py

def time(self):
    """Get current time with CUDA synchronization if applicable."""
    if self.cuda:
        torch.cuda.synchronize(self.device)
    return time.perf_counter()

ultralytics.utils.ops.segment2box

segment2box(segment, width: int = 640, height: int = 640)

Convert segment coordinates to bounding box coordinates.

Converts a single segment label to a box label by finding the minimum and maximum x and y coordinates. Applies inside-image constraint and clips coordinates when necessary.

Parameters:

Name	Type	Description	Default
`segment`	`Tensor`	Segment coordinates in format (N, 2) where N is number of points.	required
`width`	`int`	Width of the image in pixels.	`640`
`height`	`int`	Height of the image in pixels.	`640`

Returns:

Type	Description
`ndarray`	Bounding box coordinates in xyxy format [x1, y1, x2, y2].

Source code in ultralytics/utils/ops.py

def segment2box(segment, width: int = 640, height: int = 640):
    """
    Convert segment coordinates to bounding box coordinates.

    Converts a single segment label to a box label by finding the minimum and maximum x and y coordinates.
    Applies inside-image constraint and clips coordinates when necessary.

    Args:
        segment (torch.Tensor): Segment coordinates in format (N, 2) where N is number of points.
        width (int): Width of the image in pixels.
        height (int): Height of the image in pixels.

    Returns:
        (np.ndarray): Bounding box coordinates in xyxy format [x1, y1, x2, y2].
    """
    x, y = segment.T  # segment xy
    # Clip coordinates if 3 out of 4 sides are outside the image
    if np.array([x.min() < 0, y.min() < 0, x.max() > width, y.max() > height]).sum() >= 3:
        x = x.clip(0, width)
        y = y.clip(0, height)
    inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height)
    x = x[inside]
    y = y[inside]
    return (
        np.array([x.min(), y.min(), x.max(), y.max()], dtype=segment.dtype)
        if any(x)
        else np.zeros(4, dtype=segment.dtype)
    )  # xyxy

ultralytics.utils.ops.scale_boxes

scale_boxes(
    img1_shape,
    boxes,
    img0_shape,
    ratio_pad=None,
    padding: bool = True,
    xywh: bool = False,
)

Rescale bounding boxes from one image shape to another.

Rescales bounding boxes from img1_shape to img0_shape, accounting for padding and aspect ratio changes. Supports both xyxy and xywh box formats.

Parameters:

Name	Type	Description	Default
`img1_shape`	`tuple`	Shape of the source image (height, width).	required
`boxes`	`Tensor`	Bounding boxes to rescale in format (N, 4).	required
`img0_shape`	`tuple`	Shape of the target image (height, width).	required
`ratio_pad`	`tuple`	Tuple of (ratio, pad) for scaling. If None, calculated from image shapes.	`None`
`padding`	`bool`	Whether boxes are based on YOLO-style augmented images with padding.	`True`
`xywh`	`bool`	Whether box format is xywh (True) or xyxy (False).	`False`

Returns:

Type	Description
`Tensor`	Rescaled bounding boxes in the same format as input.

Source code in ultralytics/utils/ops.py

def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding: bool = True, xywh: bool = False):
    """
    Rescale bounding boxes from one image shape to another.

    Rescales bounding boxes from img1_shape to img0_shape, accounting for padding and aspect ratio changes.
    Supports both xyxy and xywh box formats.

    Args:
        img1_shape (tuple): Shape of the source image (height, width).
        boxes (torch.Tensor): Bounding boxes to rescale in format (N, 4).
        img0_shape (tuple): Shape of the target image (height, width).
        ratio_pad (tuple, optional): Tuple of (ratio, pad) for scaling. If None, calculated from image shapes.
        padding (bool): Whether boxes are based on YOLO-style augmented images with padding.
        xywh (bool): Whether box format is xywh (True) or xyxy (False).

    Returns:
        (torch.Tensor): Rescaled bounding boxes in the same format as input.
    """
    if ratio_pad is None:  # calculate from img0_shape
        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
        pad = (
            round((img1_shape[1] - img0_shape[1] * gain) / 2 - 0.1),
            round((img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1),
        )  # wh padding
    else:
        gain = ratio_pad[0][0]
        pad = ratio_pad[1]

    if padding:
        boxes[..., 0] -= pad[0]  # x padding
        boxes[..., 1] -= pad[1]  # y padding
        if not xywh:
            boxes[..., 2] -= pad[0]  # x padding
            boxes[..., 3] -= pad[1]  # y padding
    boxes[..., :4] /= gain
    return clip_boxes(boxes, img0_shape)

ultralytics.utils.ops.make_divisible

make_divisible(x: int, divisor)

Return the nearest number that is divisible by the given divisor.

Parameters:

Name	Type	Description	Default
`x`	`int`	The number to make divisible.	required
`divisor`	`int \| Tensor`	The divisor.	required

Returns:

Type	Description
`int`	The nearest number divisible by the divisor.

Source code in ultralytics/utils/ops.py

def make_divisible(x: int, divisor):
    """
    Return the nearest number that is divisible by the given divisor.

    Args:
        x (int): The number to make divisible.
        divisor (int | torch.Tensor): The divisor.

    Returns:
        (int): The nearest number divisible by the divisor.
    """
    if isinstance(divisor, torch.Tensor):
        divisor = int(divisor.max())  # to int
    return math.ceil(x / divisor) * divisor

ultralytics.utils.ops.nms_rotated

nms_rotated(boxes, scores, threshold: float = 0.45, use_triu: bool = True)

Perform NMS on oriented bounding boxes using probiou and fast-nms.

Parameters:

Name	Type	Description	Default
`boxes`	`Tensor`	Rotated bounding boxes with shape (N, 5) in xywhr format.	required
`scores`	`Tensor`	Confidence scores with shape (N,).	required
`threshold`	`float`	IoU threshold for NMS.	`0.45`
`use_triu`	`bool`	Whether to use torch.triu operator for upper triangular matrix operations.	`True`

Returns:

Type	Description
`Tensor`	Indices of boxes to keep after NMS.

Source code in ultralytics/utils/ops.py

def nms_rotated(boxes, scores, threshold: float = 0.45, use_triu: bool = True):
    """
    Perform NMS on oriented bounding boxes using probiou and fast-nms.

    Args:
        boxes (torch.Tensor): Rotated bounding boxes with shape (N, 5) in xywhr format.
        scores (torch.Tensor): Confidence scores with shape (N,).
        threshold (float): IoU threshold for NMS.
        use_triu (bool): Whether to use torch.triu operator for upper triangular matrix operations.

    Returns:
        (torch.Tensor): Indices of boxes to keep after NMS.
    """
    sorted_idx = torch.argsort(scores, descending=True)
    boxes = boxes[sorted_idx]
    ious = batch_probiou(boxes, boxes)
    if use_triu:
        ious = ious.triu_(diagonal=1)
        # NOTE: handle the case when len(boxes) hence exportable by eliminating if-else condition
        pick = torch.nonzero((ious >= threshold).sum(0) <= 0).squeeze_(-1)
    else:
        n = boxes.shape[0]
        row_idx = torch.arange(n, device=boxes.device).view(-1, 1).expand(-1, n)
        col_idx = torch.arange(n, device=boxes.device).view(1, -1).expand(n, -1)
        upper_mask = row_idx < col_idx
        ious = ious * upper_mask
        # Zeroing these scores ensures the additional indices would not affect the final results
        scores[~((ious >= threshold).sum(0) <= 0)] = 0
        # NOTE: return indices with fixed length to avoid TFLite reshape error
        pick = torch.topk(scores, scores.shape[0]).indices
    return sorted_idx[pick]

ultralytics.utils.ops.non_max_suppression

non_max_suppression(
    prediction,
    conf_thres: float = 0.25,
    iou_thres: float = 0.45,
    classes=None,
    agnostic: bool = False,
    multi_label: bool = False,
    labels=(),
    max_det: int = 300,
    nc: int = 0,
    max_time_img: float = 0.05,
    max_nms: int = 30000,
    max_wh: int = 7680,
    in_place: bool = True,
    rotated: bool = False,
    end2end: bool = False,
    return_idxs: bool = False,
)

Perform non-maximum suppression (NMS) on prediction results.

Applies NMS to filter overlapping bounding boxes based on confidence and IoU thresholds. Supports multiple detection formats including standard boxes, rotated boxes, and masks.

Parameters:

Name	Type	Description	Default
`prediction`	`Tensor`	Predictions with shape (batch_size, num_classes + 4 + num_masks, num_boxes) containing boxes, classes, and optional masks.	required
`conf_thres`	`float`	Confidence threshold for filtering detections. Valid values are between 0.0 and 1.0.	`0.25`
`iou_thres`	`float`	IoU threshold for NMS filtering. Valid values are between 0.0 and 1.0.	`0.45`
`classes`	`List[int]`	List of class indices to consider. If None, all classes are considered.	`None`
`agnostic`	`bool`	Whether to perform class-agnostic NMS.	`False`
`multi_label`	`bool`	Whether each box can have multiple labels.	`False`
`labels`	`List[List[Union[int, float, Tensor]]]`	A priori labels for each image.	`()`
`max_det`	`int`	Maximum number of detections to keep per image.	`300`
`nc`	`int`	Number of classes. Indices after this are considered masks.	`0`
`max_time_img`	`float`	Maximum time in seconds for processing one image.	`0.05`
`max_nms`	`int`	Maximum number of boxes for torchvision.ops.nms().	`30000`
`max_wh`	`int`	Maximum box width and height in pixels.	`7680`
`in_place`	`bool`	Whether to modify the input prediction tensor in place.	`True`
`rotated`	`bool`	Whether to handle Oriented Bounding Boxes (OBB).	`False`
`end2end`	`bool`	Whether the model is end-to-end and doesn't require NMS.	`False`
`return_idxs`	`bool`	Whether to return the indices of kept detections.	`False`

Returns:

Name	Type	Description
`output`	`List[Tensor]`	List of detections per image with shape (num_boxes, 6 + num_masks) containing (x1, y1, x2, y2, confidence, class, mask1, mask2, ...).
`keepi`	`List[Tensor]`	Indices of kept detections if return_idxs=True.

Source code in ultralytics/utils/ops.py

def non_max_suppression(
    prediction,
    conf_thres: float = 0.25,
    iou_thres: float = 0.45,
    classes=None,
    agnostic: bool = False,
    multi_label: bool = False,
    labels=(),
    max_det: int = 300,
    nc: int = 0,  # number of classes (optional)
    max_time_img: float = 0.05,
    max_nms: int = 30000,
    max_wh: int = 7680,
    in_place: bool = True,
    rotated: bool = False,
    end2end: bool = False,
    return_idxs: bool = False,
):
    """
    Perform non-maximum suppression (NMS) on prediction results.

    Applies NMS to filter overlapping bounding boxes based on confidence and IoU thresholds. Supports multiple
    detection formats including standard boxes, rotated boxes, and masks.

    Args:
        prediction (torch.Tensor): Predictions with shape (batch_size, num_classes + 4 + num_masks, num_boxes)
            containing boxes, classes, and optional masks.
        conf_thres (float): Confidence threshold for filtering detections. Valid values are between 0.0 and 1.0.
        iou_thres (float): IoU threshold for NMS filtering. Valid values are between 0.0 and 1.0.
        classes (List[int], optional): List of class indices to consider. If None, all classes are considered.
        agnostic (bool): Whether to perform class-agnostic NMS.
        multi_label (bool): Whether each box can have multiple labels.
        labels (List[List[Union[int, float, torch.Tensor]]]): A priori labels for each image.
        max_det (int): Maximum number of detections to keep per image.
        nc (int): Number of classes. Indices after this are considered masks.
        max_time_img (float): Maximum time in seconds for processing one image.
        max_nms (int): Maximum number of boxes for torchvision.ops.nms().
        max_wh (int): Maximum box width and height in pixels.
        in_place (bool): Whether to modify the input prediction tensor in place.
        rotated (bool): Whether to handle Oriented Bounding Boxes (OBB).
        end2end (bool): Whether the model is end-to-end and doesn't require NMS.
        return_idxs (bool): Whether to return the indices of kept detections.

    Returns:
        output (List[torch.Tensor]): List of detections per image with shape (num_boxes, 6 + num_masks)
            containing (x1, y1, x2, y2, confidence, class, mask1, mask2, ...).
        keepi (List[torch.Tensor]): Indices of kept detections if return_idxs=True.
    """
    import torchvision  # scope for faster 'import ultralytics'

    # Checks
    assert 0 <= conf_thres <= 1, f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0"
    assert 0 <= iou_thres <= 1, f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0"
    if isinstance(prediction, (list, tuple)):  # YOLOv8 model in validation model, output = (inference_out, loss_out)
        prediction = prediction[0]  # select only inference output
    if classes is not None:
        classes = torch.tensor(classes, device=prediction.device)

    if prediction.shape[-1] == 6 or end2end:  # end-to-end model (BNC, i.e. 1,300,6)
        output = [pred[pred[:, 4] > conf_thres][:max_det] for pred in prediction]
        if classes is not None:
            output = [pred[(pred[:, 5:6] == classes).any(1)] for pred in output]
        return output

    bs = prediction.shape[0]  # batch size (BCN, i.e. 1,84,6300)
    nc = nc or (prediction.shape[1] - 4)  # number of classes
    extra = prediction.shape[1] - nc - 4  # number of extra info
    mi = 4 + nc  # mask start index
    xc = prediction[:, 4:mi].amax(1) > conf_thres  # candidates
    xinds = torch.stack([torch.arange(len(i), device=prediction.device) for i in xc])[..., None]  # to track idxs

    # Settings
    # min_wh = 2  # (pixels) minimum box width and height
    time_limit = 2.0 + max_time_img * bs  # seconds to quit after
    multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)

    prediction = prediction.transpose(-1, -2)  # shape(1,84,6300) to shape(1,6300,84)
    if not rotated:
        if in_place:
            prediction[..., :4] = xywh2xyxy(prediction[..., :4])  # xywh to xyxy
        else:
            prediction = torch.cat((xywh2xyxy(prediction[..., :4]), prediction[..., 4:]), dim=-1)  # xywh to xyxy

    t = time.time()
    output = [torch.zeros((0, 6 + extra), device=prediction.device)] * bs
    keepi = [torch.zeros((0, 1), device=prediction.device)] * bs  # to store the kept idxs
    for xi, (x, xk) in enumerate(zip(prediction, xinds)):  # image index, (preds, preds indices)
        # Apply constraints
        # x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0  # width-height
        filt = xc[xi]  # confidence
        x, xk = x[filt], xk[filt]

        # Cat apriori labels if autolabelling
        if labels and len(labels[xi]) and not rotated:
            lb = labels[xi]
            v = torch.zeros((len(lb), nc + extra + 4), device=x.device)
            v[:, :4] = xywh2xyxy(lb[:, 1:5])  # box
            v[range(len(lb)), lb[:, 0].long() + 4] = 1.0  # cls
            x = torch.cat((x, v), 0)

        # If none remain process next image
        if not x.shape[0]:
            continue

        # Detections matrix nx6 (xyxy, conf, cls)
        box, cls, mask = x.split((4, nc, extra), 1)

        if multi_label:
            i, j = torch.where(cls > conf_thres)
            x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1)
            xk = xk[i]
        else:  # best class only
            conf, j = cls.max(1, keepdim=True)
            filt = conf.view(-1) > conf_thres
            x = torch.cat((box, conf, j.float(), mask), 1)[filt]
            xk = xk[filt]

        # Filter by class
        if classes is not None:
            filt = (x[:, 5:6] == classes).any(1)
            x, xk = x[filt], xk[filt]

        # Check shape
        n = x.shape[0]  # number of boxes
        if not n:  # no boxes
            continue
        if n > max_nms:  # excess boxes
            filt = x[:, 4].argsort(descending=True)[:max_nms]  # sort by confidence and remove excess boxes
            x, xk = x[filt], xk[filt]

        # Batched NMS
        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
        scores = x[:, 4]  # scores
        if rotated:
            boxes = torch.cat((x[:, :2] + c, x[:, 2:4], x[:, -1:]), dim=-1)  # xywhr
            i = nms_rotated(boxes, scores, iou_thres)
        else:
            boxes = x[:, :4] + c  # boxes (offset by class)
            i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
        i = i[:max_det]  # limit detections

        output[xi], keepi[xi] = x[i], xk[i].reshape(-1)
        if (time.time() - t) > time_limit:
            LOGGER.warning(f"NMS time limit {time_limit:.3f}s exceeded")
            break  # time limit exceeded

    return (output, keepi) if return_idxs else output

ultralytics.utils.ops.clip_boxes

clip_boxes(boxes, shape)

Clip bounding boxes to image boundaries.

Parameters:

Name	Type	Description	Default
`boxes`	`Tensor \| ndarray`	Bounding boxes to clip.	required
`shape`	`tuple`	Image shape as (height, width).	required

Returns:

Type	Description
`Tensor \| ndarray`	Clipped bounding boxes.

Source code in ultralytics/utils/ops.py

def clip_boxes(boxes, shape):
    """
    Clip bounding boxes to image boundaries.

    Args:
        boxes (torch.Tensor | np.ndarray): Bounding boxes to clip.
        shape (tuple): Image shape as (height, width).

    Returns:
        (torch.Tensor | np.ndarray): Clipped bounding boxes.
    """
    if isinstance(boxes, torch.Tensor):  # faster individually (WARNING: inplace .clamp_() Apple MPS bug)
        boxes[..., 0] = boxes[..., 0].clamp(0, shape[1])  # x1
        boxes[..., 1] = boxes[..., 1].clamp(0, shape[0])  # y1
        boxes[..., 2] = boxes[..., 2].clamp(0, shape[1])  # x2
        boxes[..., 3] = boxes[..., 3].clamp(0, shape[0])  # y2
    else:  # np.array (faster grouped)
        boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])  # x1, x2
        boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])  # y1, y2
    return boxes

ultralytics.utils.ops.clip_coords

clip_coords(coords, shape)

Clip line coordinates to image boundaries.

Parameters:

Name	Type	Description	Default
`coords`	`Tensor \| ndarray`	Line coordinates to clip.	required
`shape`	`tuple`	Image shape as (height, width).	required

Returns:

Type	Description
`Tensor \| ndarray`	Clipped coordinates.

Source code in ultralytics/utils/ops.py

def clip_coords(coords, shape):
    """
    Clip line coordinates to image boundaries.

    Args:
        coords (torch.Tensor | np.ndarray): Line coordinates to clip.
        shape (tuple): Image shape as (height, width).

    Returns:
        (torch.Tensor | np.ndarray): Clipped coordinates.
    """
    if isinstance(coords, torch.Tensor):  # faster individually (WARNING: inplace .clamp_() Apple MPS bug)
        coords[..., 0] = coords[..., 0].clamp(0, shape[1])  # x
        coords[..., 1] = coords[..., 1].clamp(0, shape[0])  # y
    else:  # np.array (faster grouped)
        coords[..., 0] = coords[..., 0].clip(0, shape[1])  # x
        coords[..., 1] = coords[..., 1].clip(0, shape[0])  # y
    return coords

ultralytics.utils.ops.scale_image

scale_image(masks, im0_shape, ratio_pad=None)

Rescale masks to original image size.

Takes resized and padded masks and rescales them back to the original image dimensions, removing any padding that was applied during preprocessing.

Parameters:

Name	Type	Description	Default
`masks`	`ndarray`	Resized and padded masks with shape [H, W, N] or [H, W, 3].	required
`im0_shape`	`tuple`	Original image shape as (height, width).	required
`ratio_pad`	`tuple`	Ratio and padding values as ((ratio_h, ratio_w), (pad_h, pad_w)).	`None`

Returns:

Type	Description
`ndarray`	Rescaled masks with shape [H, W, N] matching original image dimensions.

Source code in ultralytics/utils/ops.py

def scale_image(masks, im0_shape, ratio_pad=None):
    """
    Rescale masks to original image size.

    Takes resized and padded masks and rescales them back to the original image dimensions, removing any padding
    that was applied during preprocessing.

    Args:
        masks (np.ndarray): Resized and padded masks with shape [H, W, N] or [H, W, 3].
        im0_shape (tuple): Original image shape as (height, width).
        ratio_pad (tuple, optional): Ratio and padding values as ((ratio_h, ratio_w), (pad_h, pad_w)).

    Returns:
        (np.ndarray): Rescaled masks with shape [H, W, N] matching original image dimensions.
    """
    # Rescale coordinates (xyxy) from im1_shape to im0_shape
    im1_shape = masks.shape
    if im1_shape[:2] == im0_shape[:2]:
        return masks
    if ratio_pad is None:  # calculate from im0_shape
        gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1])  # gain  = old / new
        pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2  # wh padding
    else:
        pad = ratio_pad[1]

    top, left = (int(round(pad[1] - 0.1)), int(round(pad[0] - 0.1)))
    bottom, right = (
        im1_shape[0] - int(round(pad[1] + 0.1)),
        im1_shape[1] - int(round(pad[0] + 0.1)),
    )

    if len(masks.shape) < 2:
        raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
    masks = masks[top:bottom, left:right]
    masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]))
    if len(masks.shape) == 2:
        masks = masks[:, :, None]

    return masks

ultralytics.utils.ops.xyxy2xywh

xyxy2xywh(x)

Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height) format where (x1, y1) is the top-left corner and (x2, y2) is the bottom-right corner.

Parameters:

Name	Type	Description	Default
`x`	`ndarray \| Tensor`	Input bounding box coordinates in (x1, y1, x2, y2) format.	required

Returns:

Type	Description
`ndarray \| Tensor`	Bounding box coordinates in (x, y, width, height) format.

Source code in ultralytics/utils/ops.py

def xyxy2xywh(x):
    """
    Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height) format where (x1, y1) is the
    top-left corner and (x2, y2) is the bottom-right corner.

    Args:
        x (np.ndarray | torch.Tensor): Input bounding box coordinates in (x1, y1, x2, y2) format.

    Returns:
        (np.ndarray | torch.Tensor): Bounding box coordinates in (x, y, width, height) format.
    """
    assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
    y = empty_like(x)  # faster than clone/copy
    y[..., 0] = (x[..., 0] + x[..., 2]) / 2  # x center
    y[..., 1] = (x[..., 1] + x[..., 3]) / 2  # y center
    y[..., 2] = x[..., 2] - x[..., 0]  # width
    y[..., 3] = x[..., 3] - x[..., 1]  # height
    return y

ultralytics.utils.ops.xywh2xyxy

xywh2xyxy(x)

Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the top-left corner and (x2, y2) is the bottom-right corner. Note: ops per 2 channels faster than per channel.

Parameters:

Name	Type	Description	Default
`x`	`ndarray \| Tensor`	Input bounding box coordinates in (x, y, width, height) format.	required

Returns:

Type	Description
`ndarray \| Tensor`	Bounding box coordinates in (x1, y1, x2, y2) format.

Source code in ultralytics/utils/ops.py

def xywh2xyxy(x):
    """
    Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the
    top-left corner and (x2, y2) is the bottom-right corner. Note: ops per 2 channels faster than per channel.

    Args:
        x (np.ndarray | torch.Tensor): Input bounding box coordinates in (x, y, width, height) format.

    Returns:
        (np.ndarray | torch.Tensor): Bounding box coordinates in (x1, y1, x2, y2) format.
    """
    assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
    y = empty_like(x)  # faster than clone/copy
    xy = x[..., :2]  # centers
    wh = x[..., 2:] / 2  # half width-height
    y[..., :2] = xy - wh  # top left xy
    y[..., 2:] = xy + wh  # bottom right xy
    return y

ultralytics.utils.ops.xywhn2xyxy

xywhn2xyxy(x, w: int = 640, h: int = 640, padw: int = 0, padh: int = 0)

Convert normalized bounding box coordinates to pixel coordinates.

Parameters:

Name	Type	Description	Default
`x`	`ndarray \| Tensor`	Normalized bounding box coordinates in (x, y, w, h) format.	required
`w`	`int`	Image width in pixels.	`640`
`h`	`int`	Image height in pixels.	`640`
`padw`	`int`	Padding width in pixels.	`0`
`padh`	`int`	Padding height in pixels.	`0`

Returns:

Name	Type	Description
`y`	`ndarray \| Tensor`	The coordinates of the bounding box in the format [x1, y1, x2, y2] where x1,y1 is the top-left corner, x2,y2 is the bottom-right corner of the bounding box.

Source code in ultralytics/utils/ops.py

def xywhn2xyxy(x, w: int = 640, h: int = 640, padw: int = 0, padh: int = 0):
    """
    Convert normalized bounding box coordinates to pixel coordinates.

    Args:
        x (np.ndarray | torch.Tensor): Normalized bounding box coordinates in (x, y, w, h) format.
        w (int): Image width in pixels.
        h (int): Image height in pixels.
        padw (int): Padding width in pixels.
        padh (int): Padding height in pixels.

    Returns:
        y (np.ndarray | torch.Tensor): The coordinates of the bounding box in the format [x1, y1, x2, y2] where
            x1,y1 is the top-left corner, x2,y2 is the bottom-right corner of the bounding box.
    """
    assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
    y = empty_like(x)  # faster than clone/copy
    y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw  # top left x
    y[..., 1] = h * (x[..., 1] - x[..., 3] / 2) + padh  # top left y
    y[..., 2] = w * (x[..., 0] + x[..., 2] / 2) + padw  # bottom right x
    y[..., 3] = h * (x[..., 1] + x[..., 3] / 2) + padh  # bottom right y
    return y

ultralytics.utils.ops.xyxy2xywhn

xyxy2xywhn(x, w: int = 640, h: int = 640, clip: bool = False, eps: float = 0.0)

Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height, normalized) format. x, y, width and height are normalized to image dimensions.

Parameters:

Name	Type	Description	Default
`x`	`ndarray \| Tensor`	Input bounding box coordinates in (x1, y1, x2, y2) format.	required
`w`	`int`	Image width in pixels.	`640`
`h`	`int`	Image height in pixels.	`640`
`clip`	`bool`	Whether to clip boxes to image boundaries.	`False`
`eps`	`float`	Minimum value for box width and height.	`0.0`

Returns:

Type	Description
`ndarray \| Tensor`	Normalized bounding box coordinates in (x, y, width, height) format.

Source code in ultralytics/utils/ops.py

def xyxy2xywhn(x, w: int = 640, h: int = 640, clip: bool = False, eps: float = 0.0):
    """
    Convert bounding box coordinates from (x1, y1, x2, y2) format to (x, y, width, height, normalized) format. x, y,
    width and height are normalized to image dimensions.

    Args:
        x (np.ndarray | torch.Tensor): Input bounding box coordinates in (x1, y1, x2, y2) format.
        w (int): Image width in pixels.
        h (int): Image height in pixels.
        clip (bool): Whether to clip boxes to image boundaries.
        eps (float): Minimum value for box width and height.

    Returns:
        (np.ndarray | torch.Tensor): Normalized bounding box coordinates in (x, y, width, height) format.
    """
    if clip:
        x = clip_boxes(x, (h - eps, w - eps))
    assert x.shape[-1] == 4, f"input shape last dimension expected 4 but input shape is {x.shape}"
    y = empty_like(x)  # faster than clone/copy
    y[..., 0] = ((x[..., 0] + x[..., 2]) / 2) / w  # x center
    y[..., 1] = ((x[..., 1] + x[..., 3]) / 2) / h  # y center
    y[..., 2] = (x[..., 2] - x[..., 0]) / w  # width
    y[..., 3] = (x[..., 3] - x[..., 1]) / h  # height
    return y

ultralytics.utils.ops.xywh2ltwh

xywh2ltwh(x)

Convert bounding box format from [x, y, w, h] to [x1, y1, w, h] where x1, y1 are top-left coordinates.

Parameters:

Name	Type	Description	Default
`x`	`ndarray \| Tensor`	Input bounding box coordinates in xywh format.	required

Returns:

Type	Description
`ndarray \| Tensor`	Bounding box coordinates in xyltwh format.

Source code in ultralytics/utils/ops.py

def xywh2ltwh(x):
    """
    Convert bounding box format from [x, y, w, h] to [x1, y1, w, h] where x1, y1 are top-left coordinates.

    Args:
        x (np.ndarray | torch.Tensor): Input bounding box coordinates in xywh format.

    Returns:
        (np.ndarray | torch.Tensor): Bounding box coordinates in xyltwh format.
    """
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[..., 0] = x[..., 0] - x[..., 2] / 2  # top left x
    y[..., 1] = x[..., 1] - x[..., 3] / 2  # top left y
    return y

ultralytics.utils.ops.xyxy2ltwh

xyxy2ltwh(x)

Convert bounding boxes from [x1, y1, x2, y2] to [x1, y1, w, h] format.

Parameters:

Name	Type	Description	Default
`x`	`ndarray \| Tensor`	Input bounding box coordinates in xyxy format.	required

Returns:

Type	Description
`ndarray \| Tensor`	Bounding box coordinates in xyltwh format.

Source code in ultralytics/utils/ops.py

def xyxy2ltwh(x):
    """
    Convert bounding boxes from [x1, y1, x2, y2] to [x1, y1, w, h] format.

    Args:
        x (np.ndarray | torch.Tensor): Input bounding box coordinates in xyxy format.

    Returns:
        (np.ndarray | torch.Tensor): Bounding box coordinates in xyltwh format.
    """
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[..., 2] = x[..., 2] - x[..., 0]  # width
    y[..., 3] = x[..., 3] - x[..., 1]  # height
    return y

ultralytics.utils.ops.ltwh2xywh

ltwh2xywh(x)

Convert bounding boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center.

Parameters:

Name	Type	Description	Default
`x`	`Tensor`	Input bounding box coordinates.	required

Returns:

Type	Description
`ndarray \| Tensor`	Bounding box coordinates in xywh format.

Source code in ultralytics/utils/ops.py

def ltwh2xywh(x):
    """
    Convert bounding boxes from [x1, y1, w, h] to [x, y, w, h] where xy1=top-left, xy=center.

    Args:
        x (torch.Tensor): Input bounding box coordinates.

    Returns:
        (np.ndarray | torch.Tensor): Bounding box coordinates in xywh format.
    """
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[..., 0] = x[..., 0] + x[..., 2] / 2  # center x
    y[..., 1] = x[..., 1] + x[..., 3] / 2  # center y
    return y

ultralytics.utils.ops.xyxyxyxy2xywhr

xyxyxyxy2xywhr(x)

Convert batched Oriented Bounding Boxes (OBB) from [xy1, xy2, xy3, xy4] to [xywh, rotation] format.

Parameters:

Name	Type	Description	Default
`x`	`ndarray \| Tensor`	Input box corners with shape (N, 8) in [xy1, xy2, xy3, xy4] format.	required

Returns:

Type	Description
`ndarray \| Tensor`	Converted data in [cx, cy, w, h, rotation] format with shape (N, 5). Rotation values are in radians from 0 to pi/2.

Source code in ultralytics/utils/ops.py

def xyxyxyxy2xywhr(x):
    """
    Convert batched Oriented Bounding Boxes (OBB) from [xy1, xy2, xy3, xy4] to [xywh, rotation] format.

    Args:
        x (np.ndarray | torch.Tensor): Input box corners with shape (N, 8) in [xy1, xy2, xy3, xy4] format.

    Returns:
        (np.ndarray | torch.Tensor): Converted data in [cx, cy, w, h, rotation] format with shape (N, 5).
            Rotation values are in radians from 0 to pi/2.
    """
    is_torch = isinstance(x, torch.Tensor)
    points = x.cpu().numpy() if is_torch else x
    points = points.reshape(len(x), -1, 2)
    rboxes = []
    for pts in points:
        # NOTE: Use cv2.minAreaRect to get accurate xywhr,
        # especially some objects are cut off by augmentations in dataloader.
        (cx, cy), (w, h), angle = cv2.minAreaRect(pts)
        rboxes.append([cx, cy, w, h, angle / 180 * np.pi])
    return torch.tensor(rboxes, device=x.device, dtype=x.dtype) if is_torch else np.asarray(rboxes)

ultralytics.utils.ops.xywhr2xyxyxyxy

xywhr2xyxyxyxy(x)

Convert batched Oriented Bounding Boxes (OBB) from [xywh, rotation] to [xy1, xy2, xy3, xy4] format.

Parameters:

Name	Type	Description	Default
`x`	`ndarray \| Tensor`	Boxes in [cx, cy, w, h, rotation] format with shape (N, 5) or (B, N, 5). Rotation values should be in radians from 0 to pi/2.	required

Returns:

Type	Description
`ndarray \| Tensor`	Converted corner points with shape (N, 4, 2) or (B, N, 4, 2).

Source code in ultralytics/utils/ops.py

def xywhr2xyxyxyxy(x):
    """
    Convert batched Oriented Bounding Boxes (OBB) from [xywh, rotation] to [xy1, xy2, xy3, xy4] format.

    Args:
        x (np.ndarray | torch.Tensor): Boxes in [cx, cy, w, h, rotation] format with shape (N, 5) or (B, N, 5).
            Rotation values should be in radians from 0 to pi/2.

    Returns:
        (np.ndarray | torch.Tensor): Converted corner points with shape (N, 4, 2) or (B, N, 4, 2).
    """
    cos, sin, cat, stack = (
        (torch.cos, torch.sin, torch.cat, torch.stack)
        if isinstance(x, torch.Tensor)
        else (np.cos, np.sin, np.concatenate, np.stack)
    )

    ctr = x[..., :2]
    w, h, angle = (x[..., i : i + 1] for i in range(2, 5))
    cos_value, sin_value = cos(angle), sin(angle)
    vec1 = [w / 2 * cos_value, w / 2 * sin_value]
    vec2 = [-h / 2 * sin_value, h / 2 * cos_value]
    vec1 = cat(vec1, -1)
    vec2 = cat(vec2, -1)
    pt1 = ctr + vec1 + vec2
    pt2 = ctr + vec1 - vec2
    pt3 = ctr - vec1 - vec2
    pt4 = ctr - vec1 + vec2
    return stack([pt1, pt2, pt3, pt4], -2)

ultralytics.utils.ops.ltwh2xyxy

ltwh2xyxy(x)

Convert bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right.

Parameters:

Name	Type	Description	Default
`x`	`ndarray \| Tensor`	Input bounding box coordinates.	required

Returns:

Type	Description
`ndarray \| Tensor`	Bounding box coordinates in xyxy format.

Source code in ultralytics/utils/ops.py

def ltwh2xyxy(x):
    """
    Convert bounding box from [x1, y1, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right.

    Args:
        x (np.ndarray | torch.Tensor): Input bounding box coordinates.

    Returns:
        (np.ndarray | torch.Tensor): Bounding box coordinates in xyxy format.
    """
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[..., 2] = x[..., 2] + x[..., 0]  # width
    y[..., 3] = x[..., 3] + x[..., 1]  # height
    return y

ultralytics.utils.ops.segments2boxes

segments2boxes(segments)

Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh).

Parameters:

Name	Type	Description	Default
`segments`	`list`	List of segments where each segment is a list of points, each point is [x, y] coordinates.	required

Returns:

Type	Description
`ndarray`	Bounding box coordinates in xywh format.

Source code in ultralytics/utils/ops.py

def segments2boxes(segments):
    """
    Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh).

    Args:
        segments (list): List of segments where each segment is a list of points, each point is [x, y] coordinates.

    Returns:
        (np.ndarray): Bounding box coordinates in xywh format.
    """
    boxes = []
    for s in segments:
        x, y = s.T  # segment xy
        boxes.append([x.min(), y.min(), x.max(), y.max()])  # cls, xyxy
    return xyxy2xywh(np.array(boxes))  # cls, xywh

ultralytics.utils.ops.resample_segments

resample_segments(segments, n: int = 1000)

Resample segments to n points each using linear interpolation.

Parameters:

Name	Type	Description	Default
`segments`	`list`	List of (N, 2) arrays where N is the number of points in each segment.	required
`n`	`int`	Number of points to resample each segment to.	`1000`

Returns:

Type	Description
`list`	Resampled segments with n points each.

Source code in ultralytics/utils/ops.py

def resample_segments(segments, n: int = 1000):
    """
    Resample segments to n points each using linear interpolation.

    Args:
        segments (list): List of (N, 2) arrays where N is the number of points in each segment.
        n (int): Number of points to resample each segment to.

    Returns:
        (list): Resampled segments with n points each.
    """
    for i, s in enumerate(segments):
        if len(s) == n:
            continue
        s = np.concatenate((s, s[0:1, :]), axis=0)
        x = np.linspace(0, len(s) - 1, n - len(s) if len(s) < n else n)
        xp = np.arange(len(s))
        x = np.insert(x, np.searchsorted(x, xp), xp) if len(s) < n else x
        segments[i] = (
            np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)], dtype=np.float32).reshape(2, -1).T
        )  # segment xy
    return segments

ultralytics.utils.ops.crop_mask

crop_mask(masks, boxes)

Crop masks to bounding box regions.

Parameters:

Name	Type	Description	Default
`masks`	`Tensor`	Masks with shape (N, H, W).	required
`boxes`	`Tensor`	Bounding box coordinates with shape (N, 4) in relative point form.	required

Returns:

Type	Description
`Tensor`	Cropped masks.

Source code in ultralytics/utils/ops.py

def crop_mask(masks, boxes):
    """
    Crop masks to bounding box regions.

    Args:
        masks (torch.Tensor): Masks with shape (N, H, W).
        boxes (torch.Tensor): Bounding box coordinates with shape (N, 4) in relative point form.

    Returns:
        (torch.Tensor): Cropped masks.
    """
    _, h, w = masks.shape
    x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1)  # x1 shape(n,1,1)
    r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :]  # rows shape(1,1,w)
    c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None]  # cols shape(1,h,1)

    return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))

ultralytics.utils.ops.process_mask

process_mask(protos, masks_in, bboxes, shape, upsample: bool = False)

Apply masks to bounding boxes using mask head output.

Parameters:

Name	Type	Description	Default
`protos`	`Tensor`	Mask prototypes with shape (mask_dim, mask_h, mask_w).	required
`masks_in`	`Tensor`	Mask coefficients with shape (N, mask_dim) where N is number of masks after NMS.	required
`bboxes`	`Tensor`	Bounding boxes with shape (N, 4) where N is number of masks after NMS.	required
`shape`	`tuple`	Input image size as (height, width).	required
`upsample`	`bool`	Whether to upsample masks to original image size.	`False`

Returns:

Type	Description
`Tensor`	A binary mask tensor of shape [n, h, w], where n is the number of masks after NMS, and h and w are the height and width of the input image. The mask is applied to the bounding boxes.

Source code in ultralytics/utils/ops.py

def process_mask(protos, masks_in, bboxes, shape, upsample: bool = False):
    """
    Apply masks to bounding boxes using mask head output.

    Args:
        protos (torch.Tensor): Mask prototypes with shape (mask_dim, mask_h, mask_w).
        masks_in (torch.Tensor): Mask coefficients with shape (N, mask_dim) where N is number of masks after NMS.
        bboxes (torch.Tensor): Bounding boxes with shape (N, 4) where N is number of masks after NMS.
        shape (tuple): Input image size as (height, width).
        upsample (bool): Whether to upsample masks to original image size.

    Returns:
        (torch.Tensor): A binary mask tensor of shape [n, h, w], where n is the number of masks after NMS, and h and w
            are the height and width of the input image. The mask is applied to the bounding boxes.
    """
    c, mh, mw = protos.shape  # CHW
    ih, iw = shape
    masks = (masks_in @ protos.float().view(c, -1)).view(-1, mh, mw)  # CHW
    width_ratio = mw / iw
    height_ratio = mh / ih

    downsampled_bboxes = bboxes.clone()
    downsampled_bboxes[:, 0] *= width_ratio
    downsampled_bboxes[:, 2] *= width_ratio
    downsampled_bboxes[:, 3] *= height_ratio
    downsampled_bboxes[:, 1] *= height_ratio

    masks = crop_mask(masks, downsampled_bboxes)  # CHW
    if upsample:
        masks = F.interpolate(masks[None], shape, mode="bilinear", align_corners=False)[0]  # CHW
    return masks.gt_(0.0)

ultralytics.utils.ops.process_mask_native

process_mask_native(protos, masks_in, bboxes, shape)

Apply masks to bounding boxes using mask head output with native upsampling.

Parameters:

Name	Type	Description	Default
`protos`	`Tensor`	Mask prototypes with shape (mask_dim, mask_h, mask_w).	required
`masks_in`	`Tensor`	Mask coefficients with shape (N, mask_dim) where N is number of masks after NMS.	required
`bboxes`	`Tensor`	Bounding boxes with shape (N, 4) where N is number of masks after NMS.	required
`shape`	`tuple`	Input image size as (height, width).	required

Returns:

Type	Description
`Tensor`	Binary mask tensor with shape (H, W, N).

Source code in ultralytics/utils/ops.py

def process_mask_native(protos, masks_in, bboxes, shape):
    """
    Apply masks to bounding boxes using mask head output with native upsampling.

    Args:
        protos (torch.Tensor): Mask prototypes with shape (mask_dim, mask_h, mask_w).
        masks_in (torch.Tensor): Mask coefficients with shape (N, mask_dim) where N is number of masks after NMS.
        bboxes (torch.Tensor): Bounding boxes with shape (N, 4) where N is number of masks after NMS.
        shape (tuple): Input image size as (height, width).

    Returns:
        (torch.Tensor): Binary mask tensor with shape (H, W, N).
    """
    c, mh, mw = protos.shape  # CHW
    masks = (masks_in @ protos.float().view(c, -1)).view(-1, mh, mw)
    masks = scale_masks(masks[None], shape)[0]  # CHW
    masks = crop_mask(masks, bboxes)  # CHW
    return masks.gt_(0.0)

ultralytics.utils.ops.scale_masks

scale_masks(masks, shape, padding: bool = True)

Rescale segment masks to target shape.

Parameters:

Name	Type	Description	Default
`masks`	`Tensor`	Masks with shape (N, C, H, W).	required
`shape`	`tuple`	Target height and width as (height, width).	required
`padding`	`bool`	Whether masks are based on YOLO-style augmented images with padding.	`True`

Returns:

Type	Description
`Tensor`	Rescaled masks.

Source code in ultralytics/utils/ops.py

def scale_masks(masks, shape, padding: bool = True):
    """
    Rescale segment masks to target shape.

    Args:
        masks (torch.Tensor): Masks with shape (N, C, H, W).
        shape (tuple): Target height and width as (height, width).
        padding (bool): Whether masks are based on YOLO-style augmented images with padding.

    Returns:
        (torch.Tensor): Rescaled masks.
    """
    mh, mw = masks.shape[2:]
    gain = min(mh / shape[0], mw / shape[1])  # gain  = old / new
    pad = [mw - shape[1] * gain, mh - shape[0] * gain]  # wh padding
    if padding:
        pad[0] /= 2
        pad[1] /= 2
    top, left = (int(round(pad[1] - 0.1)), int(round(pad[0] - 0.1))) if padding else (0, 0)  # y, x
    bottom, right = (
        mh - int(round(pad[1] + 0.1)),
        mw - int(round(pad[0] + 0.1)),
    )
    masks = masks[..., top:bottom, left:right]

    masks = F.interpolate(masks, shape, mode="bilinear", align_corners=False)  # NCHW
    return masks

ultralytics.utils.ops.scale_coords

scale_coords(
    img1_shape,
    coords,
    img0_shape,
    ratio_pad=None,
    normalize: bool = False,
    padding: bool = True,
)

Rescale segment coordinates from img1_shape to img0_shape.

Parameters:

Name	Type	Description	Default
`img1_shape`	`tuple`	Shape of the source image.	required
`coords`	`Tensor`	Coordinates to scale with shape (N, 2).	required
`img0_shape`	`tuple`	Shape of the target image.	required
`ratio_pad`	`tuple`	Ratio and padding values as ((ratio_h, ratio_w), (pad_h, pad_w)).	`None`
`normalize`	`bool`	Whether to normalize coordinates to range [0, 1].	`False`
`padding`	`bool`	Whether coordinates are based on YOLO-style augmented images with padding.	`True`

Returns:

Type	Description
`Tensor`	Scaled coordinates.

Source code in ultralytics/utils/ops.py

def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None, normalize: bool = False, padding: bool = True):
    """
    Rescale segment coordinates from img1_shape to img0_shape.

    Args:
        img1_shape (tuple): Shape of the source image.
        coords (torch.Tensor): Coordinates to scale with shape (N, 2).
        img0_shape (tuple): Shape of the target image.
        ratio_pad (tuple, optional): Ratio and padding values as ((ratio_h, ratio_w), (pad_h, pad_w)).
        normalize (bool): Whether to normalize coordinates to range [0, 1].
        padding (bool): Whether coordinates are based on YOLO-style augmented images with padding.

    Returns:
        (torch.Tensor): Scaled coordinates.
    """
    if ratio_pad is None:  # calculate from img0_shape
        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
    else:
        gain = ratio_pad[0][0]
        pad = ratio_pad[1]

    if padding:
        coords[..., 0] -= pad[0]  # x padding
        coords[..., 1] -= pad[1]  # y padding
    coords[..., 0] /= gain
    coords[..., 1] /= gain
    coords = clip_coords(coords, img0_shape)
    if normalize:
        coords[..., 0] /= img0_shape[1]  # width
        coords[..., 1] /= img0_shape[0]  # height
    return coords

ultralytics.utils.ops.regularize_rboxes

regularize_rboxes(rboxes)

Regularize rotated bounding boxes to range [0, pi/2].

Parameters:

Name	Type	Description	Default
`rboxes`	`Tensor`	Input rotated boxes with shape (N, 5) in xywhr format.	required

Returns:

Type	Description
`Tensor`	Regularized rotated boxes.

Source code in ultralytics/utils/ops.py

def regularize_rboxes(rboxes):
    """
    Regularize rotated bounding boxes to range [0, pi/2].

    Args:
        rboxes (torch.Tensor): Input rotated boxes with shape (N, 5) in xywhr format.

    Returns:
        (torch.Tensor): Regularized rotated boxes.
    """
    x, y, w, h, t = rboxes.unbind(dim=-1)
    # Swap edge if t >= pi/2 while not being symmetrically opposite
    swap = t % math.pi >= math.pi / 2
    w_ = torch.where(swap, h, w)
    h_ = torch.where(swap, w, h)
    t = t % (math.pi / 2)
    return torch.stack([x, y, w_, h_, t], dim=-1)  # regularized boxes

ultralytics.utils.ops.masks2segments

masks2segments(masks, strategy: str = 'all')

Convert masks to segments using contour detection.

Parameters:

Name	Type	Description	Default
`masks`	`Tensor`	Binary masks with shape (batch_size, 160, 160).	required
`strategy`	`str`	Segmentation strategy, either 'all' or 'largest'.	`'all'`

Returns:

Type	Description
`list`	List of segment masks as float32 arrays.

Source code in ultralytics/utils/ops.py

def masks2segments(masks, strategy: str = "all"):
    """
    Convert masks to segments using contour detection.

    Args:
        masks (torch.Tensor): Binary masks with shape (batch_size, 160, 160).
        strategy (str): Segmentation strategy, either 'all' or 'largest'.

    Returns:
        (list): List of segment masks as float32 arrays.
    """
    from ultralytics.data.converter import merge_multi_segment

    segments = []
    for x in masks.int().cpu().numpy().astype("uint8"):
        c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
        if c:
            if strategy == "all":  # merge and concatenate all segments
                c = (
                    np.concatenate(merge_multi_segment([x.reshape(-1, 2) for x in c]))
                    if len(c) > 1
                    else c[0].reshape(-1, 2)
                )
            elif strategy == "largest":  # select largest segment
                c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)
        else:
            c = np.zeros((0, 2))  # no segments found
        segments.append(c.astype("float32"))
    return segments

ultralytics.utils.ops.convert_torch2numpy_batch

convert_torch2numpy_batch(batch: Tensor) -> np.ndarray

Convert a batch of FP32 torch tensors to NumPy uint8 arrays, changing from BCHW to BHWC layout.

Parameters:

Name	Type	Description	Default
`batch`	`Tensor`	Input tensor batch with shape (Batch, Channels, Height, Width) and dtype torch.float32.	required

Returns:

Type	Description
`ndarray`	Output NumPy array batch with shape (Batch, Height, Width, Channels) and dtype uint8.

Source code in ultralytics/utils/ops.py

def convert_torch2numpy_batch(batch: torch.Tensor) -> np.ndarray:
    """
    Convert a batch of FP32 torch tensors to NumPy uint8 arrays, changing from BCHW to BHWC layout.

    Args:
        batch (torch.Tensor): Input tensor batch with shape (Batch, Channels, Height, Width) and dtype torch.float32.

    Returns:
        (np.ndarray): Output NumPy array batch with shape (Batch, Height, Width, Channels) and dtype uint8.
    """
    return (batch.permute(0, 2, 3, 1).contiguous() * 255).clamp(0, 255).to(torch.uint8).cpu().numpy()

ultralytics.utils.ops.clean_str

clean_str(s)

Clean a string by replacing special characters with '_' character.

Parameters:

Name	Type	Description	Default
`s`	`str`	A string needing special characters replaced.	required

Returns:

Type	Description
`str`	A string with special characters replaced by an underscore _.

Source code in ultralytics/utils/ops.py

def clean_str(s):
    """
    Clean a string by replacing special characters with '_' character.

    Args:
        s (str): A string needing special characters replaced.

    Returns:
        (str): A string with special characters replaced by an underscore _.
    """
    return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s)

ultralytics.utils.ops.empty_like

empty_like(x)

Create empty torch.Tensor or np.ndarray with same shape as input and float32 dtype.

Source code in ultralytics/utils/ops.py

def empty_like(x):
    """Create empty torch.Tensor or np.ndarray with same shape as input and float32 dtype."""
    return (
        torch.empty_like(x, dtype=torch.float32) if isinstance(x, torch.Tensor) else np.empty_like(x, dtype=np.float32)
    )

📅 Created 1 year ago ✏️ Updated 7 months ago

Reference for ultralytics/utils/ops.py

ultralytics.utils.ops.Profile

__enter__

__exit__

__str__

time

ultralytics.utils.ops.segment2box

ultralytics.utils.ops.scale_boxes

ultralytics.utils.ops.make_divisible

ultralytics.utils.ops.nms_rotated

ultralytics.utils.ops.non_max_suppression

ultralytics.utils.ops.clip_boxes

ultralytics.utils.ops.clip_coords

ultralytics.utils.ops.scale_image

ultralytics.utils.ops.xyxy2xywh

ultralytics.utils.ops.xywh2xyxy

ultralytics.utils.ops.xywhn2xyxy

ultralytics.utils.ops.xyxy2xywhn

ultralytics.utils.ops.xywh2ltwh

ultralytics.utils.ops.xyxy2ltwh

ultralytics.utils.ops.ltwh2xywh

ultralytics.utils.ops.xyxyxyxy2xywhr

ultralytics.utils.ops.xywhr2xyxyxyxy

ultralytics.utils.ops.ltwh2xyxy

ultralytics.utils.ops.segments2boxes

ultralytics.utils.ops.resample_segments

ultralytics.utils.ops.crop_mask

ultralytics.utils.ops.process_mask

ultralytics.utils.ops.process_mask_native

ultralytics.utils.ops.scale_masks

ultralytics.utils.ops.scale_coords

ultralytics.utils.ops.regularize_rboxes

ultralytics.utils.ops.masks2segments

ultralytics.utils.ops.convert_torch2numpy_batch

ultralytics.utils.ops.clean_str

ultralytics.utils.ops.empty_like

Reference for `ultralytics/utils/ops.py`

enter

exit

str