YOLOv8检测图片和视频

本文介绍了如何使用Python的YOLOv8模型进行图片和视频中的物体检测,包括单张图片的处理以及通过Sahi框架结合CUDA加速的视频帧级预测。代码展示了如何加载模型、调整图像大小、绘制边界框和显示结果。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

一、检测图片

Python

import cv2
from ultralytics import YOLO
import torch

model_path = 'object_detection/best.pt'  # Change this to your YOLOv8 model's path
image_path = 'object_detection/32.jpg'  # Change this to your video's path

# Load the trained YOLOv8 model
model = YOLO(model_path)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Using device: %s" % device)
model.to(device)


# Process video frames
image = cv2.imread(image_path)
width, height, _ = image.shape
new_shape = [32*int(height/128), 32*int(width/128)]
image = cv2.resize(image, new_shape)

with torch.no_grad():
    results = model.predict(image)
    for result in results:
        for box in result.boxes:
            x1, y1, x2, y2 = box.xyxy[0]
            # Draw the bounding box on the BGR frame
            cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
            # Add a label above the box
            cv2.putText(image, result.names[int(box.cls)], (int(x1) - 30, int(y1) + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)

    cv2.imshow('Video', image)

    cv2.waitKey(0)


cv2.destroyAllWindows()


二、检测视频

Python

import cv2
from ultralytics import YOLO
import torch

model_path = 'object_detection/best.pt'  # Change this to your YOLOv8 model's path
video_path = 'object_detection/物块.mp4'  # Change this to your video's path

# Load the trained YOLOv8 model
model = YOLO(model_path)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Using device: %s" % device)
model.to(device)
batch_size = 8
frames_rgb = []
frames = []
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print("Error: Could not open video.")
    exit()

# Process video frames
while True:
    ret, frame = cap.read()
    if not ret:
        print("Finished processing video.")
        break
    width, height, _ = frame.shape
    new_shape = [32*int(height/64), 32*int(width/64)]
    frame = cv2.resize(frame, new_shape)
    frames.append(frame)
    # YOLOv8 expects RGB images
    if len(frames) == batch_size:
        with torch.no_grad():
            results = model.predict(frames)

        # Process each detection
        for i, result in enumerate(results):
            for box in result.boxes:
                print(box.conf)
                if float(box.conf) > 0.9:
                    x1, y1, x2, y2 = box.xyxy[0]
                    # Draw the bounding box on the BGR frame
                    cv2.rectangle(frames[i], (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
                    # Add a label above the box
                    cv2.putText(frames[i], result.names[int(box.cls)], (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

            cv2.imshow('Video', frames[i])

            if cv2.waitKey(1) & 0xFF == ord('q'):
                cap.release()
                cv2.destroyAllWindows()
                exit()
        frames.clear()
        frames_rgb.clear()
cap.release()
cv2.destroyAllWindows()

使用了sahi的视频检测

import argparse
import sys
import cv2
from sahi import AutoDetectionModel
from sahi.predict import get_sliced_prediction
import imageio
import numpy as np


def run(weights="yolov8n.pt", source="test.mp4", view_img=False):
    """
    Run object detection on a video using YOLOv8 and SAHI.

    Args:
        weights (str): Model weights path.
        source (str): Video file path.
        view_img (bool): Show results.
    """

    yolov8_model_path = weights
    detection_model = AutoDetectionModel.from_pretrained(
        model_type="yolov8", model_path=yolov8_model_path, confidence_threshold=0.3, device="cuda:0"
    )
    videocapture = cv2.VideoCapture(0)

    new_shape = 32 * int(videocapture.get(3) / 64), 32 * int(videocapture.get(4) / 64)
    writer = imageio.get_writer("object_detection/object_detection.mp4", fps=1 / 0.025)

    while videocapture.isOpened():
        success, frame = videocapture.read()
        if not success:
            break
        frame = cv2.resize(frame, new_shape)
        image = frame.copy()
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = get_sliced_prediction(
            frame, detection_model, slice_height=512, slice_width=512, overlap_height_ratio=0.2, overlap_width_ratio=0.2
        )
        object_prediction_list = results.object_prediction_list

        boxes_list = []
        clss_list = []

        for ind, _ in enumerate(object_prediction_list):
            print(object_prediction_list[ind].score.value)
            if float(object_prediction_list[ind].score.value) > 0.85:
                boxes = (
                    object_prediction_list[ind].bbox.minx,
                    object_prediction_list[ind].bbox.miny,
                    object_prediction_list[ind].bbox.maxx,
                    object_prediction_list[ind].bbox.maxy,
                )
                clss = object_prediction_list[ind].category.name
                boxes_list.append(boxes)
                clss_list.append(clss)

        for box, cls in zip(boxes_list, clss_list):
            x1, y1, x2, y2 = box
            cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (56, 56, 255), 2)
            label = str(cls)
            t_size = cv2.getTextSize(label, 0, fontScale=0.6, thickness=1)[0]
            cv2.rectangle(
                image, (int(x1), int(y1) - t_size[1] - 3), (int(x1) + t_size[0], int(y1) + 3), (56, 56, 255), -1
            )
            cv2.putText(
                image, label, (int(x1), int(y1) - 2), 0, 0.6, [255, 255, 255], thickness=1, lineType=cv2.LINE_AA
            )

        if view_img:
            cv2.imshow("result", image)
            frame = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            writer.append_data((np.asarray(frame)).astype(np.uint8))

        if cv2.waitKey(1) == ord("q"):
            videocapture.release()
            cv2.destroyAllWindows()
            sys.exit()
    writer.close()


def parse_opt():
    """Parse command line arguments."""
    parser = argparse.ArgumentParser()
    parser.add_argument("--weights", type=str, default="object_detection/best.pt", help="initial weights path")
    parser.add_argument("--source", type=str, default="object_detection/物块.mp4", help="video file path")
    parser.add_argument("--view-img", type=bool, default=True, help="show results")
    return parser.parse_args()


def main(options):
    """Main function."""
    run(**vars(options))


if __name__ == "__main__":
    opt = parse_opt()
    main(opt)

### YOLOv8 预测时未生成图片的原因分析 当遇到YOLOv8预测过程中未能成功生成图片的情况,可能涉及多个方面的问题。通常情况下,这可能是由于配置文件设置不当、模型权重加载错误或者输入数据处理环节出现问题所致。 #### 模型配置与环境准备 确保使用的YOLOv8版本是最新的稳定版,并且所有的依赖库都已正确安装并匹配相应的版本。对于基于PyTorch框架构建的应用程序来说,验证CUDA/GPU是否正常工作也非常重要[^1]。 #### 输入图像预处理 检查用于推理阶段的图像预处理逻辑是否存在缺陷。例如,在某些场景下,如果图像尺寸不符合预期或颜色通道顺序不对,则可能导致无法得到有效的检测结果。可以参考ResNet特征提取中的做法来调整图像转换方式: ```python import torchvision.transforms as transforms transform = transforms.Compose([ transforms.Resize((640, 640)), # 调整到适合YOLOv8输入大小 transforms.ToTensor(), ]) image_tensor = transform(image) ``` #### 推理过程调试 为了更好地理解为什么没有输出任何可视化结果,可以在代码中加入更多的日志记录语句以便追踪每一步的状态变化。特别是关注`model.predict()`函数调用之后的部分,确认是否有异常抛出或是返回为空的结果集。 另外,尝试简化测试流程,先使用官方提供的示例脚本运行一次完整的预测操作,观察其行为模式后再逐步引入自定义修改项。 #### 输出路径设定 最后还需注意保存预测后图像的具体位置参数是否被正确指定。有时因为相对路径解析失败等原因造成最终产物并未存放在期望目录里而让人误以为完全没有生成新图。 ```python from ultralytics import YOLO # 加载预训练好的YOLOv8模型 model = YOLO('yolov8n.pt') results = model.predict(source='data/images', save=True, project='./runs/detect') print(f"Results saved to {results.save_dir}") ``` 通过上述几个角度排查问题所在应该能够帮助定位并解决问题根源。
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

jjm2002

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值