Skip to content

chore: reenable py313 #3455

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 22 commits into from
Apr 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/scripts/filter-matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
import argparse
import json
import sys
from typing import List

disabled_python_versions = "3.13"
disabled_python_versions: List[str] = []


def main(args: list[str]) -> None:
Expand Down
8 changes: 8 additions & 0 deletions .github/scripts/generate-tensorrt-test-matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@
# please update the future tensorRT version you want to test here
TENSORRT_VERSIONS_DICT = {
"windows": {
"10.3.0": {
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.3.0/zip/TensorRT-10.3.0.26.Windows.win10.cuda-12.5.zip",
"strip_prefix": "TensorRT-10.3.0.26",
},
"10.7.0": {
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.7.0/zip/TensorRT-10.7.0.23.Windows.win10.cuda-12.6.zip",
"strip_prefix": "TensorRT-10.7.0.23",
Expand All @@ -42,6 +46,10 @@
},
},
"linux": {
"10.3.0": {
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.3.0/tars/TensorRT-10.3.0.26.Linux.x86_64-gnu.cuda-12.5.tar.gz",
"strip_prefix": "TensorRT-10.3.0.26",
},
"10.7.0": {
"urls": "https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.7.0/tars/TensorRT-10.7.0.23.Linux.x86_64-gnu.cuda-12.6.tar.gz",
"strip_prefix": "TensorRT-10.7.0.23",
Expand Down
16 changes: 6 additions & 10 deletions .github/scripts/generate_binary_build_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,16 @@
import sys
from typing import Any, Callable, Dict, List, Optional, Tuple

PYTHON_VERSIONS_FOR_PR_BUILD = ["3.11"]
PYTHON_ARCHES_DICT = {
"nightly": ["3.9", "3.10", "3.11", "3.12"],
"test": ["3.9", "3.10", "3.11", "3.12"],
"release": ["3.9", "3.10", "3.11", "3.12"],
"nightly": ["3.9", "3.10", "3.11", "3.12", "3.13"],
"test": ["3.9", "3.10", "3.11", "3.12", "3.13"],
"release": ["3.9", "3.10", "3.11", "3.12", "3.13"],
}
CUDA_ARCHES_DICT = {
"nightly": ["11.8", "12.6", "12.8"],
"test": ["11.8", "12.6", "12.8"],
"release": ["11.8", "12.6", "12.8"],
"release": ["11.8", "12.4", "12.6"],
}
ROCM_ARCHES_DICT = {
"nightly": ["6.1", "6.2"],
Expand Down Expand Up @@ -422,11 +423,6 @@ def generate_wheels_matrix(
# Define default python version
python_versions = list(PYTHON_ARCHES)

# If the list of python versions is set explicitly by the caller, stick with it instead
# of trying to add more versions behind the scene
if channel == NIGHTLY and (os in (LINUX, MACOS_ARM64, LINUX_AARCH64)):
python_versions += ["3.13"]

if os == LINUX:
# NOTE: We only build manywheel packages for linux
package_type = "manywheel"
Expand Down Expand Up @@ -456,7 +452,7 @@ def generate_wheels_matrix(
arches += [XPU]

if limit_pr_builds:
python_versions = [python_versions[0]]
python_versions = PYTHON_VERSIONS_FOR_PR_BUILD

global WHEEL_CONTAINER_IMAGES

Expand Down
1 change: 0 additions & 1 deletion .github/workflows/build-test-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ jobs:
test-infra-ref: main
with-rocm: false
with-cpu: false
python-versions: '["3.11", "3.12", "3.10", "3.9"]'

filter-matrix:
needs: [generate-matrix]
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/build-test-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ jobs:
test-infra-ref: main
with-rocm: false
with-cpu: false
python-versions: '["3.11", "3.12", "3.10", "3.9"]'

substitute-runner:
needs: generate-matrix
Expand Down
20 changes: 19 additions & 1 deletion py/torch_tensorrt/_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"torch_tensorrt_runtime",
"dynamo_frontend",
"fx_frontend",
"refit",
],
)

Expand All @@ -36,9 +37,10 @@
_TORCHTRT_RT_AVAIL = _TS_FE_AVAIL or os.path.isfile(linked_file_runtime_full_path)
_DYNAMO_FE_AVAIL = version.parse(sanitized_torch_version()) >= version.parse("2.1.dev")
_FX_FE_AVAIL = True
_REFIT_AVAIL = version.parse(sys.version.split()[0]) < version.parse("3.13")

ENABLED_FEATURES = FeatureSet(
_TS_FE_AVAIL, _TORCHTRT_RT_AVAIL, _DYNAMO_FE_AVAIL, _FX_FE_AVAIL
_TS_FE_AVAIL, _TORCHTRT_RT_AVAIL, _DYNAMO_FE_AVAIL, _FX_FE_AVAIL, _REFIT_AVAIL
)


Expand All @@ -62,6 +64,22 @@ def not_implemented(*args: List[Any], **kwargs: Dict[str, Any]) -> Any:
return wrapper


def needs_refit(f: Callable[..., Any]) -> Callable[..., Any]:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we can make this a bit more generic (like for any feature in the FeatureSet)

def wrapper(*args: List[Any], **kwargs: Dict[str, Any]) -> Any:
if ENABLED_FEATURES.refit:
return f(*args, **kwargs)
else:

def not_implemented(*args: List[Any], **kwargs: Dict[str, Any]) -> Any:
raise NotImplementedError(
"Refit feature is currently not available in Python 3.13 or higher"
)

return not_implemented(*args, **kwargs)

return wrapper


T = TypeVar("T")


Expand Down
15 changes: 12 additions & 3 deletions py/torch_tensorrt/dynamo/_refit.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from torch.export import ExportedProgram
from torch.fx.experimental.proxy_tensor import unset_fake_temporarily
from torch_tensorrt._enums import dtype
from torch_tensorrt._features import needs_refit
from torch_tensorrt._Input import Input
from torch_tensorrt.dynamo import partitioning
from torch_tensorrt.dynamo._exporter import inline_torch_modules
Expand Down Expand Up @@ -47,6 +48,7 @@
logger = logging.getLogger(__name__)


@needs_refit
def construct_refit_mapping(
module: torch.fx.GraphModule,
inputs: Sequence[Input],
Expand Down Expand Up @@ -108,8 +110,11 @@ def construct_refit_mapping(
return weight_map


@needs_refit
def construct_refit_mapping_from_weight_name_map(
weight_name_map: dict[Any, Any], state_dict: dict[Any, Any]
weight_name_map: dict[Any, Any],
state_dict: dict[Any, Any],
settings: CompilationSettings,
) -> dict[Any, Any]:
engine_weight_map = {}
for engine_weight_name, (sd_weight_name, np_weight_type) in weight_name_map.items():
Expand All @@ -120,7 +125,9 @@ def construct_refit_mapping_from_weight_name_map(
# If weights is not in sd, we can leave it unchanged
continue
else:
engine_weight_map[engine_weight_name] = state_dict[sd_weight_name]
engine_weight_map[engine_weight_name] = state_dict[sd_weight_name].to(
to_torch_device(settings.device)
)

engine_weight_map[engine_weight_name] = (
engine_weight_map[engine_weight_name]
Expand All @@ -134,6 +141,7 @@ def construct_refit_mapping_from_weight_name_map(
return engine_weight_map


@needs_refit
def _refit_single_trt_engine_with_gm(
new_gm: torch.fx.GraphModule,
old_engine: trt.ICudaEngine,
Expand Down Expand Up @@ -163,7 +171,7 @@ def _refit_single_trt_engine_with_gm(
"constant_mapping", {}
) # type: ignore
mapping = construct_refit_mapping_from_weight_name_map(
weight_name_map, new_gm.state_dict()
weight_name_map, new_gm.state_dict(), settings
)
constant_mapping_with_type = {}

Expand Down Expand Up @@ -213,6 +221,7 @@ def _refit_single_trt_engine_with_gm(
raise AssertionError("Refitting failed.")


@needs_refit
def refit_module_weights(
compiled_module: torch.fx.GraphModule | ExportedProgram,
new_weight_module: ExportedProgram,
Expand Down
21 changes: 10 additions & 11 deletions py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from torch.fx.passes.shape_prop import TensorMetadata
from torch.utils._python_dispatch import _disable_current_modes
from torch_tensorrt._enums import dtype
from torch_tensorrt._features import needs_refit
from torch_tensorrt._Input import Input
from torch_tensorrt.dynamo import _defaults
from torch_tensorrt.dynamo._engine_cache import BaseEngineCache
Expand All @@ -44,7 +45,7 @@
get_trt_tensor,
to_torch,
)
from torch_tensorrt.dynamo.utils import DYNAMIC_DIM, get_model_device, to_torch_device
from torch_tensorrt.dynamo.utils import DYNAMIC_DIM, to_torch_device
from torch_tensorrt.fx.observer import Observer
from torch_tensorrt.logging import TRT_LOGGER

Expand Down Expand Up @@ -434,6 +435,7 @@ def check_weight_equal(
except Exception:
return torch.all(sd_weight == network_weight)

@needs_refit
def _save_weight_mapping(self) -> None:
"""
Construct the weight name mapping from engine weight name to state_dict weight name.
Expand Down Expand Up @@ -491,15 +493,10 @@ def _save_weight_mapping(self) -> None:
_LOGGER.info("Building weight name mapping...")
# Stage 1: Name mapping
torch_device = to_torch_device(self.compilation_settings.device)
gm_is_on_cuda = get_model_device(self.module).type == "cuda"
if not gm_is_on_cuda:
# If the model original position is on CPU, move it GPU
sd = {
k: v.reshape(-1).to(torch_device)
for k, v in self.module.state_dict().items()
}
else:
sd = {k: v.reshape(-1) for k, v in self.module.state_dict().items()}
sd = {
k: v.reshape(-1).to(torch_device)
for k, v in self.module.state_dict().items()
}
weight_name_map: dict[str, Any] = {}
np_map = {}
constant_mapping = {}
Expand Down Expand Up @@ -583,6 +580,7 @@ def _save_weight_mapping(self) -> None:
gc.collect()
torch.cuda.empty_cache()

@needs_refit
def _insert_engine_to_cache(self, hash_val: str, serialized_engine: bytes) -> None:
# TODO: @Evan is waiting for TRT's feature to cache the weight-stripped engine
# if not self.compilation_settings.strip_engine_weights:
Expand Down Expand Up @@ -610,6 +608,7 @@ def _insert_engine_to_cache(self, hash_val: str, serialized_engine: bytes) -> No
),
)

@needs_refit
def _pull_cached_engine(self, hash_val: str) -> Optional[TRTInterpreterResult]:
# query the cached TRT engine
cached_data = self.engine_cache.check(hash_val) # type: ignore[union-attr]
Expand Down Expand Up @@ -720,7 +719,7 @@ def run(
if self.compilation_settings.reuse_cached_engines:
interpreter_result = self._pull_cached_engine(hash_val)
if interpreter_result is not None: # hit the cache
return interpreter_result
return interpreter_result # type: ignore[no-any-return]

self._construct_trt_network_def()

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[build-system]
requires = [
"setuptools>=68.0.0",
"setuptools>=77.0.0",
"packaging>=23.1",
"wheel>=0.40.0",
"ninja>=1.11.0",
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
import torch
import yaml
from setuptools import Extension, find_namespace_packages, setup
from setuptools.command.bdist_wheel import bdist_wheel
from setuptools.command.build_ext import build_ext
from setuptools.command.develop import develop
from setuptools.command.editable_wheel import editable_wheel
from setuptools.command.install import install
from torch.utils.cpp_extension import IS_WINDOWS, BuildExtension, CUDAExtension
from wheel.bdist_wheel import bdist_wheel

__version__: str = "0.0.0"
__cuda_version__: str = "0.0"
Expand Down
3 changes: 2 additions & 1 deletion tests/modules/custom_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import BertConfig, BertModel, BertTokenizer


# Sample Pool Model (for testing plugin serialization)
Expand Down Expand Up @@ -165,6 +164,8 @@ def forward(self, z: List[torch.Tensor]):


def BertModule():
from transformers import BertConfig, BertModel, BertTokenizer

enc = BertTokenizer.from_pretrained("google-bert/bert-base-uncased")
text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
tokenized_text = enc.tokenize(text)
Expand Down
3 changes: 0 additions & 3 deletions tests/modules/hub.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,7 @@
import custom_models as cm
import timm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from transformers import BertConfig, BertModel, BertTokenizer

torch.hub._validate_not_a_forked_repo = lambda a, b, c: True

Expand Down
20 changes: 20 additions & 0 deletions tests/py/dynamo/models/test_engine_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,10 @@ def remove_timing_cache(path=TIMING_CACHE_PATH):
msg=f"Engine caching didn't speed up the compilation. Time taken without engine caching: {times[0]} ms, time taken with engine caching: {times[2]} ms",
)

@unittest.skipIf(
not torch_trt.ENABLED_FEATURES.refit,
"Engine caching requires refit feature that is not supported in Python 3.13 or higher",
)
def test_dynamo_compile_with_custom_engine_cache(self):
model = models.resnet18(pretrained=True).eval().to("cuda")

Expand Down Expand Up @@ -314,6 +318,10 @@ def test_dynamo_compile_with_custom_engine_cache(self):
for h, count in custom_engine_cache.hashes.items()
]

@unittest.skipIf(
not torch_trt.ENABLED_FEATURES.refit,
"Engine caching requires refit feature that is not supported in Python 3.13 or higher",
)
def test_dynamo_compile_change_input_shape(self):
"""Runs compilation 3 times, the cache should miss each time"""
model = models.resnet18(pretrained=True).eval().to("cuda")
Expand Down Expand Up @@ -346,6 +354,10 @@ def test_dynamo_compile_change_input_shape(self):
for h, count in custom_engine_cache.hashes.items()
]

@unittest.skipIf(
not torch_trt.ENABLED_FEATURES.refit,
"Engine caching requires refit feature that is not supported in Python 3.13 or higher",
)
@pytest.mark.xfail
def test_torch_compile_with_default_disk_engine_cache(self):
# Custom Engine Cache
Expand Down Expand Up @@ -485,6 +497,10 @@ def test_torch_compile_with_custom_engine_cache(self):
for h, count in custom_engine_cache.hashes.items()
]

@unittest.skipIf(
not torch_trt.ENABLED_FEATURES.refit,
"Engine caching requires refit feature that is not supported in Python 3.13 or higher",
)
def test_torch_trt_compile_change_input_shape(self):
# Custom Engine Cache
model = models.resnet18(pretrained=True).eval().to("cuda")
Expand Down Expand Up @@ -611,6 +627,10 @@ def forward(self, c, d):
assertions.assertEqual(hash1, hash2)

# @unittest.skip("benchmark on small models")
@unittest.skipIf(
not torch_trt.ENABLED_FEATURES.refit,
"Engine caching requires refit feature that is not supported in Python 3.13 or higher",
)
def test_caching_small_model(self):
from torch_tensorrt.dynamo._refit import refit_module_weights

Expand Down
Loading
Loading