Utilities¶

Device detection and capability checking utilities.

Overview¶

These utilities help you write portable code that adapts to available hardware.

from rotalabs_accel import (
    get_device,
    is_cuda_available,
    is_triton_available,
    get_device_properties,
)

# Auto-select best device
device = get_device()  # Returns 'cuda' if available, else 'cpu'

# Check capabilities
print(f"CUDA available: {is_cuda_available()}")
print(f"Triton available: {is_triton_available()}")

# Get detailed GPU info
if is_cuda_available():
    props = get_device_properties()
    print(f"GPU: {props['name']}")
    print(f"VRAM: {props['total_memory'] / 1e9:.1f} GB")

API Reference¶

Functions¶

get_device ¶

get_device(device: Optional[str] = None) -> torch.device

Get a torch device, with smart defaults.

Parameters:

Name	Type	Description	Default
`device`	`Optional[str]`	Device string ('cuda', 'cpu', 'cuda:0', etc.). If None, returns CUDA if available, else CPU.	`None`

Returns:

Type	Description
`device`	torch.device instance.

Example

device = get_device() # Auto-detect device = get_device('cuda:1') # Specific GPU

Source code in src/rotalabs_accel/utils/device.py

def get_device(device: Optional[str] = None) -> torch.device:
    """
    Get a torch device, with smart defaults.

    Args:
        device: Device string ('cuda', 'cpu', 'cuda:0', etc.).
                If None, returns CUDA if available, else CPU.

    Returns:
        torch.device instance.

    Example:
        >>> device = get_device()  # Auto-detect
        >>> device = get_device('cuda:1')  # Specific GPU
    """
    if device is not None:
        return torch.device(device)

    if is_cuda_available():
        return torch.device('cuda')
    return torch.device('cpu')

is_cuda_available ¶

is_cuda_available() -> bool

Check if CUDA is available.

Source code in src/rotalabs_accel/utils/device.py

def is_cuda_available() -> bool:
    """Check if CUDA is available."""
    return torch.cuda.is_available()

is_triton_available ¶

is_triton_available() -> bool

Check if Triton is available.

Source code in src/rotalabs_accel/utils/device.py

def is_triton_available() -> bool:
    """Check if Triton is available."""
    try:
        import triton
        return True
    except ImportError:
        return False

get_device_properties ¶

get_device_properties(device: Optional[device] = None) -> Dict[str, Any]

Get device properties and capabilities.

Parameters:

Name	Type	Description	Default
`device`	`Optional[device]`	Device to query. If None, uses current CUDA device.	`None`

Returns:

Type	Description
`Dict[str, Any]`	Dictionary with device properties:
`Dict[str, Any]`	name: Device name
`Dict[str, Any]`	compute_capability: (major, minor) tuple
`Dict[str, Any]`	total_memory: Total memory in bytes
`Dict[str, Any]`	supports_fp16: Whether FP16 is supported
`Dict[str, Any]`	supports_bf16: Whether BF16 is supported
`Dict[str, Any]`	supports_fp8: Whether FP8 is supported (Hopper+)
`Dict[str, Any]`	supports_int8_tensor_cores: Whether INT8 tensor cores available

Example

props = get_device_properties() print(f"GPU: {props['name']}") if props['supports_fp8']: ... print("FP8 quantization available!")

Source code in src/rotalabs_accel/utils/device.py

def get_device_properties(device: Optional[torch.device] = None) -> Dict[str, Any]:
    """
    Get device properties and capabilities.

    Args:
        device: Device to query. If None, uses current CUDA device.

    Returns:
        Dictionary with device properties:
        - name: Device name
        - compute_capability: (major, minor) tuple
        - total_memory: Total memory in bytes
        - supports_fp16: Whether FP16 is supported
        - supports_bf16: Whether BF16 is supported
        - supports_fp8: Whether FP8 is supported (Hopper+)
        - supports_int8_tensor_cores: Whether INT8 tensor cores available

    Example:
        >>> props = get_device_properties()
        >>> print(f"GPU: {props['name']}")
        >>> if props['supports_fp8']:
        ...     print("FP8 quantization available!")
    """
    if not is_cuda_available():
        return {
            'name': 'CPU',
            'compute_capability': (0, 0),
            'total_memory': 0,
            'supports_fp16': True,
            'supports_bf16': False,
            'supports_fp8': False,
            'supports_int8_tensor_cores': False,
        }

    if device is None:
        device = torch.device('cuda')

    props = torch.cuda.get_device_properties(device)
    cc = (props.major, props.minor)

    return {
        'name': props.name,
        'compute_capability': cc,
        'total_memory': props.total_memory,
        'supports_fp16': cc >= (5, 3),  # Maxwell+
        'supports_bf16': cc >= (8, 0),  # Ampere+
        'supports_fp8': cc >= (8, 9),   # Hopper (sm89) / Ada (sm89)
        'supports_int8_tensor_cores': cc >= (7, 5),  # Turing+
        'multi_processor_count': props.multi_processor_count,
    }

select_dtype ¶

select_dtype(preferred: dtype = torch.float16, device: Optional[device] = None) -> torch.dtype

Select the best available dtype for the device.

Parameters:

Name	Type	Description	Default
`preferred`	`dtype`	Preferred dtype if supported.	`float16`
`device`	`Optional[device]`	Device to check capabilities for.	`None`

Returns:

Type	Description
`dtype`	Best supported dtype.

Example

dtype = select_dtype(torch.bfloat16) model = model.to(dtype)

Source code in src/rotalabs_accel/utils/device.py

def select_dtype(
    preferred: torch.dtype = torch.float16,
    device: Optional[torch.device] = None,
) -> torch.dtype:
    """
    Select the best available dtype for the device.

    Args:
        preferred: Preferred dtype if supported.
        device: Device to check capabilities for.

    Returns:
        Best supported dtype.

    Example:
        >>> dtype = select_dtype(torch.bfloat16)
        >>> model = model.to(dtype)
    """
    props = get_device_properties(device)

    if preferred == torch.bfloat16 and not props['supports_bf16']:
        return torch.float16

    if preferred == torch.float16 and not props['supports_fp16']:
        return torch.float32

    return preferred

Usage Patterns¶

Portable Device Selection¶

from rotalabs_accel import get_device

device = get_device()

# Works on any platform
model = Model().to(device)
x = torch.randn(1, 512, 4096, device=device)
y = model(x)

Conditional Logic Based on Capabilities¶

from rotalabs_accel import is_triton_available, get_device_properties

if is_triton_available():
    print("Using Triton-optimized kernels")
else:
    print("Falling back to PyTorch")

# Select dtype based on GPU capabilities
if is_cuda_available():
    props = get_device_properties()
    if props.get('supports_bf16', False):
        dtype = torch.bfloat16
        print("Using BF16 (Ampere+)")
    else:
        dtype = torch.float16
        print("Using FP16")
else:
    dtype = torch.float32
    print("Using FP32 on CPU")

Multi-GPU Selection¶

from rotalabs_accel import get_device

# Select specific GPU
device = get_device("cuda:0")
device = get_device("cuda:1")

# Force CPU even if GPU available
device = get_device("cpu")

Device Properties¶

The get_device_properties() function returns a dictionary with:

Property	Type	Description
`name`	str	GPU name (e.g., "NVIDIA A100-SXM4-80GB")
`compute_capability`	tuple	Compute capability (e.g., (8, 0))
`total_memory`	int	Total VRAM in bytes
`supports_bf16`	bool	BF16 tensor core support (Ampere+)
`supports_fp8`	bool	FP8 support (Hopper+)

GPU Generation Detection¶

props = get_device_properties()
cc = props['compute_capability']

if cc >= (9, 0):
    print("Hopper (H100) - FP8 support")
elif cc >= (8, 0):
    print("Ampere (A100/A10) - BF16 tensor cores")
elif cc >= (7, 0):
    print("Volta/Turing (V100/T4)")
else:
    print("Older GPU")

Triton Availability¶

Triton requires:

Linux operating system
NVIDIA GPU with CUDA
Python 3.8+

On other platforms, is_triton_available() returns False and all kernels automatically fall back to PyTorch.

from rotalabs_accel import is_triton_available

if not is_triton_available():
    # Could be:
    # - macOS/Windows (Triton only supports Linux)
    # - No NVIDIA GPU
    # - Triton not installed: pip install triton
    print("Triton not available, using PyTorch fallbacks")