utils ¶

A collection of random utilities for the TabPFN models.

infer_categorical_features ¶

infer_categorical_features(
    X: ndarray,
    *,
    provided: Sequence[int] | None,
    min_samples_for_inference: int,
    max_unique_for_category: int,
    min_unique_for_numerical: int
) -> list[int]

Infer the categorical features from the given data.

Note

This function may infer particular columns to not be categorical as defined by what suits the model predictions and it's pre-training.

Parameters:

Name	Type	Description	Default
`X`	`ndarray`	The data to infer the categorical features from.	required
`provided`	`Sequence[int] \| None`	Any user provided indices of what is considered categorical.	required
`min_samples_for_inference`	`int`	The minimum number of samples required for automatic inference of features which were not provided as categorical.	required
`max_unique_for_category`	`int`	The maximum number of unique values for a feature to be considered categorical.	required
`min_unique_for_numerical`	`int`	The minimum number of unique values for a feature to be considered numerical.	required

Returns:

Type	Description
`list[int]`	The indices of inferred categorical features.

infer_device_and_type ¶

infer_device_and_type(
    device: str | device | None,
) -> device

Infer the device and data type from the given device string.

Parameters:

Name	Type	Description	Default
`device`	`str \| device \| None`	The device to infer the type from.	required

Returns:

Type	Description
`device`	The inferred device

infer_fp16_inference_mode ¶

infer_fp16_inference_mode(
    device: device, *, enable: bool | None
) -> bool

Infer whether fp16 inference should be enabled.

Parameters:

Name	Type	Description	Default
`device`	`device`	The device to validate against.	required
`enable`	`bool \| None`	Whether it should be enabled, `True` or `False`, otherwise if `None`, detect if it's possible and use it if so.	required

Returns:

Type	Description
`bool`	Whether to use fp16 inference or not.

Raises:

Type	Description
`ValueError`	If fp16 inference was enabled and device type does not support it.

infer_random_state ¶

infer_random_state(
    random_state: int | RandomState | Generator | None,
) -> tuple[int, Generator]

Infer the random state from the given input.

Parameters:

Name	Type	Description	Default
`random_state`	`int \| RandomState \| Generator \| None`	The random state to infer.	required

Returns:

Type	Description
`tuple[int, Generator]`	A static integer seed and a random number generator.

is_autocast_available ¶

is_autocast_available(device_type: str) -> bool

Infer whether autocast is available for the given device type.

Parameters:

Name	Type	Description	Default
`device_type`	`str`	The device type to check for autocast availability.	required

Returns:

Type	Description
`bool`	Whether autocast is available for the given device type.

load_model_criterion_config ¶

load_model_criterion_config(
    model_path: None | str | Path,
    *,
    check_bar_distribution_criterion: bool,
    cache_trainset_representation: bool,
    which: Literal["regressor", "classifier"],
    version: Literal["v2"] = "v2",
    download: bool,
    model_seed: int
) -> tuple[
    PerFeatureTransformer,
    BCEWithLogitsLoss
    | CrossEntropyLoss
    | FullSupportBarDistribution,
    InferenceConfig,
]

Load the model, criterion, and config from the given path.

Parameters:

Name	Type	Description	Default
`model_path`	`None \| str \| Path`	The path to the model.	required
`check_bar_distribution_criterion`	`bool`	Whether to check if the criterion is a FullSupportBarDistribution, which is the expected criterion for models trained for regression.	required
`cache_trainset_representation`	`bool`	Whether the model should know to cache the trainset representation.	required
`which`	`Literal['regressor', 'classifier']`	Whether the model is a regressor or classifier.	required
`version`	`Literal['v2']`	The version of the model.	`'v2'`
`download`	`bool`	Whether to download the model if it doesn't exist.	required
`model_seed`	`int`	The seed of the model.	required

Returns:

Type	Description
`tuple[PerFeatureTransformer, BCEWithLogitsLoss \| CrossEntropyLoss \| FullSupportBarDistribution, InferenceConfig]`	The model, criterion, and config.

translate_probs_across_borders ¶

translate_probs_across_borders(
    logits: Tensor, *, frm: Tensor, to: Tensor
) -> Tensor

Translate the probabilities across the borders.

Parameters:

Name	Type	Description	Default
`logits`	`Tensor`	The logits defining the distribution to translate.	required
`frm`	`Tensor`	The borders to translate from.	required
`to`	`Tensor`	The borders to translate to.	required

Returns:

Type	Description
`Tensor`	The translated probabilities.

update_encoder_outlier_params ¶

update_encoder_outlier_params(
    model: Module,
    remove_outliers_std: float | None,
    seed: int | None,
    *,
    inplace: Literal[True]
) -> None

Update the encoder to handle outliers in the model.

Warning

This only happens inplace.

Parameters:

Name	Type	Description	Default
`model`	`Module`	The model to update.	required
`remove_outliers_std`	`float \| None`	The standard deviation to remove outliers.	required
`seed`	`int \| None`	The seed to use, if any.	required
`inplace`	`Literal[True]`	Whether to do the operation inplace.	required

Raises:

Type	Description
`ValueError`	If `inplace` is not `True`.

validate_X_predict ¶

validate_X_predict(
    X: XType, estimator: TabPFNRegressor | TabPFNClassifier
) -> ndarray

Validate the input data for prediction.

validate_Xy_fit ¶

validate_Xy_fit(
    X: XType,
    y: YType,
    estimator: TabPFNRegressor | TabPFNClassifier,
    *,
    max_num_features: int,
    max_num_samples: int,
    ensure_y_numeric: bool = False,
    ignore_pretraining_limits: bool = False
) -> tuple[ndarray, ndarray, NDArray[Any] | None, int]

Validate the input data for fitting.