Base

Pipeline utility functions and classes.

`get_abundance(omic_df, analyte, use_median=False)`

Get the mean or median abundance of an analyte from an omics dataset.

Parameters:

Name	Type	Description	Default
`omic_df`	`DataFrame`	Polars dataframe containing the omics data, with the first column being the index.	required
`analyte`	`str`	analyte that should have an exact match in omic_df.	required
`use_median`	`bool`	Aggregate samples by median instead of mean. Defaults to False.	`False`

Returns:

Type	Description
`float \| None`	float \| None: abundance value or None if not found.

Source code in src/xlranker/util/__init__.py

def get_abundance(
    omic_df: pl.DataFrame, analyte: str, use_median=False
) -> float | None:
    """Get the mean or median abundance of an analyte from an omics dataset.

    Args:
        omic_df (pl.DataFrame): Polars dataframe containing the omics data, with the first column being the index.
        analyte (str): analyte that should have an exact match in omic_df.
        use_median (bool): Aggregate samples by median instead of mean. Defaults to False.

    Returns:
        float | None: abundance value or None if not found.

    """
    # Assume first column is the index/search space
    index_col = omic_df.columns[0]
    # Filter rows where index_col matches analyte
    filtered = omic_df.filter(pl.col(index_col) == analyte)
    if filtered.is_empty():
        return None
    # Get numeric columns (excluding index)
    value_cols = [col for col in omic_df.columns if col != index_col]
    if not value_cols:
        return None
    # Compute mean across all value columns for the analyte row(s)
    all_vals = (
        filtered.select([pl.col(col).mean() for col in value_cols]).to_numpy().flatten()
    )
    if all_vals.size == 0:
        return None
    if use_median:  # use median?
        return float(np.median(all_vals))
    return float(all_vals.mean())

`get_pair_id(a, b)`

Get a string representation of the pair. Input order independent.

Order is determine alphabetically.

Parameters:

Name	Type	Description	Default
`a`	`Protein \| Peptide`	entity a	required
`b`	`Protein \| Peptide`	entity b	required

Returns:

Name	Type	Description
`str`	`str`	pair representation with entities separated by '+'.

Source code in src/xlranker/util/__init__.py

def get_pair_id(a: Protein | Peptide, b: Protein | Peptide) -> str:
    """Get a string representation of the pair. Input order independent.

    Order is determine alphabetically.

    Args:
        a (Protein | Peptide): entity a
        b (Protein | Peptide): entity b

    Returns:
        str: pair representation with entities separated by '+'.

    """
    name_a = ""
    name_b = ""
    if isinstance(a, Protein):
        name_a = a.name
    else:
        name_a = a.sequence
    if isinstance(b, Protein):
        name_b = b.name
    else:
        name_b = b.sequence
    if name_a < name_b:
        return f"{name_a}+{name_b}"
    return f"{name_b}+{name_a}"

`safe_a_greater_or_equal_to_b(a, b)`

Returns True if a is greater or equal to b, with checks for None.

None is treated as missing value. Any float is greater than None. If both are None, return True.

Parameters:

Name	Type	Description	Default
`a`	`float \| None`	a value	required
`b`	`float \| None`	b value	required

Returns:

Name	Type	Description
`bool`	`bool`	True if a is greater or equal to b. If both are None, return True. Any float is greater than None.

Source code in src/xlranker/util/__init__.py

def safe_a_greater_or_equal_to_b(a: float | None, b: float | None) -> bool:
    """Returns True if a is greater or equal to b, with checks for None.

    None is treated as missing value. Any float is greater than None. If both are None, return True.

    Args:
        a (float | None): a value
        b (float | None): b value

    Returns:
        bool: True if a is greater or equal to b. If both are None, return True. Any float is greater than None.

    """
    if a is None:
        return b is None  # if a is None, then if b is not None, b is greater
    else:
        if b is None:
            return True  # Non-None is always greater than None
        return a >= b  # both are not None, so compare normally

`set_seed(seed)`

Set seed to provide consistent results between runs.

Parameters:

Name	Type	Description	Default
`seed`	`int`	number to initialize random number generators with	required

Source code in src/xlranker/util/__init__.py

def set_seed(seed: int) -> None:
    """Set seed to provide consistent results between runs.

    Args:
        seed (int): number to initialize random number generators with

    """
    random.seed(seed)
    np.random.seed(int(random.random() * 1000000))