Skip to content

Lib

Main module containing the core of the XLRanker tool.

XLDataSet

XLRanker cross-linking dataset object.

Parameters:

Name Type Description Default
peptide_pairs dict[str, PeptidePair]

Dictionary of peptide pairs, where the key is a unique identifier for the pair.

required
omic_data dict[str, DataFrame]

Dictionary of omic data, where the key is the file name and the value is a Polars DataFrame containing the data.

required

Attributes:

Name Type Description
peptide_pairs dict[str, PeptidePair]

Dictionary of peptide pairs, where the key is a unique identifier for the pair.

omic_data dict[str, DataFrame]

Dictionary of omic data, where the key is the file name and the value is a Polars DataFrame containing the data.

proteins dict[str, Protein]

Dictionary of proteins, where the key is a unique identifier for the protein.

protein_pairs dict[str, ProteinPair]

Dictionary of protein pairs, where the key is a unique identifier for the pair.

Source code in src/xlranker/lib.py
class XLDataSet:
    """XLRanker cross-linking dataset object.

    Args:
        peptide_pairs (dict[str, PeptidePair]): Dictionary of peptide pairs, where the key is a unique identifier for the pair.
        omic_data (dict[str, pl.DataFrame]): Dictionary of omic data, where the key is the file name and the value is a Polars DataFrame containing the data.

    Attributes:
        peptide_pairs (dict[str, PeptidePair]): Dictionary of peptide pairs, where the key is a unique identifier for the pair.
        omic_data (dict[str, pl.DataFrame]): Dictionary of omic data, where the key is the file name and the value is a Polars DataFrame containing the data.
        proteins (dict[str, Protein]): Dictionary of proteins, where the key is a unique identifier for the protein.
        protein_pairs (dict[str, ProteinPair]): Dictionary of protein pairs, where the key is a unique identifier for the pair.

    """

    peptide_pairs: dict[str, PeptidePair]
    omic_data: dict[str, pl.DataFrame]
    proteins: dict[str, Protein]
    protein_pairs: dict[str, ProteinPair]

    def __init__(
        self, peptide_pairs: dict[str, PeptidePair], omic_data: dict[str, pl.DataFrame]
    ) -> None:
        """XLRanker cross-linking dataset object.

        Args:
            peptide_pairs (dict[str, PeptidePair]): Dictionary of peptide pairs, where the key is a unique identifier for the pair.
            omic_data (dict[str, pl.DataFrame]): Dictionary of omic data, where the key is the file name and the value is a Polars DataFrame containing the data.

        """
        self.peptide_pairs = peptide_pairs
        self.omic_data = omic_data
        self.protein_pairs = {}
        self.proteins = {}

    def build_proteins(self, remove_intra: bool = False) -> None:
        """Build protein pairs of the XLDataSet network.

        Args:
            remove_intra (bool, optional): if true, only creates protein pairs between different proteins. Defaults to True.

        """
        all_proteins: set[str] = set()
        for p_peptide_pairs in self.peptide_pairs.values():
            all_proteins = all_proteins.union(set(p_peptide_pairs.a.mapped_proteins))
            all_proteins = all_proteins.union(set(p_peptide_pairs.b.mapped_proteins))
        for protein in all_proteins:
            abundances = {}
            for omic_file in self.omic_data:
                abundances[omic_file] = get_abundance(
                    self.omic_data[omic_file], protein
                )
            self.proteins[protein] = Protein(
                protein, protein, abundances, xlr_config.primary_column
            )
        remove_pairs = []
        for (
            peptide_pair_key
        ) in self.peptide_pairs.keys():  # TODO: Make this loop cleaner to read
            peptide_pair = self.peptide_pairs[peptide_pair_key]
            peptide_pair_id = get_pair_id(peptide_pair.a, peptide_pair.b)
            had_intra = False
            for protein_a_name in peptide_pair.a.mapped_proteins:
                for protein_b_name in peptide_pair.b.mapped_proteins:
                    if remove_intra and protein_a_name == protein_b_name:
                        had_intra = True
                        break
            if had_intra:
                remove_pairs.append(peptide_pair_key)
            else:
                for protein_a_name in peptide_pair.a.mapped_proteins:
                    protein_a = self.proteins[protein_a_name]
                    for protein_b_name in peptide_pair.b.mapped_proteins:
                        protein_b = self.proteins[protein_b_name]
                        protein_pair_id = get_pair_id(protein_a, protein_b)
                        if protein_pair_id not in self.protein_pairs:
                            new_pair = ProteinPair(protein_a, protein_b)
                            self.protein_pairs[protein_pair_id] = new_pair
                            peptide_pair.add_connection(protein_pair_id)
                            new_pair.add_connection(peptide_pair_id)
                        else:
                            self.protein_pairs[protein_pair_id].add_connection(
                                peptide_pair_id
                            )
                            peptide_pair.add_connection(protein_pair_id)
        for key in remove_pairs:
            self.peptide_pairs.pop(key)

    @classmethod
    def load_from_network(
        cls,
        network_path: str,
        omics_data_folder: str,
        custom_mapper: PeptideMapper | None = None,
        custom_mapping_path: str | None = None,
        is_fasta: bool = True,
        split_by: str | None = "|",
        split_index: int | None = 3,
        fasta_type: str | FastaType = "UNIPROT",
    ) -> "XLDataSet":
        """Create a XLDataSet object from a network file.

        Args:
            network_path (str): path to the peptide pairs
            omics_data_folder (str): folder containing the omic data
            custom_mapper (PeptideMapper | None, optional): PeptideMapper object that should be used for mapping. If None, create peptide mapper using other parameters. Defaults to None.
            custom_mapping_path (str | None, optional): If not using custom_mapper, path to mapping table. Defaults to None.
            is_fasta (bool, optional): True if custom_mapping_path points to FASTA file. Defaults to True.
            split_by (str | None, optional): character to split FASTA description by. Defaults to "|".
            split_index (int | None, optional): 0-based index to extract gene symbol from. Defaults to 3.
            fasta_type (str | FastaType, optional): FASTA file type. str can be "UNIPROT" or "GENCODE". Defaults to "UNIPROT".

        Returns:
            XLDataSet: XLDataSet with peptide pairs and omics data loaded

        """
        split_by = "|" if split_by is None else split_by
        split_index = 6 if split_index is None else split_index
        network = read_network_file(network_path)
        omic_data: dict[str, pl.DataFrame] = read_data_folder(omics_data_folder)
        peptide_sequences = set()
        for group in network.values():
            peptide_sequences.add(group.a.sequence)
            peptide_sequences.add(group.b.sequence)
        if isinstance(fasta_type, str):
            fasta_type = convert_str_to_fasta_type(fasta_type)
        if custom_mapper is None:
            mapper = PeptideMapper(
                mapping_table_path=custom_mapping_path,
                split_by=split_by,
                split_index=split_index,
                is_fasta=is_fasta,
                fasta_type=fasta_type,
            )
        else:
            mapper = custom_mapper
        mapping_results = mapper.map_sequences(list(peptide_sequences))
        for group in network.values():
            group.a.mapped_proteins = mapping_results.peptide_to_protein[
                group.a.sequence
            ]
            group.b.mapped_proteins = mapping_results.peptide_to_protein[
                group.b.sequence
            ]
        return cls(network, omic_data)

__init__(peptide_pairs, omic_data)

XLRanker cross-linking dataset object.

Parameters:

Name Type Description Default
peptide_pairs dict[str, PeptidePair]

Dictionary of peptide pairs, where the key is a unique identifier for the pair.

required
omic_data dict[str, DataFrame]

Dictionary of omic data, where the key is the file name and the value is a Polars DataFrame containing the data.

required
Source code in src/xlranker/lib.py
def __init__(
    self, peptide_pairs: dict[str, PeptidePair], omic_data: dict[str, pl.DataFrame]
) -> None:
    """XLRanker cross-linking dataset object.

    Args:
        peptide_pairs (dict[str, PeptidePair]): Dictionary of peptide pairs, where the key is a unique identifier for the pair.
        omic_data (dict[str, pl.DataFrame]): Dictionary of omic data, where the key is the file name and the value is a Polars DataFrame containing the data.

    """
    self.peptide_pairs = peptide_pairs
    self.omic_data = omic_data
    self.protein_pairs = {}
    self.proteins = {}

build_proteins(remove_intra=False)

Build protein pairs of the XLDataSet network.

Parameters:

Name Type Description Default
remove_intra bool

if true, only creates protein pairs between different proteins. Defaults to True.

False
Source code in src/xlranker/lib.py
def build_proteins(self, remove_intra: bool = False) -> None:
    """Build protein pairs of the XLDataSet network.

    Args:
        remove_intra (bool, optional): if true, only creates protein pairs between different proteins. Defaults to True.

    """
    all_proteins: set[str] = set()
    for p_peptide_pairs in self.peptide_pairs.values():
        all_proteins = all_proteins.union(set(p_peptide_pairs.a.mapped_proteins))
        all_proteins = all_proteins.union(set(p_peptide_pairs.b.mapped_proteins))
    for protein in all_proteins:
        abundances = {}
        for omic_file in self.omic_data:
            abundances[omic_file] = get_abundance(
                self.omic_data[omic_file], protein
            )
        self.proteins[protein] = Protein(
            protein, protein, abundances, xlr_config.primary_column
        )
    remove_pairs = []
    for (
        peptide_pair_key
    ) in self.peptide_pairs.keys():  # TODO: Make this loop cleaner to read
        peptide_pair = self.peptide_pairs[peptide_pair_key]
        peptide_pair_id = get_pair_id(peptide_pair.a, peptide_pair.b)
        had_intra = False
        for protein_a_name in peptide_pair.a.mapped_proteins:
            for protein_b_name in peptide_pair.b.mapped_proteins:
                if remove_intra and protein_a_name == protein_b_name:
                    had_intra = True
                    break
        if had_intra:
            remove_pairs.append(peptide_pair_key)
        else:
            for protein_a_name in peptide_pair.a.mapped_proteins:
                protein_a = self.proteins[protein_a_name]
                for protein_b_name in peptide_pair.b.mapped_proteins:
                    protein_b = self.proteins[protein_b_name]
                    protein_pair_id = get_pair_id(protein_a, protein_b)
                    if protein_pair_id not in self.protein_pairs:
                        new_pair = ProteinPair(protein_a, protein_b)
                        self.protein_pairs[protein_pair_id] = new_pair
                        peptide_pair.add_connection(protein_pair_id)
                        new_pair.add_connection(peptide_pair_id)
                    else:
                        self.protein_pairs[protein_pair_id].add_connection(
                            peptide_pair_id
                        )
                        peptide_pair.add_connection(protein_pair_id)
    for key in remove_pairs:
        self.peptide_pairs.pop(key)

load_from_network(network_path, omics_data_folder, custom_mapper=None, custom_mapping_path=None, is_fasta=True, split_by='|', split_index=3, fasta_type='UNIPROT') classmethod

Create a XLDataSet object from a network file.

Parameters:

Name Type Description Default
network_path str

path to the peptide pairs

required
omics_data_folder str

folder containing the omic data

required
custom_mapper PeptideMapper | None

PeptideMapper object that should be used for mapping. If None, create peptide mapper using other parameters. Defaults to None.

None
custom_mapping_path str | None

If not using custom_mapper, path to mapping table. Defaults to None.

None
is_fasta bool

True if custom_mapping_path points to FASTA file. Defaults to True.

True
split_by str | None

character to split FASTA description by. Defaults to "|".

'|'
split_index int | None

0-based index to extract gene symbol from. Defaults to 3.

3
fasta_type str | FastaType

FASTA file type. str can be "UNIPROT" or "GENCODE". Defaults to "UNIPROT".

'UNIPROT'

Returns:

Name Type Description
XLDataSet XLDataSet

XLDataSet with peptide pairs and omics data loaded

Source code in src/xlranker/lib.py
@classmethod
def load_from_network(
    cls,
    network_path: str,
    omics_data_folder: str,
    custom_mapper: PeptideMapper | None = None,
    custom_mapping_path: str | None = None,
    is_fasta: bool = True,
    split_by: str | None = "|",
    split_index: int | None = 3,
    fasta_type: str | FastaType = "UNIPROT",
) -> "XLDataSet":
    """Create a XLDataSet object from a network file.

    Args:
        network_path (str): path to the peptide pairs
        omics_data_folder (str): folder containing the omic data
        custom_mapper (PeptideMapper | None, optional): PeptideMapper object that should be used for mapping. If None, create peptide mapper using other parameters. Defaults to None.
        custom_mapping_path (str | None, optional): If not using custom_mapper, path to mapping table. Defaults to None.
        is_fasta (bool, optional): True if custom_mapping_path points to FASTA file. Defaults to True.
        split_by (str | None, optional): character to split FASTA description by. Defaults to "|".
        split_index (int | None, optional): 0-based index to extract gene symbol from. Defaults to 3.
        fasta_type (str | FastaType, optional): FASTA file type. str can be "UNIPROT" or "GENCODE". Defaults to "UNIPROT".

    Returns:
        XLDataSet: XLDataSet with peptide pairs and omics data loaded

    """
    split_by = "|" if split_by is None else split_by
    split_index = 6 if split_index is None else split_index
    network = read_network_file(network_path)
    omic_data: dict[str, pl.DataFrame] = read_data_folder(omics_data_folder)
    peptide_sequences = set()
    for group in network.values():
        peptide_sequences.add(group.a.sequence)
        peptide_sequences.add(group.b.sequence)
    if isinstance(fasta_type, str):
        fasta_type = convert_str_to_fasta_type(fasta_type)
    if custom_mapper is None:
        mapper = PeptideMapper(
            mapping_table_path=custom_mapping_path,
            split_by=split_by,
            split_index=split_index,
            is_fasta=is_fasta,
            fasta_type=fasta_type,
        )
    else:
        mapper = custom_mapper
    mapping_results = mapper.map_sequences(list(peptide_sequences))
    for group in network.values():
        group.a.mapped_proteins = mapping_results.peptide_to_protein[
            group.a.sequence
        ]
        group.b.mapped_proteins = mapping_results.peptide_to_protein[
            group.b.sequence
        ]
    return cls(network, omic_data)

get_final_network(data_set, pair_selector=BestSelector())

DEPRECIATED: USE REPORTS MODULE. Get the final network of all selected protein pairs.

Parameters:

Name Type Description Default
data_set XLDataSet

XL data set after prioritization

required
pair_selector PairSelector

What kind of pair selector to use for selecting final pairs. Defaults to BestSelector().

BestSelector()

Returns:

Type Description
list[ProteinPair]

list[ProteinPair]: list of selected protein pairs

Source code in src/xlranker/lib.py
def get_final_network(
    data_set: XLDataSet, pair_selector: PairSelector = BestSelector()
) -> list[ProteinPair]:
    """DEPRECIATED: USE REPORTS MODULE. Get the final network of all selected protein pairs.

    Args:
        data_set (XLDataSet): XL data set after prioritization
        pair_selector (PairSelector, optional): What kind of pair selector to use for selecting final pairs. Defaults to BestSelector().

    Returns:
        list[ProteinPair]: list of selected protein pairs

    """
    pair_selector.process(list(data_set.protein_pairs.values()))
    return [
        pair
        for pair in data_set.protein_pairs.values()
        if pair.prioritization_status == PrioritizationStatus.ML_PRIMARY_SELECTED
        or pair.prioritization_status == PrioritizationStatus.ML_SECONDARY_SELECTED
        or pair.prioritization_status == PrioritizationStatus.PARSIMONY_PRIMARY_SELECTED
        or pair.prioritization_status
        == PrioritizationStatus.PARSIMONY_SECONDARY_SELECTED
    ]

setup_logging(verbose=False, log_file=None, silent_all=False)

Set up logging for XLRanker.

Parameters:

Name Type Description Default
verbose bool

Use more verbose logging. Sets logging level to DEBUG. Defaults to False.

False
log_file str | None

Path to log file. If none, no log file is kept. Defaults to None.

None
silent_all bool

Disable all logging. Defaults to False.

False
Source code in src/xlranker/lib.py
def setup_logging(
    verbose: bool = False, log_file: str | None = None, silent_all: bool = False
) -> None:
    """Set up logging for XLRanker.

    Args:
        verbose (bool, optional): Use more verbose logging. Sets logging level to DEBUG. Defaults to False.
        log_file (str | None, optional): Path to log file. If none, no log file is kept. Defaults to None.
        silent_all (bool, optional): Disable all logging. Defaults to False.

    """
    if silent_all:
        # Remove all handlers and disable logging
        logging.getLogger().handlers.clear()
        logging.disable(logging.CRITICAL + 1)
        return
    level = logging.DEBUG if verbose else logging.INFO

    # Create root logger
    logger = logging.getLogger()
    logger.setLevel(level)

    # Console handler (stderr)
    console_handler = logging.StreamHandler(sys.stderr)
    console_handler.setLevel(level)
    console_formatter = logging.Formatter("[%(levelname)s] %(message)s")
    console_handler.setFormatter(console_formatter)
    logger.addHandler(console_handler)

    # Optional file handler
    if log_file:
        file_handler = logging.FileHandler(log_file)
        file_handler.setLevel(logging.DEBUG)
        file_formatter = logging.Formatter(
            "%(asctime)s [%(levelname)s] %(name)s: %(message)s"
        )
        file_handler.setFormatter(file_formatter)
        logger.addHandler(file_handler)

write_pair_to_network(pairs, output_file)

Write list of protein pairs to a TSV file.

Parameters:

Name Type Description Default
pairs list[ProteinPair]

list of protein pairs to save to file.

required
output_file str

path to write TSV file. Full path must be accessible.

required
Source code in src/xlranker/lib.py
def write_pair_to_network(pairs: list[ProteinPair], output_file: str) -> None:
    """Write list of protein pairs to a TSV file.

    Args:
        pairs (list[ProteinPair]): list of protein pairs to save to file.
        output_file (str): path to write TSV file. Full path must be accessible.

    """
    network_strings = []
    for pair in pairs:
        network_strings.append(f"{pair.a.name}\t{pair.b.name}")
    with open(output_file, "w") as w:
        w.write("\n".join(network_strings) + "\n")