Source code for cell2location.cluster_averages.select_features

import numpy as np
import scanpy as sc


[docs]def select_features(adata, groupName, n_features=10000, use_raw=True, verbose=False, sc_kwargs={}): r""" #TODO Write docstring """ # Subsets adata to features that best distinguish a group given in adata.obs[groupName] if "rank_genes_groups" in adata.uns.keys(): if verbose: print("Using existing ranked genes...") # print options should be optional else: sc.tl.rank_genes_groups( adata, groupName, use_raw=use_raw, n_genes=int(np.round(len(adata.var) / 10)), **sc_kwargs ) # explain why ranked_features = np.unique([item for sublist in adata.uns["rank_genes_groups"]["names"] for item in sublist]) if n_features > len(ranked_features): if verbose: print(f"Maximum number of features: {len(ranked_features)}") # print options should be optional selected_features = ranked_features return adata[:, selected_features].var_names else: i = 1 selected_features = [] while len(np.unique(selected_features)) < n_features: selected_features = [item for sublist in adata.uns["rank_genes_groups"]["names"][:][:i] for item in sublist] i += 1 return adata[:, np.unique(selected_features)[:n_features]].var_names