Pipeline preprocess yaml
Download this file
# ============================================================
# Preprocess workflow Panpipes (pipeline_preprocess.py)
# ============================================================
# This file contains the parameters for the ingest workflow.
# For full descriptions of the parameters, see the documentation at https://panpipes-pipelines.readthedocs.io/en/latest/yaml_docs/pipeline_preprocess_yml.html
#--------------------------
# Compute resources options
#--------------------------
resources:
threads_high: 2
threads_medium: 2
threads_low: 1
# Add path to conda env, leave blank if running native or your cluster automatically inherits the login node environment
condaenv:
#-------------------------------
# General project specifications
#-------------------------------
sample_prefix: teaseq
unfiltered_obj: teaseq_unfilt.h5mu
modalities:
rna: True
prot: True
rep: False
atac: True
# ----------------------------
# Filtering Cells and Features
# ----------------------------
# Filtering is done sequentially for all modalities, filtering first cells and then features.
# In the following, you can specify the filtering parameters for each modality.
filtering:
run: True
keep_barcodes:
#------------------------
# RNA-specific filtering
rna:
# obs, i.e. cell level filtering
obs:
min:
n_genes_by_counts: 100
max:
pct_counts_mt: 40
pct_counts_rp: 100
doublet_scores: 0.25
bool:
# var, i.e. gene (feature) level filtering
var:
min:
n_cells_by_counts: 1
max:
total_counts:
n_cells_by_counts:
#------------------------
# Protein-specific filtering
prot:
# obs, i.e. cell level filtering
obs:
max:
total_counts:
# var, i.e. gene (feature) level filtering
var:
max:
min:
#------------------------
# ATAC-specific filtering
atac:
# obs, i.e. cell level filtering
obs:
max:
total_counts: 2500
# var, i.e. gene (feature) level filtering
var:
nucleosome_signal:
# ---------------------------
# Intersecting cell barcodes
# ---------------------------
# Subset observations (cells) in-place by intersect
intersect_mods: rna,prot,atac
# --------------------------
# Downsampling cell barcodes
# --------------------------
downsample_n:
downsample_col:
downsample_mods:
# ------------------
# Plotting variables
# ------------------
# all metrics in this section should be provided as a comma separated string without spaces e.g. a,b,c
# leave blank to avoid plotting
plotqc:
grouping_var: sample_id,orig.ident
rna_metrics: pct_counts_mt,pct_counts_rp,pct_counts_hb,doublet_scores
prot_metrics: total_counts,log1p_total_counts,n_adt_by_counts
atac_metrics: total_counts
rep_metrics:
# -----------------------
# RNA preprocessing steps
# -----------------------
# Currently, only standard preprocessing steps (sc.pp.normalize_total followed by sc.pp.log1p) is offered for the RNA modality.
log1p: True
hvg:
flavor: seurat # Options: seurat, cell_ranger, or seurat_v3
batch_key:
n_top_genes: 2000
min_mean:
max_mean:
min_disp:
exclude_file:
exclude:
filter: False
regress_variables:
#---------
# Scaling
run_scale: True
scale_max_value:
#-----------------------------
# RNA Dimensionality Reduction
pca:
n_pcs: 50
solver: default
color_by: sample_id,total_counts
# ----------------------------------
# Protein (PROT) preprocessing steps
# ----------------------------------
prot:
normalisation_methods: clr
# CLR parameters:
# 0 = normalise row-wise (per cell)
# 1 = normalise column-wise (per feature, recommended)
clr_margin: 1
# DSB parameters:
background_obj:
quantile_clipping: True
store_as_X: clr
save_norm_prot_mtx: False
#---------------------------------
# Protein Dimensionality reduction
pca: True
n_pcs: 10
solver: default
color_by: orig.ident,log1p_total_counts
# ------------------------
# ATAC preprocessing steps
# ------------------------
atac:
binarize: False
normalize: TFIDF #"log1p" or "TFIDF"
TFIDF_flavour: signac #"signac", "logTF" or "logIDF"
feature_selection_flavour: scanpy #"signac" or "scanpy"
# parameters for feature_selection_flavour == "scanpy", leave blank to use defaults
min_mean: #default 0.05
max_mean: #default 1.5
min_disp: #default 0.5
n_top_features: #if specified, overwrites previous defaults for HVF selection
filter_by_hvf: False
# parameter for feature_selection_flavour == "signac"
min_cutoff: q5
#------------------------------
# ATAC Dimensionality reduction
dimred: PCA #PCA or LSI
n_comps: 50
solver: default
color_by: dataset,total_counts
dim_remove: