Pipeline preprocess yaml

Download this file

# ============================================================
# Preprocess workflow Panpipes (pipeline_preprocess.py)
# ============================================================
# This file contains the parameters for the ingest workflow.
# For full descriptions of the parameters, see the documentation at https://panpipes-pipelines.readthedocs.io/en/latest/yaml_docs/pipeline_preprocess_yml.html


#--------------------------
# Compute resources options
#--------------------------
resources:
  threads_high: 2
  threads_medium: 2
  threads_low: 1

# Add path to conda env, leave blank if running native or your cluster automatically inherits the login node environment

condaenv: 


#-------------------------------
# General project specifications
#-------------------------------
sample_prefix: teaseq
unfiltered_obj: teaseq_unfilt.h5mu

modalities:
  rna:  True
  prot: True
  rep: False
  atac: True

# ----------------------------
# Filtering Cells and Features
# ----------------------------
# Filtering is done sequentially for all modalities, filtering first cells and then features.
# In the following, you can specify the filtering parameters for each modality.

filtering:
  run: True
  keep_barcodes:

  #------------------------
  # RNA-specific filtering
  rna:
    # obs, i.e. cell level filtering
    obs:
      min:
        n_genes_by_counts: 100 
      max:
        pct_counts_mt: 40
        pct_counts_rp: 100
        doublet_scores: 0.25
      bool:

    # var, i.e. gene (feature) level filtering
    var:
      min:
        n_cells_by_counts: 1
      max:
        total_counts: 
        n_cells_by_counts:

  #------------------------
  # Protein-specific filtering
  prot:
    # obs, i.e. cell level filtering
    obs:  
      max:
        total_counts:

    # var, i.e. gene (feature) level filtering
    var:
      max:
      min:

  #------------------------
  # ATAC-specific filtering
  atac:
    # obs, i.e. cell level filtering
    obs:  
      max:
      total_counts: 2500

    # var, i.e. gene (feature) level filtering
    var:    
      nucleosome_signal:


# ---------------------------
# Intersecting cell barcodes
# ---------------------------
# Subset observations (cells) in-place by intersect
intersect_mods: rna,prot,atac


# --------------------------
# Downsampling cell barcodes
# --------------------------
downsample_n:
downsample_col:
downsample_mods: 


# ------------------
# Plotting variables
# ------------------
# all metrics in this section should be provided as a comma separated string without spaces e.g. a,b,c
# leave blank to avoid plotting
plotqc:
  grouping_var: sample_id,orig.ident
  rna_metrics: pct_counts_mt,pct_counts_rp,pct_counts_hb,doublet_scores
  prot_metrics: total_counts,log1p_total_counts,n_adt_by_counts
  atac_metrics: total_counts
  rep_metrics: 


# -----------------------
# RNA preprocessing steps
# -----------------------
# Currently, only standard preprocessing steps (sc.pp.normalize_total followed by sc.pp.log1p) is offered for the RNA modality.
log1p: True
hvg:
  flavor: seurat # Options: seurat, cell_ranger, or seurat_v3
  batch_key:
  n_top_genes: 2000
  min_mean:
  max_mean:
  min_disp:

  exclude_file: 
  exclude:
  filter: False

regress_variables:


#---------
# Scaling
run_scale: True
scale_max_value:


#-----------------------------
# RNA Dimensionality Reduction
pca:
  n_pcs: 50
  solver: default
  color_by: sample_id,total_counts


# ----------------------------------
# Protein (PROT) preprocessing steps
# ----------------------------------
prot:
  normalisation_methods: clr

  # CLR parameters:
  # 0 = normalise row-wise (per cell)
  # 1 = normalise column-wise (per feature, recommended)
  clr_margin: 1

  # DSB parameters:
  background_obj:
  quantile_clipping: True

  store_as_X: clr
  save_norm_prot_mtx: False

  #---------------------------------
  # Protein Dimensionality reduction
  pca: True
  n_pcs: 10
  solver: default
  color_by: orig.ident,log1p_total_counts


# ------------------------
# ATAC preprocessing steps
# ------------------------
atac:
  binarize: False
  normalize: TFIDF  #"log1p" or "TFIDF"
  TFIDF_flavour: signac  #"signac", "logTF" or "logIDF"
  feature_selection_flavour: scanpy  #"signac" or "scanpy"

  # parameters for feature_selection_flavour == "scanpy", leave blank to use defaults
  min_mean:  #default 0.05
  max_mean:  #default 1.5
  min_disp:  #default 0.5
  n_top_features:  #if specified, overwrites previous defaults for HVF selection
  filter_by_hvf: False

  # parameter for feature_selection_flavour == "signac"
  min_cutoff: q5

  #------------------------------
  # ATAC Dimensionality reduction
  dimred: PCA  #PCA or LSI
  n_comps: 50
  solver: default
  color_by: dataset,total_counts
  dim_remove: