Pipeline qc_spatial yaml - merfish

Download this file

# ----------------------- #
# Spatial QC pipeline 
# ----------------------- #
# Written by Fabiola Curion, Sarah Ouologuem

# This pipeline needs a sample metadata file (see resources for an example)
# Will run :
#   scanpy QC
#   summary QC plots

# Followed by preprocess pipeline. 
# The qc_spatial pipeline does not perform any filtering of cells or genes.
# Filtering happens as the first step in the preprocess pipeline. See pipeline_preprocess_spatial for details


# ---------------------------
# 0. Compute resource options
# ---------------------------
resources:
  # Number of threads used for parallel jobs
  threads_high: 1 # Must be enough memory to load all input files and create MuDatas
  threads_medium: 1  # Must be enough memory to load your mudata and do computationally light tasks
  threads_low: 1 # Must be enough memory to load text files and do plotting, requires much less memory

 # Path to conda env, leave blank if running native or your cluster automatically inherits the login node environment
condaenv: 


# ------------------------------------------------------------------------------------------------
# 1. Loading options
# ------------------------------------------------------------------------------------------------

project: test
# Submission_file format (example at resources/sample_file_qc_spatial.txt) :
submission_file: sample_file_qc_spatial.txt # Path to submission file



# ------------------------------------------------------------------------------------------------
# 2. QC options
# ------------------------------------------------------------------------------------------------

# -------------------------
# Calculate QC metrics
# -------------------------
# This part of the pipeline allows to generate the QC metrics that will be used to evaluate inclusion/exclusion criteria.
# Filtering of cells/genes happens in the pipeline pipeline_preprocess_spatial.py.
# Basic QC metrics using scanpy.pp.calculate_qc_metrics() are in every case calculated.
# Additional optional scores can be calculated for 1. cell cycle genes and 2. custom genes: 

# 1. Cell cycle scores (optional)
# Leave options blank to avoid running
# Default file: panpipes/resources/cell_cycle_genes.tsv
ccgenes: # "default" (uses panpipes/resources/cell_cycle_genes.tsv) or path to tsv file with columns "cc_phase" and "gene_name". "cc_phase" can either be "s" or "g2m". Information in tsv file used to run the function scanpy.tl.score_genes_cell_cycle()


# 2. Custom genes actions (optional)
# It's often practical to rely on known gene lists, for a series of tasks, like evaluating of mitochondrial genes, ribosomal genes, or excluding IGG genes from HVG selection.
# We collect useful gene lists in:  panpipes/resources/custom_gene_lists_v1.tsv and define "actions" on them as follows:
# calc_proportions: calculate proportion of reads mapping to X genes over total number of reads, per cell
# score_genes: using scanpy.score_genes() function, 

# Leave options blank to avoid running
# Default file: panpipes/resources/qc_genelist_1.0.csv
custom_genes_file: default # "default" (uses panpipes/resources/qc_genelist_1.0.csv) or path to csv file with columns "group" and "feature".

calc_proportions: hb,mt,rp # Comma-separated without spaces, which groups in the custom_genes_file to calculate percentages for
score_genes:  # Comma-separated without spaces, which groups in the custom_genes_file to run scanpy.score_genes() for


# ---------------
# Plot QC metrics
# ---------------

# All metrics and grouping variables should be inputted as a comma separated string without spaces, e.g. a,b,c

plotqc:
  grouping_var: sample_id # sample_id is the name of each spatial slide (specified in the submission file) and it will always be used to plot.
  spatial_metrics: total_counts,n_genes_by_counts # If metric is present in both, .obs and .var, both will be plotted