Pipeline vis yaml
Download this file
# ----------------------- #
# Visualisation pipeline DendrouLab
# ----------------------- #
# written by Charlotte Rich-Griffin and Fabiola Curion
# WARNING: Do not edit any line with the format `continuous_vars: &continuous_vars` or `continuous_vars: *continuous_vars`
# ------------------------
# compute resource options
# ------------------------
resources:
# Number of threads used for parallel jobs
# this must be enough memory to load your mudata and do computationally intensive tasks
threads_high: 3
# this must be enough memory to load your mudata and do computationally light tasks
threads_medium: 3
# this must be enough memory to load text files and do plotting, requires much less memory than the other two
threads_low: 3
# path to conda env, leave blank if running native or your cluster automatically inherits the login node environment
condaenv:
# Start
# --------------------------
# either one that exists already with
sample_prefix: teaseq
mudata_obj: data/teaseq_clustered.h5mu
modalities:
rna: True
prot: True
atac: True
rep: False
multimodal: False
# on dot plots and bar plots, grouping vars are used to group other features (for categorical, continuous and feature plots)
grouping_vars:
- sample_id
- rna:leiden_res1
- atac:leiden_res0.3
- prot:leiden_res1
#--------------------------------------------------
# Plot markers
#--------------------------------------------------
custom_markers:
files:
# csv files for full and minimal containing three columns:
# | mod | feature | group |
# |------|----------|--------------|
# | prot | prot_CD8 | Tcellmarkers |
# | rna | CD8A | Tcellmarkers |
# the full list will be plotted in dot plots and matrix plots, one plot per group
full:
- custom_markers.csv
# the shorter list will be plotted on umaps as well as other plot types, one plot per group
minimal:
- custom_markers.csv
# | feature_1 | feature_2 | colour |
# |-----------|-----------|----------------|
# | CD8A | prot_CD8 | |
# | CD4 | CD8A | doublet_scores |
paired_scatters:
- paired_scatters_markers.csv
# where different normalisation exists in a modality, choose which one to use, set X or leave blank to use the mdata[mod].X assay
layers:
rna:
- logged_counts
prot:
- clr
atac:
- signac_norm
#--------------------------------------------------
# Plot metadata variables
#--------------------------------------------------
categorical_vars: &categorical_vars
# use mod:variable notation,
# any metrics that you want to plot on all modality umaps go under "all"
# these variables will be plotted as categories
all:
- sample_id
rna:
- rna:leiden_res1
- rna:predicted_doublets
prot:
- prot:leiden_res1
atac:
- atac:leiden_res0.3
rep:
multimodal:
- rna:leiden_res1
continuous_vars: &continuous_vars
# use mod:variable notation,
# any metrics that you want to plot on all modality umaps go under "all"
# these variables will be plotted as continous variables
all:
rna:
- rna:total_counts
prot:
- prot:total_counts
atac:
- atac:total_counts
multimodal:
- rna:total_counts
paired_scatters:
- paired_scatters.csv
# | feature_1 | feature_2 | colour |
# |-----------|-----------|----------------|
# |rna:total_counts | prot:total_counts | doublet_scores |
#--------------------------------------------------
# Decide what plots to do
#--------------------------------------------------
do_plots:
# plot each categorical variable as a bar plot
# (e.g. categorical variable "cluster" on x axis and n cells on y)
categorical_barplots: True
# plot each grouping var as a bar plot, with categorical variables stacked.
# (e.g. grouping var "sample_id" on x axis and n cells on y and colored by categorical variable "cluster" in a stack)
categorical_stacked_barplots: True
# # plot each continuous as a violin plot, .
# (e.g. grouping var "sample_id" on x axis and the continuous variable "doublet_scores" on y)
continuous_violin: True
# marker dotplots as produced by scanpy.pl.dotplot
marker_dotplots: True
# marker matrixplot as produced by scanpy.pl.matrixplot
marker_matrixplots: True
# scatter plots as define in paired_scatters csv file,
# feature_1 on x, feature_2 on y and colorued by color
# values will be taken from the layers specified above
paired_scatters: True
# embedding plots using the modality and embedding basis specified.
# will plot all of minimal markers csv, and categorical and continous variables
embedding:
rna:
run: True
basis:
- X_umap_mindist_0.5
- X_pca
prot:
run: True
basis:
- X_umap_mindist_0.1
atac:
run: True
basis:
- X_umap_mindist_0.5