Pipeline Integration yml
Download this file
# ============================================================
# Integration workflow Panpipes (pipeline_integration.py)
# ============================================================
# This file contains the parameters for the integration workflow.
# For full descriptions of the parameters, see the documentation at https://panpipes-pipelines.readthedocs.io/en/latest/yaml_docs/pipeline_integration_yml.html
#--------------------------
# Compute resources options
#--------------------------
resources:
threads_high: 1
threads_medium: 1
threads_low: 1
threads_gpu: 2
# Path to conda env, leave blank if running native or your cluster automatically inherits the login node environment
condaenv:
queues:
long:
gpu:
# --------------------------------
# Loading and merging data options
# --------------------------------
# ----------------------------
# Data format
sample_prefix: teaseq
preprocessed_obj: teaseq.h5mu
#-----------------
# Batch correction
# ----------------
# Batch correction is done unimodal, meaning each modality is batch corrected independently
# ------------
# RNA modality
rna:
run: True
tools: harmony,scvi,bbknn
column: dataset
# Harmony arguments
harmony:
sigma: 0.1
theta: 1.0
npcs: 30
# BBKNN args # https://bbknn.readthedocs.io/en/latest/
bbknn:
neighbors_within_batch: 20
# SCVI args
scvi:
seed: 1492
exclude_mt_genes: True
mt_column: mt
model_args:
n_layers:
n_latent:
gene_likelihood: zinb
training_args:
max_epochs: 400
train_size: 0.9
early_stopping: True
training_plan:
lr: 0.001
n_epochs_kl_warmup: 400
reduce_lr_on_plateau: True
lr_scheduler_metric:
lr_patience: 8
lr_factor: 0.1
# Find neighbour parameters
neighbors: &rna_neighbors
npcs: 30
k: 30
metric: euclidean
method: scanpy
# ----------------
# Protein modality
prot:
run: True
tools: harmony
column: orig.ident
# Harmony args
harmony:
sigma: 0.1
theta: 1.0
npcs: 30
# BBKNN args # https://bbknn.readthedocs.io/en/latest/
bbknn:
neighbors_within_batch: 20
# Find neighbour parameters
neighbors: &prot_neighbors
npcs: 30
k: 30
metric: euclidean
method: scanpy
# -------------
# ATAC modality
atac:
run: True
dimred: LSI
tools: harmony,bbknn
column: dataset
# Harmony args
harmony:
sigma: 0.1
theta: 1.0
npcs: 30
# BBKNN args # https://bbknn.readthedocs.io/en/latest/
bbknn:
neighbors_within_batch:
# Find neighbour parameters
neighbors: &atac_neighbors
npcs: 30
k: 30
metric: euclidean
method: scanpy
#-----------------------
# multimodal integration
# ----------------------
# remember to specify knn graph params in the section "neighbors"
multimodal:
run: True
tools:
- WNN
- totalvi
column_categorical: sample_id
# TotalVI arguments
totalvi:
seed: 1492
modalities: rna,prot
exclude_mt_genes: True
mt_column: mt
filter_by_hvg: True
filter_prot_outliers: False
model_args:
latent_distribution: "normal"
training_args:
max_epochs: 100
train_size: 0.9
early_stopping: True
training_plan: None
# MultiVI arguments
MultiVI:
seed: 1492
lowmem: True
model_args:
n_hidden :
n_latent :
region_factors : True
latent_distribution : 'normal'
deeply_inject_covariates : False
fully_paired : False
training_args:
max_epochs : 500
lr : 0.0001
use_gpu :
train_size : 0.9
validation_size :
batch_size : 128
weight_decay : 0.001
eps : 1e-08
early_stopping : True
save_best : True
check_val_every_n_epoch :
n_steps_kl_warmup :
n_epochs_kl_warmup : 50
adversarial_mixing : True
training_plan :
# Mofa arguments
mofa:
modalities:
filter_by_hvg: True
n_factors: 10
n_iterations: 1000
convergence_mode: fast
save_parameters: False
outfile:
# WNN arguments
WNN:
modalities: rna,prot,atac
batch_corrected:
rna: None
prot: None
atac: None
# please use anchors (&) and scalars (*) if necessary
knn:
rna: *rna_neighbors
prot: *prot_neighbors
atac: *atac_neighbors
# WNN neighbour search
n_neighbors:
n_bandwidth_neighbors: 20
n_multineighbors: 200
metric: 'euclidean'
low_memory: True
# KNN calculation for multimodal analysis
neighbors:
npcs: 30
k: 30
metric: euclidean
method: scanpy
#--------------------
# Plotting parameters
#--------------------
plotqc:
grouping_var: dataset,sample_id
all: rep:receptor_subtype
rna: rna:total_counts
prot: prot:total_counts
atac:
multimodal: rna:total_counts
#-------------
# scib metrics
#-------------
#Obs columns containing the cell type labels
scib:
rna:
prot:
atac:
# -------------------------
# Creating the final object
# -------------------------
final_obj:
rna:
include: True
bc_choice: no_correction
prot:
include: True
bc_choice: harmony
atac:
include: False
bc_choice: bbknn
multimodal:
include: True
bc_choice: WNN