Pipeline ingest yaml - multiome data
Download this file
# ============================================================
# Ingest workflow Panpipes (pipeline_ingest.py)
# ============================================================
# This file contains the parameters for the ingest workflow.
# For full descriptions of the parameters, see the documentation at https://panpipes-pipelines.readthedocs.io/en/latest/yaml_docs/pipeline_ingestion_yml.html
#--------------------------
# Compute resources options
#--------------------------
resources:
threads_high: 8
threads_medium: 4
threads_low: 2
# Path to conda env, leave blank if running native or your cluster automatically inherits the login node environment
condaenv:
# --------------------------------
# Loading and merging data options
# --------------------------------
# ----------------------------
# Project name and data format
project: "mome"
sample_prefix: "mome"
use_existing_h5mu: False
submission_file: multiomecaf.txt
metadatacols:
concat_join_type: inner
#--------------------------
# Modalities in the project
modalities:
rna: True
prot: False
bcr: False
tcr: False
atac: True
#--------------------------------
# Integrating barcode level data
# e.g. demultiplexing with hashtags, chemical tags or lipid tagging
barcode_mtd:
include: False
path:
metadatacols:
#------------------------------------------
# Loading Protein data - additional options
protein_metadata_table:
index_col_choice:
load_prot_from_raw: False
subset_prot_barcodes_to_rna: False
# -----------------------------
# Quality Control (QC) options
# -----------------------------
# -----------------------------------
# Processing of 10X cellranger metrics files
plot_10X_metrics: True
# ----------------------------------
# Doublet detection on RNA modality
scr:
run: True
expected_doublet_rate: 0.06
sim_doublet_ratio: 2
n_neighbours: 20
min_counts: 2
min_cells: 3
min_gene_variability_pctl: 85
n_prin_comps: 30
use_thr: True
call_doublets_thr: 0.25
# ----------------------------
# RNA modality Quality Control
# Providing a gene list
# see documentation at https://panpipes-pipelines.readthedocs.io/en/latest/usage/gene_list_format.html
custom_genes_file: panpipes-tutorials/tutorials/ingesting_data/qc_genelist_1.0.csv
# Defining actions on the genes
# (for pipeline_ingest.py)
calc_proportions: hb,mt,rp,ig
score_genes: MarkersNeutro
# cell cycle action
ccgenes: default
# ------------------------
# Plotting RNA QC metrics
# all metrics should be provided as a comma separated string e.g. a,b,c
plotqc_grouping_var: sample_id
plotqc_rna_metrics: doublet_scores,pct_counts_mt,pct_counts_rp,pct_counts_hb,pct_counts_ig
# ----------------------------
# Plotting Protein QC metrics
# requires prot_path to be included in the submission file
# all metrics should be provided as a comma separated string e.g. a,b,c
plotqc_prot_metrics: total_counts,log1p_total_counts,n_prot_by_counts,pct_counts_isotype
plot_metrics_per_prot: total_counts,log1p_total_counts,n_cells_by_counts,mean_counts
identify_isotype_outliers: True
isotype_upper_quantile: 90
isotype_n_pass: 2
# ---------------------
# Plot ATAC QC metrics
# set is_paired to True if a multiome is ingested
is_paired: True
# If this is NOT a multiome experiment, but you have an RNA anndata that you would like to use for TSS enrichment
# use the partner_rna to specify the path to the file and provide a features_tss file with the tss coordinates
# leave empty if multiome is used
partner_rna:
features_tss:
plotqc_atac_metrics: n_genes_by_counts,total_counts,pct_fragments_in_peaks,atac_peak_region_fragments,atac_mitochondrial_reads,atac_TSS_fragments
# ---------------------------
# Plot Repertoire QC metrics
ir_dist:
metric:
sequence:
clonotype_definition:
receptor_arms:
dual_ir:
within_group:
plotqc_rep_metrics:
# provide a item list
- is_cell
- extra_chains
- clonal_expansion
- rep:receptor_type
- rep:receptor_subtype
- rep:chain_pairing
- rep:multi_chain
# -------------------------------------
# Profiling Protein Ambient background
# -------------------------------------
# PLEASE NOTE that this analysis can only be run if your inputs are from cellranger raw outputs
assess_background: False
downsample_background: True
# -----------------------------------------------------
# Files required for profiling ambient background or running dsb normalisation
# The pipeline requires the raw_feature_bc_matrix folder from cellranger or equivalent,
# specified in the submission file path with {mod}_filetype set to "cellranger," "cellranger_multi," or "10X_h5"
# for automatic search of .h5 or matrix folder for profiling ambient background or running dsb normalization.
#-------------------------------------------
# Investigate per-channel antibody staining
channel_col: sample_id
save_norm_prot_mtx: False
#----------------------
# Protein normalization
#----------------------
normalisation_methods: clr
#-----------------------------------------------
# Centered log ratio (CLR) normalization options
# margin determines whether you normalise per cell (as you would for RNA),
# or by feature (recommended, due to the variable nature of prot assays).
# CLR margin 0 is recommended for informative qc plots in this pipeline
# 0 = normalise row-wise (per cell)
# 1 = normalise column-wise (per feature)
clr_margin: 0
#--------------------------------------------------------------
# Denoised and Scaled by Background (DSB) normalization options
quantile_clipping: True