Pipeline ingest yml - ingesting mouse data
Download this file
# ============================================================
# Ingest workflow Panpipes (pipeline_ingest.py)
# ============================================================
# This file contains the parameters for the ingest workflow.
# For full descriptions of the parameters, see the documentation at https://panpipes-pipelines.readthedocs.io/en/latest/yaml_docs/pipeline_ingestion_yml.html
#--------------------------
# Compute resources options
#--------------------------
resources:
threads_high: 1
threads_medium: 1
threads_low: 1
# Path to conda env, leave blank if running native or your cluster automatically inherits the login node environment
condaenv:
# --------------------------------
# Loading and merging data options
# --------------------------------
# ----------------------------
# Project name and data format
project: tutorial
sample_prefix: mouse_lymph_node
use_existing_h5mu: False
submission_file: sample_submission_file.txt
metadatacols:
concat_join_type: inner
#--------------------------
# Modalities in the project
modalities:
rna: True
prot: False
bcr: False
tcr: False
atac: False
#--------------------------------
# Integrating barcode level data
# e.g. demultiplexing with hashtags, chemical tags or lipid tagging
barcode_mtd:
include:
path:
metadatacols:
#------------------------------------------
# Loading Protein data - additional options
protein_metadata_table:
index_col_choice:
load_prot_from_raw: False
subset_prot_barcodes_to_rna: False
# -----------------------------
# Quality Control (QC) options
# -----------------------------
# -----------------------------------
# Processing of 10X cellranger metrics files
plot_10X_metrics: False
# ----------------------------------
# Doublet detection on RNA modality
scr:
run: False
expected_doublet_rate: 0.06
sim_doublet_ratio: 2
n_neighbours: 20
min_counts: 2
min_cells: 3
min_gene_variability_pctl: 85
n_prin_comps: 30
use_thr: True
call_doublets_thr: 0.25
# ----------------------------
# RNA modality Quality Control
# Providing a gene list
# see documentation at https://panpipes-pipelines.readthedocs.io/en/latest/usage/gene_list_format.html
custom_genes_file: data/qc_gene_list_mouse_mt.csv
# Defining actions on the genes
# (for pipeline_ingest.py)
calc_proportions: hb,mt,rp,CD8cytotoxic
score_genes: hb,mt,rp,CD8cytotoxic
# cell cycle action
ccgenes:
# ------------------------
# Plotting RNA QC metrics
# all metrics should be provided as a comma separated string e.g. a,b,c
plotqc_grouping_var: sample_id
plotqc_rna_metrics: pct_counts_mt,pct_counts_rp,pct_counts_hb,pct_counts_CD8cytotoxic,total_counts,n_genes_by_counts
# ----------------------------
# Plotting Protein QC metrics
# requires prot_path to be included in the submission file
# all metrics should be provided as a comma separated string e.g. a,b,c
plotqc_prot_metrics:
plot_metrics_per_prot:
identify_isotype_outliers:
isotype_upper_quantile:
isotype_n_pass:
# ---------------------
# Plot ATAC QC metrics
# set is_paired to True if a multiome is ingested
is_paired:
# If this is NOT a multiome experiment, but you have an RNA anndata that you would like to use for TSS enrichment
# use the partner_rna to specify the path to the file and provide a features_tss file with the tss coordinates
# leave empty if multiome is used
partner_rna:
features_tss:
plotqc_atac_metrics:
# ---------------------------
# Plot Repertoire QC metrics
ir_dist:
metric:
sequence:
clonotype_definition:
receptor_arms:
dual_ir:
within_group:
plotqc_rep_metrics:
# -------------------------------------
# Profiling Protein Ambient background
# -------------------------------------
# PLEASE NOTE that this analysis can only be run if your inputs are from cellranger raw outputs
assess_background:
downsample_background:
# -----------------------------------------------------
# Files required for profiling ambient background or running dsb normalisation
# The pipeline requires the raw_feature_bc_matrix folder from cellranger or equivalent,
# specified in the submission file path with {mod}_filetype set to "cellranger," "cellranger_multi," or "10X_h5"
# for automatic search of .h5 or matrix folder for profiling ambient background or running dsb normalization.
#-------------------------------------------
# Investigate per-channel antibody staining
channel_col:
save_norm_prot_mtx:
#----------------------
# Protein normalization
#----------------------
normalisation_methods:
#-----------------------------------------------
# Centered log ratio (CLR) normalization options
# margin determines whether you normalise per cell (as you would for RNA),
# or by feature (recommended, due to the variable nature of prot assays).
# CLR margin 0 is recommended for informative qc plots in this pipeline
# 0 = normalise row-wise (per cell)
# 1 = normalise column-wise (per feature)
clr_margin:
#--------------------------------------------------------------
# Denoised and Scaled by Background (DSB) normalization options
quantile_clipping: