Pipeline ingest yml - ingesting mouse data

Download this file
# ============================================================
# Ingest workflow Panpipes (pipeline_ingest.py)
# ============================================================
# This file contains the parameters for the ingest workflow.
# For full descriptions of the parameters, see the documentation at https://panpipes-pipelines.readthedocs.io/en/latest/yaml_docs/pipeline_ingestion_yml.html


#--------------------------
# Compute resources options
#--------------------------
resources:
  threads_high: 1
  threads_medium: 1
  threads_low: 1

 # Path to conda env, leave blank if running native or your cluster automatically inherits the login node environment
condaenv: 

# --------------------------------
# Loading and merging data options
# --------------------------------

# ----------------------------
# Project name and data format
project: tutorial
sample_prefix: mouse_lymph_node
use_existing_h5mu: False
submission_file: sample_submission_file.txt
metadatacols:
concat_join_type: inner

#--------------------------
# Modalities in the project
modalities:
  rna: True
  prot: False
  bcr: False
  tcr: False
  atac: False

#--------------------------------
# Integrating barcode level data
# e.g. demultiplexing with hashtags, chemical tags or lipid tagging
barcode_mtd:
  include:
  path:
  metadatacols:  

#------------------------------------------
# Loading Protein data - additional options
protein_metadata_table:
index_col_choice:
load_prot_from_raw: False
subset_prot_barcodes_to_rna: False


# -----------------------------
# Quality Control (QC) options
# -----------------------------

# -----------------------------------
# Processing of 10X cellranger metrics files
plot_10X_metrics: False

# ----------------------------------
# Doublet detection on RNA modality
scr:
  run: False
  expected_doublet_rate: 0.06
  sim_doublet_ratio: 2
  n_neighbours: 20
  min_counts: 2
  min_cells: 3
  min_gene_variability_pctl: 85
  n_prin_comps: 30
  use_thr: True
  call_doublets_thr: 0.25

# ----------------------------
# RNA modality Quality Control

# Providing a gene list
# see documentation at https://panpipes-pipelines.readthedocs.io/en/latest/usage/gene_list_format.html
custom_genes_file: data/qc_gene_list_mouse_mt.csv

# Defining actions on the genes

# (for pipeline_ingest.py)
calc_proportions: hb,mt,rp,CD8cytotoxic
score_genes: hb,mt,rp,CD8cytotoxic

# cell cycle action
ccgenes:

# ------------------------
# Plotting RNA QC metrics
# all metrics should be provided as a comma separated string e.g. a,b,c
plotqc_grouping_var: sample_id
plotqc_rna_metrics: pct_counts_mt,pct_counts_rp,pct_counts_hb,pct_counts_CD8cytotoxic,total_counts,n_genes_by_counts

# ----------------------------
# Plotting Protein QC metrics

# requires prot_path to be included in the submission file
# all metrics should be provided as a comma separated string e.g. a,b,c
plotqc_prot_metrics:
plot_metrics_per_prot: 

identify_isotype_outliers: 
isotype_upper_quantile: 
isotype_n_pass: 

# ---------------------
# Plot ATAC QC metrics

# set is_paired to True if a multiome is ingested
is_paired:
# If this is NOT a multiome experiment, but you have an RNA anndata that you would like to use for TSS enrichment
# use the partner_rna to specify the path to the file and provide a features_tss file with the tss coordinates
# leave empty if multiome is used
partner_rna:
features_tss:
plotqc_atac_metrics:

# ---------------------------
# Plot Repertoire QC metrics
ir_dist:
  metric:
  sequence:

clonotype_definition:
  receptor_arms:
  dual_ir:
  within_group:

plotqc_rep_metrics:


# -------------------------------------
# Profiling Protein Ambient background
# -------------------------------------
# PLEASE NOTE that this analysis can only be run if your inputs are from cellranger raw outputs

assess_background:
downsample_background: 

# -----------------------------------------------------
# Files required for profiling ambient background or running dsb normalisation

# The pipeline requires the raw_feature_bc_matrix folder from cellranger or equivalent,
# specified in the submission file path with {mod}_filetype set to "cellranger," "cellranger_multi," or "10X_h5"
# for automatic search of .h5 or matrix folder for profiling ambient background or running dsb normalization.

#-------------------------------------------
# Investigate per-channel antibody staining
channel_col:
save_norm_prot_mtx: 


#----------------------
# Protein normalization
#----------------------

normalisation_methods:

#-----------------------------------------------
# Centered log ratio (CLR) normalization options

# margin determines whether you normalise per cell (as you would for RNA),
# or by feature (recommended, due to the variable nature of prot assays).
# CLR margin 0 is recommended for informative qc plots in this pipeline
# 0 = normalise row-wise (per cell)
# 1 = normalise column-wise (per feature)
clr_margin: 

#--------------------------------------------------------------
# Denoised and Scaled by Background (DSB) normalization options
quantile_clipping: