import scanpy as sc
import scirpy as ir
import anndata
import pandas as pd
import os
import sys
from multiprocessing import Pool
import itertools


sample_sheet = "../tables/vanderburg_01_samples.csv"
output_file = "../results/01_process_data/adata.h5ad"
data_dir = "../data"
n_cpus = 16


# Parameters
sample_sheet = "sample_sheet.csv"
output_file = "adata.h5ad"
data_dir = "data"
n_cpus = "16"


obs = pd.read_csv(sample_sheet)


obs.set_index("samples")


dataset_samples = obs["samples"].to_numpy(copy=True)


dataset_samples.sort()
dataset_samples

array(['H141', 'H143', 'H149', 'H160', 'H176', 'H182', 'H185', 'H188',
       'H197', 'H205', 'H208', 'H211', 'H68'], dtype=object)


def load_sample(sample_id, data_dir):
    filename_gex = os.path.join(
        data_dir, f"cellranger/{sample_id[1:]}_GEX/outs/raw_feature_bc_matrix.h5"
    )
    filename_tcr = os.path.join(
        data_dir, f"cellranger/{sample_id[1:]}_TCR/outs/filtered_contig_annotations.csv"
    )
    adata = sc.read_10x_h5(filename_gex, genome="GRCh38")
    adata_tcr = ir.io.read_10x_vdj(filename_tcr)
    adata.obs_names = [
        "{}_{}".format(sample_id, barcode) for barcode in adata.obs_names
    ]

    adata_tcr.obs_names = [
        "{}_{}".format(sample_id, barcode) for barcode in adata_tcr.obs_names
    ]
    duplicated = adata.var_names.duplicated()
    print(
        "Removing {} gene symbols because they are duplicated".format(sum(duplicated))
    )
    adata = adata[:, ~duplicated].copy()
    ir.pp.merge_with_ir(adata, adata_tcr)
    adata.obs["samples"] = sample_id
    return adata


with Pool(int(n_cpus)) as p:
    adatas = p.starmap(load_sample, zip(dataset_samples, itertools.repeat(data_dir)))

Variable names are not unique. To make them unique, call `.var_names_make_unique`.
/opt/conda/lib/python3.8/site-packages/anndata/_core/anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if not is_categorical(df_full[k]):
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
/opt/conda/lib/python3.8/site-packages/anndata/_core/anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if not is_categorical(df_full[k]):
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
/opt/conda/lib/python3.8/site-packages/anndata/_core/anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if not is_categorical(df_full[k]):
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
/opt/conda/lib/python3.8/site-packages/anndata/_core/anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if not is_categorical(df_full[k]):
/opt/conda/lib/python3.8/site-packages/anndata/_core/anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if not is_categorical(df_full[k]):
/opt/conda/lib/python3.8/site-packages/anndata/_core/anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if not is_categorical(df_full[k]):
/opt/conda/lib/python3.8/site-packages/anndata/_core/anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if not is_categorical(df_full[k]):
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
/opt/conda/lib/python3.8/site-packages/anndata/_core/anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if not is_categorical(df_full[k]):
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
/opt/conda/lib/python3.8/site-packages/anndata/_core/anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if not is_categorical(df_full[k]):
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
/opt/conda/lib/python3.8/site-packages/anndata/_core/anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if not is_categorical(df_full[k]):
/opt/conda/lib/python3.8/site-packages/anndata/_core/anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if not is_categorical(df_full[k]):
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
/opt/conda/lib/python3.8/site-packages/anndata/_core/anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if not is_categorical(df_full[k]):
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
/opt/conda/lib/python3.8/site-packages/anndata/_core/anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if not is_categorical(df_full[k]):
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
/opt/conda/lib/python3.8/site-packages/anndata/_core/anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if is_string_dtype(df[key]) and not is_categorical(df[key])
... storing 'IR_VJ_1_cdr3' as categorical
... storing 'IR_VJ_2_cdr3' as categorical
... storing 'IR_VDJ_1_cdr3' as categorical
... storing 'IR_VDJ_2_cdr3' as categorical
... storing 'IR_VJ_1_cdr3_nt' as categorical
... storing 'IR_VJ_2_cdr3_nt' as categorical
... storing 'IR_VDJ_1_cdr3_nt' as categorical
... storing 'IR_VDJ_2_cdr3_nt' as categorical
... storing 'IR_VJ_1_junction_ins' as categorical
... storing 'IR_VJ_2_junction_ins' as categorical
... storing 'IR_VDJ_1_junction_ins' as categorical
... storing 'IR_VDJ_2_junction_ins' as categorical

Removing 24 gene symbols because they are duplicated

/opt/conda/lib/python3.8/site-packages/anndata/_core/anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if is_string_dtype(df[key]) and not is_categorical(df[key])
... storing 'IR_VJ_1_cdr3' as categorical
... storing 'IR_VJ_2_cdr3' as categorical
... storing 'IR_VDJ_1_cdr3' as categorical
... storing 'IR_VDJ_2_cdr3' as categorical
... storing 'IR_VJ_1_cdr3_nt' as categorical
... storing 'IR_VJ_2_cdr3_nt' as categorical
... storing 'IR_VDJ_1_cdr3_nt' as categorical
... storing 'IR_VDJ_2_cdr3_nt' as categorical
... storing 'IR_VJ_1_junction_ins' as categorical
... storing 'IR_VJ_2_junction_ins' as categorical
... storing 'IR_VDJ_1_junction_ins' as categorical
... storing 'IR_VDJ_2_junction_ins' as categorical
/opt/conda/lib/python3.8/site-packages/anndata/_core/anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if is_string_dtype(df[key]) and not is_categorical(df[key])
... storing 'IR_VJ_1_cdr3' as categorical
/opt/conda/lib/python3.8/site-packages/anndata/_core/anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if is_string_dtype(df[key]) and not is_categorical(df[key])
... storing 'IR_VJ_2_cdr3' as categorical
... storing 'IR_VJ_1_cdr3' as categorical
... storing 'IR_VDJ_1_cdr3' as categorical
... storing 'IR_VJ_2_cdr3' as categorical
... storing 'IR_VDJ_2_cdr3' as categorical
... storing 'IR_VDJ_1_cdr3' as categorical
... storing 'IR_VJ_1_cdr3_nt' as categorical
... storing 'IR_VDJ_2_cdr3' as categorical
... storing 'IR_VJ_2_cdr3_nt' as categorical
... storing 'IR_VJ_1_cdr3_nt' as categorical
... storing 'IR_VDJ_1_cdr3_nt' as categorical
... storing 'IR_VJ_2_cdr3_nt' as categorical
... storing 'IR_VDJ_2_cdr3_nt' as categorical
... storing 'IR_VJ_1_junction_ins' as categorical
... storing 'IR_VJ_2_junction_ins' as categorical
... storing 'IR_VDJ_1_junction_ins' as categorical
... storing 'IR_VDJ_1_cdr3_nt' as categorical
... storing 'IR_VDJ_2_junction_ins' as categorical
... storing 'IR_VDJ_2_cdr3_nt' as categorical
... storing 'IR_VJ_1_junction_ins' as categorical
... storing 'IR_VJ_2_junction_ins' as categorical
... storing 'IR_VDJ_1_junction_ins' as categorical
... storing 'IR_VDJ_2_junction_ins' as categorical

Removing 24 gene symbols because they are duplicated
Removing 24 gene symbols because they are duplicated
Removing 24 gene symbols because they are duplicated

/opt/conda/lib/python3.8/site-packages/anndata/_core/anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if is_string_dtype(df[key]) and not is_categorical(df[key])
... storing 'IR_VJ_1_cdr3' as categorical
... storing 'IR_VJ_2_cdr3' as categorical
... storing 'IR_VDJ_1_cdr3' as categorical
... storing 'IR_VDJ_2_cdr3' as categorical
... storing 'IR_VJ_1_cdr3_nt' as categorical
... storing 'IR_VJ_2_cdr3_nt' as categorical
... storing 'IR_VDJ_1_cdr3_nt' as categorical
... storing 'IR_VDJ_2_cdr3_nt' as categorical
... storing 'IR_VJ_1_junction_ins' as categorical
... storing 'IR_VJ_2_junction_ins' as categorical
... storing 'IR_VDJ_1_junction_ins' as categorical
... storing 'IR_VDJ_2_junction_ins' as categorical
/opt/conda/lib/python3.8/site-packages/anndata/_core/anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if is_string_dtype(df[key]) and not is_categorical(df[key])
... storing 'IR_VJ_1_cdr3' as categorical
... storing 'IR_VJ_2_cdr3' as categorical
... storing 'IR_VDJ_1_cdr3' as categorical
... storing 'IR_VDJ_2_cdr3' as categorical
... storing 'IR_VJ_1_cdr3_nt' as categorical
... storing 'IR_VJ_2_cdr3_nt' as categorical
... storing 'IR_VDJ_1_cdr3_nt' as categorical
... storing 'IR_VDJ_2_cdr3_nt' as categorical
... storing 'IR_VJ_1_junction_ins' as categorical
... storing 'IR_VJ_2_junction_ins' as categorical
... storing 'IR_VDJ_1_junction_ins' as categorical
... storing 'IR_VDJ_2_junction_ins' as categorical

Removing 24 gene symbols because they are duplicated

/opt/conda/lib/python3.8/site-packages/anndata/_core/anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if is_string_dtype(df[key]) and not is_categorical(df[key])
... storing 'IR_VJ_1_cdr3' as categorical
... storing 'IR_VJ_2_cdr3' as categorical
... storing 'IR_VDJ_1_cdr3' as categorical
... storing 'IR_VDJ_2_cdr3' as categorical
... storing 'IR_VJ_1_cdr3_nt' as categorical
... storing 'IR_VJ_2_cdr3_nt' as categorical

Removing 24 gene symbols because they are duplicated

... storing 'IR_VDJ_1_cdr3_nt' as categorical
... storing 'IR_VDJ_2_cdr3_nt' as categorical
... storing 'IR_VJ_1_junction_ins' as categorical
... storing 'IR_VJ_2_junction_ins' as categorical
... storing 'IR_VDJ_1_junction_ins' as categorical
... storing 'IR_VDJ_2_junction_ins' as categorical
/opt/conda/lib/python3.8/site-packages/anndata/_core/anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if is_string_dtype(df[key]) and not is_categorical(df[key])
... storing 'IR_VJ_1_cdr3' as categorical
... storing 'IR_VJ_2_cdr3' as categorical
... storing 'IR_VDJ_1_cdr3' as categorical
... storing 'IR_VDJ_2_cdr3' as categorical
... storing 'IR_VJ_1_cdr3_nt' as categorical
... storing 'IR_VJ_2_cdr3_nt' as categorical
... storing 'IR_VDJ_1_cdr3_nt' as categorical
... storing 'IR_VDJ_2_cdr3_nt' as categorical
... storing 'IR_VJ_1_junction_ins' as categorical
... storing 'IR_VJ_2_junction_ins' as categorical
... storing 'IR_VDJ_1_junction_ins' as categorical
... storing 'IR_VDJ_2_junction_ins' as categorical
/opt/conda/lib/python3.8/site-packages/anndata/_core/anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if is_string_dtype(df[key]) and not is_categorical(df[key])
... storing 'IR_VJ_1_cdr3' as categorical
... storing 'IR_VJ_2_cdr3' as categorical
... storing 'IR_VDJ_1_cdr3' as categorical
... storing 'IR_VDJ_2_cdr3' as categorical
... storing 'IR_VJ_1_cdr3_nt' as categorical
... storing 'IR_VJ_2_cdr3_nt' as categorical
... storing 'IR_VDJ_1_cdr3_nt' as categorical
... storing 'IR_VDJ_2_cdr3_nt' as categorical
... storing 'IR_VJ_1_junction_ins' as categorical
... storing 'IR_VJ_2_junction_ins' as categorical
... storing 'IR_VDJ_1_junction_ins' as categorical
... storing 'IR_VDJ_2_junction_ins' as categorical

Removing 24 gene symbols because they are duplicated


adata = anndata.concat(adatas)


# coarse filtering, proper QC is done later
sc.pp.filter_cells(adata, min_genes=100)

/opt/conda/lib/python3.8/site-packages/anndata/_core/anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if not is_categorical(df_full[k]):


adata.obs["samples"]

H141_AAACCTGAGATAGCAT-1    H141
H141_AAACCTGAGATCCCAT-1    H141
H141_AAACCTGAGCGGCTTC-1    H141
H141_AAACCTGAGCTGTCTA-1    H141
H141_AAACCTGAGGCATTGG-1    H141
                           ... 
H68_TTTGTCAGTTGTTTGG-1      H68
H68_TTTGTCATCAGAGACG-1      H68
H68_TTTGTCATCAGGTAAA-1      H68
H68_TTTGTCATCGTCTGAA-1      H68
H68_TTTGTCATCTCCGGTT-1      H68
Name: samples, Length: 71401, dtype: object

obs


obs.set_index("samples")


tmp_obs = adata.obs.join(obs.set_index("samples"), on="samples", how="left", sort=True)


adata.obs = tmp_obs


adata.obs.loc[:, ["samples", "patient", "facs_purity_cd56"]].drop_duplicates()


adata.shape

(71401, 33514)


adata.write(output_file, compression="lzf")

/opt/conda/lib/python3.8/site-packages/anndata/_core/anndata.py:1192: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
  if is_string_dtype(df[key]) and not is_categorical(df[key])
... storing 'IR_VJ_1_cdr3' as categorical
... storing 'IR_VJ_2_cdr3' as categorical
... storing 'IR_VDJ_1_cdr3' as categorical
... storing 'IR_VDJ_2_cdr3' as categorical
... storing 'IR_VJ_1_cdr3_nt' as categorical
... storing 'IR_VJ_2_cdr3_nt' as categorical
... storing 'IR_VDJ_1_cdr3_nt' as categorical
... storing 'IR_VDJ_2_cdr3_nt' as categorical
... storing 'IR_VJ_1_v_gene' as categorical
... storing 'IR_VJ_2_v_gene' as categorical
... storing 'IR_VDJ_1_v_gene' as categorical
... storing 'IR_VDJ_2_v_gene' as categorical
... storing 'IR_VJ_1_j_gene' as categorical
... storing 'IR_VJ_2_j_gene' as categorical
... storing 'IR_VJ_1_c_gene' as categorical
... storing 'IR_VJ_2_c_gene' as categorical
... storing 'IR_VDJ_1_c_gene' as categorical
... storing 'IR_VDJ_2_c_gene' as categorical
... storing 'samples' as categorical
... storing 'patient' as categorical
... storing 'origin' as categorical
... storing 'dataset' as categorical
... storing 'tumor_type' as categorical
... storing 'platform' as categorical
... storing 'hpv_status' as categorical
... storing 'ir_status' as categorical

	patient	origin	replicate	dataset	tumor_type	platform	age	sex	hpv_status	ir_status	facs_purity_cd3	facs_purity_cd56
samples
H143	H143	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV-	NaN	0.653	0.008
H149	H149	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR-	0.644	0.033
H160	H160	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR+	0.342	0.067
H176	H176	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR+	0.558	0.108
H182	H182	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR-	0.303	0.109
H185	H185	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR+	0.493	0.163
H188	H188	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR+	0.657	0.087
H205	H205	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV-	NaN	0.485	0.028
H211	H211	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR-	0.382	0.029
H197	H197	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV-	NaN	0.271	0.171
H208	H208	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR-	0.336	0.323
H68	H68	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR+	0.797	0.138
H141	H141	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR+	0.288	0.025

	samples	patient	origin	replicate	dataset	tumor_type	platform	age	sex	hpv_status	ir_status	facs_purity_cd3	facs_purity_cd56
0	H143	H143	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV-	NaN	0.653	0.008
1	H149	H149	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR-	0.644	0.033
2	H160	H160	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR+	0.342	0.067
3	H176	H176	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR+	0.558	0.108
4	H182	H182	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR-	0.303	0.109
5	H185	H185	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR+	0.493	0.163
6	H188	H188	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR+	0.657	0.087
7	H205	H205	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV-	NaN	0.485	0.028
8	H211	H211	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR-	0.382	0.029
9	H197	H197	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV-	NaN	0.271	0.171
10	H208	H208	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR-	0.336	0.323
11	H68	H68	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR+	0.797	0.138
12	H141	H141	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR+	0.288	0.025

	patient	origin	replicate	dataset	tumor_type	platform	age	sex	hpv_status	ir_status	facs_purity_cd3	facs_purity_cd56
samples
H143	H143	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV-	NaN	0.653	0.008
H149	H149	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR-	0.644	0.033
H160	H160	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR+	0.342	0.067
H176	H176	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR+	0.558	0.108
H182	H182	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR-	0.303	0.109
H185	H185	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR+	0.493	0.163
H188	H188	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR+	0.657	0.087
H205	H205	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV-	NaN	0.485	0.028
H211	H211	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR-	0.382	0.029
H197	H197	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV-	NaN	0.271	0.171
H208	H208	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR-	0.336	0.323
H68	H68	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR+	0.797	0.138
H141	H141	tumor_primary	1	vanderburg_01	HNSC	10x_5p	NaN	NaN	HPV16+	IR+	0.288	0.025

Load data (TCR + counts)¶

Write output file¶

	samples	patient	facs_purity_cd56
H141_AAACCTGAGATAGCAT-1	H141	H141	0.025
H143_AAACCTGAGCCGATTT-1	H143	H143	0.008
H149_AAACCTGAGACTGTAA-1	H149	H149	0.033
H160_AAACCTGAGAAACCAT-1	H160	H160	0.067
H176_AAACCTGAGAAGGTTT-1	H176	H176	0.108
H182_AAACCTGAGCCCAACC-1	H182	H182	0.109
H185_AAACCTGAGACGCACA-1	H185	H185	0.163
H188_AAACCTGAGAGACTAT-1	H188	H188	0.087
H197_AAACCTGAGGCTCTTA-1	H197	H197	0.171
H205_AAACCTGAGTTTAGGA-1	H205	H205	0.028
H208_AAACCTGAGAACTGTA-1	H208	H208	0.323
H211_AAACCTGAGGATATAC-1	H211	H211	0.029
H68_AAACCTGCACATTCGA-1	H68	H68	0.138