Workflow

Click the nodes to obtain details about each step.

Allele Frequency Estimation

Concordance

FDR Control

Precision and Recall

Score Distribution

Statistics

If the workflow has been executed in cluster/cloud, runtimes include the waiting time in the queue.

Configuration

Configuration files
File Code
config.yaml
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
gdc-manifest: gdc_manifest_20171012_060907.txt

runs:
  synthetic-5:
    mapper: bwa
    dataset: synthetic-5
    ref: hg38
    purity: 1.0
    legend-outside: true
  synthetic-20:
    mapper: bwa
    dataset: synthetic-20
    ref: hg38
    purity: 1.0
    legend-outside: true
  simulated-bwa:
    mapper: bwa
    dataset: simulated
    ref: hg18
    purity: 0.75
  COLO_829-GSC:
    dataset: COLO_829-GSC
    mapper: bwa
    ref: hg38
    purity: 1.0
  COLO_829-EBI:
    dataset: COLO_829-EBI
    mapper: bwa
    ref: hg38
    purity: 1.0
  COLO_829-TGen:
    dataset: COLO_829-TGen
    mapper: bwa
    ref: hg38
    purity: 1.0
  COLO_829-Ill:
    dataset: COLO_829-Ill
    mapper: bwa
    ref: hg38
    purity: 1.0


datasets:
  synthetic-5:
    tumor:
      fq1: /vol/tiny/prosic/chm-synthetic-tumor-normal/dataset/mix5/tumor_1.fastq
      fq2: /vol/tiny/prosic/chm-synthetic-tumor-normal/dataset/mix5/tumor_2.fastq
      name: Cancer
    normal:
      fq1: /vol/tiny/prosic/chm-synthetic-tumor-normal/dataset/mix5/normal_1.fastq
      fq2: /vol/tiny/prosic/chm-synthetic-tumor-normal/dataset/mix5/normal_2.fastq
      name: Control
    truth: /vol/tiny/prosic/chm-synthetic-tumor-normal/dataset/mix5/truth.nochr.vcf
    regions: /vol/tiny/prosic/chm-synthetic-tumor-normal/dataset/mix5/confident-regions.nochr.bed
  synthetic-20:
    tumor:
      fq1: /vol/tiny/prosic/chm-synthetic-tumor-normal/dataset/mix20/tumor_1.fastq
      fq2: /vol/tiny/prosic/chm-synthetic-tumor-normal/dataset/mix20/tumor_2.fastq
      name: Cancer
    normal:
      fq1: /vol/tiny/prosic/chm-synthetic-tumor-normal/dataset/mix20/normal_1.fastq
      fq2: /vol/tiny/prosic/chm-synthetic-tumor-normal/dataset/mix20/normal_2.fastq
      name: Control
    truth: /vol/tiny/prosic/chm-synthetic-tumor-normal/dataset/mix20/truth.nochr.vcf
    regions: /vol/tiny/prosic/chm-synthetic-tumor-normal/dataset/mix20/confident-regions.nochr.bed
  simulated:
    tumor:
      bam: ../data/hiseq.Cancer80.wholegenome.bwamemm.cat.sorted.smtag.bam
      name: Cancer80
    normal:
      bam: ../data/hiseq.Control.wholegenome.cov30.bwamemm.sorted.smtag.bam
      name: Control
    isize:
      mean: 312
      sd: 15
    truth: ../data/simulated-truth.indels.vcf
  COLO_829-GSC:
    tumor:
      bam: ../data/EGAD00001002142/EGAD00001002142/EGAZ00001226259_COLO_829_BCGSC_BCGSCPipe.bam
      name: Cancer
    normal:
      bam: ../data/EGAD00001002142/EGAD00001002142/EGAZ00001226245_COLO_829BL_BCGSC_BCGSCPipe.bam
      name: Control
    case: COLO_829
  COLO_829-EBI:
    tumor:
      bam: ../data/EGAD00001002142/EGAD00001002142/EGAZ00001226269_COLO_829_EPleasance_BCGSCPipe.bam
      name: Cancer
    normal:
      bam: ../data/EGAD00001002142/EGAD00001002142/EGAZ00001226254_COLO_829BL_EPleasance_BCGSCPipe.bam
      name: Control
    case: COLO_829
  COLO_829-Ill:
    tumor:
      bam: ../data/EGAD00001002142/EGAD00001002142/EGAZ00001226270_COLO_829_Illumina_BCGSCPipe.bam
      name: Cancer
    normal:
      bam: ../data/EGAD00001002142/EGAD00001002142/EGAZ00001226256_COLO_829BL_Illumina_BCGSCPipe.bam
      name: Control
    case: COLO_829
  COLO_829-TGen:
    tumor:
      bam: ../data/EGAD00001002142/EGAD00001002142/EGAZ00001229972_COLO_829_TGEN_BCGSCPipe.bam
      name: Cancer
    normal:
      bam: ../data/EGAD00001002142/EGAD00001002142/EGAZ00001226249_COLO_829BL_TGEN_BCGSCPipe.bam
      name: Control
    case: COLO_829

ref:
  hg18:
    fasta: ../data/hg18.fasta
    date: 2006-03
    chrom_prefix: chr
  hg38:
    fasta: ../data/Homo_sapiens.GRCh38.dna.primary_assembly.fa
    date: 2013-12
    chrom_prefix: ""
    exons: resources/hg38.exons.bed

caller:
  neusomatic:
    params: ""
    score: SCORE
    invert: true
    adhoc: true
    genotypes: false
    info:
      - SCORE
  delly:
    params: ""
    adhoc: true
    genotypes: true
    info:
      - END
      - SOMATIC
      - SVLEN
      - SVTYPE
  #pindel:
  #  params: "-M 1 -B 10000 -H 10 -x 4 -I false -a 2"
  lancet:
    params: ""
    score: QUAL
    invert: true
    adhoc: true
    genotypes: true
    info:
      - SOMATIC
      - SVLEN
      - SVTYPE
  manta:
    params: ""
    score: SOMATICSCORE
    invert: true
    adhoc: true
    useraw: true
    info:
      - SOMATIC
      - SOMATICSCORE
      - SVLEN
      - SVTYPE
  strelka:
    params: ""
    score: QSI
    invert: true
    adhoc: true
    info:
      - SOMATIC
      - QSI
  bpi:
    params: ""
    adhoc: true
    info:
      - SOMATIC
      - SVLEN
      - SVTYPE
  varlociraptor:
    params: "--indel-window 64 --omit-snvs"
    blacklist:
      - bpi
    score: PROB_SOMATIC_TUMOR
    genotypes: false
    info:
      - PROB_SOMATIC_TUMOR
      - PROB_GERMLINE_HET
      - PROB_GERMLINE_HOM
      - PROB_ABSENT
      - PROB_SOMATIC_NORMAL
      - PROB_ARTIFACT
      - SVLEN
    fmt:
      - AF
      - DP

vcf-match-params: "--max-dist 50 --max-len-diff 50"

len-ranges:
  synthetic-20:
    DEL:
      - [1, 30]
      - [30, 250]
    INS:
      - [1, 30]
      - [30, 250]
  synthetic-5:
    DEL:
      - [1, 250]
    INS:
      - [1, 250]
  DEL:
    - [1, 30]
    - [30, 50]
    - [50, 100]
    - [100, 250]
  INS:
    - [1, 30]
    - [30, 100]


depth-ranges:
  DEL:
    - [1, 20]
    - [20, 40]
  INS:
    - [1, 10]
    - [10, 20]
    - [20, 40]
    - [40, 100]


plots:
  concordance: # runs where we can do concordance analysis
    colo1:
      - COLO_829-GSC
      - COLO_829-Ill
      - COLO_829-TGen
      - COLO_829-EBI
  known-truth: # runs with a known truth
    - simulated-bwa
    - synthetic-5
    - synthetic-20
  known-truth-full-afs: # runs where variants exhibit the full allele frequency spectrum
    - simulated-bwa

Loading...