@@ -30,7 +30,7 @@ copts = config["clusterOpts"]
30
30
31
31
rule all :
32
32
input :
33
- chrsize = expand (outdir + "{genome}.{hap}.chrSize" , genome = genomes , hap = haps ),
33
+ # chrsize = expand(outdir + "{genome}.{hap}.chrSize", genome=genomes, hap=haps),
34
34
faAln = expand (outdir + "{genome}.{hap}.aln.foo" , genome = genomes , hap = haps ),
35
35
lift = expand (outdir + "{genome}/lift.foo" , genome = genomes ),
36
36
TRfa = expand (outdir + "{genome}.{hap}.tr.fasta" , genome = genomes , hap = haps ),
@@ -65,9 +65,6 @@ ln -sf {input.fa} .
65
65
for hap in 0 1; do
66
66
fa={params.indir}/{wildcards.genome}.$hap.fa
67
67
ln -sf "$fa".fai .
68
- #samtools faidx $fa &
69
- {params.sd}/script/chrsize.sh $fa > {wildcards.genome}.$hap.chrSize
70
- wait
71
68
done
72
69
"""
73
70
@@ -209,13 +206,14 @@ rule JointTRAnnotation:
209
206
mapping = outdir + "OrthoMap.v2.tsv" ,
210
207
TRfa = expand (outdir + "{genome}.{hap}.tr.fasta" , genome = genomes , hap = haps ),
211
208
resources :
212
- cores = 6 ,
213
- mem = lambda wildcards , attempt : 40 + 20 * ( attempt - 1 )
209
+ cores = 12 ,
210
+ mem = lambda wildcards , attempt : 110
214
211
priority : 96
215
212
params :
216
213
copts = copts ,
217
214
sd = srcdir ,
218
215
od = outdir ,
216
+ indir = indir ,
219
217
refTR = config ["refTR" ],
220
218
ksize = ksize ,
221
219
FS = FS ,
@@ -230,15 +228,16 @@ set -eu
230
228
ulimit -c 20000
231
229
cd {params.od}
232
230
233
- echo "Generating panbed"
231
+ printf "Generating panbed"
234
232
cut -f 1-3 {params.refTR} >pan.tr.mbe.v0.bed
235
233
for g in {params.genomes}; do
234
+ printf "."
236
235
bedtools map -c 1 -o count -a pan.tr.mbe.v0.bed -b <(cut -f 4-6 $g/tmp1.0.bed) >pan.tr.mbe.v0.bed.tmp &&
237
236
mv pan.tr.mbe.v0.bed.tmp pan.tr.mbe.v0.bed
238
237
done
239
- #{params.sd}/script/preMBE.py {params.pairs} pan.tr.mbe.v0.bed {params.TRwindow}
240
- {params.sd}/script/multiBoundaryExpansion.py {params.ksize} {params.FS} {params.TRwindow} {params.pairs} pan.tr.mbe.v0.bed {params.th1} {params.th2}
241
- # {params.sd}/script/writeMBEbed. py {params.th1} {params.th2}
238
+ echo ""
239
+ mkdir -p MBE
240
+ {params.sd}/script/multiBoundaryExpansion.parallel. py {params.ksize} {params.FS} {params.TRwindow} {params.pairs} pan.tr.mbe.v0.bed {params. th1} {params.th2} {resources.cores} {params.indir }
242
241
hi=0
243
242
for g in {params.genomes}; do
244
243
for h in 0 1; do
@@ -255,10 +254,10 @@ for g in {params.genomes}; do
255
254
done
256
255
done >mbe.m0.loci
257
256
rm tmp.bed
258
- {params.sd}/script/mergeMBEbed.py {params.pairs} pan.tr.mbe.v0.bed
257
+ {params.sd}/script/mergeMBEbed.py {params.pairs} {params.th2}
259
258
260
259
### write fasta
261
- echo "Fetching TR+flank"
260
+ echo "Fetching TR+flank" $(date)
262
261
hi=0
263
262
for g in {params.genomes}; do
264
263
for h in 0 1; do
@@ -269,7 +268,7 @@ for g in {params.genomes}; do
269
268
$3=$3+{params.FS}
270
269
print $0
271
270
}}' |
272
- {params.sd}/script/SelectRegions.py /dev/stdin "$g"."$h".fa /dev/stdout |
271
+ {params.sd}/script/SelectRegions.py /dev/stdin {params.indir}/ "$g"."$h".fa /dev/stdout |
273
272
awk '{{if ($1 ~ />/) {{print}} else {{print toupper($0)}} }}' >"$g"."$h".tr.fasta
274
273
((++hi))
275
274
done
@@ -284,10 +283,10 @@ rule GenRawGenomeGraph:
284
283
mapping = outdir + "OrthoMap.v2.tsv" ,
285
284
output :
286
285
rawPBkmers = expand (outdir + "{{genome}}.rawPB.{kmerType}.kmers" , kmerType = kmerTypes ),
287
- rawILkmers = outdir + "{genome}.rawIL.tr.kmers"
286
+ rawILkmers = [ outdir + "{genome}.rawIL.tr.kmers" ] if prune else []
288
287
resources :
289
- cores = 24 ,
290
- mem = lambda wildcards , attempt : 20 #90 + 20*(attempt-1)
288
+ cores = 24 if prune else 1 ,
289
+ mem = lambda wildcards , attempt : 25 + 20 * (attempt - 1 )
291
290
priority : 95
292
291
params :
293
292
copts = copts ,
@@ -299,17 +298,21 @@ rule GenRawGenomeGraph:
299
298
rth = rth ,
300
299
rstring = rstring ,
301
300
thcth = thcth ,
302
- hi = lambda wildcards : 2 * genomes .index (wildcards .genome )
301
+ hi = lambda wildcards : 2 * genomes .index (wildcards .genome ),
302
+ prune = int (prune )
303
303
shell :"""
304
304
set -eu
305
305
ulimit -c 20000
306
306
cd {params.od}
307
+ module load gcc
307
308
308
309
{params.sd}/bin/vntr2kmers_thread -g -m <(cut -f $(({params.hi}+1)),$(({params.hi}+2)) {input.mapping}) -k {params.ksize} -fs {params.FS} -ntr {params.FS} -o {wildcards.genome}.rawPB -fa 2 {input.TRfa}
309
310
310
- samtools fasta -@2 -n {input.ILbam} |
311
- {params.sd}/bin/bam2pe -fai /dev/stdin |
312
- {params.sd}/bin/danbing-tk -g {params.thcth} -k {params.ksize} -qs {params.od}/{wildcards.genome}.rawPB -fai /dev/stdin -o {wildcards.genome}.rawIL -p {resources.cores} -cth {params.cth} -rth {params.rth}
311
+ if [ {params.prune} == "1" ]; then
312
+ samtools fasta -@2 -n {input.ILbam} |
313
+ {params.sd}/bin/bam2pe -fai /dev/stdin |
314
+ {params.sd}/bin/danbing-tk -g {params.thcth} -k {params.ksize} -qs {params.od}/{wildcards.genome}.rawPB -fai /dev/stdin -o {wildcards.genome}.rawIL -p {resources.cores} -cth {params.cth} -rth {params.rth}
315
+ fi
313
316
"""
314
317
315
318
@@ -385,6 +388,7 @@ rule GenPanGenomeGraph:
385
388
shell :"""
386
389
cd {params.od}
387
390
ulimit -c 20000
391
+ module load gcc
388
392
389
393
{params.sd}/bin/genPanKmers -o pan -m - -k {params.kmerpref}
390
394
"""
0 commit comments