Skip to content

Commit 9c54c67

Browse files
committed
Add hashing to plot-tree
1 parent a2dc1e4 commit 9c54c67

File tree

3 files changed

+72
-15
lines changed

3 files changed

+72
-15
lines changed

bin/libphylociraptor/usageinfo.py

+4
Original file line numberDiff line numberDiff line change
@@ -251,10 +251,13 @@ def hi():
251251
-i, --intrees Relative paths to input trees in results folder, seperated by commas. (Default: all)
252252
-o, --outprefix Output file name prefix.
253253
-n, --nquartets Total number of quartets to be calculated. Use either this or --stopby.
254+
OR
254255
-b, --stopby Stoping criterion. There are two options:
255256
-b conflicts=100 stops as soon as 100 conflicts have been found.
256257
--stopby tipcoverage=100 stops as soon as every tip is in at least 100 quartets.
258+
257259
Optional Arguments:
260+
--config-file Relative custom config-file path. (Default: data/config.yaml)
258261
-s, --seed Random seed number for reproducibility. (Default: random)
259262
-l, --lineagefile Lineagefile created with phylociraptor util get-lineage.
260263
Mandatory when using-a/--selecttaxa with a specific taxonomic level.
@@ -278,6 +281,7 @@ def hi():
278281
Use 'all' to plot all available trees.
279282
280283
Optional Arguments:
284+
--config-file Relative custom config-file path. (Default: data/config.yaml)
281285
-o, --outprefix Output file name prefix.
282286
-l, --lineagefile Lineagefile created with phylociraptor util get-lineage.
283287
-e, --level Taxonomic level in lineage file which should be plotted.

bin/plot-tree.R

+10-7
Original file line numberDiff line numberDiff line change
@@ -283,11 +283,13 @@ if (level == "none" || lineage_file == "none") {
283283
for (ntree in 1:length(treenames)) {
284284
#extract filename information:
285285
treename <- treenames[ntree]
286-
bs_cutoff <- strsplit(strsplit(treename,"/")[[1]][2],"-")[[1]][2]
286+
287+
bs_cutoff <- strsplit(strsplit(treename,"cutoff-")[[1]][2],"/")[[1]][1]
287288
algorithm <- strsplit(treename,"/")[[1]][3]
288-
alitrim <- strsplit(treename,"/")[[1]][4]
289+
alitrim <- strsplit(strsplit(treename,".",fixed=T)[[1]][1], "/")[[1]][5]
290+
hash <- strsplit(strsplit(treename,".",fixed=T)[[1]][2], "/")[[1]][1]
289291
prefix <- paste( algorithm, alitrim, bs_cutoff, sep="-")
290-
292+
291293
#reroot tree
292294
tree <- read.tree(treename)
293295
if (outgroup != "none") { #reroot tree in case an outgroup was specified
@@ -308,7 +310,7 @@ if (level == "none" || lineage_file == "none") {
308310
# extract legend then remove it
309311
cat(paste0(" Write PDF: ",prefix,"-",level,"-tree.pdf\n"))
310312
pdf(file = paste0(prefix,"-",level,"-tree.pdf"), width = 10, height=get_pdf_height(tree))
311-
print(t2 + theme(legend.position="none") + plot_annotation(title = prefix, caption=paste0("Taxonomy: ", level,". Random seed:", seed,".\nOutgroup: ", outgroups), theme=theme(plot.title=element_text(hjust=0.5, size=16))))#+ plot_layout(guides = 'none')# & theme(legend.position='bottom')
313+
print(t2 + theme(legend.position="none") + plot_annotation(title = prefix, caption=paste0("Taxonomy: ", level,". Random seed: ", seed,".\nOutgroup: ", outgroups, "\nHash: ", hash), theme=theme(plot.title=element_text(hjust=0.5, size=16))))#+ plot_layout(guides = 'none')# & theme(legend.position='bottom')
312314
garbage <- dev.off()
313315
if (singlet == TRUE) {break}
314316
}
@@ -337,9 +339,10 @@ if (level == "none" || lineage_file == "none") {
337339
for (ntree in 1:length(treenames)) {
338340
#extract filename information:
339341
treename <- treenames[ntree]
340-
bs_cutoff <- strsplit(strsplit(treename,"/")[[1]][2],"-")[[1]][2]
342+
bs_cutoff <- strsplit(strsplit(treename,"cutoff-")[[1]][2],"/")[[1]][1]
341343
algorithm <- strsplit(treename,"/")[[1]][3]
342-
alitrim <- strsplit(treename,"/")[[1]][4]
344+
alitrim <- strsplit(strsplit(treename,".",fixed=T)[[1]][1], "/")[[1]][5]
345+
hash <- strsplit(strsplit(treename,".",fixed=T)[[1]][2], "/")[[1]][1]
343346
prefix <- paste( algorithm, alitrim, bs_cutoff, sep="-")
344347
cat(paste0("Plot tree: ", prefix, "\n"))
345348
#reroot tree
@@ -469,7 +472,7 @@ if (level == "none" || lineage_file == "none") {
469472
if (single == "yes") {
470473
print(t2 + theme(legend.position="none") + plot_annotation(title = prefix, caption=paste0("Taxonomy: ", level,". Random seed:", seed,".\nOutgroup: ", outgroups), theme=theme(plot.title=element_text(hjust=0.5, size=16))))#+ plot_layout(guides = 'none')# & theme(legend.position='bottom')
471474
} else {
472-
print(t2 + t1 + theme(legend.position="none") + plot_annotation(tag_levels = 'A', title = prefix, caption=paste0("A) Topology without branch lengths. B) Topology with branch lengths and collapsed taxonomic groups. Taxonomy: ", level,". Random seed:", seed,".\nOutgroup: ", outgroups), theme=theme(plot.title=element_text(hjust=0.5, size=16))) + plot_layout(design = layout))#+ plot_layout(guides = 'none')# & theme(legend.position='bottom')
475+
print(t2 + t1 + theme(legend.position="none") + plot_annotation(tag_levels = 'A', title = prefix, caption=paste0("A) Topology without branch lengths. B) Topology with branch lengths and collapsed taxonomic groups. Taxonomy: ", level,". Random seed: ", seed,".\nOutgroup: ", outgroups, "\nHash: ", hash), theme=theme(plot.title=element_text(hjust=0.5, size=16))) + plot_layout(design = layout))#+ plot_layout(guides = 'none')# & theme(legend.position='bottom')
473476
}
474477
garbage <- dev.off()
475478
if (single == TRUE) {break}

phylociraptor

+58-8
Original file line numberDiff line numberDiff line change
@@ -889,6 +889,7 @@ elif args.command == "util":
889889
qs_parser.add_argument("-l", "--lineagefile", action="store")
890890
qs_parser.add_argument("-b", "--stopby", action="store", default=None)
891891
qs_parser.add_argument("-a", "--selecttaxa", action="store", default="")
892+
qs_parser.add_argument("--config-file", dest="configfile", action="store", default="data/config.yaml")
892893
qs_parser.add_argument("--quiet", action="store_true", default=False)
893894
qs_args = qs_parser.parse_args(args.arguments)
894895
if qs_args.help:
@@ -904,10 +905,34 @@ elif args.command == "util":
904905
sys.exit(1)
905906
elif qs_args.intrees == "all":
906907
print(now(), "Will compare all trees, this expects phylociraptor mltree and/or speciestree to be finished.")
907-
iqtree_trees = glob.glob("results/phylogeny-*/iqtree/*/concat.contree")
908-
raxml_trees = glob.glob("results/phylogeny-*/raxml/*/raxmlng.raxml.support")
909-
astral_trees = glob.glob("results/phylogeny-*/astral/*/species_tree.tre")
910-
all_trees = ",".join(iqtree_trees + raxml_trees + astral_trees)
908+
print(now(), "Calculating analysis hashes for mltree, this may take a few seconds...")
909+
hashes = {}
910+
hashes["mltree"] = collect_hashes("mltree", parse_config_file(qs_args.configfile), qs_args.configfile, debug=qs_args.debug, wd=os.getcwd())
911+
print(now(), "Calculating analysis hashes for speciestree, this may take a few seconds...")
912+
hashes["speciestree"] = collect_hashes("speciestree", parse_config_file(qs_args.configfile), qs_args.configfile, debug=qs_args.debug, wd=os.getcwd())
913+
combinations = []
914+
#first calculate all possible combinations:
915+
for bs in hashes["mltree"]["mltree"]["per"]:
916+
for treeprog in hashes["mltree"]["mltree"]["per"][bs]:
917+
for trimmer in hashes["mltree"]["mltree"]["per"][bs][treeprog]["iqtree"]:
918+
for aligner in hashes["mltree"]["mltree"]["per"][bs][treeprog]["iqtree"][trimmer]:
919+
if treeprog == "iqtree":
920+
filename = "concat.contree"
921+
elif treeprog == "raxml":
922+
filename = "raxmlng.raxml.support"
923+
combinations.append("results/phylogeny/" + treeprog + "/bootstrap-cutoff-" + bs + "/" + aligner + "-" + trimmer + "." + hashes["mltree"]["mltree"]["per"][bs][treeprog]["iqtree"][trimmer][aligner] + "/" + filename)
924+
for bs in hashes["speciestree"]["speciestree"]["per"]:
925+
for treeprog in hashes["speciestree"]["speciestree"]["per"][bs]:
926+
for trimmer in hashes["speciestree"]["speciestree"]["per"][bs][treeprog]["iqtree"]:
927+
for aligner in hashes["speciestree"]["speciestree"]["per"][bs][treeprog]["iqtree"][trimmer]:
928+
combinations.append("results/phylogeny/" + treeprog + "/bootstrap-cutoff-" + bs + "/" + aligner + "-" + trimmer + "." + hashes["speciestree"]["speciestree"]["per"][bs][treeprog]["iqtree"][trimmer][aligner] + "/species_tree.tre")
929+
# now check which files are there in the file system:
930+
iqtree_trees = glob.glob("results/phylogeny/iqtree/bootstrap-cutoff-*/*/concat.contree")
931+
raxml_trees = glob.glob("results/phylogeny/raxml/bootstrap-cutoff-*/*/raxmlng.raxml.support")
932+
astral_trees = glob.glob("results/phylogeny/astral/bootstrap-cutoff-*/*/species_tree.tre")
933+
all_trees = iqtree_trees + raxml_trees + astral_trees
934+
all_trees = [tree for tree in all_trees if tree in combinations] #keep only files which are specified in config file.
935+
all_trees = ",".join(all_trees)
911936
else:
912937
all_trees = qs_args.intrees
913938
cmd = ["singularity", "exec", "-B", os.getcwd(), "docker://reslp/phylopy:2", "python", "bin/estimate_conflict.py", "-i", all_trees, "-o", qs_args.outfile, "-s", qs_args.seed, "-t", qs_args.threads]
@@ -937,6 +962,7 @@ elif args.command == "util":
937962
qs_parser.add_argument("-l", "--lineagefile", action="store", default="none")
938963
qs_parser.add_argument("-e", "--level", action="store", default="none")
939964
qs_parser.add_argument("-s", "--seed", action="store", default="random")
965+
qs_parser.add_argument("--config-file", dest="configfile", action="store", default="data/config.yaml")
940966
qs_parser.add_argument("--quiet", action="store_true", default=False)
941967
qs_parser.add_argument("--single", action="store_true", default=False)
942968
qs_args = qs_parser.parse_args(args.arguments)
@@ -949,10 +975,34 @@ elif args.command == "util":
949975
sys.exit(1)
950976
elif qs_args.intrees == "all":
951977
print(now(), "Will compare all trees, this expects phylociraptor mltree and/or speciestree to be finished.")
952-
iqtree_trees = glob.glob("results/phylogeny-*/iqtree/*/concat.contree")
953-
raxml_trees = glob.glob("results/phylogeny-*/raxml/*/raxmlng.raxml.support")
954-
astral_trees = glob.glob("results/phylogeny-*/astral/*/species_tree.tre")
955-
all_trees = ",".join(iqtree_trees + raxml_trees + astral_trees)
978+
print(now(), "Calculating analysis hashes for mltree, this may take a few seconds...")
979+
hashes = {}
980+
hashes["mltree"] = collect_hashes("mltree", parse_config_file(qs_args.configfile), qs_args.configfile, debug=qs_args.debug, wd=os.getcwd())
981+
print(now(), "Calculating analysis hashes for speciestree, this may take a few seconds...")
982+
hashes["speciestree"] = collect_hashes("speciestree", parse_config_file(qs_args.configfile), qs_args.configfile, debug=qs_args.debug, wd=os.getcwd())
983+
combinations = []
984+
#first calculate all possible combinations:
985+
for bs in hashes["mltree"]["mltree"]["per"]:
986+
for treeprog in hashes["mltree"]["mltree"]["per"][bs]:
987+
for trimmer in hashes["mltree"]["mltree"]["per"][bs][treeprog]["iqtree"]:
988+
for aligner in hashes["mltree"]["mltree"]["per"][bs][treeprog]["iqtree"][trimmer]:
989+
if treeprog == "iqtree":
990+
filename = "concat.contree"
991+
elif treeprog == "raxml":
992+
filename = "raxmlng.raxml.support"
993+
combinations.append("results/phylogeny/" + treeprog + "/bootstrap-cutoff-" + bs + "/" + aligner + "-" + trimmer + "." + hashes["mltree"]["mltree"]["per"][bs][treeprog]["iqtree"][trimmer][aligner] + "/" + filename)
994+
for bs in hashes["speciestree"]["speciestree"]["per"]:
995+
for treeprog in hashes["speciestree"]["speciestree"]["per"][bs]:
996+
for trimmer in hashes["speciestree"]["speciestree"]["per"][bs][treeprog]["iqtree"]:
997+
for aligner in hashes["speciestree"]["speciestree"]["per"][bs][treeprog]["iqtree"][trimmer]:
998+
combinations.append("results/phylogeny/" + treeprog + "/bootstrap-cutoff-" + bs + "/" + aligner + "-" + trimmer + "." + hashes["speciestree"]["speciestree"]["per"][bs][treeprog]["iqtree"][trimmer][aligner] + "/species_tree.tre")
999+
# now check which files are there in the file system:
1000+
iqtree_trees = glob.glob("results/phylogeny/iqtree/bootstrap-cutoff-*/*/concat.contree")
1001+
raxml_trees = glob.glob("results/phylogeny/raxml/bootstrap-cutoff-*/*/raxmlng.raxml.support")
1002+
astral_trees = glob.glob("results/phylogeny/astral/bootstrap-cutoff-*/*/species_tree.tre")
1003+
all_trees = iqtree_trees + raxml_trees + astral_trees
1004+
all_trees = [tree for tree in all_trees if tree in combinations] #keep only files which are specified in config file.
1005+
all_trees = ",".join(all_trees)
9561006
else:
9571007
all_trees = qs_args.intrees
9581008
if qs_args.single:

0 commit comments

Comments
 (0)