Skip to content

Commit 4a1a44d

Browse files
committed
Reorganize how phylogeny results are stored in results
1 parent ca85109 commit 4a1a44d

File tree

7 files changed

+106
-109
lines changed

7 files changed

+106
-109
lines changed

bin/libphylociraptor/check.py

+42-41
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,10 @@ def has_outfile(mode="", previous_mode = "", hashes={}, previous_hashes={}, debu
2323
for f in outfile_check_dict[mode]:
2424
f = f.replace("HASH", hashes[mode]["global"]).replace("PREVIOUS",previous_hashes[previous_mode]["global"])
2525
if not os.path.isfile(f):
26-
print(" File not found:", f)
26+
print(" This final output file for this step is missing:", f)
2727
return False
2828
return True
29+
2930
def check_is_running(mode="", previous_mode="", hashes={}, previous_hashes={}, debug=False, verbose=False):
3031
#print(hashes[mode])
3132
#print(" ")
@@ -128,7 +129,7 @@ def check_is_running(mode="", previous_mode="", hashes={}, previous_hashes={}, d
128129

129130
for bs in hashes[mode]["per"].keys():
130131
print(" Bootstrap-cutoff:", bs)
131-
if not os.path.isdir("results/phylogeny-"+bs+"/quicktree"):
132+
if not os.path.isdir("results/phylogeny/bootstrap-cutoff-"+bs+"/quicktree"):
132133
print("\tAnalysis for boostrap-cutoff "+bs+" not yet started.")
133134
continue
134135
missing = []
@@ -137,7 +138,7 @@ def check_is_running(mode="", previous_mode="", hashes={}, previous_hashes={}, d
137138
alitrim = aligner + "-" + trimmer + "." + previous_hashes[previous_mode]["per"]["iqtree"][trimmer][aligner]
138139
genes_to_check = [gene.split("/")[-1].split("_")[0] for gene in glob.glob("results/alignments/filtered/"+alitrim+"/*.fas")]
139140
njtree = aligner + "-" + trimmer + "." + hashes[mode]["per"][bs]["quicktree"]["iqtree"][trimmer][aligner]
140-
if os.path.isfile("results/phylogeny-"+bs+"/quicktree/"+njtree+"/njtree.tre"):
141+
if os.path.isfile("results/phylogeny/quicktree/bootstrap-cutoff-"+bs+"/"+njtree+"/njtree.tre"):
141142
if verbose:
142143
print("\tnjtree for", njtree, "done.")
143144
else:
@@ -149,53 +150,53 @@ def check_is_running(mode="", previous_mode="", hashes={}, previous_hashes={}, d
149150
if mode == "mltree":
150151
for bs in hashes[mode]["per"].keys():
151152
print(" Bootstrap-cutoff:", bs)
152-
if not os.path.isdir("results/phylogeny-"+bs+"/astral"):
153-
print("\tAnalysis for boostrap-cutoff "+bs+" not yet started.")
154-
continue
155153
missing = []
156154
if "iqtree" in hashes[mode]["per"][bs].keys(): # this can be made more efficient!
157-
for trimmer in hashes[mode]["per"][bs]["iqtree"]["iqtree"].keys(): #not sure why iqtree is in this
158-
for aligner in hashes[mode]["per"][bs]["iqtree"]["iqtree"][trimmer].keys(): #not sure why iqtree is in this
159-
alitrim = aligner + "-" + trimmer + "." + previous_hashes[previous_mode]["per"]["iqtree"][trimmer][aligner]
160-
genes_to_check = [gene.split("/")[-1].split("_")[0] for gene in glob.glob("results/modeltest/"+alitrim+"/*.treefile")]
161-
mltree = aligner + "-" + trimmer + "." + hashes[mode]["per"][bs]["iqtree"]["iqtree"][trimmer][aligner]
162-
if os.path.isfile("results/phylogeny-"+bs+"/iqtree/"+mltree+"/concat.contree"):
163-
if verbose:
164-
print("\tIQ-Tree for", mltree, "done.")
165-
else:
166-
missing.append(mltree)
167-
if verbose:
168-
print("\tMissing IQ-Tree for", mltree)
169-
if len(missing) > 0:
170-
print("\tTotal number of missing IQ-Tree trees:", len(missing))
155+
if not os.path.isdir("results/phylogeny/iqtree/bootstrap-cutoff-"+bs):
156+
print("\t IQ-Tree analysis for boostrap-cutoff "+bs+" not yet started.")
171157
else:
172-
print("\tAll IQ-Tree trees finished.")
158+
for trimmer in hashes[mode]["per"][bs]["iqtree"]["iqtree"].keys(): #not sure why iqtree is in this
159+
for aligner in hashes[mode]["per"][bs]["iqtree"]["iqtree"][trimmer].keys(): #not sure why iqtree is in this
160+
alitrim = aligner + "-" + trimmer + "." + previous_hashes[previous_mode]["per"]["iqtree"][trimmer][aligner]
161+
genes_to_check = [gene.split("/")[-1].split("_")[0] for gene in glob.glob("results/modeltest/"+alitrim+"/*.treefile")]
162+
mltree = aligner + "-" + trimmer + "." + hashes[mode]["per"][bs]["iqtree"]["iqtree"][trimmer][aligner]
163+
if os.path.isfile("results/phylogeny/iqtree/bootstrap-cutoff-"+bs+"/"+mltree+"/concat.contree"):
164+
if verbose:
165+
print("\tIQ-Tree for", mltree, "done.")
166+
else:
167+
missing.append(mltree)
168+
if verbose:
169+
print("\tMissing IQ-Tree for", mltree)
170+
if len(missing) > 0:
171+
print("\tTotal number of missing IQ-Tree trees:", len(missing))
172+
else:
173+
print("\tAll IQ-Tree trees finished.")
173174
print()
174-
if not os.path.isdir("results/phylogeny-"+bs+"/astral"):
175-
print("\tAnalysis for boostrap-cutoff "+bs+" not yet started.")
176-
continue
177175
missing = []
178176
if "raxml" in hashes[mode]["per"][bs].keys(): # this can be made more efficient!
179-
for trimmer in hashes[mode]["per"][bs]["raxml"]["iqtree"].keys(): #not sure why iqtree is in this
180-
for aligner in hashes[mode]["per"][bs]["raxml"]["iqtree"][trimmer].keys(): #not sure why iqtree is in this
181-
alitrim = aligner + "-" + trimmer + "." + previous_hashes[previous_mode]["per"]["iqtree"][trimmer][aligner]
182-
genes_to_check = [gene.split("/")[-1].split("_")[0] for gene in glob.glob("results/modeltest/"+alitrim+"/*.treefile")]
183-
mltree = aligner + "-" + trimmer + "." + hashes[mode]["per"][bs]["raxml"]["iqtree"][trimmer][aligner]
184-
if os.path.isfile("results/phylogeny-"+bs+"/raxml/"+mltree+"/raxmlng.raxml.bestTree"):
185-
if verbose:
186-
print("\tRAxML-NG for", mltree, "done.")
187-
else:
188-
missing.append(mltree)
189-
if verbose:
190-
print("\tMissing RAxML-NG for", mltree)
191-
if len(missing) > 0:
192-
print("\tTotal number of missing RAxML-NG trees:", len(missing))
177+
if not os.path.isdir("results/phylogeny/raxml/bootstrap-cutoff-"+bs):
178+
print("\tRAxML analysis for boostrap-cutoff-"+bs+" not yet started.")
193179
else:
194-
print("\tAll RAxML-NG trees finished.")
180+
for trimmer in hashes[mode]["per"][bs]["raxml"]["iqtree"].keys(): #not sure why iqtree is in this
181+
for aligner in hashes[mode]["per"][bs]["raxml"]["iqtree"][trimmer].keys(): #not sure why iqtree is in this
182+
alitrim = aligner + "-" + trimmer + "." + previous_hashes[previous_mode]["per"]["iqtree"][trimmer][aligner]
183+
genes_to_check = [gene.split("/")[-1].split("_")[0] for gene in glob.glob("results/modeltest/"+alitrim+"/*.treefile")]
184+
mltree = aligner + "-" + trimmer + "." + hashes[mode]["per"][bs]["raxml"]["iqtree"][trimmer][aligner]
185+
if os.path.isfile("results/phylogeny/raxml/bootstrap-cutoff-"+bs+"/"+mltree+"/raxmlng.raxml.bestTree"):
186+
if verbose:
187+
print("\tRAxML-NG for", mltree, "done.")
188+
else:
189+
missing.append(mltree)
190+
if verbose:
191+
print("\tMissing RAxML-NG for", mltree)
192+
if len(missing) > 0:
193+
print("\tTotal number of missing RAxML-NG trees:", len(missing))
194+
else:
195+
print("\tAll RAxML-NG trees finished.")
195196
if mode == "speciestree":
196197
for bs in hashes[mode]["per"].keys():
197198
print(" Bootstrap-cutoff:", bs)
198-
if not os.path.isdir("results/phylogeny-"+bs+"/astral"):
199+
if not os.path.isdir("results/phylogeny/bootstrap-cutoff-"+bs):
199200
print("\tAnalysis for boostrap-cutoff "+bs+" not yet started.")
200201
continue
201202
missing = []
@@ -204,7 +205,7 @@ def check_is_running(mode="", previous_mode="", hashes={}, previous_hashes={}, d
204205
alitrim = aligner + "-" + trimmer + "." + previous_hashes[previous_mode]["per"]["iqtree"][trimmer][aligner]
205206
genes_to_check = [gene.split("/")[-1].split("_")[0] for gene in glob.glob("results/modeltest/"+alitrim+"/*.treefile")]
206207
sptree = aligner + "-" + trimmer + "." + hashes[mode]["per"][bs]["astral"]["iqtree"][trimmer][aligner]
207-
if os.path.isfile("results/phylogeny-"+bs+"/astral/"+sptree+"/species_tree.tre"):
208+
if os.path.isfile("results/phylogeny/astral/bootstrap-cutoff-"+bs+"/"+sptree+"/species_tree.tre"):
208209
if verbose:
209210
print("\tSpeciestree for", sptree, "done.")
210211
else:

phylociraptor

+3-3
Original file line numberDiff line numberDiff line change
@@ -788,7 +788,7 @@ elif args.command == "check":
788788
print(now(), "Welcome to phylociraptor check v%s" % version)
789789
check_parser = argparse.ArgumentParser(add_help=False)
790790
check_parser.add_argument("-h", "--help", action="store_true")
791-
check_parser.add_argument("-c", "--configfile", action="store", default="data/config.yaml")
791+
check_parser.add_argument("--config-file", dest="configfile", action="store", default="data/config.yaml")
792792
check_parser.add_argument("--verbose", action="store_true", default=False)
793793
check_parser.add_argument("--debug", action="store_true", dest="debug", default=False)
794794
check_args = check_parser.parse_args(args.arguments)
@@ -807,9 +807,9 @@ elif args.command == "check":
807807
currenthash = ""
808808
all_hashes = {}
809809
if check_args.configfile:
810-
print(now(), "Calculating analysis hashes, may take a few seconds...")
810+
print(now(), "Calculating analysis hashes, this may take a few seconds...")
811811
for mode in steps_to_check:
812-
all_hashes[mode] = collect_hashes(mode, parse_config_file(check_args.configfile), check_args.configfile, debug=False)
812+
all_hashes[mode] = collect_hashes(mode, parse_config_file(check_args.configfile), check_args.configfile, debug=check_args.debug)
813813

814814
step_status = {"setup": 0, "orthology": 0, "filter-orthology": 0, "align": 0, "filter-align": 0, "modeltest": 0, "njtree": 0, "mltree": 0, "speciestree": 0}
815815
for mode in steps_to_check:

rules/concatenate.smk

+11-7
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
include: "functions.smk"
1+
#include: "functions.smk"
22
import yaml
33

44
def get_alignment_dir(wildcards):
@@ -16,24 +16,28 @@ def previous_params_per(wildcards):
1616
def previous_params_global(wildcards):
1717
return "results/modeltest/parameters.modeltest."+previous_hash+".yaml"
1818

19-
19+
def get_concatenate_params(wildcards):
20+
if os.environ["MODE"] == "njtree":
21+
return "results/phylogeny/quicktree/bootstrap-cutoff-"+wildcards.bootstrap+"/parameters.njtree.quicktree-"+wildcards.aligner+"-"+wildcards.alitrim+"."+wildcards.hash+".yaml"
22+
elif os.environ["MODE"] == "mltree":
23+
return "results/phylogeny/raxml/bootstrap-cutoff-"+wildcards.bootstrap+"/parameters.mltree.raxml-"+wildcards.aligner+"-"+wildcards.alitrim+"."+wildcards.hash+".yaml"
2024

2125
rule concatenate:
2226
input:
2327
checkpoint = get_modeltest_checkpoint,
2428
params = get_concatenate_params
2529
output:
26-
alignment = "results/phylogeny-{bootstrap}/concatenate/{aligner}-{alitrim}.{hash}/concat.fas",
27-
phylip_alignment = "results/phylogeny-{bootstrap}/concatenate/{aligner}-{alitrim}.{hash}/concat.phy",
28-
stockholm_alignment = "results/phylogeny-{bootstrap}/concatenate/{aligner}-{alitrim}.{hash}/concat.sto",
29-
statistics = "results/phylogeny-{bootstrap}/concatenate/{aligner}-{alitrim}.{hash}/statistics.txt"
30+
alignment = "results/phylogeny/concatenate/bootstrap-cutoff-{bootstrap}/{aligner}-{alitrim}.{hash}/concat.fas",
31+
phylip_alignment = "results/phylogeny/concatenate/bootstrap-cutoff-{bootstrap}/{aligner}-{alitrim}.{hash}/concat.phy",
32+
stockholm_alignment = "results/phylogeny/concatenate/bootstrap-cutoff-{bootstrap}/{aligner}-{alitrim}.{hash}/concat.sto",
33+
statistics = "results/phylogeny/concatenate/bootstrap-cutoff-{bootstrap}/{aligner}-{alitrim}.{hash}/statistics.txt"
3034
benchmark:
3135
"results/statistics/benchmarks/tree-{bootstrap}/concatenate_{aligner}_{alitrim}.{hash}txt"
3236
params:
3337
wd = os.getcwd(),
3438
ids = config["species"],
3539
models = get_best_models,
36-
target_dir = "results/phylogeny-{bootstrap}/concatenate/{aligner}-{alitrim}.{hash}",
40+
target_dir = "results/phylogeny/concatenate/bootstrap-cutoff-{bootstrap}/{aligner}-{alitrim}.{hash}",
3741
alidir = get_alignment_dir,
3842
genes = get_input_genes
3943
singularity:

rules/functions.smk

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
import os
22
import sys
3+
import glob
34
sys.path.insert(0, os.getcwd()+"/bin") #needed so that phylociraptor module can be imported.
4-
#import hashlib
5-
#import yaml
5+
import yaml
66
from libphylociraptor.hashing import *
77

8+
# get list of containers to use:
9+
with open("data/containers.yaml", "r") as yaml_stream:
10+
containers = yaml.safe_load(yaml_stream)
11+
812
#configfi=str(sys.argv[sys.argv.index("--configfile")+1])
913
configfi=os.environ["CONFIG"]
1014
print("CONFIGFILE:", configfi)

0 commit comments

Comments
 (0)