Skip to content

Commit 322c3d2

Browse files
committed
add functions for hash functionality
1 parent 6da485e commit 322c3d2

File tree

1 file changed

+250
-1
lines changed

1 file changed

+250
-1
lines changed

rules/functions.smk

+250-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,253 @@
11
import sys
2+
import hashlib
3+
import yaml
4+
5+
#configfi=str(sys.argv[sys.argv.index("--configfile")+1])
6+
configfi="data/config.yaml"
7+
8+
###all kinds of functions related to the hash functionality
9+
10+
def get_hash(previous, string_of_dict_paths, yamlfile):
11+
import collections
12+
print("### GET HASH: "+string_of_dict_paths,yamlfile+" ###")
13+
dict_to_hash = {}
14+
with open(yamlfile) as f:
15+
my_dict = yaml.safe_load(f)
16+
dic_list = string_of_dict_paths.split(" ")
17+
for d in dic_list:
18+
print(d)
19+
l = d.split(",")
20+
if len(l) == 2:
21+
print("level 2")
22+
if not l[0] in dict_to_hash:
23+
dict_to_hash[l[0]] = {l[1]: str(my_dict[l[0]][l[1]])}
24+
elif not l[1] in dict_to_hash[l[0]]:
25+
dict_to_hash[l[0]][l[1]] = str(my_dict[l[0]][l[1]])
26+
if len(l) == 3:
27+
print("level 3")
28+
if not l[0] in dict_to_hash:
29+
dict_to_hash[l[0]] = {l[1]: {l[2]: str(my_dict[l[0]][l[1]][l[2]])}}
30+
elif not l[1] in dict_to_hash[l[0]]:
31+
dict_to_hash[l[0]][l[1]] = {l[2]: str(my_dict[l[0]][l[1]][l[2]])}
32+
elif not l[2] in dict_to_hash[l[0]][l[1]]:
33+
dict_to_hash[l[0]][l[1]][l[2]] = str(my_dict[l[0]][l[1]][l[2]])
34+
35+
print("FINAL: "+str(dict_to_hash))
36+
ordered = collections.OrderedDict(dict_to_hash)
37+
print(str(ordered))
38+
combined = str(previous+str(ordered))
39+
print(combined)
40+
hash = hashlib.shake_256(combined.encode()).hexdigest(5)
41+
print(hash)
42+
print("### DONE HASH ###")
43+
return hash
44+
45+
46+
47+
def collect_hashes(mode):
48+
hashes = {}
49+
50+
#orthology
51+
hashes['orthology'] = get_hash("", "orthology,method orthology,busco_options,set orthology,busco_options,version orthology,busco_options,mode orthology,busco_options,augustus_species orthology,busco_options,additional_parameters", configfi)
52+
if mode == "orthology":
53+
print("Gathered hashes until 'orthology'")
54+
print(hashes['orthology'])
55+
return hashes
56+
57+
#filter-orthology
58+
hashes['filter-orthology'] = get_hash(hashes['orthology'], "filtering,dupseq filtering,cutoff filtering,minsp filtering,seq_type filtering,exclude_orthology", configfi)
59+
60+
if mode == "filter-orthology":
61+
print("Gathered hashes until 'filter-orthology'")
62+
print(hashes['filter-orthology'])
63+
return hashes
64+
65+
#align
66+
hashes['align'] = {}
67+
for a in config["alignment"]["method"]:
68+
hashes['align'][a] = get_hash(hashes['filter-orthology'], "alignment,options,"+a, configfi)
69+
70+
if mode == "align":
71+
print("Gathered hashes until 'align'")
72+
print(hashes['align'])
73+
return hashes
74+
75+
#filter-alignment
76+
hashes['filter-align'] = {}
77+
for t in config["trimming"]["method"]:
78+
hashes['filter-align'][t] = {}
79+
for a in hashes['align'].keys():
80+
if os.path.isfile("results/alignments/full/"+a+"."+hashes['align'][a]+"/parameters.yaml"):
81+
hashes['filter-align'][t][a] = get_hash(hashes['align'][a], "trimming,options,"+t+" trimming,min_parsimony_sites", configfi)
82+
else:
83+
print("Please doublecheck if the stage 'align' was run with the parameters currently specified in "+configfi)
84+
sys.exit()
85+
86+
if mode == "filter-align":
87+
print("Gathered hashes until 'filter-align'")
88+
print(hashes['filter-align'])
89+
return hashes
90+
91+
#modeltest
92+
hashes['modeltest'] = {}
93+
for m in config["modeltest"]["method"]:
94+
hashes['modeltest'][m] = {}
95+
for t in config["trimming"]["method"]:
96+
hashes['modeltest'][m][t] = {}
97+
for a in hashes['align'].keys():
98+
if os.path.isfile("results/alignments/parameters."+a+"-"+t+"."+hashes['filter-align'][t][a]+".yaml"):
99+
hashes['modeltest'][m][t][a] = get_hash(hashes['filter-align'][t][a], "modeltest,options,"+m+" modeltest,bootstrap", configfi)
100+
else:
101+
print("Please doublecheck if the stage 'filter-align' was run with the parameters currently specified in "+configfi)
102+
sys.exit()
103+
104+
if mode == "modeltest":
105+
print("Gathered hashes until 'modeltest'")
106+
print(hashes['modeltest'])
107+
return hashes
108+
109+
hashes['tree_inference'] = {}
110+
#speciestree
111+
if mode == "speciestree":
112+
for i in config["speciestree"]["method"]:
113+
hashes['tree_inference'][i] = {}
114+
for m in hashes['modeltest'].keys():
115+
hashes['tree_inference'][i][m] = {}
116+
for t in hashes['filter-align'].keys():
117+
hashes['tree_inference'][i][m][t] = {}
118+
for a in hashes['align'].keys():
119+
if os.path.isfile("results/modeltest/parameters."+a+"-"+t+"."+hashes['modeltest'][m][t][a]+".yaml"):
120+
hashes['tree_inference'][i][m][t][a] = get_hash(hashes['modeltest'][m][t][a], "speciestree,options,"+i+" speciestree,include", configfi)
121+
else:
122+
print("Please doublecheck if the stage 'modeltest' was run with the parameters currently specified in "+configfi)
123+
sys.exit()
124+
125+
print("Gathered hashes until 'speciestree'")
126+
print(hashes['tree_inference'])
127+
return hashes
128+
129+
###################################
130+
#mltree
131+
if mode == "mltree":
132+
for i in config["mltree"]["method"]:
133+
hashes['tree_inference'][i] = {}
134+
for m in hashes['modeltest'].keys():
135+
hashes['tree_inference'][i][m] = {}
136+
for t in hashes['filter-align'].keys():
137+
hashes['tree_inference'][i][m][t] = {}
138+
for a in hashes['align'].keys():
139+
if os.path.isfile("results/modeltest/parameters."+a+"-"+t+"."+hashes['modeltest'][m][t][a]+".yaml"):
140+
hashes['tree_inference'][i][m][t][a] = get_hash(hashes['modeltest'][m][t][a], "speciestree,options,"+i+" speciestree,include", configfi)
141+
else:
142+
print("Please doublecheck if the stage 'modeltest' was run with the parameters currently specified in "+configfi)
143+
sys.exit()
144+
145+
print("Gathered hashes until 'mltree'")
146+
print(hashes['mltree'])
147+
return hashes
148+
149+
150+
def trigger(current_yaml, config_yaml):
151+
#the function triggers the start of a submodule if parameter files aren't there or when there are differences to the config file
152+
if os.path.isfile(current_yaml):
153+
# print("Reading in file")
154+
with open(current_yaml) as f:
155+
my_dict = yaml.safe_load(f)
156+
print(str(my_dict))
157+
if find_top(my_dict, config):
158+
print("ALL GOOD")
159+
return "None"
160+
else:
161+
return config_yaml
162+
else:
163+
print("File "+current_yaml+" is not there")
164+
return config_yaml
165+
166+
167+
def get_all_keys(d):
168+
#finds all keys in a nested dictionary
169+
outlist=[]
170+
for key, value in d.items():
171+
yield key
172+
if isinstance(value, dict):
173+
yield from get_all_keys(value)
174+
175+
def search(d, k, path=None):
176+
#finds the path to all keys in a nested dictionary
177+
if path is None:
178+
path = []
179+
180+
# Reached bottom of dict - no good
181+
if not isinstance(d, dict):
182+
return False
183+
184+
# Found it!
185+
if k in d.keys():
186+
path.append(k)
187+
return path
188+
189+
else:
190+
check = list(d.keys())
191+
# Look in each key of dictionary
192+
while check:
193+
first = check[0]
194+
# Note which we just looked in
195+
path.append(first)
196+
if search(d[first], k, path) is not False:
197+
break
198+
else:
199+
# Move on
200+
check.pop(0)
201+
path.pop(-1)
202+
else:
203+
return False
204+
return path
205+
206+
def find_top(t, against):
207+
#finds all terminal values and the paths in a nested dictionary and compares to a second dictionary
208+
#returns True if no difference
209+
#return False if difference
210+
for x in get_all_keys(t):
211+
path = search(t, x)
212+
if len(path) == 1:
213+
if not isinstance(t[path[0]], dict):
214+
print("1 - key: "+x+" - "+str(search(t, x))+" - "+t[path[0]])
215+
if len(path) == 2:
216+
if not isinstance(t[path[0]][path[1]], dict):
217+
print("2 - key: "+x+" - "+str(search(t, x))+" - "+t[path[0]][path[1]])
218+
print("config: "+str(against[path[0]][path[1]]))
219+
from_file = str(t[path[0]][path[1]])
220+
from_config = str(against[path[0]][path[1]])
221+
if from_file == from_config:
222+
print("EQUAL")
223+
else:
224+
print("NOT EQUAL")
225+
return False
226+
if len(path) == 3:
227+
if not isinstance(t[path[0]][path[1]][path[2]], dict):
228+
print("3 - key: "+x+" - "+str(search(t, x))+" - "+t[path[0]][path[1]][path[2]])
229+
print("config: "+str(against[path[0]][path[1]][path[2]]))
230+
from_file = str(t[path[0]][path[1]][path[2]])
231+
from_config = str(against[path[0]][path[1]][path[2]])
232+
if from_file == from_config:
233+
print("EQUAL")
234+
else:
235+
print("NOT EQUAL")
236+
return False
237+
return True
238+
239+
240+
####
241+
def get_input_genes(wildcards):
242+
bs_cutoff = int(wildcards.bootstrap)
243+
list_of_genes = []
244+
with open("results/modeltest/genetree_filter_" + wildcards.aligner + "_" + wildcards.alitrim + "."+modeltest_hashes["iqtree"][wildcards.alitrim][wildcards.aligner]+".txt") as file:
245+
for line in file:
246+
gene = line.split("\t")[0]
247+
bs_value = int(line.strip().split("\t")[-1])
248+
if bs_value >= bs_cutoff:
249+
list_of_genes.append(gene)
250+
return list_of_genes
2251

3252
def get_aligners():
4253
aligners = config["alignment"]["method"]
@@ -39,7 +288,7 @@ def get_trimmers():
39288
sys.exit(1)
40289

41290
def get_treemethods():
42-
trees = config["tree"]["method"]
291+
trees = config["mltree"]["method"]
43292
if isinstance(trees, str):
44293
if ", " in trees:
45294
return trees.split(", ")

0 commit comments

Comments
 (0)