Skip to content

Commit a5994ba

Browse files
committed
orthology: Double check names in CSV file with names already in resulst/assemblies to identify better if samples have been removed and prevent orthology runs for samples not in CSV file.
1 parent cd1d65a commit a5994ba

File tree

1 file changed

+2
-0
lines changed

1 file changed

+2
-0
lines changed

rules/orthology.smk

+2
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import yaml
99
sample_data = pd.read_csv(config["species"])
1010
sample_data["species"] = sample_data["species"].str.replace(" ","_")
1111
sample_data.set_index("species", drop=False)
12+
samples_from_csv = sample_data["species"].to_list() # needed for crosscheck if taxa are removed or renamed in csv file.
1213
print(sample_data["species"].to_list())
1314

1415
# get list of containers to use:
@@ -53,6 +54,7 @@ def get_assemblies(wildcards):
5354
def select_species(dir="results/assemblies"):
5455
sps = [sp.split("/")[-1].split(".fna")[0] for sp in glob.glob(dir+"/*fna*")]
5556
# print("Species ("+str(len(sps))+"):"+str(sps))
57+
sps = list(set(sps).intersection(samples_from_csv)) # crosscheck with CSV file to see if taxa have been removed
5658
if config["exclude_orthology"]:
5759
blacklist = []
5860
with open(config["exclude_orthology"]) as file:

0 commit comments

Comments
 (0)