diff --git a/orthofinder.py b/orthofinder.py index 859a7ea04b1588168639d61532c94fb51024ed33..45884e55ba207b700ba1d970acb1e6ff027b4b6b 100644 --- a/orthofinder.py +++ b/orthofinder.py @@ -25,6 +25,9 @@ # For any enquiries send an email to David Emms # david_emms@hotmail.com +nThreadsDefault = 16 +nAlgDefault = 1 + import sys # Y import subprocess # Y import os # Y @@ -47,10 +50,7 @@ import Queue # Y import warnings # Y import time # Y -sys.path.append(os.path.split(os.path.abspath(__file__))[0] + "/scripts") -import get_orthologues - -version = "0.6.1" +version = "1.0.0" fastaExtensions = {"fa", "faa", "fasta", "fas"} picProtocol = 1 if sys.platform.startswith("linux"): @@ -886,8 +886,6 @@ class WaterfallMethod: OrthoFinder ------------------------------------------------------------------------------- """ -nThreadsDefault = 16 -nAlgDefault = 1 mclInflation = 1.5 def CanRunCommand(command, qAllowStderr = False): @@ -1041,6 +1039,9 @@ def AssignIDsToSequences(fastaDirectory, outputDirectory): return returnFilenames, originalFastaFilenames, idsFilename, speciesFilename, newSpeciesIDs, previousSpeciesIDs if __name__ == "__main__": + sys.path.append(os.path.split(os.path.abspath(__file__))[0] + "/scripts") + import get_orthologues + print("\nOrthoFinder version %s Copyright (C) 2014 David Emms\n" % version) print(""" This program comes with ABSOLUTELY NO WARRANTY. This is free software, and you are welcome to redistribute it under certain conditions. @@ -1380,3 +1381,4 @@ if __name__ == "__main__": print(orthogroupsResultsFilesString) print(orthologuesResultsFilesString.rstrip()) PrintCitation() + util.PrintTime("Done") diff --git a/scripts/get_orthologues.py b/scripts/get_orthologues.py index ab6a70dac4b081e17bf75e03562317e69bfd2937..f22758c59f832ac2bd83bb8f644856f6fb04da06 100755 --- a/scripts/get_orthologues.py +++ b/scripts/get_orthologues.py @@ -9,6 +9,7 @@ Created on Thu Jun 9 16:00:33 2016 import sys import os import ete2 +import subprocess import numpy as np from collections import Counter, defaultdict import cPickle as pic @@ -139,7 +140,7 @@ def lil_max(M): # ASTRAL def GetOGsToUse(ogSet): - return range(50, min(10000, len(ogSet.ogs))) + return range(100, min(10000, len(ogSet.ogs))) def CreateTaxaMapFile(ogSet, i_ogs_to_use, outputFN): """Get max number of sequences per species""" @@ -180,8 +181,8 @@ def RunAstral(ogSet, treesPat, workingDir): CreateTaxaMapFile(ogSet, i_ogs_to_use, tmFN) treesFN = dir_astral + "TreesFile.txt" ConcatenateTrees(i_ogs_to_use, treesPat, treesFN) - speciesTreeFN = workingDir + "SpeciesTree.txt" - print(" ".join(["java", "-Xmx6000M", "-jar", "/home/david/software/ASTRAL-multiind/Astral/astral.4.8.0.jar", "-a", tmFN, "-i", treesFN, "-o", speciesTreeFN])) + speciesTreeFN = workingDir + "SpeciesTree_astral.txt" + subprocess.call(" ".join(["java", "-Xmx6000M", "-jar", "~/software/ASTRAL-multiind/Astral/astral.4.8.0.jar", "-a", tmFN, "-i", treesFN, "-o", speciesTreeFN]), shell=True) return speciesTreeFN # ============================================================================================================================== @@ -321,13 +322,14 @@ class DendroBLASTTrees(object): values = " ".join(["%.6g" % (0. + M[i,j]) for j in range(n)]) # hack to avoid printing out "-0" outfile.write(values + "\n") treeFN = os.path.split(self.treesPatIDs)[0] + "/SpeciesTree_ids.txt" - cmd = " ".join(["fastme", "-i", speciesMatrixFN, "-o", treeFN, "-s"]) + cmd = " ".join(["fastme", "-i", speciesMatrixFN, "-o", treeFN, "-w", "O"] + (["-s"] if n < 1000 else [])) return cmd, treeFN def PrepareGeneTreeCommand(self): cmds = [] for iog in xrange(len(self.ogSet.ogs)): - cmds.append([" ".join(["fastme", "-i", self.distPat % iog, "-o", self.treesPatIDs % iog, "-s"])]) + nTaxa = len(self.ogSet.ogs[iog]) + cmds.append([" ".join(["fastme", "-i", self.distPat % iog, "-o", self.treesPatIDs % iog, "-w", "O"] + (["-s"] if nTaxa < 1000 else []))]) return cmds def RenameTreeTaxa(self, treeFN, newTreeFilename, idsMap, qFixNegatives=False): @@ -355,7 +357,9 @@ class DendroBLASTTrees(object): seqDict = self.ogSet.Spec_SeqDict() for iog in xrange(len(self.ogSet.ogs)): self.RenameTreeTaxa(self.treesPatIDs % iog, self.treesPat % iog, seqDict, qFixNegatives=True) - self.RenameTreeTaxa(spTreeFN_ids, self.workingDir + "SpeciesTree_unrooted.txt", self.ogSet.SpeciesDict(), qFixNegatives=True) + self.RenameTreeTaxa(spTreeFN_ids, self.workingDir + "SpeciesTree_unrooted.txt", self.ogSet.SpeciesDict(), qFixNegatives=True) + #spTreeFN_ids = RunAstral(self.ogSet, self.treesPatIDs, self.workingDir) + #self.RenameTreeTaxa(spTreeFN_ids, self.workingDir + "SpeciesTree_astral_unrooted.txt", self.ogSet.SpeciesDict(), qFixNegatives=True) return len(ogs), D, spPairs, spTreeFN_ids # ============================================================================================================================== @@ -434,7 +438,7 @@ def RunDlcpar(treesPat, ogSet, nOGs, speciesTreeFN, workingDir): if not os.path.exists(dlcparResultsDir): os.mkdir(dlcparResultsDir) filenames = [rootedTreeDir + os.path.split(treesPat % i)[1] for i in xrange(nOGs)] - dlcCommands = ['dlcpar_search -s %s -S %s -D 1 -C 0.5 %s -O %s' % (speciesTreeFN, geneMapFN, fn, dlcparResultsDir + os.path.splitext(os.path.split(fn)[1])[0]) for fn in filenames] + dlcCommands = ['dlcpar_search -s %s -S %s -D 1 -C 0.125 %s -O %s' % (speciesTreeFN, geneMapFN, fn, dlcparResultsDir + os.path.splitext(os.path.split(fn)[1])[0]) for fn in filenames] # print(dlcCommands[0]) # use this to run in parallel tfo.RunParallelCommandSets(nThreads, [[c] for c in dlcCommands], qHideStdout = True) @@ -535,7 +539,6 @@ def GetOrthologues(orthofinderWorkingDir, orthofinderResultsDir, clustersFilenam print("Best outgroup for species tree:") for r in roots: print(" " + (", ".join([spDict[s] for s in r])) ) -# speciesTreeFN = RunAstral(ogSet, db.treesPat, resultsDir) qMultiple = len(roots) > 1 if qMultiple: print("\nAnalysing each of the potential species tree roots.") resultsSpeciesTrees = [] diff --git a/scripts/root_from_duplications.py b/scripts/root_from_duplications.py index bcb71cef9edaad57566a74ce851ad4648357d508..da905e8d635e7e2676d30fc1a51dc0e6de02b146 100644 --- a/scripts/root_from_duplications.py +++ b/scripts/root_from_duplications.py @@ -426,8 +426,8 @@ def PlotTree(speciesTree, treesDir, supported_clusters, qSimplePrint=True): tree_for_figure.render(treesDir + "Duplications.pdf") def GetRoot(speciesTreeFN, treesDir, GeneToSpeciesMap, nProcessors, treeFmt=None): - if treeFmt != None: spTreeFormat = treeFmt - speciesTree = ete2.Tree(speciesTreeFN, format=spTreeFormat) + if treeFmt == None: treeFmt = spTreeFormat + speciesTree = ete2.Tree(speciesTreeFN, format=treeFmt) species, dict_clades, clade_names = AnalyseSpeciesTree(speciesTree) pool = mp.Pool(nProcessors, maxtasksperchild=1) list_of_lists = pool.map(SupportedHierachies_wrapper2, [(fn, GeneToSpeciesMap, species, dict_clades, clade_names) for fn in glob.glob(treesDir + "/*")]) @@ -490,7 +490,7 @@ if __name__ == "__main__": print(r) if args.verbose: PlotTree(speciesTree, args.input_tree, clusters) else: - root, clusters, _ = GetRoot(args.Species_tree, args.input_tree, GeneToSpecies, nProcs, treeFmt = 1) + root, clusters, _, nSupport = GetRoot(args.Species_tree, args.input_tree, GeneToSpecies, nProcs, treeFmt = 1) for r in root: print(r) speciesTree = ete2.Tree(args.Species_tree, format=1) if args.verbose: PlotTree(speciesTree, args.input_tree, clusters, qSimplePrint=False) diff --git a/trees_for_orthogroups.py b/trees_for_orthogroups.py index 9e85022bb2288542ecee8d2483614658804d39c1..7dbd61de3da03f1e85c80ab1cd3b34a2202a6cd4 100755 --- a/trees_for_orthogroups.py +++ b/trees_for_orthogroups.py @@ -40,7 +40,7 @@ import glob import orthofinder -version = "0.6.1" +version = "1.0.0" def RunCommandSet(commandSet, qHideStdout): # orthofinder.util.PrintTime("Runing command: %s" % commandSet[-1])