From fe6cb24c17fb3ba004831553ee5d1d4bfb619ed0 Mon Sep 17 00:00:00 2001
From: David Emms <david_emms@hotmail.com>
Date: Fri, 19 Aug 2016 14:18:06 +0100
Subject: [PATCH] Tidy up output

---
 get_orthologues.py        | 27 ++++++++++++++++++++-------
 process_trees.py          | 21 ++-------------------
 root_from_duplications.py |  4 ++--
 trees_for_orthogroups.py  | 17 ++++++++++-------
 4 files changed, 34 insertions(+), 35 deletions(-)

diff --git a/get_orthologues.py b/get_orthologues.py
index 78ff58b..47a14b3 100755
--- a/get_orthologues.py
+++ b/get_orthologues.py
@@ -244,14 +244,13 @@ class DendroBLASTTrees(object):
         nSeqs = self.NumberOfSequences(self.species)
         ogMatrices = [np.zeros((n, n)) for n in nGenes]
         for iiSp, sp1 in enumerate(self.species):
-            orthofinder.util.PrintTime(str(sp1))
+            orthofinder.util.PrintTime("Species %d" % sp1)
             Bs = [orthofinder.LoadMatrix("Bit", self.ogSet.fileInfo, iiSp, jjSp) for jjSp in xrange(len(self.species))]
             mins = np.ones((nSeqs[sp1], 1), dtype=np.float64)*9e99 
             maxes = np.zeros((nSeqs[sp1], 1), dtype=np.float64)
             for B, sp2 in zip(Bs, self.species):
                 mins = np.minimum(mins, lil_min(B))
                 maxes = np.maximum(maxes, lil_max(B))
-            orthofinder.util.PrintTime("got mins and maxes")
             for jjSp, B  in enumerate(Bs):
                 for og, m in zip(ogsPerSpecies, ogMatrices):
                     for gi, i in og[iiSp]:
@@ -344,6 +343,8 @@ class DendroBLASTTrees(object):
         D, spPairs = self.SpeciesTreeDistances(ogs, ogMatrices)
         cmd_spTree, spTreeFN_ids = self.PrepareSpeciesTreeCommand(D, spPairs)
         cmds_geneTrees = self.PrepareGeneTreeCommand()
+        print("\n3. Inferring gene and species trees")
+        print(  "-----------------------------------")
         tfo.RunParallelCommandSets(nProcesses, [[cmd_spTree]] + cmds_geneTrees, qHideStdout = True)
         seqDict = self.ogSet.Spec_SeqDict()
         for iog in xrange(len(self.ogSet.ogs)):
@@ -509,34 +510,46 @@ if __name__ == "__main__":
             userDir = arg
         
     # Check arguments
+    print("0. Getting Orthologues")
+    print("----------------------")
     orthofinderWorkingDir, orthofinderResultsDir, clustersFilename_pairs = tfo.GetOGsFile(userDir)
 
     if nProcesses == None:
-        print("""Number of parallel processes has not been specified, will use the default value.  
-   Number of parallel processes can be specified using the -t option\n""")
+        print("""\nNumber of parallel processes has not been specified, will use the default value.  
+Number of parallel processes can be specified using the -t option.""")
         nProcesses = orthofinder.nThreadsDefault
-    print("Using %d threads for alignments and trees\n" % nProcesses)
+    print("Using %d threads for alignments and trees" % nProcesses)
     
+    print("\n1. Checking required programs are installed")
+    print(  "-------------------------------------------")
     if not CanRunDependencies(orthofinderWorkingDir): orthofinder.Fail()   
     
+    print("\n2. Reading sequence similarity scores")
+    print(  "-------------------------------------")
     resultsDir = orthofinder.util.CreateNewWorkingDirectory(orthofinderResultsDir + "Orthologues_")
     ogSet = OrthoGroupsSet(orthofinderWorkingDir, clustersFilename_pairs, idExtractor = idextractor.FirstWordExtractor)
     
     db = DendroBLASTTrees(ogSet, resultsDir)
     db.ReadAndPickle()
     nOGs, D, spPairs, spTreeFN_ids = db.RunAnalysis()
-        
+     
+    print("\n4. Best outgroup(s) for species tree")
+    print(  "------------------------------------")   
     roots, clusters, rootedSpeciesTreeFN = rfd.GetRoot(spTreeFN_ids, os.path.split(db.treesPatIDs)[0] + "/", rfd.GeneToSpecies_dash, nProcesses, treeFmt = 1)
     db.RenameTreeTaxa(rootedSpeciesTreeFN, resultsDir + "SpeciesTree_rooted.txt", db.ogSet.SpeciesDict(), qFixNegatives=True)
     
     spDict = ogSet.SpeciesDict()
     for r in roots:
-        print([spDict[s] for s in r])
+        print(", ".join([spDict[s] for s in r]))
 #    speciesTreeFN = RunAstral(ogSet, db.treesPat, resultsDir)
 
+    print("\n5. Reconciling gene and species trees")
+    print(  "-------------------------------------")   
     dlcparResultsDir = RunDlcpar(db.treesPatIDs, ogSet, nOGs, rootedSpeciesTreeFN, db.workingDir)
     
     # Orthologue lists
+    print("\n6. Inferring orthologues from gene trees")
+    print(  "----------------------------------------")   
     pt.get_orthologue_lists(ogSet, resultsDir, dlcparResultsDir, db.workingDir)
     
     print("\nDone!")
diff --git a/process_trees.py b/process_trees.py
index f501605..8deac33 100644
--- a/process_trees.py
+++ b/process_trees.py
@@ -86,7 +86,7 @@ def multiply(specmin, specmax, matrixDir):
     product = M.dot(M.transpose())
     return product, M
 
-def WriteOrthologues(folder, spec1, spec2, orthologues, speciesDict, sequenceDict):
+def WriteOrthologues(resultsDir, spec1, spec2, orthologues, speciesDict, sequenceDict):
     d1 = resultsDir + "Orthologues_" + speciesDict[str(spec1)] + "/"
     d2 = resultsDir + "Orthologues_" + speciesDict[str(spec2)] + "/"
     with open(d1 + '%s__v__%s.csv' % (speciesDict[str(spec1)], speciesDict[str(spec2)]), 'wb') as outfile1, open(d2 + '%s__v__%s.csv' % (speciesDict[str(spec2)], speciesDict[str(spec1)]), 'wb') as outfile2:
@@ -98,13 +98,6 @@ def WriteOrthologues(folder, spec1, spec2, orthologues, speciesDict, sequenceDic
             writer1.writerow((", ".join([sequenceDict["%d_%d" % (spec1, o)] for o in genes1]), ", ".join([sequenceDict["%d_%d" % (spec2, o)] for o in genes2])))
             writer2.writerow((", ".join([sequenceDict["%d_%d" % (spec2, o)] for o in genes2]), ", ".join([sequenceDict["%d_%d" % (spec1, o)] for o in genes1])))
 
-#def WriteOrthologues_OneFile(folder, spec1, speciesIDs, orthologues, speciesDict, sequenceDict):
-#    with open(folder + 'Orthologues_%s.csv' % speciesDict[str(spec1)], 'wb') as outfile:
-#        writer = csv.writer(outfile)
-#        writer.writerow([speciesDict[str(spec1)]] + [speciesDict[str(j)] for j in speciesIDs if j != spec1])
-#        """Need to be able to go through the genes in turn for a single speices and get the ortholgues of that gene"""
-#        """Note that if other genes are in the same row then they aren't necessarily orthologues"""
-
 def GetOrthologues(orig_matrix, orig_matrix_csc, index):
     orthologues = orig_matrix.getrowview(index).nonzero()[1]
     index = orthologues[0]
@@ -123,12 +116,6 @@ def find_all(matrix, orig_matrix):
         done.update(orthologuesSp1)
         orthologues.append((orthologuesSp1, orthologuesSp2))
     return orthologues
-    
-#def AllOrthologuesForSpecies(index1, speciesIDs, matrixDir):
-#    orthologues = []
-#    for index2 in xrange(len(speciesIDs)):
-#        product, M = multiply(index1, index2, matrixDir)
-#        orthologues.append(find_all(product, M))
         
 def species_find_all(speciesDict, sequenceDict, matrixDir, resultsDir):
     # Calls multiply and find_all on each species pair, and appends the numbers from find_all's output to the relevant csv lists.
@@ -140,12 +127,8 @@ def species_find_all(speciesDict, sequenceDict, matrixDir, resultsDir):
     for index1, index2 in itertools.product(xrange(nspecies), xrange(nspecies)):      
         if index1 >= index2: continue
         product, M = multiply(index1, index2, matrixDir)
-        print("Ortholgues for species pair (%d, %d)" % (index2, index1))
         orthologues = find_all(product, M)
-        WriteOrthologues(resultsDir, speciesIDs[index2], speciesIDs[index1], orthologues, speciesDict, sequenceDict)   
-#    for index1 in xrange(nspecies):
-#        orthologues = AllOrthologuesForSpecies(index1, speciesIDs, matrixDir)
-#        WriteOrthologues_OneFile(resultsDir, speciesIDs, speciesIDs[index1], orthologues, speciesDict, sequenceDict)    
+        WriteOrthologues(resultsDir, speciesIDs[index2], speciesIDs[index1], orthologues, speciesDict, sequenceDict)    
     
 def get_orthologue_lists(ogSet, resultsDir, dlcparResultsDir, workingDir):
     # -> Matrices
diff --git a/root_from_duplications.py b/root_from_duplications.py
index cdacf7a..3b3416d 100644
--- a/root_from_duplications.py
+++ b/root_from_duplications.py
@@ -395,10 +395,10 @@ def ParsimonyRoot(allSpecies, clades, supported_clusters, qPrint=True):
         if score == m:
             if len(clade) > len(allSpecies)/2:
                 root.append(allSpecies.difference(clade))
-                if qPrint: print(root[-1])
+                if qPrint: print(", ".join(root[-1]))
             else:
                 root.append(clade)
-                if qPrint: print(root[-1])
+                if qPrint: print(", ".join(root[-1]))
             if qPrint: print("")
     return root
 
diff --git a/trees_for_orthogroups.py b/trees_for_orthogroups.py
index a3c1dbf..5bf2912 100755
--- a/trees_for_orthogroups.py
+++ b/trees_for_orthogroups.py
@@ -43,14 +43,14 @@ import orthofinder
 version = "0.6.1"   
 
 def RunCommandSet(commandSet, qHideStdout):
-    orthofinder.util.PrintTime("Runing command: %s" % commandSet[-1])
+#    orthofinder.util.PrintTime("Runing command: %s" % commandSet[-1])
     if qHideStdout:
         for cmd in commandSet:
             subprocess.call(cmd, shell=True, stdout=subprocess.PIPE)
     else:
         for cmd in commandSet:
             subprocess.call(cmd, shell=True)
-    orthofinder.util.PrintTime("Finshed command: %s" % commandSet[-1])
+#    orthofinder.util.PrintTime("Finshed command: %s" % commandSet[-1])
     
 class FastaWriter(object):
     def __init__(self, fastaFileDir):
@@ -92,7 +92,7 @@ class FastaWriter(object):
     def SortSeqs(self, seqs):
         return sorted(seqs, key=lambda x: map(int, x.split("_")))
                     
-def Worker_RunCommand(cmd_queue, qHideStdout):
+def Worker_RunCommand(cmd_queue, nProcesses, nToDo, qHideStdout):
     """ repeatedly takes items to process from the queue until it is empty at which point it returns. Does not take a new task
         if it can't acquire queueLock as this indicates the queue is being rearranged.
         
@@ -100,7 +100,10 @@ def Worker_RunCommand(cmd_queue, qHideStdout):
     """
     while True:
         try:
-            commandSet = cmd_queue.get(True, 1)
+            i, commandSet = cmd_queue.get(True, 1)
+            nDone = i - nProcesses + 1
+            if nDone >= 0 and divmod(nDone, 100)[1] == 0:
+                print("Done %d of %d" % (nDone, nToDo))
             RunCommandSet(commandSet, qHideStdout)
         except Queue.Empty:
             return   
@@ -112,9 +115,9 @@ def RunParallelCommandSets(nProcesses, commands, qHideStdout = False):
     """
     # Setup the workers and run
     cmd_queue = mp.Queue()
-    for cmd in commands:
-        cmd_queue.put(cmd)
-    runningProcesses = [mp.Process(target=Worker_RunCommand, args=(cmd_queue, qHideStdout)) for i_ in xrange(nProcesses)]
+    for i, cmd in enumerate(commands):
+        cmd_queue.put((i, cmd))
+    runningProcesses = [mp.Process(target=Worker_RunCommand, args=(cmd_queue, nProcesses, i+1, qHideStdout)) for i_ in xrange(nProcesses)]
     for proc in runningProcesses:
         proc.start()
     
-- 
GitLab