From b6ee41a7a53eea49b5ce88e42aebc18f7e11c79c Mon Sep 17 00:00:00 2001
From: remy <remy.dernat@umontpellier.fr>
Date: Tue, 26 Nov 2019 18:45:05 +0100
Subject: [PATCH] no more error, but still does not work

---
 orthofinder/orthofinder.py                  | 17 ++---
 orthofinder/scripts/blast_file_processor.py | 81 +++++++++++----------
 orthofinder/scripts/scnorm.py               | 34 ++++-----
 3 files changed, 63 insertions(+), 69 deletions(-)

diff --git a/orthofinder/orthofinder.py b/orthofinder/orthofinder.py
index d4e3ee0..f04594b 100755
--- a/orthofinder/orthofinder.py
+++ b/orthofinder/orthofinder.py
@@ -449,18 +449,18 @@ class WaterfallMethod:
                 Bi.append(Bij)
             if qNormalizeAndFixZeros:
                 for jSpecies in range(seqsInfo.nSpecies):
-                    WaterfallMethod.WriteFixNewScores(seqsInfo, blastDir_list, Bi[jSpecies], seqsInfo.speciesToUse[iSpecies], seqsInfo.speciesToUse[jSpecies], qDoubleBlast=qDoubleBlast)
+                    WaterfallMethod.WriteFixNewScores(seqsInfo, blastDir_list, seqsInfo.speciesToUse[iSpecies], seqsInfo.speciesToUse[jSpecies], qDoubleBlast=qDoubleBlast)
             matrices.DumpMatrixArray("B", Bi, iSpecies)
             BH = GetBH_s(Bi, seqsInfo, iSpecies)
             matrices.DumpMatrixArray("BH", BH, iSpecies)
             util.PrintTime("Initial processing of species %d complete" % iSpecies)
         
     @staticmethod 
-    def Worker_ProcessBlastHits(cmd_queue, qDoubleBlast, qNormalize=False):
+    def Worker_ProcessBlastHits(cmd_queue, qDoubleBlast, qNormalize=False, qNormalizeAndFixZeros=False):
         while True:
             try:
                 args = cmd_queue.get(True, 1)
-                WaterfallMethod.ProcessBlastHits(*args, qDoubleBlast=qDoubleBlast, qNormalize=qNormalize)
+                WaterfallMethod.ProcessBlastHits(*args, qDoubleBlast=qDoubleBlast, qNormalize=qNormalize, qNormalizeAndFixZeros=qNormalizeAndFixZeros)
             except queue.Empty:
                 return
 
@@ -472,11 +472,10 @@ class WaterfallMethod:
             blast_file_processor.WriteNormalizedBlastFile(blastDir_list, B, iSpecies, jSpecies, qDoubleBlast=qDoubleBlast)
     
     @staticmethod
-    def WriteFixNewScores(seqsInfo, blastDir_list, B, iSpecies, jSpecies, qDoubleBlast):
+    def WriteFixNewScores(seqsInfo, blastDir_list, iSpecies, jSpecies, qDoubleBlast):
         with warnings.catch_warnings():
             warnings.simplefilter("ignore")
-            # process up to the best hits for each species; Bi is a list of matrices
-            blast_file_processor.WriteFixNormalizedBlastFile(blastDir_list, B, iSpecies, jSpecies, qDoubleBlast=qDoubleBlast)
+            blast_file_processor.WriteFixNormalizedBlastFile(blastDir_list, iSpecies, jSpecies, qDoubleBlast=qDoubleBlast)
 
     @staticmethod
     def ConnectCognates(seqsInfo, iSpecies): 
@@ -1149,7 +1148,7 @@ def ProcessArgs(prog_caller):
     
     # check argument combinations       
     if not (options.qStartFromFasta or options.qStartFromBlast or options.qStartFromGroups or options.qStartFromTrees or options.qNormalize):
-        print("ERROR: Please specify the input directory for OrthoFinder using one of the options: '-f', '-b', '-N', '-fg' or '-ft'.")
+        print("ERROR: Please specify the input directory for OrthoFinder using one of the options: '-f', '-b', '-N', '-NF', '-fg' or '-ft'.")
         util.Fail()
     
     if options.qStartFromFasta and (options.qStartFromTrees or options.qStartFromGroups):
@@ -1192,12 +1191,12 @@ def ProcessArgs(prog_caller):
         print("ERROR: Search program (%s) not configured in config.json file" % options.search_program)
         util.Fail()
 
-    if options.qNormalize and ( options.qNormalizeAndFixZeros or options.qStartFromFasta or options.qStartFromBlast or options.qStartFromGroups or options.qStartFromTrees ) :
+    if options.qNormalize and ( options.qStartFromFasta or options.qStartFromBlast or options.qStartFromGroups or options.qStartFromTrees ) :
         # not a failure/exit, just a warning
         print("ERROR: --normalize option is not possible with other options")
         util.Fail()
         
-    if options.qNormalizeAndFixZeros and ( options.qNormalize or options.qStartFromFasta or options.qStartFromBlast or options.qStartFromGroups or options.qStartFromTrees ) :
+    if options.qNormalizeAndFixZeros and ( options.qStartFromFasta or options.qStartFromBlast or options.qStartFromGroups or options.qStartFromTrees ) :
         # not a failure/exit, just a warning
         print("ERROR: --normalize-fix option is not possible with other options")
         util.Fail()
diff --git a/orthofinder/scripts/blast_file_processor.py b/orthofinder/scripts/blast_file_processor.py
index 4be8c4c..ea41e9f 100644
--- a/orthofinder/scripts/blast_file_processor.py
+++ b/orthofinder/scripts/blast_file_processor.py
@@ -41,6 +41,24 @@ file_read_mode = 'rb' if PY2 else 'rt'
 #file_write_mode = 'wb+' if PY2 else 'wt+'
 file_write_mode = 'wb+'
 
+def GetSpeciesOrder(iSpecies, jSpecies, qDoubleBlast):
+    # checking species order to get the right filenames
+    if not qDoubleBlast:
+        qRev = (iSpecies > jSpecies)
+    else:
+        qRev = False
+    if qRev:
+        iQ = 1
+        iH = 0
+        iSpeciesOpen = jSpecies
+        jSpeciesOpen = iSpecies
+    else:
+        iQ = 0
+        iH = 1
+        iSpeciesOpen = iSpecies
+        jSpeciesOpen = jSpecies
+    return iSpeciesOpen, jSpeciesOpen, iQ, iH
+
 def GetBLAST6Scores(seqsInfo, blastDir_list, iSpecies, jSpecies, qExcludeSelfHits = True, sep = "_", qDoubleBlast=True): 
     qSameSpecies = iSpecies==jSpecies
     qCheckForSelfHits = qExcludeSelfHits and qSameSpecies
@@ -118,46 +136,31 @@ def WriteNormalizedBlastFile(blastDir_list, B, iSpecies, jSpecies, sep = "_", qD
         sys.stderr.write("\t".join(map(str, row)) + "\n")
         raise 
 
-def WriteFixNormalizedBlastFile(blastDir_list, B, iSpecies, jSpecies, sep = "_", qDoubleBlast=True):
+def WriteFixNormalizedBlastFile(blastDir_list, iSpecies, jSpecies, sep = "_", qDoubleBlast=True):
     iSpeciesOpen, jSpeciesOpen, iQ, iH = GetSpeciesOrder(iSpecies, jSpecies, qDoubleBlast)
     row = ""
     rows = []
-    for d in blastDir_list:
-        fn = d + "Blast%d_%d.txt" % (iSpeciesOpen, jSpeciesOpen)
-        fnNorm = d + "Blast%d_%d.norm.blastout" % (iSpeciesOpen, jSpeciesOpen)
-        if os.path.exists(fn) or os.path.exists(fn + ".gz"): break
-    try:
-        with (gzip.open(fn + ".gz", file_read_mode) if os.path.exists(fn + ".gz") else open(fn, file_read_mode)) as blastfile:
-            blastreader = csv.reader(blastfile, delimiter='\t')
-            tempfile = NamedTemporaryFile(mode='w', delete=False)
-            blastwriter = csv.writer(tempfile, delimiter='\t')
-            for row in blastreader:
-                rows.append(row)
-            newrows = sc.scnorm.FixZeroNormalizedScores(iSpeciesOpen, jSpeciesOpen, iQ, iH, sep, B, rows)
-            for the_row in newrows:
-                blastwriter.writerow(the_row)
-            tempfile.flush()
-            tempfile.close()
-            move(tempfile.name, fnNorm)
-    except Exception:
-        sys.stderr.write("Malformatted line in %sBlast%d_%d.txt\nOffending line was:\n" % (d, iSpecies, jSpecies))
-        sys.stderr.write("\t".join(map(str, row)) + "\n")
-        raise
+    # 0.0 normalized value should only happen for self-hits
+    if iSpeciesOpen == jSpeciesOpen:
+        for d in blastDir_list:
+            fnNorm = d + "Blast%d_%d.norm.blastout" % (iSpeciesOpen, jSpeciesOpen)
+            fnNormFix = d + "Blast%d_%d.norm.fix.blastout" % (iSpeciesOpen, jSpeciesOpen)
+            if os.path.exists(fnNorm): break
+        try:
+            with open(fnNorm, file_read_mode) as blastfile:
+                blastreader = csv.reader(blastfile, delimiter='\t')
+                tempfile = NamedTemporaryFile(mode='w', delete=False)
+                blastwriter = csv.writer(tempfile, delimiter='\t')
+                for row in blastreader:
+                    rows.append(row)
+                newrows = sc.scnorm.FixZeroNormalizedScores(iSpeciesOpen, jSpeciesOpen, rows)
+                for the_row in newrows:
+                    blastwriter.writerow(the_row)
+                tempfile.flush()
+                tempfile.close()
+                move(tempfile.name, fnNormFix)
+        except Exception:
+            sys.stderr.write("Malformatted line in %sBlast%d_%d.txt\nOffending line was:\n" % (d, iSpecies, jSpecies))
+            sys.stderr.write("\t".join(map(str, row)) + "\n")
+            raise
 
-def GetSpeciesOrder(iSpecies, jSpecies, qDoubleBlast):
-    # checking species order to get the right filenames
-    if not qDoubleBlast:
-        qRev = (iSpecies > jSpecies)
-    else:
-        qRev = False
-    if qRev:
-        iQ = 1
-        iH = 0
-        iSpeciesOpen = jSpecies
-        jSpeciesOpen = iSpecies
-    else:
-        iQ = 0
-        iH = 1
-        iSpeciesOpen = iSpecies
-        jSpeciesOpen = jSpecies
-    return iSpeciesOpen, jSpeciesOpen, iQ, iH
diff --git a/orthofinder/scripts/scnorm.py b/orthofinder/scripts/scnorm.py
index ed4c385..70bf908 100644
--- a/orthofinder/scripts/scnorm.py
+++ b/orthofinder/scripts/scnorm.py
@@ -126,8 +126,8 @@ class scnorm:
         """
         This function retrieve the Normalization Score
         from Matrix B from (Seq iQ, Seq iH)
-        If it is 0.0, it will try to retrieve
-        a modified value from the log10 fitting curve
+        and return the csv rows modified
+        by adding normalization score as a new column
         """
         newrows = []
         for row in rows:
@@ -148,43 +148,35 @@ class scnorm:
         return newrows
 
     @staticmethod
-    def FixZeroNormalizedScores(iSpeciesOpen, jSpeciesOpen, iQ, iH, sep, B, rows):
+    def FixZeroNormalizedScores(iSpeciesOpen, jSpeciesOpen, rows):
         """
         This function retrieve the Normalization Score
-        from Matrix B from (Seq iQ, Seq iH)
+        from current csv file (with Seq iQ, Seq iH)
         If it is 0.0 (self-hits), it will try to retrieve
         a modified value from the log10 fitting curve
         """
-        NormBS = 0.0
+        selfHitNormBlastScore = "0.0"
+        NormBS = selfHitNormBlastScore
         newrows = []
         # retrieve value from plot https://code-examples.net/en/q/8863d1
-        print(repr(iSpeciesOpen))
-        print(repr(f))
-        all_plots = f[0][jSpeciesOpen].get_xydata()
+        all_plots = f[jSpeciesOpen][0].get_xydata()
+        print(repr(all_plots))
+        line = 0
         for row in rows:
-            # Get hit and query IDs
-            try:
-                sequence1ID = int(row[iQ].split(sep, 2)[1])
-                sequence2ID = int(row[iH].split(sep, 2)[1])
-            except (IndexError, ValueError):
-                sys.stderr.write("\nERROR: Query or hit sequence ID in BLAST results file was missing or incorrectly formatted.\n")
-                raise
             try:
-                newscore = B[sequence1ID, sequence2ID]
-                if newscore == 0.0:
-                    #sys.stderr.write("\nWARNING: 0.0 value detected in seq position %d %d for Blast species %d %d!\n" % (sequence1ID, sequence2ID, iSpeciesOpen, jSpeciesOpen))
+                if row[12] == selfHitNormBlastScore:
                     try:
                         x = float(row[11])
                         for index, values in all_plots:
                             if values == x:
                                 NormBS = index
                                 break
-                        row.append(NormBS)
+                        row[12] = NormBS
                     except (IndexError, ValueError):
                         sys.stderr.write("\nERROR: 12th field in BLAST results file line should be the bit-score for the hit\n")
-                row.append(newscore)
             except IndexError as e:
-                sys.stderr.write("\nERROR: Matrix index on Blast species %d %d, sequences B[%d, %d] : %s \n" % (iSpeciesOpen, jSpeciesOpen, sequence1ID, sequence2ID, e))
+                sys.stderr.write("\nERROR: csv values on Blast species %d %d file, line %d : %s \n" % (iSpeciesOpen, jSpeciesOpen, line, e))
                 pass
             newrows.append(row)
+            line+=1
         return newrows
-- 
GitLab