diff --git a/orthofinder/orthofinder.py b/orthofinder/orthofinder.py index a05a013daf12b27361b2b3ea66720f387f08270a..0b2a4b2b8adad62172e1bda194da90f26bdbbd8f 100755 --- a/orthofinder/orthofinder.py +++ b/orthofinder/orthofinder.py @@ -54,10 +54,14 @@ except ImportError: import Queue as queue # Y import warnings # Y +import matplotlib as mpl # install +mpl.use('Agg') +import matplotlib.pyplot as plt # install + PY2 = sys.version_info <= (3,) csv_write_mode = 'wb' if PY2 else 'wt' -from scripts import mcl +from scripts import mcl from scripts import blast_file_processor from scripts import util, matrices, orthologues, trees_msa from scripts import program_caller @@ -69,7 +73,8 @@ if getattr(sys, 'frozen', False): __location__ = os.path.split(sys.executable)[0] else: __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) - + +counter=0 max_int = sys.maxsize ok = False while not ok: @@ -339,7 +344,9 @@ class scnorm: @staticmethod def CalculateFittingParameters(Lf, S): # curve_fit doc.: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.curve_fit.html - pars,covar = curve_fit(scnorm.loglinear, Lf, np.log10(S)) + Slog = np.log10(S) + pars, covar = curve_fit(scnorm.loglinear, Lf, Slog) + scnorm.DrawLogLinear(pars, Lf, Slog) return pars @staticmethod @@ -351,7 +358,23 @@ class scnorm: # l{i,j}_matrix are built using l{i,j}_vals data using (range{q,h},range{q,h}) positions li_matrix = sparse.csr_matrix((li_vals, (rangeq, rangeq))) lj_matrix = sparse.csr_matrix((lj_vals, (rangeh, rangeh))) + # lil_matrix named as Bij in ProcessBlastHits return sparse.lil_matrix(10**(-params[1]) * li_matrix * b * lj_matrix) + + @staticmethod + def DrawLogLinear(params, x, y): + global counter + counter+=1 + nbvals=10000 + maxBS=64000 + minBS=0 + x = np.linspace(minBS, maxBS, nbvals) + plt.plot(x, scnorm.loglinear(x, *params), label='fit curve: '+str(counter)) + plt.legend() + #plt.savefig('foo'+str(counter)+'.png') + plt.savefig('fitting_curve.png') + + """ RunInfo @@ -488,7 +511,7 @@ class WaterfallMethod: Lf = Li * Lj topLf, topScores = scnorm.GetTopPercentileOfScores(Lf, scores, 95) if len(topScores) > 1: - fittingParameters = scnorm.CalculateFittingParameters(topLf, topScores) + fittingParameters = scnorm.CalculateFittingParameters(topLf, topScores) return scnorm.NormaliseScoresByLogLengthProduct(B, Lengths[iSpecies], Lengths[jSpecies], fittingParameters) else: print("WARNING: Too few hits between species %d and species %d to normalise the scores, these hits will be ignored" % (iSpecies, jSpecies)) @@ -505,7 +528,6 @@ class WaterfallMethod: Bij = blast_file_processor.GetBLAST6Scores(seqsInfo, blastDir_list, seqsInfo.speciesToUse[iSpecies], seqsInfo.speciesToUse[jSpecies], qDoubleBlast=qDoubleBlast) Bij = WaterfallMethod.NormaliseScores(Bij, Lengths, iSpecies, jSpecies) if qNormalize: - #WaterfallMethod.WriteNewScores(seqsInfo, blastDir_list, Bij, iSpecies, qDoubleBlast=qDoubleBlast) WaterfallMethod.WriteNewScores(seqsInfo, blastDir_list, Bij, seqsInfo.speciesToUse[iSpecies], seqsInfo.speciesToUse[jSpecies], qDoubleBlast=qDoubleBlast) Bi.append(Bij) matrices.DumpMatrixArray("B", Bi, iSpecies)