Back again. The script I'm working on for lower limit of detection (LLoD) for laboratory assays ran fine yesterday, but now that I'm trying to direct the output to a .txt, I'm running into all kinds of errors. Have tried the sys.stdout = open("test.txt", "w") with sys.stdout.close() at the end, to no avail (further, this throws a ValueError: I/O operation on closed file. error if I run the script again, even if I removed the sys.stdout stuff). My most recent attempt was to use with open(outName, 'w') as f: print(hitTable, '\n', file=f) for the first block to be printed to a .txt (note: before this block, I have the following:
ts = time.time()
dt = datetime.datetime.fromtimestamp(ts).strftime('%Y%m%d')
tm = datetime.datetime.fromtimestamp(ts).strftime('%H:%M')
outName = dt + " Probit Analysis " + tm +".txt"
as I expect that this script will be run multiple times a day, and don't want the output file overwritten for multiple runs)
All the examples I found were very basic, like
with open('file.txt', 'w') as f:
   print('some text', file=f)
but nothing explaining what to do when you want something printed to the file, then a bunch of calculations are done, then some more stuff is to be written, etc., so later I have lines like
if r_2 < 0.9:
   with open(outName, 'a') as f:
      print('WARNING: low r-squared value for linear regression. Try re-analyzing without using log10.', file=f)
but I'm not sure if that's the right way to handle it.
My desired output would look like
     qty   log_qty  probability    probit
0   1.0  0.000000         0.07  3.524209
1   2.0  0.301030         0.15  3.963567
2   3.0  0.477121         0.24  4.293697
3   4.0  0.602060         0.32  4.532301
4   5.0  0.698970         0.40  4.746653
5  10.0  1.000000         0.60  5.253347
6  20.0  1.301030         0.87  6.126391
7  30.0  1.477121         0.90  6.281552
8  40.0  1.602060         0.95  6.644854
9  50.0  1.698970         0.98  7.053749
 Linear regression using least-squares method yields:
                y = 2.062x + 3.353
        with a corresponding R-squared value of 0.99066
The lower limit of detection (LLoD) at 95% CI is 39.4563.
possibly adding a header at the top along the lines of "Probit analysis YYYY-MM-DD HH:MM:SS", but that's not important atm.
Code follows, sample data available here
import os
import sys
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy
import datetime
from scipy.stats import norm
from numpy.polynomial import Polynomial
from tkinter import filedialog
from tkinter import *
# Initialize tkinter
root = Tk()
root.withdraw()
# Function to check whether user-input column exists in data
def checkInput(prompt,colCheck):
    while True:
        qVar = input(f"Enter the column header for {prompt}: ")
        if qVar in colCheck.columns:
            break
        print(f"Invalid input. Column '{qVar}' does not exist. Try again.")
    return qVar
# Function to get the hitrate/probability of detection for a given quantity
# Note: any values in df.result that cannot be parsed as a number will be converted to NaN
def hitrate(qty, df):
    t_s = df[df.qty == qty].result
    t_s = t_s.apply(pd.to_numeric, args=('coerce',)).isna()
    return (len(t_s)-t_s.sum())/len(t_s)
# Main function for generating plot and calculating LLoD-95
def regPlot(x, y, log):
    ts = time.time()
    dt = datetime.datetime.fromtimestamp(ts).strftime('%Y%m%d')
    tm = datetime.datetime.fromtimestamp(ts).strftime('%H:%M')
    params = {'mathtext.default': 'regular'}
    plt.rcParams.update(params)
    y95 = 6.6448536269514722 # probit score corresponding to 95% CI
    regFun = lambda m, x, b : (m*x) + b # equation for line
    regression = scipy.stats.linregress(x,y)
    r_2 = regression.rvalue*regression.rvalue #calculate R^2
# Solve linear equation for x at 95% CI
    log_llod = (y95 - regression.intercept) / regression.slope
    xmax = log_llod * 1.2
# Start plotting all the things!
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.set_title('Limit of Detection')
    ax.set_ylabel('Probit score\n$(\sigma + 5)$')
    while True:
        if log == 'y':
            ax.set_xlabel('$log_{10}$(input quantity)')
            break
        elif log == 'n':
            ax.set_xlabel('input quantity')
            break
        raise ValueError('Error when calling regPlot(x,y,log) - User input invalid.')
    x_r = [0, xmax]
    y_r = [regression.intercept, regFun(regression.slope,x_r[1],regression.intercept)]
    ax.plot(x_r, y_r, '--k') # linear regression
    ax.plot(log_llod, y95, color='red', marker='o', markersize=8) # LLOD point
    ax.plot([0,xmax], [y95,y95], color='red', linestyle=':') # horiz. red line
    ax.plot([log_llod,log_llod], [regFun(regression.slope,x_r[0],regression.intercept),7.1], color='red', linestyle=':') # vert. red line
    ax.plot(x, y, 'bx') # actual (qty, probit) data points
    ax.grid() # grid
    ax.set_xlim(left=0)
    plt.ion()
    plt.show()
    plt.savefig(f"{dt} Probit Analysis at {tm}.png")
    plt.pause(0.001)
    return regression.slope, regression.intercept, r_2, regression.stderr, regression.intercept_stderr, log_llod
def printResult(m,b,r2):
    print('\n Linear regression using least-squares method yields:\n')
    print('\t\ty = '+str("%.3f"%regression.slope)+'x + '+str("%.3f"%regression.intercept)+'\n')
    print('\twith a corresponding R-squared value of', str("%.5f"%r_2)+"\n")
# Prompt user for data file and column headers, ask whether to use log(qty)
print("In the directory prompt, select the .csv file containing data for analysis")
path = filedialog.askopenfilename()
#data = pd.read_csv(path, usecols=[conc, detect])
data = pd.read_csv(path)
conc = checkInput('concentration/copy number', data)
detect = checkInput('target detection', data)
while True:
    logans = input("Analyze using log10(concentration/number of copies)? (y/n): ")
    if logans == 'y' or logans == 'n':
        break
    print('Invalid input. Please enter either y or n.')
# Get timestamps for file names
ts = time.time()
dt = datetime.datetime.fromtimestamp(ts).strftime('%Y%m%d')
tm = datetime.datetime.fromtimestamp(ts).strftime('%H:%M')
outName = dt + " Probit Analysis " + tm +".txt"
# Read the columns of data specified by the user and rename them for consistency
data = data.rename(columns={conc:"qty", detect:"result"})
# Create list of unique qty values and initialize corresponding
# log_10, probability, and probit vectors
qtys = data['qty'].unique()
log_qtys = [0] * len(qtys)
prop = [0] * len(qtys)
probit = [0] * len(qtys)
# Calculate log10(qty), detection probability, and associated probit score for each unique qty
for idx, val in enumerate(qtys):
    log_qtys[idx] = math.log10(val)
    prop[idx] = hitrate(val, data)
    probit[idx] = 5 + norm.ppf(prop[idx])
# Create a dataframe of quantaties, log(qtys), probabilities, and probit scores.
hitTable = pd.DataFrame(np.vstack([qtys,log_qtys,prop,probit]).T, columns=['qty','log_qty','probability','probit'])
# Convert infinite probit values to NaN and drop those rows.
hitTable.probit.replace([np.inf,-np.inf],np.nan, inplace=True)
hitTable.dropna(inplace=True)
with open(outName, 'w') as f:
    print(hitTable, '\n', file=f)
while True:
    if logans == 'y':
        m, b, r_2, stderr, int_stderr, log_llod = regPlot(hitTable.log_qty, hitTable.probit, logans)
        printResult(m,b,r_2)
        llod_95 = 10**log_llod
        if r_2 < 0.9:
            with open(outName, 'a') as f:
                print('WARNING: low r-squared value for linear regression. Try re-analyzing without using log10.', file=f)
        break
    elif logans == 'n':
        m, b, r_2, stderr, int_stderr, log_llod = regPlot(hitTable.qty, hitTable.probit, logans)
        printResult(m,b,r_2)
        llod_95 = log_llod
        if r_2 < 0.9:
            with open(outName, 'a') as f:
                print('WARNING: low r-squared value for linear regression. Try re-analyzing using log10.', file=f)
        break
    raise ValueError('Error when attempting to evaluate llod_95 - User input invalid.')
with open(outName, 'a') as f:
    print("\nThe lower limit of detection (LLoD) at 95% CI is " + str("%.4f"%llod_95) + ".\n", file=f)
#sys.stdout.close()
EDIT: If possible, it'd be helpful to have the output written to a txt as well as printed in the CLI, so I don't have to double check the output every time during testing to make sure things are showing correctly. Also edited for formatting.
 
    