I have a hard to recreate bug and I'm hoping to use a python GDB script to stop the process when the bug is detected (I'll run it in debug till the error is regenerated). In reference to Stray chars seen out of snprintf.
I've currently made a script that can create a break-point and parallely reading the serial terminal to detect an error in the string. Here's the script:
from __future__ import print_function
import serial
import pandas as pd
import argparse
import io
import time
import ast
import sys
import signal
import threading
class Serial_Port():
    def __init__(self, port_number, baud_rate):
        self.ser = serial.Serial(port_number, baud_rate, timeout=10)
        self.ser_io = io.TextIOWrapper(io.BufferedRWPair(self.ser, self.ser, 1), newline = '\r', line_buffering = True)
        self.data_count = 0
        self.data_times = 0
        self.data_succ = 0
        self.ifPrint = False
    def read(self):
        raw_line = self.ser_io.readline()
        raw_line = raw_line.strip("\x00")
        raw_line = raw_line.strip("\r\n")
        if self.ifPrint == True:
            fptr.write(raw_line)
        else:
            self.data_times +=1
            # Print the current status of string test
            to_write = "Loop: %d Read Count: %d Successful: %d" % (self.data_times, self.data_count, self.data_succ)
            fptr.write(to_write)
            fptr.write("\n")
        if "[[" in raw_line:
            self.data_count += 1
            self.parse(raw_line)
        #Flush all the data before returning
        fptr.flush()
    def parse(self, gnss):
        try:
            #if we are able to convert literal and a DF made
            # we'll assume the data received is valid
            gnss = ast.literal_eval(gnss)
            df = pd.DataFrame(gnss)
            self.data_succ += 1
        except KeyboardInterrupt:
            self.ser.flush()
            self.ser.close()
            sys.exit(0)
        except:
            fptr.write(gnss)
            fptr.write("\n")
            fptr.flush()
    def close(self, frame, andsomething):
        self.ser.flush()
        self.ser.close()
        sys.exit(0)
class DebugPrintingBreakpoint(gdb.Breakpoint):
    debugging_IDs = frozenset({37, 153, 420})
    def stop(self):
        top = gdb.newest_frame()
        someVector = top.read_var('aVectorVar')
        # Access the begin() & end() pointer of std::vector in GNU Standard C++ lib
        first = someVector['_M_impl']['_M_start']
        last = someVector['_M_impl']['_M_finish']
        values = []
        while first != last:
            values.append(int(first.dereference()['intID']))
            first = first + 1
        if not set(values) & debugging_IDs:
            return False # skip: none of the items we're looking for can be found by ID in the vector on the stack
        print("Found other accompanying IDs: {}".format(values))
        return True # drop to gdb's prompt
class MyThread_serial(threading.Thread):
    def run(self):
        while True:
            time.sleep(1)#sleep for 1s
            serialPort.read()
def main():
    ser_thread = MyThread_serial(name = "Thread-serial")
    ser_thread.start()
    gdb.execute("continue")
if __name__ == "__main__":
    fptr = open("gdbOPs.txt", "a") 
    #connect to serial port
    strPort = "/dev/ttyUSB0"
    serialPort = Serial_Port(strPort, 9600)
    #set signal interrupt to exit
    signal.signal(signal.SIGINT, serialPort.close)
    print('Press Ctrl+C to exit')
    main()
Background
I have a hardware that is sending a string in the format
[[12.12345678,12.12345678],[12.12345678,12.12345678],...]
But at times the string can have errors similar to
[[12.12345678,12.12345678],[55.01[12.12345678,12.12345678],...]
I've never been able to reproduce this bug and so I decided to write a script that waits till the error is seen, then interrupts GDB. Dump the stack and all the variables that are relevant.
Please refer to the original C question to learn more of the bug
Question
- How do I create a dynamic interrupt? I.e when a failed string is seen I should send Ctrl+Cto stop the process and get control of the prompt.
- How is my approach to finding the bug? Do people usually use these kind of testing methods? Could you link them?
Update
The bug was resolved by replacing the sprintf function with a function that doesn't deal with 64 bits (in a 32 bit ARM environment, refer to this question) directly. Please go to the original question to find the new function used.
