Hello fellow developers,
I am trying to build a chatbot using markov chains and I am stuck at a problem. I the code below, I have made a random sentence generator that learns from movie scripts. The problem is, how do I get this sentence generator to not be random and to respond to the user's input? How should I go about doing this? Is it something to do with input/output training like this:
In: how are you today
Out: I'm good thanks how are you
Here is my code. Most of the functions are used to put the data in a csv file so don't mind those.
from collections import defaultdict
import random, itertools, nltk, pandas, csv, string, re, os, time
class Chatbot:
    def __init__(self, name, txt_transcript_filedir, character=None):
        self.name = name
        self.txt_transcript_filedir = txt_transcript_filedir
        self.character = character
        print("Hello my name is " + name + ".")
    def parse_transcript(self):
        parsed_lines = []
        self.csv_transcript_filedir = self.txt_transcript_filedir.replace('.txt', '.csv')
        with open(self.txt_transcript_filedir, encoding='utf-8') as txt_file:
            lines = txt_file.readlines()
            for line in lines:
                line = line.replace(', ', ' ')
                line = re.sub(r'\[.*?\]', '', line)
                if ': ' in line:
                    line = line.replace(': ', ',')
                parsed_lines.append(line)
        with open(self.csv_transcript_filedir, 'w', encoding='utf-8') as csv_file:
            writer = csv.writer(csv_file)
            writer.writerow(['person', 'text'])
            for line in parsed_lines:
                csv_file.write(line)
    def tokenize_transcript(self):
        csv_file = pandas.read_csv(self.csv_transcript_filedir)
        textss = []
        final_sents = []
        if self.character == None:
            texts = csv_file['text']
            for text in texts:
                sent = nltk.sent_tokenize(text)
                textss.append(sent)
        else:
            char_sets = csv_file[csv_file['person'] == self.character]
            texts = char_sets['text']
            for text in texts:
                sent = nltk.sent_tokenize(text)
                textss.append(sent)
        for text in textss:
            for sent in text:
                if sent[0] == ' ':
                    sent = sent[1:]
                final_sent = [w for w in sent if w not in string.punctuation]
                final_sent = ''.join(final_sent)
                final_sents.append(final_sent)
        self.training_data = [sent for sent in final_sents]
    def learn(self):
        self.parse_transcript()
        self.tokenize_transcript()
        self.make_word_dict(self.training_data)
    def make_word_dict(self, text):
        word_dict = defaultdict(list)
        for sent in text:
            words = nltk.word_tokenize(sent)
            for i in range(len(words) - 1):
                if i+2 >= (len(words)):
                    word_dict[(words[i], words[i+1])].append('<end>')
                else:
                    word_dict[(words[i], words[i+1])].append(words[i+2])
        self.vocabulary = word_dict
    def generate_text(self, num):
        for i in range(0, num):
            start_key = random.choice(list(self.vocabulary.keys()))
            text = []
            text.append(start_key[0])
            text.append(start_key[1])
            for i in itertools.count():
                key = (text[i], text[i+1])
                if key[1] == '<end>':
                    break
                else:
                    text.append(random.choice(self.vocabulary[text[i], text[i+1]]))
            text = ' '.join(text)
            if text.endswith('<end>'):
                text = text[:-6]
                text = text + '.'
            return text
    def say(self, text):
        os.system('say -v Oliver ' + text)
def main():
    num = 100
    bot = Chatbot("J.A.R.V.I.S", "avengers_age_of_ultron.txt", "JARVIS")
    bot.learn()
    for i in range(num):
        text = bot.generate_text(1)
        print(text)
if __name__ == '__main__':
    main()
