import pypyodbc
   from pypyodbc import *
   import nltk 
   from nltk import *
   import csv
   import sys
   import codecs
   import re
   #connect to the database 
   conn = pypyodbc.connect('Driver={Microsoft Access Driver (*.Mdb)};\
          DBQ=C:\\TextData.mdb')
   #create a cursor to control the datbase with
   cur = conn.cursor()
   cur.execute('''SELECT Text FROM MessageCreationDate WHERE Tags LIKE 'GHS - %'; ''')
   TextSet = cur.fetchall()
   ghsWordList = []
   TextWords = list(TextSet)
   for row in TextWords :
       message = re.split('\W+',str(row))
       for eachword in message :
            if eachword.isalpha() :
               ghsWordList.append(eachword.lower())
   print(ghsWordList)
When I run this code, it's giving me an error:
'charmap' codec can't encode character '\u0161' in position 2742: character maps to <undefined>
I've looked at a number of other answers on here to similar questions, and googled the hell out of it; however I am not well versed enough in Python nor Character Encoding to know where I need to used the Codecs module to change the character set being used to present/append/create the list?
Could someone not only help me with the code but also point me in the direct of some good reading materials for understanding this sort of thing?
 
    