I have recently started my Python journey and stackoverflow has helped me a lot in resolving most of the issues I came across. However, this is one that I don't seem to be able to catch, despite trying the different solutions suggested here.
I am collecting urls from a website in a list. My next step is to go through the urls and download them if they don't already exist in the folder. However, some of the URLs contain non-ascii characters, such as ú, é, ç. Which leads to the unicode error below.
UnicodeEncodeError: 'ascii' codec can't encode character '\xfa' in position 64: ordinal not in range(128)
I escaped it for now with try/except but would need to download those manually.
When I use .encode('utf-8') it also result in an error: "TypeError: cannot use a string pattern on a bytes-like object".
This is my code:
import os
import urllib
dict = (this includes a large dictionary scraped from a website)
links = []
for d in dict :
    links.append(d["EncodedAbsUrl"])
    
# For every line in the file
for url in links:
    # Split on the rightmost / and take everything on the right side of that
    name = url.rsplit('/', 1)[-1]
    # Combine the name and the downloads directory to get the local filename
    filename = os.path.join(r'C:\\PATH', name)
    # Download the file if it does not exist
    if not os.path.isfile(filename):
        try:
            urllib.request.urlretrieve(url, filename)    
        except UnicodeEncodeError:
            print(filename + " could not be saved.")
            pass
    else:
            print(filename + " already exists.")    
Edit
Based on Ardiya's suggestion in the comments (thanks a million for that), I have changed used the urllib.parse.quote_plus method. This seems to work but also returns an http error 400. Revised code now reads:
for url in links:
   # Split on the rightmost / and take everything on the right side of that
    name = url.rsplit('/', 1)[-1]
    
    # Combine the name and the downloads directory to get the local filename
    filename = os.path.join(r'C:\\PATH', name)
    # Download the file if it does not exist
    if not os.path.isfile(filename):
        try:
            urllib.request.urlretrieve(url, filename)    
        except UnicodeEncodeError:
            new_url = str(root + url.split('/')[-2] + '/' + urllib.parse.quote_plus(name))
            urllib.request.urlretrieve(new_url, filename)
    else:
            print(filename + " already exists.")
For example, the following link is in the source dictionary: https://www4.unfccc.int/sites/ndcstaging/PublishedDocuments/Peru%20First/iNDC%20Perú%20castellano.pdf is translated into https://www4.unfccc.int/sites/ndcstaging/PublishedDocuments/Peru%20First/iNDC%2520Per%C3%BA%2520castellano.pdf which does not properly work.
---------------------------------------------------------------------------
UnicodeEncodeError                        Traceback (most recent call last)
<ipython-input-6-12f5f676515d> in <module>
     25         try:
---> 26             urllib.request.urlretrieve(url, filename)
     27         except UnicodeEncodeError:
~\Anaconda3\lib\urllib\request.py in urlretrieve(url, filename, reporthook, data)
    246 
--> 247     with contextlib.closing(urlopen(url, data)) as fp:
    248         headers = fp.info()
~\Anaconda3\lib\urllib\request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    221         opener = _opener
--> 222     return opener.open(url, data, timeout)
    223 
~\Anaconda3\lib\urllib\request.py in open(self, fullurl, data, timeout)
    524         sys.audit('urllib.Request', req.full_url, req.data, req.headers, req.get_method())
--> 525         response = self._open(req, data)
    526 
~\Anaconda3\lib\urllib\request.py in _open(self, req, data)
    541         protocol = req.type
--> 542         result = self._call_chain(self.handle_open, protocol, protocol +
    543                                   '_open', req)
~\Anaconda3\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args)
    501             func = getattr(handler, meth_name)
--> 502             result = func(*args)
    503             if result is not None:
~\Anaconda3\lib\urllib\request.py in https_open(self, req)
   1392         def https_open(self, req):
-> 1393             return self.do_open(http.client.HTTPSConnection, req,
   1394                 context=self._context, check_hostname=self._check_hostname)
~\Anaconda3\lib\urllib\request.py in do_open(self, http_class, req, **http_conn_args)
   1349             try:
-> 1350                 h.request(req.get_method(), req.selector, req.data, headers,
   1351                           encode_chunked=req.has_header('Transfer-encoding'))
~\Anaconda3\lib\http\client.py in request(self, method, url, body, headers, encode_chunked)
   1254         """Send a complete request to the server."""
-> 1255         self._send_request(method, url, body, headers, encode_chunked)
   1256 
~\Anaconda3\lib\http\client.py in _send_request(self, method, url, body, headers, encode_chunked)
   1265 
-> 1266         self.putrequest(method, url, **skips)
   1267 
~\Anaconda3\lib\http\client.py in putrequest(self, method, url, skip_host, skip_accept_encoding)
   1103 
-> 1104         self._output(self._encode_request(request))
   1105 
~\Anaconda3\lib\http\client.py in _encode_request(self, request)
   1183         # ASCII also helps prevent CVE-2019-9740.
-> 1184         return request.encode('ascii')
   1185 
UnicodeEncodeError: 'ascii' codec can't encode character '\xfa' in position 64: ordinal not in range(128)
During handling of the above exception, another exception occurred:
HTTPError                                 Traceback (most recent call last)
<ipython-input-6-12f5f676515d> in <module>
     27         except UnicodeEncodeError:
     28             new_url = str(root + url.split('/')[-2] + '/' + urllib.parse.quote_plus(name))
---> 29             urllib.request.urlretrieve(new_url, filename)
     30     else:
     31             print(filename + " already exists.")
~\Anaconda3\lib\urllib\request.py in urlretrieve(url, filename, reporthook, data)
    245     url_type, path = _splittype(url)
    246 
--> 247     with contextlib.closing(urlopen(url, data)) as fp:
    248         headers = fp.info()
    249 
~\Anaconda3\lib\urllib\request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    220     else:
    221         opener = _opener
--> 222     return opener.open(url, data, timeout)
    223 
    224 def install_opener(opener):
~\Anaconda3\lib\urllib\request.py in open(self, fullurl, data, timeout)
    529         for processor in self.process_response.get(protocol, []):
    530             meth = getattr(processor, meth_name)
--> 531             response = meth(req, response)
    532 
    533         return response
~\Anaconda3\lib\urllib\request.py in http_response(self, request, response)
    638         # request was successfully received, understood, and accepted.
    639         if not (200 <= code < 300):
--> 640             response = self.parent.error(
    641                 'http', request, response, code, msg, hdrs)
    642 
~\Anaconda3\lib\urllib\request.py in error(self, proto, *args)
    567         if http_err:
    568             args = (dict, 'default', 'http_error_default') + orig_args
--> 569             return self._call_chain(*args)
    570 
    571 # XXX probably also want an abstract factory that knows when it makes
~\Anaconda3\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args)
    500         for handler in handlers:
    501             func = getattr(handler, meth_name)
--> 502             result = func(*args)
    503             if result is not None:
    504                 return result
~\Anaconda3\lib\urllib\request.py in http_error_default(self, req, fp, code, msg, hdrs)
    647 class HTTPDefaultErrorHandler(BaseHandler):
    648     def http_error_default(self, req, fp, code, msg, hdrs):
--> 649         raise HTTPError(req.full_url, code, msg, hdrs, fp)
    650 
    651 class HTTPRedirectHandler(BaseHandler):
HTTPError: HTTP Error 400: Bad Request