In Python 3 a valid identifier can have characters outside of ASCII range, as you don't want to use str.isidentifier, you can write your own version of it in Python.
Its specification can be found here: https://www.python.org/dev/peps/pep-3131/#specification-of-language-changes
Implementation:
import keyword
import re
import unicodedata
def is_other_id_start(char):
"""
Item belongs to Other_ID_Start in
http://unicode.org/Public/UNIDATA/PropList.txt
"""
return bool(re.match(r'[\u1885-\u1886\u2118\u212E\u309B-\u309C]', char))
def is_other_id_continue(char):
"""
Item belongs to Other_ID_Continue in
http://unicode.org/Public/UNIDATA/PropList.txt
"""
return bool(re.match(r'[\u00B7\u0387\u1369-\u1371\u19DA]', char))
def is_xid_start(char):
# ID_Start is defined as all characters having one of
# the general categories uppercase letters(Lu), lowercase
# letters(Ll), titlecase letters(Lt), modifier letters(Lm),
# other letters(Lo), letter numbers(Nl), the underscore, and
# characters carrying the Other_ID_Start property. XID_Start
# then closes this set under normalization, by removing all
# characters whose NFKC normalization is not of the form
# ID_Start ID_Continue * anymore.
category = unicodedata.category(char)
return (
category in {'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl'} or
is_other_id_start(char)
)
def is_xid_continue(char):
# ID_Continue is defined as all characters in ID_Start, plus
# nonspacing marks (Mn), spacing combining marks (Mc), decimal
# number (Nd), connector punctuations (Pc), and characters
# carryig the Other_ID_Continue property. Again, XID_Continue
# closes this set under NFKC-normalization; it also adds U+00B7
# to support Catalan.
category = unicodedata.category(char)
return (
is_xid_start(char) or
category in {'Mn', 'Mc', 'Nd', 'Pc'} or
is_other_id_continue(char)
)
def is_valid_identifier(name):
# All identifiers are converted into the normal form NFKC
# while parsing; comparison of identifiers is based on NFKC.
name = unicodedata.normalize(
'NFKC', name
)
# check if it's a keyword
if keyword.iskeyword(name):
return False
# The identifier syntax is <XID_Start> <XID_Continue>*.
if not (is_xid_start(name[0]) or name[0] == '_'):
return False
return all(is_xid_continue(char) for char in name[1:])
if __name__ == '__main__':
# From goo.gl/pvpYg6
assert is_valid_identifier("a") is True
assert is_valid_identifier("Z") is True
assert is_valid_identifier("_") is True
assert is_valid_identifier("b0") is True
assert is_valid_identifier("bc") is True
assert is_valid_identifier("b_") is True
assert is_valid_identifier("µ") is True
assert is_valid_identifier("") is True
assert is_valid_identifier(" ") is False
assert is_valid_identifier("[") is False
assert is_valid_identifier("©") is False
assert is_valid_identifier("0") is False
You can check CPython and Pypy's implmentation here and here respectively.