Updated HTMLParser to recognize XML element name and attribute name.
authorFrederic Jolliton <frederic@jolliton.com>
Tue, 13 Sep 2005 06:09:05 +0000 (06:09 +0000)
committerFrederic Jolliton <frederic@jolliton.com>
Tue, 13 Sep 2005 06:09:05 +0000 (06:09 +0000)
git-archimport-id: frederic@jolliton.com--2005-main/tx--main--0.1--patch-35

htmlparser.py

index 935dfd0..3739832 100644 (file)
@@ -49,7 +49,7 @@ reCheckTag = re.compile(
        '/?[a-z]' , re.I
 )
 reTagName = re.compile(
-       '^[a-z][-:a-z0-9]*' , re.I
+       '^[a-z_:][a-z0-9._:-]*' , re.I
 )
 reEndOfTag = re.compile( # <CHAR*> (<QUOTED> <CHAR*>)* '>'
        '[^\'">]*' # ..
@@ -64,7 +64,7 @@ reEndOfTag = re.compile( # <CHAR*> (<QUOTED> <CHAR*>)* '>'
        '>'
 )
 reAttr = re.compile(
-       '([a-z_][a-z0-9._-]*(?::[a-z_][a-z0-9._-]*)?)' # name
+       '([a-z_:][a-z0-9._:-]*)' # name
        '('
        r'\s*=\s*'         # spaces then =
        '('