+
    WBi,                     n    ^ RI t ^ RIt^ RIHtHt ^RIHtHt ]P                  ! R4      t	 ! R R4      t
R# )    N)OptionalUnion)LanguageFilterProbingStates%   [a-zA-Z]*[-]+[a-zA-Z]*[^a-zA-Z-]?c                   .  a  ] tR t^(t o Rt]P                  3V 3R lR lltV 3R lR lt]	V 3R lR l4       t
]	V 3R lR	 l4       tV 3R
 lR lt]	V 3R lR l4       tV 3R lR lt]V 3R lR l4       t]V 3R lR l4       t]V 3R lR l4       tRtV tR# )CharSetProbergffffff?c                $   < V ^8  d   QhRS[ RR/# )   lang_filterreturnN)r   )format__classdict__s   ":/usr/lib/python3.14/site-packages/chardet/charsetprober.py__annotate__CharSetProber.__annotate__,   s     2 2N 2T 2    c                    \         P                  V n        R V n        Wn        \
        P                  ! \        4      V n        R# )TN)	r   	DETECTING_stateactiver   logging	getLogger__name__logger)selfr   s   &&r   __init__CharSetProber.__init__,   s.    ",,&''1r   c                   < V ^8  d   QhRR/# )r
   r   N )r   r   s   "r   r   r   2   s     - -t -r   c                0    \         P                  V n        R # N)r   r   r   r   s   &r   resetCharSetProber.reset2   s    ",,r   c                0   < V ^8  d   QhRS[ S[,          /# r
   r   r   str)r   r   s   "r   r   r   6   s      hsm r   c                    R # r!   r   r"   s   &r   charset_nameCharSetProber.charset_name5   s    r   c                0   < V ^8  d   QhRS[ S[,          /# r&   r'   )r   r   s   "r   r   r   :   s     " "(3- "r   c                    \         hr!   NotImplementedErrorr"   s   &r   languageCharSetProber.language9   s    !!r   c                <   < V ^8  d   QhRS[ S[S[3,          RS[/# )r
   byte_strr   )r   bytes	bytearrayr   )r   r   s   "r   r   r   =   s%     " "U5)#34 " "r   c                    \         hr!   r.   )r   r3   s   &&r   feedCharSetProber.feed=   s    !!r   c                    < V ^8  d   QhRS[ /# r&   )r   )r   r   s   "r   r   r   A   s      | r   c                    V P                   # r!   )r   r"   s   &r   stateCharSetProber.state@   s    {{r   c                    < V ^8  d   QhRS[ /# r&   )float)r   r   s   "r   r   r   D   s       r   c                    R # )g        r   r"   s   &r   get_confidenceCharSetProber.get_confidenceD   s    r   c                <   < V ^8  d   QhRS[ S[S[3,          RS[/# r
   bufr   r   r4   r5   )r   r   s   "r   r   r   H   s%      5	)9#: u r   c                6    \         P                  ! R RV 4      p V # )s   ([ -])+    )resub)rD   s   &r   filter_high_byte_only#CharSetProber.filter_high_byte_onlyG   s    ff&c2
r   c                <   < V ^8  d   QhRS[ S[S[3,          RS[/# rC   rE   )r   r   s   "r   r   r   M   s%      eY.>(? I r   c                    \        4       p\        P                  V 4      pV FL  pVP                  VRR 4       VRR pVP	                  4       '       g
   VR8  d   RpVP                  V4       KN  	  V# )u  
We define three types of bytes:
alphabet: english alphabets [a-zA-Z]
international: international characters [-ÿ]
marker: everything else [^a-zA-Z-ÿ]
The input buffer can be thought to contain a series of words delimited
by markers. This function works to filter all words that contain at
least one international character. All contiguous sequences of markers
are replaced by a single space ascii character.
This filter applies to all scripts which do not use English characters.
N   rG   )r5   INTERNATIONAL_WORDS_PATTERNfindallextendisalpha)rD   filteredwordsword	last_chars   &    r   filter_international_words(CharSetProber.filter_international_wordsL   st     ;
 ,33C8DOOD"I& RS	I$$&&9w+> 	OOI&  r   c                <   < V ^8  d   QhRS[ S[S[3,          RS[/# rC   rE   )r   r   s   "r   r   r   o   s%     $ $U5)#34 $ $r   c                V   \        4       pRp^ p\        V 4      P                  R4      p \        V 4       FV  w  rEVR8X  d   V^,           pRpK  VR8X  g   K"  WC8  d-   V'       g%   VP	                  WV 4       VP	                  R4       RpKX  	  V'       g   VP	                  WR 4       V# )a+  
Returns a copy of ``buf`` that retains only the sequences of English
alphabet and high byte characters that are not between <> characters.
This filter can be applied to all scripts which contain both English
characters and extended ASCII characters, but is currently only used by
``Latin1Prober``.
Fc   >   <rG   TN)r5   
memoryviewcast	enumeraterR   )rD   rT   in_tagprevcurrbuf_chars   &     r   remove_xml_tagsCharSetProber.remove_xml_tagsn   s     ;o""3''nND 4axT!;v OOCTN3OOD) -$  OOCJ'r   )r   r   r   r   N)r   
__module____qualname____firstlineno__SHORTCUT_THRESHOLDr   NONEr   r#   propertyr*   r0   r7   r;   r@   staticmethodrJ   rX   rf   __static_attributes____classdictcell__)r   s   @r   r   r   (   s     5C5H5H 2 2- -   " "" "       B $ $r   r   )r   rH   typingr   r   enumsr   r   compilerP   r   r   r   r   <module>rt      s1   :  	 " / jj8 
k kr   