+
    WBi9!                     D    ^ RI HtHt ^RIHt ^RIHt  ! R R]4      tR# )    )ListUnion)CharSetProberProbingStatec                   j  a a ] tR t^t oRt^tRtV3R lV 3R lltV3R lV 3R llt]	V3R lR l4       t
]	V3R	 lR
 l4       tV3R lR ltV3R lR ltV3R lR ltV3R lR ltV3R lR ltV3R lR ltV3R lR ltV3R lR ltV3R lR lt]	V3R lR l4       tV3R lR  ltR!tVtV ;t# )"UTF1632ProberaL  
This class simply looks for occurrences of zero bytes, and infers
whether the file is UTF16 or UTF32 (low-endian or big-endian)
For instance, files looking like (       [nonzero] )+
have a good probability to be UTF32BE.  Files looking like (   [nonzero] )+
may be guessed to be UTF16BE, and inversely for little-endian varieties.
gGz?c                   < V ^8  d   QhRR/#    returnN )format__classdict__s   ":/usr/lib/python3.14/site-packages/chardet/utf1632prober.py__annotate__UTF1632Prober.__annotate__)   s      $     c                  < \         SV `  4        ^ V n        ^ .^,          V n        ^ .^,          V n        \
        P                  V n        . ROV n        RV n	        RV n
        RV n        RV n        RV n        RV n        V P                  4        R# r   FN)r   r   r   r   )super__init__positionzeros_at_modnonzeros_at_modr   	DETECTING_statequadinvalid_utf16beinvalid_utf16leinvalid_utf32beinvalid_utf32le'first_half_surrogate_pair_detected_16be'first_half_surrogate_pair_detected_16leresetself	__class__s   &r   r   UTF1632Prober.__init__)   s~    C!G !sQw",, 	$$$$7<47<4

r   c                   < V ^8  d   QhRR/# r   r   )r   r   s   "r   r   r   8   s     ! !t !r   c                   < \         SV `  4        ^ V n        ^ .^,          V n        ^ .^,          V n        \
        P                  V n        RV n        RV n	        RV n
        RV n        RV n        RV n        . ROV n        R# r   )r   r%   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r   r&   s   &r   r%   UTF1632Prober.reset8   ss    C!G !sQw",,$$$$7<47<4 	r   c                    < V ^8  d   QhRS[ /# r   r   str)r   r   s   "r   r   r   G   s     
 
c 
r   c                    V P                  4       '       d   R # V P                  4       '       d   R# V P                  4       '       d   R# V P                  4       '       d   R# R# )zutf-32bezutf-32lezutf-16bezutf-16lezutf-16)is_likely_utf32beis_likely_utf32leis_likely_utf16beis_likely_utf16ler'   s   &r   charset_nameUTF1632Prober.charset_nameF   sM    !!##!!##!!##!!##r   c                    < V ^8  d   QhRS[ /# r.   r/   )r   r   s   "r   r   r   T   s      # r   c                    R # ) r   r6   s   &r   languageUTF1632Prober.languageS   s    r   c                    < V ^8  d   QhRS[ /# r.   float)r   r   s   "r   r   r   W        - -E -r   c                <    \        R V P                  R,          4      # )      ?g      @maxr   r6   s   &r   approx_32bit_chars UTF1632Prober.approx_32bit_charsW       3+,,r   c                    < V ^8  d   QhRS[ /# r.   r?   )r   r   s   "r   r   r   Z   rA   r   c                <    \        R V P                  R,          4      # )rC   g       @rD   r6   s   &r   approx_16bit_chars UTF1632Prober.approx_16bit_charsZ   rH   r   c                    < V ^8  d   QhRS[ /# r.   bool)r   r   s   "r   r   r   ]        
 
4 
r   c                   V P                  4       pWP                  8  ;'       d    V P                  ^ ,          V,          V P                  8  ;'       d    V P                  ^,          V,          V P                  8  ;'       dp    V P                  ^,          V,          V P                  8  ;'       dA    V P                  ^,          V,          V P                  8  ;'       d    V P
                  '       * # r   )rF   MIN_CHARS_FOR_DETECTIONr   EXPECTED_RATIOr   r!   r'   approx_charss   & r   r2   UTF1632Prober.is_likely_utf32be]   s    ..0;;; 
 
a </$2E2EE ) )!!!$|3d6I6II) )!!!$|3d6I6II) ) $$Q',69L9LL) ) (((	
r   c                    < V ^8  d   QhRS[ /# r.   rN   )r   r   s   "r   r   r   g   rP   r   c                   V P                  4       pWP                  8  ;'       d    V P                  ^ ,          V,          V P                  8  ;'       d    V P                  ^,          V,          V P                  8  ;'       dp    V P                  ^,          V,          V P                  8  ;'       dA    V P                  ^,          V,          V P                  8  ;'       d    V P
                  '       * # rR   )rF   rS   r   rT   r   r"   rU   s   & r   r3   UTF1632Prober.is_likely_utf32leg   s    ..0;;; 
 
  #l2T5H5HH ) )!!!$|3d6I6II) )!!!$|3d6I6II) ) !!!$|3d6I6II) ) (((	
r   c                    < V ^8  d   QhRS[ /# r.   rN   )r   r   s   "r   r   r   q   rP   r   c                   V P                  4       pWP                  8  ;'       d    V P                  ^,          V P                  ^,          ,           V,          V P                  8  ;'       dY    V P                  ^ ,          V P                  ^,          ,           V,          V P                  8  ;'       d    V P
                  '       * # )   )rK   rS   r   rT   r   r   rU   s   & r   r4   UTF1632Prober.is_likely_utf16beq       ..0;;; 
 
!!!$t';';A'>>,N!!" ) )""1%(9(9!(<<L!!") ) (((	
r   c                    < V ^8  d   QhRS[ /# r.   rN   )r   r   s   "r   r   r   {   rP   r   c                   V P                  4       pWP                  8  ;'       d    V P                  ^ ,          V P                  ^,          ,           V,          V P                  8  ;'       dY    V P                  ^,          V P                  ^,          ,           V,          V P                  8  ;'       d    V P
                  '       * # rR   )rK   rS   r   rT   r   r    rU   s   & r   r5   UTF1632Prober.is_likely_utf16le{   r_   r   c                4   < V ^8  d   QhRS[ S[,          RR/# )r   r   r   Nr   int)r   r   s   "r   r   r      s     ( (d3i (D (r   c                t   V^ ,          ^ 8w  gD   V^,          ^8  g6   V^ ,          ^ 8X  d/   V^,          ^ 8X  d!   ^V^,          u;8:  d   ^8:  d   M MRV n         V^,          ^ 8w  gE   V^,          ^8  g7   V^,          ^ 8X  d6   V^,          ^ 8X  d&   ^V^,          u;8:  d   ^8:  d   M R# RV n        R# R# R# R# )z
Validate if the quad of bytes is valid UTF-32.

UTF-32 is valid in the range 0x00000000 - 0x0010FFFF
excluding 0x0000D800 - 0x0000DFFF

https://en.wikipedia.org/wiki/UTF-32
TN)r!   r"   )r'   r   s   &&r   validate_utf32_characters'UTF1632Prober.validate_utf32_characters   s     GqLAw~Q1aA$$q'2IT2I#'D GqLAw~Q1aA$$q'2IT2I2I#'D  3Jr   c                4   < V ^8  d   QhRS[ S[,          RR/# )r   pairr   Nrd   )r   r   s   "r   r   r      s     , ,d3i ,D ,r   c                   V P                   '       gC   ^V^ ,          u;8:  d   ^8:  d   M MRV n         MI^V^ ,          u;8:  d   ^8:  d   M M0RV n        M(^V^ ,          u;8:  d   ^8:  d   M MRV n         MRV n        V P                  '       gH   ^V^,          u;8:  d   ^8:  d   M M	RV n        R# ^V^,          u;8:  d   ^8:  d   M R# RV n        R# R# ^V^,          u;8:  d   ^8:  d   M M	RV n        R# RV n        R# )a	  
Validate if the pair of bytes is  valid UTF-16.

UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
with an exception for surrogate pairs, which must be in the range
0xD800-0xDBFF followed by 0xDC00-0xDFFF

https://en.wikipedia.org/wiki/UTF-16
TFN)r#   r   r$   r    )r'   rj   s   &&r   validate_utf16_characters'UTF1632Prober.validate_utf16_characters   s     ;;;tAw&$&?C<a(D('+$tAw&$&?D<'+$;;;tAw&$&?C<a(D(('+$ ) tAw&$&?D<'+$r   c                <   < V ^8  d   QhRS[ S[S[3,          RS[/# )r   byte_strr   )r   bytes	bytearrayr   )r   r   s   "r   r   r      s%      U5)#34  r   c                   V F  pV P                   ^,          pW P                  V&   V^8X  d`   V P                  V P                  4       V P                  V P                  R,          4       V P                  V P                  R,          4       V^ 8X  d!   V P                  V;;,          ^,          uu&   MV P
                  V;;,          ^,          uu&   V ;P                   ^,          un         K  	  V P                  # )   :r   r   N:r   rs   N)r   r   rg   rl   r   r   state)r'   ro   cmod4s   &&  r   feedUTF1632Prober.feed   s    A==1$DIIdOqy..tyy9..tyy~>..tyy~>Av!!$'1,'$$T*a/*MMQM  zzr   c                    < V ^8  d   QhRS[ /# r.   r   )r   r   s   "r   r   r      s     
 
| 
r   c                H   V P                   \        P                  \        P                  09   d   V P                   # V P	                  4       R 8  d"   \        P                  V n         V P                   # V P
                  R8  d   \        P                  V n         V P                   # )g?i   )r   r   NOT_MEFOUND_ITget_confidencer   r6   s   &r   rt   UTF1632Prober.state   sz    ;;<..0E0EFF;; 4'&//DK
 {{	 ]]X% '--DK{{r   c                    < V ^8  d   QhRS[ /# r.   r?   )r   r   s   "r   r   r      s     

 

 

r   c                    V P                  4       '       gC   V P                  4       '       g-   V P                  4       '       g   V P                  4       '       d   R # R# )g333333?g        )r5   r4   r3   r2   r6   s   &r   r}   UTF1632Prober.get_confidence   sT     &&(())++))++))++ 		
 		
r   )r   r#   r$   r   r    r!   r"   r   r   r   r   )__name__
__module____qualname____firstlineno____doc__rS   rT   r   r%   propertyr7   r<   rF   rK   r2   r3   r4   r5   rg   rl   rw   rt   r}   __static_attributes____classdictcell____classcell__)r(   r   s   @@r   r	   r	      s      !N ! ! 
 
  - -- -
 

 

 

 
( (,, ,@  
 


 

 

r   r	   N)typingr   r   charsetproberr   enumsr   r	   r   r   r   <module>r      s   *  ( F
M F
r   