
    i2                       d dl mZ d dlZd dlZd dlmZ d dlmZmZm	Z	 d dl
mZmZmZmZmZmZmZmZmZ erd dlZn
	 d dlZesJ 	 d dlmZmZ d dl
mZmZ d dlmZmZmZ 	 d dlZd	Zd dl m!Z!m"Z"m#Z#m$Z$m%Z%m&Z& 	 	 d	 	 	 	 	 	 	 ddZ'dZ(ddZ)ddZ*ddZ+g dZ, G d de      Z-y# e$ r d dlZY hw xY w# e$ r dZd
ZY Vw xY w)    )annotationsN)
HTMLParser)StringIO
TextIOBaseTextIOWrapper)	IOTYPE_CHECKINGAnyDictListOptionalTextIOTupleUnion)normpathsep)r	   cast)urljoinurlsplit
urlunsplitTF)BytesIOWrapperInputSourcePythonInputSourceStringInputSourceURLInputSourcecreate_input_sourcec                	   t        | t              r| j                  dfS t        | t              rTd}| j	                         }d}t        |t
              r_t        t        t        t        t        f   |j                        }t        |t              r|}n t        |t              r|j                         }t        r|t        j                  |      }||fS t        |t
              r7| j!                         }t        j                  |j#                               }||fS t        j                  |j#                               }||fS |t%        j                  |      }||fS t%        j&                  | j!                               }||fS t)        | d      } 	 | j*                  }	|	duxr |	j1                         dv }
|
rt3        ||      }nd}	 | j	                         }	 | j!                         }||t5        dt7        |              	 |dn| j9                         }d}|Ot        |t
              r?|j                  }t        |t              r|}n t        |t              r|j                         }	 |
ru|s||}n<||j#                         }n)t:        r|J |d}t=        ||      j#                         }|j?                  |       |jA                         |jC                         }}nt        rd}|t        j                  |      }n|t        |t
              s|&|$t        j                  |j#                               }nt:        r|J t        j                  |j#                               }nud}|<t%        j                  |      |	 |jE                          |	 |jE                          S S ||}nt:        r|J |d}t=        ||      }t%        j&                  |      }||f|	 |jE                          |	 |jE                          S S # t,        t.        f$ r d}	Y w xY w# t,        t.        f$ r d}Y `w xY w# t,        t.        f$ r d}Y gw xY w# t,        t.        f$ r d}Y Ow xY w# t,        $ r Y w xY w# t,        $ r Y S w xY w# t,        $ r Y w xY w# t,        $ r Y S w xY w# |!	 |jE                          n# t,        $ r Y nw xY w|!	 |jE                          w # t,        $ r Y w w xY ww xY w)	a  Extract JSON from a source document.

    The source document can be JSON or HTML with embedded JSON script elements (type attribute = "application/ld+json").
    To process as HTML ``source.content_type`` must be set to "text/html" or "application/xhtml+xml".

    :param source: the input source document (JSON or HTML)

    :param fragment_id: if source is an HTML document then extract only the script element with matching id attribute, defaults to None

    :param extract_all_scripts: if source is an HTML document then extract all script elements (unless fragment_id is provided), defaults to False (extract only the first script element)

    :return: Tuple with the extracted JSON document and value of the HTML base element
    Nzjson-ld)format)z	text/htmlzapplication/xhtml+xml)fragment_idextract_all_scriptszLSource does not have a character stream or a byte stream and cannot be used zutf-8)encoding)#
isinstancer   datar   getByteStreamr   r   r   strr   r   wrappedgetvalue_HAS_ORJSONorjsonloadsgetCharacterStreamreadjsonloadr   content_typeAttributeErrorLookupErrorlowerHTMLJSONParser
ValueErrortypegetEncodingr	   r   feedget_jsonget_baseclose)sourcer   r    	html_baseb_streamoriginal_stringwrapped_inner	json_dictc_streamr/   is_htmlhtml_docparser
b_encodingunderlying_stringhtml_string
use_streams                   PC:\Projects\mas-dev\.venv\Lib\site-packages\rdflib/plugins/shared/jsonld/util.pysource_to_jsonrI   +   s5   * &+,{{D  &+,	 ''))-h/ sHj'@!A8CSCSTM--"/M84"/"8"8":*"LL9	 )## Hn5!446"LL9	 )## #LL9	 )##	 * JJ7	 )## !IIf&?&?&AB	)## !	:F** $& <+=+=+? D ,G 3A#9L4
 ''),,. H,Z[_`f[gZhi
 	
,4,<D&BTBTBV
 (,
8^ D ((mS) -x0 - 6 6 87~1 ,#4%&mmo #///%!(J+HzJOOQ,#1#:#:#<n>U>U>WyII ,"LL):;	%*X~*N#&"LL9	 #///"LL9	I ,zz"34      # #%
 #///%!(J*8jI
		*-I)#     q K(  K(  K(  K( 
v " 
 "  " 
 "   !  !   s   P 	P% P= Q .DR) Q-Q<-8R) (R;RP"!P"%P:9P:=QQQ*)Q*-	Q98Q9<	RR	RR	R&%R&)S1-R>=S1>	S
S1	S

S1S! S1!	S-*S1,S--S1)#/:c                p    t         D ]*  }| j                  |      }|dkD  s| d |dz    | |dz   d  fc S  | d fS )N   )VOCAB_DELIMSrfind)iridelimats      rH   	split_irirU      sL    YYu7xa=#b1fh-//  9    c                   d|v r|S t        |       }t        |      }|j                  r|S |j                  dv r|j                  j                  dd      }dt	        |      dkD  r|d   ndz   }t        ||j                        }|j                  rd|j                   nd}|j                   d|d    | | }nt        t        | |            }	t        |	d	         }
t        dk7  r$dj                  |
j                  t                    }
|	d	   j                  d      r|
j                  d      s|
dz  }
t        |	dd	 |
fz   |	d
d z         }|j                  d      r|j                  d      s|dz  }|S )a  
    >>> norm_url('http://example.org/', '/one')
    'http://example.org/one'
    >>> norm_url('http://example.org/', '/one#')
    'http://example.org/one#'
    >>> norm_url('http://example.org/one', 'two')
    'http://example.org/two'
    >>> norm_url('http://example.org/one/', 'two')
    'http://example.org/one/two'
    >>> norm_url('http://example.org/', 'http://example.net/one')
    'http://example.net/one'
    >>> norm_url('http://example.org/', 'http://example.org//one')
    'http://example.org//one'
    z://)urnzurn-xrK   rO    rJ   rL   r         N)r   schemepathsplitlenr   fragmentr   r   joinendswithr   )baseurlparsed_base
parsed_urlbase_path_parts	base_pathjoined_pathr`   resultpartsr]   s              rH   norm_urlrl      sq    |
 4.K#J
--%**00a8_1E1I?1-rR	i90:0C0CQz**+,&&'q);(<[M(Ts+,a!#:88DJJsO,D8S!$--*<CKDE!AJ$059<=
||C!5#MrV   c                
   | j                   dk7  re	 | j                  }|D ]R  }d|v s|j                  d      |j                  d      }}|dkD  s0|dkD  s6t	        | j
                  ||dz   |       c S  yy# t        $ r Y yw xY w)aJ  
    Please note that JSON-LD documents served with the application/ld+json media type
    MUST have all context information, including references to external contexts,
    within the body of the document. Contexts linked via a
    http://www.w3.org/ns/json-ld#context HTTP Link Header MUST be
    ignored for such documents.
    application/ld+jsonNz+ rel="http://www.w3.org/ns/json-ld#context"<>rN   rO   )r/   linksr0   indexr   rd   )r;   rq   linkijs        rH   context_from_urlinputsourcerv      s     33	LLE D<Dzz#

31r6a"f"6::tAEA??  4  		s   A6 6	BB)r-   rI   rU   rl   rv   r)   r(   c                  F     e Zd Z	 	 d	 	 	 d fdZd Zd ZddZd Z xZS )	r3   c                    t         |           || _        g | _        d| _        d| _        d | _        || _        d| _        y )NFr   )	super__init__r   r-   contains_jsonfragment_id_does_not_matchrc   r    script_count)selfr   r    	__class__s      rH   rz   zHTMLJSONParser.__init__!  sF    
 	& "	"*/'	#6 rV   c                    d| _         d| _        |dk(  rG|D ]A  \  }}|dk(  r|dk(  rd| _         |dk(  s| j                  s+|| j                  k7  s;d| _        C y |dk(  r|D ]  \  }}|dk(  s|| _         y y )	NFscriptr5   rn   Tidrc   href)r{   r|   r   rc   )r~   tagattrsattrvalues        rH   handle_starttagzHTMLJSONParser.handle_starttag/  s    "*/' (?$e6>e/D&D)-D&T\d&6&65DDTDT;T6:D3	  % F]$e6> %DI  % rV   c                   | j                   du r| j                  du r| j                  s| j                  dkD  ry |j	                         dk(  ry t
        rt        j                  |      }nt        j                  |      }t        |t              r| j                  j                  |       n| j                  j                  |       | xj                  dz  c_        y y y )NTFr   rY   rO   )r{   r|   r    r}   stripr(   r)   r*   r-   r"   listextendappend)r~   r#   parseds      rH   handle_datazHTMLJSONParser.handle_dataA  s     %$*I*IU*R++0A0AA0Ezz|r!   d+D) &$'		  (		  ("1 +S%rV   c                    | j                   S N)r-   r~   s    rH   r8   zHTMLJSONParser.get_json_      yyrV   c                    | j                   S r   )rc   r   s    rH   r9   zHTMLJSONParser.get_baseb  r   rV   NF)r   Optional[str]r    Optional[bool])returnz
List[Dict])	__name__
__module____qualname__rz   r   r   r8   r9   __classcell__)r   s   @rH   r3   r3      s6     &*.3" ,&$#<rV   r3   r   )r;   zMOptional[Union[IO[bytes], TextIO, InputSource, str, bytes, pathlib.PurePath]]r   r   r    r   r   z#Tuple[Union[Dict, List[Dict]], Any])rR   r%   r   zTuple[str, Optional[str]])rc   r%   rd   r%   r   r%   )r;   r   r   r   ).
__future__r   r-   pathlibhtml.parserr   ior   r   r   typingr   r	   r
   r   r   r   r   r   r   ImportError
simplejson	posixpathr   r   r   urllib.parser   r   r   r)   r(   rdflib.parserr   r   r   r   r   r   rI   rP   rU   rl   rv   __all__r3    rV   rH   <module>r      s    "   " 2 2 U U U"t $ & 6 6K  "&*/XX 	X
 (X )Xv )Z@.CZ C_  "!"  FKs#   B# !B2 #	B/.B/2	B>=B>