
    i8A              
      t   U d Z ddlmZ ddlZddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlZddlZddlmZmZmZmZmZmZ ddlmZ ddlZddlmZmZmZ ddlmZ dgZd	Zi Zd
e d<   d1dZ!d Z"d2dZ#ejH                  fdZ%d3d4dZ& G d d      Z' G d de'      Z( G d de'      Z) G d de)      Z* G d de)      Z+ G d de)      Z, G d de'      Z- G d d e)      Z. G d! d"e'      Z/ e'       Z0d# Z1d5d$Z2d% Z3d3d&Z4d' Z5d3d(Z6d3d)Z7d* Z8d5d+Z9e1e2e3e4e6e8e9e5e7d,	Z:d- Z; G d. d      Z<d/ Z=e>d0k(  r e=        yy)6z
A commandline tool for semi-automatically converting CSV to RDF.

See also https://github.com/RDFLib/pyTARQL in the RDFlib family of tools

try: ``csv2rdf --help``

    )annotationsN)AnyDictListOptionalTupleUnion)quote)RDFRDFS	split_uri)URIRefCSV2RDFa^  
csv2rdf.py     -b <instance-base>     -p <property-base>     [-D <default>]     [-c <classname>]     [-i <identity column(s)>]     [-l <label columns>]     [-s <N>] [-o <output>]     [-f configfile]     [--col<N> <colspec>]     [--prop<N> <property>]     <[-d <delim>]     [-C] [files...]"

Reads csv files from stdin or given files
if -d is given, use this delimiter
if -s is given, skips N lines at the start
Creates a URI from the columns given to -i, or automatically by numbering if
none is given
Outputs RDFS labels from the columns given to -l
if -c is given adds a type triple with the given classname
if -C is given, the class is defined as rdfs:Class
Outputs one RDF triple per column in each row.
Output is in n3 format.
Output is stdout, unless -o is specified

Long options also supported:     --base,     --propbase,     --ident,     --class,     --label,     --out,     --defineclass

Long options --col0, --col1, ...
can be used to specify conversion for columns.
Conversions can be:
    ignore, float(), int(), split(sep, [more]), uri(base, [class]), date(format)

Long options --prop0, --prop1, ...
can be used to use specific properties, rather than ones auto-generated
from the headers

-D sets the default conversion for columns not listed

-f says to read config from a .ini/config file - the file must contain one
section called csv2rdf, with keys like the long options, i.e.:

[csv2rdf]
out=output.n3
base=http://example.org/
col0=split(";")
col1=split(";", uri("http://example.org/things/",
                    "http://xmlns.com/foaf/0.1/Person"))
col2=float()
col3=int()
col4=date("%Y-%b-%d %H:%M:%S")

z*Dict[Any, Tuple[URIRef, Optional[URIRef]]]urisc                   t        j                  dd|       } t        j                  dd|       } | j                  d      } dj                  | d   j	                         g| dd D cg c]  }|j                          c}z         S c c}w )	zc
    CamelCase + lowercase initial a string


    FIRST_NM => firstNm

    firstNm => firstNm

    z[^\w] z([a-z])([A-Z])z\1 \2 r      N)resubsplitjoinlower
capitalize)labelxs     CC:\Projects\mas-dev\.venv\Lib\site-packages\rdflib/tools/csv2rdf.py
toPropertyr   a   sw     FF8S%(EFF#Y6EKKE77E!HNN$%qr(KA(KKLL(Ks   %B
c                ^    | dd j                         s| dd j                         | dd  z   S | S )Nr      r   )isupperr   )r   s    r   toPropertyLabelr"   r   s9    1:Qqz!E!"I--L    c                D    t        |D cg c]  }| |   	 c}      S c c}w )zPreturn a set of indexes from a list
    >>> index([1,2,3],(0,2))
    (1, 3)
    )tuple)l_ir   s      r   indexr(   x   s%    
 #A"Q%#$$#s   c              +  R   K   t        j                  | fd|i|}|D ]  }|  y w)Ndialect)csvreader)csv_datar*   kwargs
csv_readerrows        r   r/   r/      s-     H@g@@J	 s   %'c           	         |rCt        j                  |t        | j                  d      j	                  dd      d      z         }nt        j                  |       }||ft
        | <   |S )Nutf8r   _r   safe)rdflibr   r
   encodereplacer   )r   prefixclass_rs       r   	prefixurir<      sV    MM&5&)9)A)A#s)KRT#UUVMM!&kDGHr#   c                      e Zd Zd ZddZy)	NodeMakerc                6    t         j                  j                  S N)r6   r   Literalselfs    r   rangezNodeMaker.range   s    {{"""r#   c                ,    t        j                  |      S r@   )r6   rA   rC   r   s     r   __call__zNodeMaker.__call__   s    ~~a  r#   N)r   r   )__name__
__module____qualname__rD   rG    r#   r   r>   r>      s    #!r#   r>   c                      e Zd Zd Zd Zd Zy)NodeUric                h    d | _         || _        |rt        j                  |      | _         y d | _         y r@   )r:   r9   r6   r   )rC   r9   r:   s      r   __init__zNodeUri.__init__   s*    (, --/DKDKr#   c                D    t        || j                  | j                        S r@   )r<   r9   r:   rF   s     r   rG   zNodeUri.__call__   s    DKK55r#   c                R    | j                   xs t        j                  j                  S r@   )r:   r6   r   ResourcerB   s    r   rD   zNodeUri.range   s    {{1fjj111r#   NrH   rI   rJ   rO   rG   rD   rK   r#   r   rM   rM      s    62r#   rM   c                      e Zd ZddZy)NodeLiteralNc                    || _         y r@   f)rC   rX   s     r   rO   zNodeLiteral.__init__   s	    r#   r@   )rH   rI   rJ   rO   rK   r#   r   rU   rU      s    r#   rU   c                      e Zd Zd Zd Zy)	NodeFloatc                    | j                   st        j                  t        |            S t	        | j                         r-t        j                  t        | j                  |                  S t        d      )Nz(Function passed to float is not callable)rX   r6   rA   floatcallable	ExceptionrF   s     r   rG   zNodeFloat.__call__   sP    vv>>%(++DFF>>%q	"233BCCr#   c                6    t         j                  j                  S r@   )r6   XSDdoublerB   s    r   rD   zNodeFloat.range   s    zz   r#   NrH   rI   rJ   rG   rD   rK   r#   r   rZ   rZ      s    D!r#   rZ   c                      e Zd Zd Zd Zy)NodeIntc                    | j                   st        j                  t        |            S t	        | j                         r-t        j                  t        | j                  |                  S t        d      )Nz&Function passed to int is not callable)rX   r6   rA   intr]   r^   rF   s     r   rG   zNodeInt.__call__   sO    vv>>#a&))DFF>>#dffQi.11@AAr#   c                6    t         j                  j                  S r@   )r6   r`   rf   rB   s    r   rD   zNodeInt.range   s    zz~~r#   Nrb   rK   r#   r   rd   rd      s    Br#   rd   c                      e Zd Zd Zd Zy)NodeBoolc                    | j                   st        j                  t        |            S t	        | j                         r-t        j                  t        | j                  |                  S t        d      )Nz'Function passed to bool is not callable)rX   r6   rA   boolr]   r^   rF   s     r   rG   zNodeBool.__call__   sO    vv>>$q'**DFF>>$tvvay/22ABBr#   c                6    t         j                  j                  S r@   )r6   r`   rk   rB   s    r   rD   zNodeBool.range   s    zzr#   Nrb   rK   r#   r   ri   ri      s    Cr#   ri   c                      e Zd Zd Zd Zy)NodeReplacec                     || _         || _        y r@   ab)rC   rq   rr   s      r   rO   zNodeReplace.__init__   s    r#   c                N    |j                  | j                  | j                        S r@   )r8   rq   rr   rF   s     r   rG   zNodeReplace.__call__   s    yy((r#   N)rH   rI   rJ   rO   rG   rK   r#   r   rn   rn      s    )r#   rn   c                      e Zd Zd Zd Zy)NodeDatec                |    t        j                  t        j                  j                  || j                              S r@   )r6   rA   datetimestrptimerX   rF   s     r   rG   zNodeDate.__call__   s(    ~~h//88DFFCDDr#   c                6    t         j                  j                  S r@   )r6   r`   dateTimerB   s    r   rD   zNodeDate.range   s    zz"""r#   Nrb   rK   r#   r   ru   ru      s    E#r#   ru   c                      e Zd Zd Zd Zd Zy)	NodeSplitc                     || _         || _        y r@   seprX   )rC   r   rX   s      r   rO   zNodeSplit.__init__   s    r#   c                B   | j                   st        j                  | _         t        | j                         st	        d      |j                  | j                        D cg c]5  }|j                         dk7  s| j                  |j                               7 c}S c c}w )Nz)Function passed to split is not callable!r   )rX   r6   rA   r]   r^   r   r   strip)rC   r   ys      r   rG   zNodeSplit.__call__   sn    vv^^DFGHH+,77488+<P+<a	Rqwwy!+<PPPs   B8!Bc                    | j                   r4t        | j                   t              r| j                   j                         S t        j                  |       S r@   )rX   
isinstancer>   rD   rB   s    r   rD   zNodeSplit.range   s5    66j366<<>!t$$r#   NrS   rK   r#   r   r|   r|      s    Q%r#   r|   c                      y)NignorerK   )argsr.   s     r   _config_ignorer      s    r#   c                    t        | |      S r@   )rM   )r9   r:   s     r   _config_urir      s    66""r#   c                     t               S r@   )rU   rK   r#   r   _config_literalr     s
    =r#   c                    t        |       S r@   )rZ   rW   s    r   _config_floatr     s    Q<r#   c                    t        | |      S r@   )rn   rp   s     r   _config_replacer   	  s    q!r#   c                    t        |       S r@   )rd   rW   s    r   _config_intr     s    1:r#   c                    t        |       S r@   )ri   rW   s    r   _config_boolr     s    A;r#   c                    t        |       S r@   )ru   )format_s    r   _config_dater     s    Gr#   c                    t        | |      S r@   )r|   r~   s     r   _config_splitr     s    S!r#   )	r   uriliteralr\   rf   dater   r8   rk   c                "    t        | t              S )z$Return a function for column mapping)evalconfig_functions)vs    r   columnr   *  s     #$$r#   c                      e Zd Zd Zd Zd Zy)r   c                    d | _         d | _        d | _        d| _        d | _        d| _        d| _        d| _        d | _        i | _	        i | _
        t        j                  | _        d| _        y )NautoFr   ,)CLASSBASEPROPBASEIDENTLABELDEFINECLASSSKIPDELIMDEFAULTCOLUMNSPROPSsysstdoutOUTtriplesrB   s    r   rO   zCSV2RDF.__init__1  sb    
	28

 	

::r#   c                    | j                   j                  |j                         d|j                         d|j                         d       | xj                  dz  c_        y )Nr   z .
r   )r   writen3r   )rC   spos       r   triplezCSV2RDF.tripleC  s9    @Ar#   c                   t        j                          }| j                  r6t        j                  j	                  d| j                  j
                  z         | j                  dk7  r,t        | j                  t              s| j                  f| _        | j                  s/t        j                  d       t        j                  d      | _	        | j                  s/t        j                  d       t        j                  d      | _        t        | j                         D ]  }t#        |        t%        t#        |            }t'        t)        |D cg c]  }| j                  t+        |          c}            }| j,                  j/                         D ]  \  }}|||<   t1        |      d   ||<    | j2                  rw| j5                  | j6                  t8        j:                  t<        j>                         t        tA        |            D ]&  }||   ||   }
}	|	dk(  s|
dk(  r| jB                  jE                  || jF                        d	k(  rC| j5                  |	t8        j:                  t8        jH                         | j5                  |	t<        jJ                  t        jL                  tO        |
                   | j5                  |	t<        jP                  | j6                         | j5                  |	t<        j                  | jB                  jE                  |tR              j                                ) d
}|D ]C  }
	 | j                  dk(  r| j                  d|z     }nj| j                  djU                  tW        |
| j                        D cg c].  }tY        |j[                  d      j]                  dd      d      0 c}         }| j^                  rW| j5                  |t<        jJ                  t        jL                  djU                  tW        |
| j^                                           | j6                  r+| j5                  |t8        j:                  | j6                         t)        |
      D ]  \  }}|ja                         }|dk7  s| jB                  jE                  || jF                        d	k(  rF	  | jB                  jE                  |t        jL                        |      }t        |t$              r|D ]  }| j5                  |||   |        n| j5                  |||   |        |dz  }|dz  d
k(  rDt        j                  j	                  d|| jf                  t        j                          |z
  fz         F ti               }tj        j/                         D ]  \  }
}|\  }}| j5                  |t<        jJ                  t        jL                  |
             |sBt        jl                  |      }|jo                  |       | j5                  |t8        j:                  |        |D ]1  }| j5                  |t8        j:                  t<        j>                         3 | j                  jq                          t        j                  j	                  d|| jf                  fz         t        j                  j	                  dt        j                          |z
  z         y c c}w c c}w # tb        $ r7}t        j                  dd|||   ||jd                  fz  z          Y d }~d }~ww xY w# tb        $ r$ t        j                  j	                  d|z          w xY w)NzOutput to %s
r   z2No base given, using http://example.org/instances/zhttp://example.org/instances/z:No property base given, using http://example.org/property/zhttp://example.org/props/r   r   r   r   z%dr3   r2   r   r4   z#Could not process value for column z%d:%s in row %d, ignoring: %s i z$%d rows, %d triples, elapsed %.2fs.
zError processing line: %d
z#Converted %d rows into %d triples.
zTook %.2f seconds.
)9timer   r   stderrr   namer   r   r%   r   warningswarnr6   	Namespacer   rD   r   nextlistdict	enumerater   r   itemsr   r   r   r   r   typer   Classlenr   getr   Propertyr   rA   r"   domaindefault_node_maker   r(   r
   r7   r8   r   r   r^   messager   setr   r   addclose)rC   	csvreaderstartr   header_labelsheaderskr   r'   hr&   rowsr   r   _oeclassesucs                      r   convertzCSV2RDF.convertG  s   		88JJ-=>::
4::u(E**DJyyMMNO(()HIDI}}MMVW",,-HIDM tyy!AO " T)_-y!V1$--
1">!VWXJJ$$&DAqGAJ(|AM! ' KK

CHHdjj93w<(
M!$427bBh<<##At||4@Asxx6Atzz6>>/":M+NOAt{{DJJ7tzz4<<#3#3A7H#I#O#O#Q ) B9::'))D4K0C))
 */r4::)>	 *?A !&ahhv&6&>&>sC&Hr R)>		C ::KK

sxxb$**0E'FG	 ::KKSXXtzz:%bMDAq	ABw<<++At||<H$ C 0 0FNN CA FA)!T2*+B$(KKWQZ$D +, !%CQ ? *, 	&=A%JJ$$?tyy{U/BCDi z %ZZ\EBDAqKK4::v~~b'9:MM!$AAsxx+ " AKK388TZZ0  	

?4BVVW

/499;3FGHQ "W<F  ) $MM E"B#$gaj$		"B#C!C   

  !>!EFsX   [>A\,3[$C	\,*\,6A4[)*A\,$\,)	\)2,\$\,$\))\,,-]N)rH   rI   rJ   rO   r   r   rK   r#   r   r   r   0  s    $Ir#   c                 	   t               } t        j                  t        j                  dd  dg d      \  }}t	        |      }d|v sd|v r$t        t               t        j                  d       d|v rt        j                         }|j                  t        |d                |j                  d      D ]|  \  }}|d	k(  rt        j                  |d
d      | _        )|dk(  rt        j                   |      | _        I|dk(  rt        j                   |      | _        i|dk(  rt        j&                  |      | _        |dk(  rt+        |      | _        |dk(  rt/        |      | _        |dk(  rt/        |      | _        |dk(  r|| _        |dk(  rt7        |      | _        |dk(  rt;        |      | _        |j?                  d      r&t;        |      | j@                  t7        |dd        <   <|j?                  d      sOt        j&                  |      | jB                  t7        |dd        <    d|v rt        j                  |d   d
d      | _        d|v rt        j                  |d   d
d      | _        d|v rt        j                   |d         | _        d|v rt        j                   |d         | _        d|v r
|d   | _        d|v r
|d   | _        d|v rt;        |d         | _        d |v rt;        |d          | _        d!|v rt        j                   |d!         | _        d"|v rt        j                   |d"         | _        d#|v rt/        |d#         | _        d$|v rt/        |d$         | _        d%|v rt/        |d%         | _        d&|v rt/        |d&         | _        d'|v rt7        |d'         | _        d(|v rt7        |d(         | _        d)|v rt        j&                  |d)         | _        d*|v rt        j&                  |d*         | _        |j                         D ]{  \  }}|j?                  d+      r%t;        |      | j@                  t7        |d,d        <   <|j?                  d-      sNt        j&                  |      | jB                  t7        |d.d        <   } | j(                  rd/|v sd0|v rd1| _        | jE                  tG        tI        jJ                  |      | j4                  2             y )3Nr   zhc:b:p:i:o:Cf:l:s:d:D:)
zout=zbase=zdelim=z	propbase=zclass=zdefault=ident=zlabel=zskip=defineclasshelpz-hz--helpz-fcsv2rdfoutwzutf-8basepropbaseclassr   identr   delimskipdefaultcol   prop   z-oz--outz-bz--basez-dz--delimz-Dz	--defaultz-pz
--propbasez-lz--labelz-iz--identz-sz--skipz-cz--classz--col   z--prop   z-Cz--defineclassT)	delimiter)&r   getoptr   argvr   printHELPexitconfigparserConfigParser	read_fileopenr   codecsr   r6   r   r   r   r   r   rk   r   r   r   r   r   rf   r   r   r   
startswithr   r   r   r/   	fileinputinput)r   optsfilesconfigr   r   s         r   mainr    s<   iG -- 	
KD%  :Dt|x4'dt|**,d4j)*LL+DAqEz$kk!S':f%//2j#)#3#3A#6 g &a 0m#&*1g#g $Qg $Qg !f"1vi"()e$.4QiAabE
+f%,2MM!,<c!AB%j)1 ,4 t|kk$t*c7;$kk$w-g>t|''T
34''X7t|T
DYt| d,d k!23t|!++DJ7t!++D,>?t|T$Z(DT)_-t|T$Z(DT)_-t|4:44>*t|d4j1Dd9o6

1<< *0)GOOC!"J'\\(#(.a(8GMM#ae*%	  }}$$,/T*A"OOJyu5OPr#   __main__)r   str)r&   z	List[int]r'   Tuple[int, ...]returnr  r@   )r:   zOptional[URIRef])NN)?__doc__
__future__r   r   r   r+   rw   r   r   r   r   r   r   typingr   r   r   r   r   r	   urllib.parser
   r6   rdflib.namespacer   r   r   rdflib.termr   __all__r   r   __annotations__r   r"   r(   excelr/   r<   r>   rM   rU   rZ   rd   ri   rn   ru   r|   r   r   r   r   r   r   r   r   r   r   r   r   r   r  rH   rK   r#   r   <module>r     sw   #   
    	 
   : :   1 1 +<~ 460 5M"% "% ! !2i 2 ) 
	! 	!	k 		{ 	)) )#{ #%	 %$ K #
 
 %VI VIrmQ` zF r#   