##
## registry entry for corpus GOETHE_UTF8
##

# long descriptive name for the corpus
NAME "'Das Schreien' von Johann Wolfgang v. Goethe (UTF-8)"
# corpus ID (must be lowercase in registry!)
ID   goethe_utf8
# path to binary data files
HOME data/goethe_utf8

# corpus properties provide additional information about the corpus:
##:: charset = "utf8"	# character encoding of corpus data
##:: language = "de"	# insert ISO code for language (de, en, fr, ...)
#========================================================================#


##
## p-attributes (token annotations)
##

ATTRIBUTE word


##
## s-attributes (structural markup)
##

# <s> ... </s>
# (no recursive embedding allowed)
STRUCTURE s

# <line> ... </line>
# (no recursive embedding allowed)
STRUCTURE line
