#Sample Raccoon configuration file
#The config file is just a python script, but knowledge of python is not needed for simple configurations
#################################################
## basic configuration variables
#################################################
#PATH is a platform-specific string listing the directories that will be searched when resolving 'path:' URLs
#default: -p argument if set, otherwise the RACCOONPATH environment variable if set, otherwise the current directory
PATH = '.;..'
#appBase specifies base URL that this application runs on
#default: '/'
appBase = "/cgi-bin/rhizome"
#appName is a short name for this application, must be unique within the current Raccoon process
#default: if only one application is running, "root", if multiple are,
#BASE_MODEL_URI with non-alphanumeric characters replaced with "_"
appName = "root"
#the __include__ function includes another Raccoon configuration file
__include__('../rhizome/rhizome-config.py')
#BASE_MODEL_URI is the base URI reference to be used when creating RDF resources
#this is made available as the $BASE_MODEL_URI XPath variable
#default: 'http://' + socket.getfqdn() + '/'
BASE_MODEL_URI='http://example.com/'
#domStoreFactory is the class or factory function the Raccoon will call to instantiate application's DOM store
#It is passed as keyword arguments the dictionary of the variables contained in the config file
#note that this is a callable object which may need to be imported into the config file
#default is DomStore.RxPathDomStore
from rx import DomStore
domStoreFactory = DomStore.XMLDomStore #the application uses an XML document instead of RDF
#the following config variables are passed to domStoreFactory as keyword arguments:
#STORAGE_PATH the location of the RDF model. Usually a file path but the appropriate value depends on 'modelFactory'
#default is ''
STORAGE_PATH = 'mywebsite.nt'
#transactionLog is the path of the transactionLog. The transactionLog records in NTriples format a log
#of the statements added and removed from the model along with comments on when and by whom.
#Note: the default file store uses this format so there is not much reason to use this if you are using the default
#default is '' (no transactionLog)
transactionLog=/logs/auditTrail.nt
#STORAGE_TEMPLATE is a string containing NTriples that is used when
#the file specified by STORAGE_PATH is not found
STORAGE_TEMPLATE='''
_:itemdispositionhandlertemplate <http://rx4rdf.sf.net/ns/wiki#name> "item-disposition-handler-template" .
_:itemdispositionhandlertemplate <http://rx4rdf.sf.net/ns/wiki#revisions> _:itemdispositionhandlertemplate1List .
_:itemdispositionhandlertemplate <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://rx4rdf.sf.net/ns/archive#NamedContent> .
'''
##APPLICATION_MODEL is a string containing NTriples that are added to the RDF model
#but are read-only and not saved to disc. Use for structural components such as the schema.
APPLICATION_MODEL='''<http://rx4rdf.sf.net/ns/wiki#item-format-zml'> <http://www.w3.org/2000/01/rdf-schema#label> "ZML" .'''
#modelFactory is the class or factory function used by RxPathDomStore to load or create a new RDF document or database
#note that this is a callable object which may need to be imported into the config file
#default is RxPath.IncrementalNTriplesFileModel
modelFactory=RxPath.RedlandHashBdbModel
#VERSION_STORAGE_PATH the location of a separate RDF model for storing the history of changes to the database.
#Usually a file path but the appropriate value depends on 'versionModelFactory'
#default is '' (history not stored separately)
VERSION_STORAGE_PATH = 'mywebsite.history.nt'
#versionModelFactory is the class or factory function used by RxPathDomStore to load or create the version history RDF database
#note that this is a callable object which may need to be imported into the config file
#default is whatever 'modelFactory' is set to
versionModelFactory=RxPath.RedlandHashBdbModel
#the following are security specific configuration settings:
#SECURE_FILE_ACCESS limits 'file:' URLs access only the directories reachable through the PATH
#default is True
SECURE_FILE_ACCESS = True
#DEFAULT_URI_SCHEMES specifies which URL schemes can be resolved.
#default is ['file','data'] (no URL schemes that allow network access)
DEFAULT_URI_SCHEMES = ['http', 'https', 'file', 'ftp', 'data', 'gopher'] #all supported schemes
#uriResolveWhitelist is a list of regular expressions. URL resolution will only be allowed if
#the URL matches one of its regular expressions.
#default: uriResolveWhitelist = [] (if the list is empty, resolve all URLs)
uriResolveWhitelist = ['(file:|data:|site:|path:).*', 'http://localhost.*']
#uriResolveBlacklist is a list of regex strings where if any match the URL, resolution is denied.
#default: uriResolveBlacklist = []
uriResolveBlacklist = ['http://(.*@)?localhost(?!:8000)', 'http://(.*@)?10\.*.'] #blacklist access to the local subnet
#authorizationDigests is a dictionary used by RequestProcessor.authorizeByDigest
#to create a whitelist of content that processed. By default it is used for the Python content processor
#(see authorizeContentProcessors setting below).
#Because its a config setting, it adds a level of authorization that only local administrators can set.
#The dictionary contains items with a base64-encoded SHA1 digest as the keyand a non-zero value.
#The digest can be calculated using the rx.utils.shaDigest function
#and is also reported in the NotAuthorized exception message raised by the Python content processor
#and is the value of a:sha1-digest property set by Rhizome ("Edit Metadata" will display it).
#default is {} (thus no Python code can be executed)
authorizationDigests = { 'rosyIeJ7Kk+FZRkYPSjcteQi59w=' : 1} #for myPythonPage.py
#disabledContentProcessors is a list of content processors to disable.
#To enable all content processors set this to an empty list.
#default is [] (all contentProcessors enabled)
disabledContentProcessors = ['http://rx4rdf.sf.net/ns/wiki#item-format-python']
#Other config variables
#nsMap is dictionary of namespace prefixes that may appear in RxPath expressions
#default is {} but Raccoon will always add 'rdf', 'rdfs', 'owl', 'bnode', 'wf' (for Raccoon XPath Extension functions),
#'xf' (for 4Suite XPath Extension functions), 'session', 'request-header', 'response-header',
#'request-cookie', 'response-cookie', 'previous' (for callActions() ) and 'error'
nsMap = { 'dc' : 'http://purl.org/dc/elements/1.1/',
'myNs' : 'http://www.example.com/#'
}
#XPATH_CACHE_SIZE sets the maximum number of items to be stored in the XPath processing cache. Set to 0 to disable.
#default is 1000
XPATH_CACHE_SIZE=1000
#ACTION_CACHE_SIZE sets the maximum number of items to be stored in the Action cache. Set to 0 to disable.
#default is 1000
ACTION_CACHE_SIZE=1000
#XPATH_PARSER_CACHE_SIZE sets the maximum number of items to be stored in the XPath parser cache. Set to 0 to disable.
#default is 200
XPATH_PARSER_CACHE_SIZE=200
#STYLESHEET_CACHE_SIZE sets the maximum number of items to be stored in the stylesheet parser cache. Set to 0 to disable.
#default is 200
STYLESHEET_CACHE_SIZE=200
#FILE_CACHE_SIZE sets the maximum size (in bytes) of the file cache
#(used by the file:// and path: URL resolvers). Set to 0 to disable.
#default is 0 (disabled)
FILE_CACHE_SIZE=10000000 #~10mb
#MAX_CACHEABLE_FILE_SIZE limits caching files to only those less than the specified size (in bytes).
#Use to prevent the cache from being filled up with just a few large files.
#default is 0 (files of any size may be cached)
MAX_CACHEABLE_FILE_SIZE=10192 #only cache files under 10K in size
#if LIVE_ENVIRONMENT is True, the system will not cache XPath expressions
#or Actions that reference functions (such as document())
#that retrieve URL or interact with the file system.
#Also stylesheets with xsl:import, xsl:include, xinclude or external entity references will not be cached.
#default is True
LIVE_ENVIRONMENT = True
#defaultPageName specifies the name of the page to be invoke if the request URL doesn't include a path
#e.g. http://www.example.org/ is equivalent to http://www.example.org/index
#default is 'index'
#defaultPageName=home.html
#if DEFAULT_MIME_TYPE is set then this MIME type will be set on any request that doesn't set its own mimetype
#and Raccoon can't guess its MIME type
#default is '' (not set)
DEFAULT_MIME_TYPE='text/plain'
#MODEL_RESOURCE_URI is the resource that represents the model this instance of the application is running
#it can be used to assertions about the model itself, e.g its location or which application created it
#default is the value of BASE_MODEL_URI
MODEL_RESOURCE_URI = 'http://example.org/rhizomeapp/2/20/2004'
#defaultExpiresIn specifies what to do about Expires HTTP response header if it
#hasn't already set by the application. If it's value is 0 or None the header
#will not be sent, otherwise the value is the number of seconds in the future
#that responses should expire. To indicate that they already expired set it to -1;
#to indicate that they never expires set it to 31536000 (1 year).
#default is 3600 (1 hour)
defaultExpiresIn = 0 #disable setting the Expires header by default
#if useEtags is True, If-None-Match request headers are honors and an etag based
#on a MD5 hash of the response content will be set with every response
#default is True
useEtags = False #disable
#if useFileLock is True Raccoon will use interprocess file lock when committing
#a transaction. Alternately useFileLock can be a reference to a class or factory
#function that conforms to the glock.LockFile interface.
#default is False
useFileLock=True #enable
#################################################
## available read-only variables
#################################################
__server__ #RequestProcessor reference
__argv__ #list of arguments intended for this configuration file
__configpath__ #stack of configuration file paths, __configpath__[-1] is always the current file
#__configpath__[-2], if it exists, would be the config file currently including this one, and so on.
###############################################################################
## rhizome-specific configuration variables
## this are only used when including rhizome-config.py into your config file
###############################################################################
#MAX_MODEL_LITERAL when saving a page, any content whose length is greater then this will be saved to disk.
#default is -1 (disable save to disk)
MAX_MODEL_LITERAL = 0 #save any page to disk
#SAVE_DIR is the directory content is saved to. It is a relative or
#absolute path and must be a distinct sub-directory of a directory on the PATH
#default is 'content/.rzvs'
SAVE_DIR='content/.rzvs'
#ALTSAVE_DIR allows a copy of the latest revision of an page will be saved there,
#allowing external changes to page to be made without messing up the revision history.
#Changes made externally will appear in Rhizome (almost) as if a change was
#saved with the minor edit flag.
#It is must be a relative or absolute path and must be on the PATH
#(If PATH isn't specified ALTSAVE_DIR is added to it.)
#to disable ALTSAVE logic set ALTSAVE_DIR = ''
#default is 'content'
ALTSAVE_DIR='content'
#THEME_DIR specifies the directory that theme resources (such theme.css)
#are located. It only takes effect when PATH is not specified; if so, it is
#added to the PATH after the ALTSAVE_DIR.
#It can be either a relative or absolute path; if it is a relative path
#the directory where rhizome-config.py is located is prepended.
#default is 'themes/default'
THEME_DIR='themes/mytheme'
#these following password related settings are referenced by the rhizome config
#and so must be set before you include it
#SECURE_HASH_SEED is a string that is combined with plaintext when generating a secure hash of passwords
#You really should set your own private value. If it is compromised, it will be much
#easier to mount a dictionary attack on the password hashes.
#If you change this all previously generated password hashes will no longer work.
#default is 'YOU REALLY SHOULD CHANGE THIS!' (will cause a warning on start up)
SECURE_HASH_SEED = 'my secret seed value'
#ADMIN_PASSWORD is the password for the default 'admin' super-user
#default password is 'admin' (will cause a warning on start up)
ADMIN_PASSWORD = 'topsecret'
#ADMIN_PASSWORD_HASH can be set instead of ADMIN_PASSWORD if you don't want to store the password in the config file
#you can calculate it using sha.sha( 'yourpassword' + secureHashSeed).hexdigest()
ADMIN_PASSWORD_HASH = '12201fe5e202883bd45fc97e87366ea05183e0e4'
#passwordHashProperty is the name of RDF property that represents the secure hash of a password
#There should be a unique property for each extant seed.
#default is BASE_MODEL_URI+'password-hash'
passwordHashProperty = http://example.org#password-hash5
#secureHashMap is a dictionary of mapping secure hash properties to seeds.
#It is used by wf:secure-hash.
#You can set this if you need to support multiple seeds (e.g. for backwards compatibility)
#Otherwise, use the default.
#default is { passwordHashProperty : SECURE_HASH_SEED }
secureHashMap = { '#oldHashProperty' : 'oldSeed', passwordHashProperty : SECURE_HASH_SEED }
#other Rhizome specific security settings
#Authors that don't own the "base:create-unsanitary-content-token" access token
#will have their content filtered to prevent them from writing dangerous HTML constructs
#(basically from executing javascript or embedding objects). You can customize which elements
#and attributes may not appear using this settings:
#blacklistedElements is a list of elements banned from appearing
#default: see utils.BlackListHTMLSanitizer
blacklistedElements = rx.rhizome.SanitizeHTML.blacklistedElements + ['style']
#blacklistedAttributes is a dictionary whose keys and values are regular expressions
#that are matched against the attribute's name and value, respectively.
#default: see utils.BlackListHTMLSanitizer
blacklistedAttributes = { 'style': '.*', } #remove any inline style attribute
#blacklistedContent is a dictionary whose keys and values are regular expressions.
#The key is matched against the element's name and the value is searched through
#the text and comments contained by the element.
#If they match, the entire comment or text will be removed
#default: see utils.BlackListHTMLSanitizer
blacklistedContent = { 'style': 'javascript:|@import' }
#if the following two properies are set, before saving content Rhizome will invoke the Akismet
#service to check if the content is spam (unless the user has the create-nospam-token access token)
#See http://akismet.com/ for more info.
#akismetKey is your free WordPress API key
#default: ''
akismetKey = '79deadbeef31'
#akismetUrl is the url to your blog or wiki
#default: ''
akismetUrl = 'http://mywiki.example.net'
#the following settings can be turned on to give your site a wiki like look and feel
#if undefinedPageIndicator is True then links to internal pages that haven't been defined will be displayed with ?
#(or however path:wiki2html.xsl transforms such links)
#default is True
undefinedPageIndicator=True
#if externalLinkIndicator is True then links to external URLs will have an icon next to them
#(or however path:wiki2html.xsl transforms such links)
#default is True
externalLinkIndicator=True
#if interWikiLinkIndicator is True then InterWiki links will have an icon next to them
#(or however path:wiki2html.xsl transforms such links)
#default is True
interWikiLinkIndicator=True
#interWikiMapURL is the URL to the file that defines the supported InterWiki link types
#to disable InterWiki linking set this to ''
#default is 'site:///intermap.txt'
interWikiMapURL=''
#namespaceMapURL is the URL to the file that lists the namespace prefixes that
#will be used when serializing RDF (e.g. when displaying RxML)
#default is 'site://namespaces.txt'
namespaceMapURL='site://namespaces.txt'
#ZMLDefaultVersion indicates the version of the ZML syntax that should be assumed when
#the ZML header (e.g. #?zml0.7) is missing from ZML text.
#default: the value of zml.defaultZMLVersion (currently 0.7)
ZMLDefaultVersion = 0.8 #set default to latest experimental version
#content indexing
#INDEX_DIR is the directory the content index will be stored in
#If the directory doesn't exist at startup, it will be created and all applicable
#content will be indexed on startup.
#default is 'contentindex'
INDEX_DIR = 'contentindex'
#useIndex indicates whether content indexing should be queried and updated
#default is True if Lupy is installed
useIndex = False #no indexing
#indexableFormats is a list content formts that should be indexed when content is saved.
#default is ['http://rx4rdf.sf.net/ns/wiki#item-format-text',
# 'http://rx4rdf.sf.net/ns/wiki#item-format-xml',
# 'http://rx4rdf.sf.net/ns/wiki#item-format-zml']
indexableFormats = rhizome.defaultIndexableFormats + ['urn:example:mycontentformat']
#the following settings can be used to customize Rhizome
#below are a few helper function to make it easy to extend or override the rhizome default template
#they must be called after the rhizome config has been included
#The rhizome config creates a temporary dictionary of triples that is used to create the template
#These functions allow you to add or replace the items in dictionary. The keys of the dictionary
#are either page names or one of the following:
#@userClasses, @sitevars, @labels, @auth, @dispositions, @doctypes, @model, @keywords, @themes
#(This list may be incomplete and is subject to change, see rhizome-config.py.)
#__addItem__ adds a page to STORAGE_TEMPLATE. See rhizome.addItem() for a list of all the keyword parameters
#if the page's name matches one rhizome's default page it will replace it.
__addItem__('Todo',loc='path:todo.txt', format='zml', doctype='todo')
#__addTriples__(contents='', replace=None) adds arbitrary triples to STORAGE_TEMPLATE given a string in NTriples format
#If the replace keyword is given it with replace the triples associate with the name
__addTriples__(
'''
_:itemdispositionhandlertemplate <http://rx4rdf.sf.net/ns/wiki#name> "item-disposition-handler-template" .
_:itemdispositionhandlertemplate <http://rx4rdf.sf.net/ns/wiki#revisions> _:itemdispositionhandlertemplate1List .
_:itemdispositionhandlertemplate <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://rx4rdf.sf.net/ns/archive#NamedContent> .
''')
#__addRxML__(contents='', replace=None) adds arbitrary triples to STORAGE_TEMPLATE given a string in RxML format
#If the replace keyword is given it with replace the triples associate with the name
__addRxML__(replace = '@sitevars', contents = '''
base:site-template:
wiki:header-image: `Rx4RDFlogo.gif
wiki:header-text: ` Welcome to Rx4Rdf!
''')
#################################################
## advanced configuration variables
## these variables are only necessary when developing a new Raccoon application
#################################################
#cmd_usage is string used to display the command-line usage help
cmd_usage = '''--import [dir] [--recurse] [--format format] [--disposition disposition]
--export dir [--static]'''
#configHook is a function called after this config file has been loaded
#is signature is def configHook(configDict) where configDict is the dictionary of the variables contained in the config file
#default is None
configHook = rhizome.configHook
#actions is a dictionary that is the heart of an application running on Raccoon
# the key is the name of the trigger and the value is list of Actions that are invoked in that order
# Raccoon currently uses these triggers:
# 'http-request' is invoked by RequestProcessor.handleRequest (for http requests) and by the 'site:' URL resolver
# 'load-model' is invoked after a model is loaded
# 'run-cmds' is invoked on start-up to handle command line arguements
# 'before-add' and 'before-remove' is invoked every time a statement is added or removed
# 'before-new' is invoked when a new resource is added
# 'before-prepare' is invoked at the end of a transaction but trigger still has a chance to modify it
# 'before-commit' is invoked when transaction frozen and about to be committed, one last chance to abort it
# 'after-commit' is invoked after a transaction is completed successfully
# triggerName + '-error' is invoked when an exception is raised while processing a trigger
# see Action class for more info
actions = { 'http-request' : [Action(['.//myNs:contents/myNs:ContentTransform/myNs:transformed-by/*',],
__server__.processContents, matchFirst = False, forEachNode = True)],
'run-cmds' : [ Action(["$import", '$i'], lambda result, kw, contextNode, retVal, rhizome=rhizome:
rhizome.doImport(result[0], **kw)),
Action(['$export', '$e'], lambda result, kw, contextNode, retVal, rhizome=rhizome:
rhizome.doExport(result[0], **kw)),
],
'load-model' : [ FunctorAction(rhizome.initIndex) ],
}
#DEFAULT_TRIGGER is used by Requestor objects and the "site:" URL resolver as the trigger to use to invoke a request
#default is 'http-request'
DEFAULT_TRIGGER='http-request'
#globalRequestVars is a list of request metadata variables that should be preserved
#when invoking callActions() (e.g. to invoke templates or an error handler)
#default is [] (but Raccoon will always adds the following:
# '_name', '_noErrorHandling', '__store', '_APP_BASE', and '__readOnly')
globalRequestVars = [ '__account', '_static', '_disposition' ]
#contentProcessors is a list used by RequestProcessor.processContents to specify
#content processors in addition to the built-in ones (RxSLT, RxUpdate, Python, base64, etc.)
contentProcessors = [
ZMLContentProcessor()
]
#authorizeContentProcessors is a dictionary used by RequestProcessor.processContents to authorize
#that the content can be sent to the content processor.
#The key is the URI of the content format or 'default'. If the format doesn't exist in the dictionary,
than 'default' is checked.
#The value is a function with this signature:
#def authorizeContent(contentProcessor, contents, formatType, kw, dynamicFormat)
#where is dynamicFormat is a boolean indicated whether format was returned by the previous content processor.
#default is {'http://rx4rdf.sf.net/ns/wiki#item-format-python': self.authorizeByDigest}
authorizeContentProcessors = {} #no authorization
#extFunctions is a dictionary of RxPath extension functions to be made available RxPath, RxSLT, RxUpdate, etc.
extFunctions = {
('http://example.com/extfuncs', 'reverse'): lambda context, nodeset: nodeset.reverse() or nodeset
}
#dictionary of XPath functions or Xslt Extension elements that may not be cacheable
#if the value evaluates to False than function is treated as not cachable
#otherwise the value should be a function with the signature:
#def getKey(node, context, notCacheableDict) where
#node is either the Ft.Xml.XPath.ParsedExpr.FunctionCall
#or the Ft.Xml.Xslt.XsltElement node representing the function or extension
#element, respectively, context is the XPath context,
#and notCacheableDict is this dictionary.
#getKey should either return a key (any hashable object)
#or raise MRUCache.NotCacheable.
#Note that the context may not have all its fields set. If the getKey function relies on one,
#it should check that it's not None and raise MRUCache.NotCacheable if it is.
#default is {} (but the system will always add its own list)
NOT_CACHEABLE_FUNCTIONS= {('http://example.com/extfuncs', 'delete-everything') : 0 }
#getPrincipleFunc is function that is called to retrieve the
#application-specific Principal (in the security sense) object
#for the current request context.
#It takes one argument that is the dictionary of metadata for the current request
#default: lambda kw: ''
getPrincipleFunc = lambda kw: kw.get('__account','')
#authorizeMetadata is a function that is called whenever one of Raccoon's XXX-metadata
#XPath extension functions is called.
#Its signature looks like:
#def authorizeMetadata(operation, namespace, name, value, kw)
#where operation is one of 'get', 'has', 'assign', 'remove'
#return True is the operation authorized or False if it isn't
#default is lambda *args: True
authorizeMetadata=rhizome.authorizeMetadata
#validateExternalRequest is a function that is called
#when receiving an external request (e.g. an http request)
#It is called before invoking runActions(). Use it to make sure the request
#doesn't contain metadata that could dangerously confuse request processing.
#Its signature looks like:
#def validateExternalRequest(kw)
#where kw is the request metadata dictionary (which can be modified if necessary).
#It should raise raccoon.NotAuthorized if the request should not be processed.
#default is lambda *args: True
validateExternalRequest=rhizome.validateExternalRequest
#authorizeXPathFuncs is a function that is invoked on start up to
#allow the application to modify a copy of the extFunctions dictionary.
#(i.e. to update it with functions that do an authorization before invoking
#the XPath function.) This modified dictionary is used when authorization is
#needed (e.g. when processing XSLT, RxSLT, and RxUpdate stylesheets).
#Its signature looks like:
#def authorizeXPathFuncs(extFunctions, notCacheableDict)
#where extFunctions is a copy of the dictionary of XPath functions
#and notCacheableDict is a copy of NOT_CACHEABLE_FUNCTIONS
#default is lambda *args: None
authorizeXPathFuncs=rhizome.authorizeXPathFuncs #rhizome-config.py sets this
#authorizedExtFunctions is dictionary used by rhizome.authorizeXPathFunc
#where the key is the name of the XPath function and the value is
#(authFunc, 0 or cachFunc) with the following signatures:
# def authFunc(funcName, context, args)
# returns (list of access token URIs, args) or raises NotAuthorized
# def cachFunc(funcName, func)
# returns either 0 (never cache), -1 (OK to cache), or a caching func
authorizedExtFunctions = {('http://example.com/extfuncs','delete-everything') :
(lambda name, context, args:
(['http://example.com/delete-everything-token'], args),
0) }
#shredders is a list of ContentProcessors that are invoked during the shredding process
#default: see rhizome-config.py
shredders = [ rx.ContentProcessors.RDFShredder() ]

