#Sample Raccoon configuration file #The config file is just a python script, but knowledge of python is not needed for simple configurations ################################################# ## basic configuration variables ################################################# #PATH is a platform-specific string listing the directories that will be searched when resolving 'path:' URLs #default: -p argument if set, otherwise the RACCOONPATH environment variable if set, otherwise the current directory PATH = '.;..' #appBase specifies base URL that this application runs on #default: '/' appBase = "/cgi-bin/rhizome" #appName is a short name for this application, must be unique within the current Raccoon process #default: if only one application is running, "root", if multiple are, #BASE_MODEL_URI with non-alphanumeric characters replaced with "_" appName = "root" #the __include__ function includes another Raccoon configuration file __include__('../rhizome/rhizome-config.py') #BASE_MODEL_URI is the base URI reference to be used when creating RDF resources #this is made available as the $BASE_MODEL_URI XPath variable #default: 'http://' + socket.getfqdn() + '/' BASE_MODEL_URI='http://example.com/' #domStoreFactory is the class or factory function the Raccoon will call to instantiate application's DOM store #It is passed as keyword arguments the dictionary of the variables contained in the config file #note that this is a callable object which may need to be imported into the config file #default is DomStore.RxPathDomStore from rx import DomStore domStoreFactory = DomStore.XMLDomStore #the application uses an XML document instead of RDF #the following config variables are passed to domStoreFactory as keyword arguments: #STORAGE_PATH the location of the RDF model. Usually a file path but the appropriate value depends on 'modelFactory' #default is '' STORAGE_PATH = 'mywebsite.nt' #transactionLog is the path of the transactionLog. The transactionLog records in NTriples format a log #of the statements added and removed from the model along with comments on when and by whom. #Note: the default file store uses this format so there is not much reason to use this if you are using the default #default is '' (no transactionLog) transactionLog=/logs/auditTrail.nt #STORAGE_TEMPLATE is a string containing NTriples that is used when #the file specified by STORAGE_PATH is not found STORAGE_TEMPLATE=''' _:itemdispositionhandlertemplate "item-disposition-handler-template" . _:itemdispositionhandlertemplate _:itemdispositionhandlertemplate1List . _:itemdispositionhandlertemplate . ''' ##APPLICATION_MODEL is a string containing NTriples that are added to the RDF model #but are read-only and not saved to disc. Use for structural components such as the schema. APPLICATION_MODEL=''' "ZML" .''' #modelFactory is the class or factory function used by RxPathDomStore to load or create a new RDF document or database #note that this is a callable object which may need to be imported into the config file #default is RxPath.IncrementalNTriplesFileModel modelFactory=RxPath.RedlandHashBdbModel #VERSION_STORAGE_PATH the location of a separate RDF model for storing the history of changes to the database. #Usually a file path but the appropriate value depends on 'versionModelFactory' #default is '' (history not stored separately) VERSION_STORAGE_PATH = 'mywebsite.history.nt' #versionModelFactory is the class or factory function used by RxPathDomStore to load or create the version history RDF database #note that this is a callable object which may need to be imported into the config file #default is whatever 'modelFactory' is set to versionModelFactory=RxPath.RedlandHashBdbModel #the following are security specific configuration settings: #SECURE_FILE_ACCESS limits 'file:' URLs access only the directories reachable through the PATH #default is True SECURE_FILE_ACCESS = True #DEFAULT_URI_SCHEMES specifies which URL schemes can be resolved. #default is ['file','data'] (no URL schemes that allow network access) DEFAULT_URI_SCHEMES = ['http', 'https', 'file', 'ftp', 'data', 'gopher'] #all supported schemes #uriResolveWhitelist is a list of regular expressions. URL resolution will only be allowed if #the URL matches one of its regular expressions. #default: uriResolveWhitelist = [] (if the list is empty, resolve all URLs) uriResolveWhitelist = ['(file:|data:|site:|path:).*', 'http://localhost.*'] #uriResolveBlacklist is a list of regex strings where if any match the URL, resolution is denied. #default: uriResolveBlacklist = [] uriResolveBlacklist = ['http://(.*@)?localhost(?!:8000)', 'http://(.*@)?10\.*.'] #blacklist access to the local subnet #authorizationDigests is a dictionary used by RequestProcessor.authorizeByDigest #to create a whitelist of content that processed. By default it is used for the Python content processor #(see authorizeContentProcessors setting below). #Because its a config setting, it adds a level of authorization that only local administrators can set. #The dictionary contains items with a base64-encoded SHA1 digest as the keyand a non-zero value. #The digest can be calculated using the rx.utils.shaDigest function #and is also reported in the NotAuthorized exception message raised by the Python content processor #and is the value of a:sha1-digest property set by Rhizome ("Edit Metadata" will display it). #default is {} (thus no Python code can be executed) authorizationDigests = { 'rosyIeJ7Kk+FZRkYPSjcteQi59w=' : 1} #for myPythonPage.py #disabledContentProcessors is a list of content processors to disable. #To enable all content processors set this to an empty list. #default is [] (all contentProcessors enabled) disabledContentProcessors = ['http://rx4rdf.sf.net/ns/wiki#item-format-python'] #Other config variables #nsMap is dictionary of namespace prefixes that may appear in RxPath expressions #default is {} but Raccoon will always add 'rdf', 'rdfs', 'owl', 'bnode', 'wf' (for Raccoon XPath Extension functions), #'xf' (for 4Suite XPath Extension functions), 'session', 'request-header', 'response-header', #'request-cookie', 'response-cookie', 'previous' (for callActions() ) and 'error' nsMap = { 'dc' : 'http://purl.org/dc/elements/1.1/', 'myNs' : 'http://www.example.com/#' } #XPATH_CACHE_SIZE sets the maximum number of items to be stored in the XPath processing cache. Set to 0 to disable. #default is 1000 XPATH_CACHE_SIZE=1000 #ACTION_CACHE_SIZE sets the maximum number of items to be stored in the Action cache. Set to 0 to disable. #default is 1000 ACTION_CACHE_SIZE=1000 #XPATH_PARSER_CACHE_SIZE sets the maximum number of items to be stored in the XPath parser cache. Set to 0 to disable. #default is 200 XPATH_PARSER_CACHE_SIZE=200 #STYLESHEET_CACHE_SIZE sets the maximum number of items to be stored in the stylesheet parser cache. Set to 0 to disable. #default is 200 STYLESHEET_CACHE_SIZE=200 #FILE_CACHE_SIZE sets the maximum size (in bytes) of the file cache #(used by the file:// and path: URL resolvers). Set to 0 to disable. #default is 0 (disabled) FILE_CACHE_SIZE=10000000 #~10mb #MAX_CACHEABLE_FILE_SIZE limits caching files to only those less than the specified size (in bytes). #Use to prevent the cache from being filled up with just a few large files. #default is 0 (files of any size may be cached) MAX_CACHEABLE_FILE_SIZE=10192 #only cache files under 10K in size #if LIVE_ENVIRONMENT is True, the system will not cache XPath expressions #or Actions that reference functions (such as document()) #that retrieve URL or interact with the file system. #Also stylesheets with xsl:import, xsl:include, xinclude or external entity references will not be cached. #default is True LIVE_ENVIRONMENT = True #defaultPageName specifies the name of the page to be invoke if the request URL doesn't include a path #e.g. http://www.example.org/ is equivalent to http://www.example.org/index #default is 'index' #defaultPageName=home.html #if DEFAULT_MIME_TYPE is set then this MIME type will be set on any request that doesn't set its own mimetype #and Raccoon can't guess its MIME type #default is '' (not set) DEFAULT_MIME_TYPE='text/plain' #MODEL_RESOURCE_URI is the resource that represents the model this instance of the application is running #it can be used to assertions about the model itself, e.g its location or which application created it #default is the value of BASE_MODEL_URI MODEL_RESOURCE_URI = 'http://example.org/rhizomeapp/2/20/2004' #defaultExpiresIn specifies what to do about Expires HTTP response header if it #hasn't already set by the application. If it's value is 0 or None the header #will not be sent, otherwise the value is the number of seconds in the future #that responses should expire. To indicate that they already expired set it to -1; #to indicate that they never expires set it to 31536000 (1 year). #default is 3600 (1 hour) defaultExpiresIn = 0 #disable setting the Expires header by default #if useEtags is True, If-None-Match request headers are honors and an etag based #on a MD5 hash of the response content will be set with every response #default is True useEtags = False #disable #if useFileLock is True Raccoon will use interprocess file lock when committing #a transaction. Alternately useFileLock can be a reference to a class or factory #function that conforms to the glock.LockFile interface. #default is False useFileLock=True #enable ################################################# ## available read-only variables ################################################# __server__ #RequestProcessor reference __argv__ #list of arguments intended for this configuration file __configpath__ #stack of configuration file paths, __configpath__[-1] is always the current file #__configpath__[-2], if it exists, would be the config file currently including this one, and so on. ############################################################################### ## rhizome-specific configuration variables ## this are only used when including rhizome-config.py into your config file ############################################################################### #MAX_MODEL_LITERAL when saving a page, any content whose length is greater then this will be saved to disk. #default is -1 (disable save to disk) MAX_MODEL_LITERAL = 0 #save any page to disk #SAVE_DIR is the directory content is saved to. It is a relative or #absolute path and must be a distinct sub-directory of a directory on the PATH #default is 'content/.rzvs' SAVE_DIR='content/.rzvs' #ALTSAVE_DIR allows a copy of the latest revision of an page will be saved there, #allowing external changes to page to be made without messing up the revision history. #Changes made externally will appear in Rhizome (almost) as if a change was #saved with the minor edit flag. #It is must be a relative or absolute path and must be on the PATH #(If PATH isn't specified ALTSAVE_DIR is added to it.) #to disable ALTSAVE logic set ALTSAVE_DIR = '' #default is 'content' ALTSAVE_DIR='content' #THEME_DIR specifies the directory that theme resources (such theme.css) #are located. It only takes effect when PATH is not specified; if so, it is #added to the PATH after the ALTSAVE_DIR. #It can be either a relative or absolute path; if it is a relative path #the directory where rhizome-config.py is located is prepended. #default is 'themes/default' THEME_DIR='themes/mytheme' #these following password related settings are referenced by the rhizome config #and so must be set before you include it #SECURE_HASH_SEED is a string that is combined with plaintext when generating a secure hash of passwords #You really should set your own private value. If it is compromised, it will be much #easier to mount a dictionary attack on the password hashes. #If you change this all previously generated password hashes will no longer work. #default is 'YOU REALLY SHOULD CHANGE THIS!' (will cause a warning on start up) SECURE_HASH_SEED = 'my secret seed value' #ADMIN_PASSWORD is the password for the default 'admin' super-user #default password is 'admin' (will cause a warning on start up) ADMIN_PASSWORD = 'topsecret' #ADMIN_PASSWORD_HASH can be set instead of ADMIN_PASSWORD if you don't want to store the password in the config file #you can calculate it using sha.sha( 'yourpassword' + secureHashSeed).hexdigest() ADMIN_PASSWORD_HASH = '12201fe5e202883bd45fc97e87366ea05183e0e4' #passwordHashProperty is the name of RDF property that represents the secure hash of a password #There should be a unique property for each extant seed. #default is BASE_MODEL_URI+'password-hash' passwordHashProperty = http://example.org#password-hash5 #secureHashMap is a dictionary of mapping secure hash properties to seeds. #It is used by wf:secure-hash. #You can set this if you need to support multiple seeds (e.g. for backwards compatibility) #Otherwise, use the default. #default is { passwordHashProperty : SECURE_HASH_SEED } secureHashMap = { '#oldHashProperty' : 'oldSeed', passwordHashProperty : SECURE_HASH_SEED } #other Rhizome specific security settings #Authors that don't own the "base:create-unsanitary-content-token" access token #will have their content filtered to prevent them from writing dangerous HTML constructs #(basically from executing javascript or embedding objects). You can customize which elements #and attributes may not appear using this settings: #blacklistedElements is a list of elements banned from appearing #default: see utils.BlackListHTMLSanitizer blacklistedElements = rx.rhizome.SanitizeHTML.blacklistedElements + ['style'] #blacklistedAttributes is a dictionary whose keys and values are regular expressions #that are matched against the attribute's name and value, respectively. #default: see utils.BlackListHTMLSanitizer blacklistedAttributes = { 'style': '.*', } #remove any inline style attribute #blacklistedContent is a dictionary whose keys and values are regular expressions. #The key is matched against the element's name and the value is searched through #the text and comments contained by the element. #If they match, the entire comment or text will be removed #default: see utils.BlackListHTMLSanitizer blacklistedContent = { 'style': 'javascript:|@import' } #if the following two properies are set, before saving content Rhizome will invoke the Akismet #service to check if the content is spam (unless the user has the create-nospam-token access token) #See http://akismet.com/ for more info. #akismetKey is your free WordPress API key #default: '' akismetKey = '79deadbeef31' #akismetUrl is the url to your blog or wiki #default: '' akismetUrl = 'http://mywiki.example.net' #the following settings can be turned on to give your site a wiki like look and feel #if undefinedPageIndicator is True then links to internal pages that haven't been defined will be displayed with ? #(or however path:wiki2html.xsl transforms such links) #default is True undefinedPageIndicator=True #if externalLinkIndicator is True then links to external URLs will have an icon next to them #(or however path:wiki2html.xsl transforms such links) #default is True externalLinkIndicator=True #if interWikiLinkIndicator is True then InterWiki links will have an icon next to them #(or however path:wiki2html.xsl transforms such links) #default is True interWikiLinkIndicator=True #interWikiMapURL is the URL to the file that defines the supported InterWiki link types #to disable InterWiki linking set this to '' #default is 'site:///intermap.txt' interWikiMapURL='' #namespaceMapURL is the URL to the file that lists the namespace prefixes that #will be used when serializing RDF (e.g. when displaying RxML) #default is 'site://namespaces.txt' namespaceMapURL='site://namespaces.txt' #ZMLDefaultVersion indicates the version of the ZML syntax that should be assumed when #the ZML header (e.g. #?zml0.7) is missing from ZML text. #default: the value of zml.defaultZMLVersion (currently 0.7) ZMLDefaultVersion = 0.8 #set default to latest experimental version #content indexing #INDEX_DIR is the directory the content index will be stored in #If the directory doesn't exist at startup, it will be created and all applicable #content will be indexed on startup. #default is 'contentindex' INDEX_DIR = 'contentindex' #useIndex indicates whether content indexing should be queried and updated #default is True if Lupy is installed useIndex = False #no indexing #indexableFormats is a list content formts that should be indexed when content is saved. #default is ['http://rx4rdf.sf.net/ns/wiki#item-format-text', # 'http://rx4rdf.sf.net/ns/wiki#item-format-xml', # 'http://rx4rdf.sf.net/ns/wiki#item-format-zml'] indexableFormats = rhizome.defaultIndexableFormats + ['urn:example:mycontentformat'] #the following settings can be used to customize Rhizome #below are a few helper function to make it easy to extend or override the rhizome default template #they must be called after the rhizome config has been included #The rhizome config creates a temporary dictionary of triples that is used to create the template #These functions allow you to add or replace the items in dictionary. The keys of the dictionary #are either page names or one of the following: #@userClasses, @sitevars, @labels, @auth, @dispositions, @doctypes, @model, @keywords, @themes #(This list may be incomplete and is subject to change, see rhizome-config.py.) #__addItem__ adds a page to STORAGE_TEMPLATE. See rhizome.addItem() for a list of all the keyword parameters #if the page's name matches one rhizome's default page it will replace it. __addItem__('Todo',loc='path:todo.txt', format='zml', doctype='todo') #__addTriples__(contents='', replace=None) adds arbitrary triples to STORAGE_TEMPLATE given a string in NTriples format #If the replace keyword is given it with replace the triples associate with the name __addTriples__( ''' _:itemdispositionhandlertemplate "item-disposition-handler-template" . _:itemdispositionhandlertemplate _:itemdispositionhandlertemplate1List . _:itemdispositionhandlertemplate . ''') #__addRxML__(contents='', replace=None) adds arbitrary triples to STORAGE_TEMPLATE given a string in RxML format #If the replace keyword is given it with replace the triples associate with the name __addRxML__(replace = '@sitevars', contents = ''' base:site-template: wiki:header-image: `Rx4RDFlogo.gif wiki:header-text: ` Welcome to Rx4Rdf! ''') ################################################# ## advanced configuration variables ## these variables are only necessary when developing a new Raccoon application ################################################# #cmd_usage is string used to display the command-line usage help cmd_usage = '''--import [dir] [--recurse] [--format format] [--disposition disposition] --export dir [--static]''' #configHook is a function called after this config file has been loaded #is signature is def configHook(configDict) where configDict is the dictionary of the variables contained in the config file #default is None configHook = rhizome.configHook #actions is a dictionary that is the heart of an application running on Raccoon # the key is the name of the trigger and the value is list of Actions that are invoked in that order # Raccoon currently uses these triggers: # 'http-request' is invoked by RequestProcessor.handleRequest (for http requests) and by the 'site:' URL resolver # 'load-model' is invoked after a model is loaded # 'run-cmds' is invoked on start-up to handle command line arguements # 'before-add' and 'before-remove' is invoked every time a statement is added or removed # 'before-new' is invoked when a new resource is added # 'before-prepare' is invoked at the end of a transaction but trigger still has a chance to modify it # 'before-commit' is invoked when transaction frozen and about to be committed, one last chance to abort it # 'after-commit' is invoked after a transaction is completed successfully # triggerName + '-error' is invoked when an exception is raised while processing a trigger # see Action class for more info actions = { 'http-request' : [Action(['.//myNs:contents/myNs:ContentTransform/myNs:transformed-by/*',], __server__.processContents, matchFirst = False, forEachNode = True)], 'run-cmds' : [ Action(["$import", '$i'], lambda result, kw, contextNode, retVal, rhizome=rhizome: rhizome.doImport(result[0], **kw)), Action(['$export', '$e'], lambda result, kw, contextNode, retVal, rhizome=rhizome: rhizome.doExport(result[0], **kw)), ], 'load-model' : [ FunctorAction(rhizome.initIndex) ], } #DEFAULT_TRIGGER is used by Requestor objects and the "site:" URL resolver as the trigger to use to invoke a request #default is 'http-request' DEFAULT_TRIGGER='http-request' #globalRequestVars is a list of request metadata variables that should be preserved #when invoking callActions() (e.g. to invoke templates or an error handler) #default is [] (but Raccoon will always adds the following: # '_name', '_noErrorHandling', '__store', '_APP_BASE', and '__readOnly') globalRequestVars = [ '__account', '_static', '_disposition' ] #contentProcessors is a list used by RequestProcessor.processContents to specify #content processors in addition to the built-in ones (RxSLT, RxUpdate, Python, base64, etc.) contentProcessors = [ ZMLContentProcessor() ] #authorizeContentProcessors is a dictionary used by RequestProcessor.processContents to authorize #that the content can be sent to the content processor. #The key is the URI of the content format or 'default'. If the format doesn't exist in the dictionary, than 'default' is checked. #The value is a function with this signature: #def authorizeContent(contentProcessor, contents, formatType, kw, dynamicFormat) #where is dynamicFormat is a boolean indicated whether format was returned by the previous content processor. #default is {'http://rx4rdf.sf.net/ns/wiki#item-format-python': self.authorizeByDigest} authorizeContentProcessors = {} #no authorization #extFunctions is a dictionary of RxPath extension functions to be made available RxPath, RxSLT, RxUpdate, etc. extFunctions = { ('http://example.com/extfuncs', 'reverse'): lambda context, nodeset: nodeset.reverse() or nodeset } #dictionary of XPath functions or Xslt Extension elements that may not be cacheable #if the value evaluates to False than function is treated as not cachable #otherwise the value should be a function with the signature: #def getKey(node, context, notCacheableDict) where #node is either the Ft.Xml.XPath.ParsedExpr.FunctionCall #or the Ft.Xml.Xslt.XsltElement node representing the function or extension #element, respectively, context is the XPath context, #and notCacheableDict is this dictionary. #getKey should either return a key (any hashable object) #or raise MRUCache.NotCacheable. #Note that the context may not have all its fields set. If the getKey function relies on one, #it should check that it's not None and raise MRUCache.NotCacheable if it is. #default is {} (but the system will always add its own list) NOT_CACHEABLE_FUNCTIONS= {('http://example.com/extfuncs', 'delete-everything') : 0 } #getPrincipleFunc is function that is called to retrieve the #application-specific Principal (in the security sense) object #for the current request context. #It takes one argument that is the dictionary of metadata for the current request #default: lambda kw: '' getPrincipleFunc = lambda kw: kw.get('__account','') #authorizeMetadata is a function that is called whenever one of Raccoon's XXX-metadata #XPath extension functions is called. #Its signature looks like: #def authorizeMetadata(operation, namespace, name, value, kw) #where operation is one of 'get', 'has', 'assign', 'remove' #return True is the operation authorized or False if it isn't #default is lambda *args: True authorizeMetadata=rhizome.authorizeMetadata #validateExternalRequest is a function that is called #when receiving an external request (e.g. an http request) #It is called before invoking runActions(). Use it to make sure the request #doesn't contain metadata that could dangerously confuse request processing. #Its signature looks like: #def validateExternalRequest(kw) #where kw is the request metadata dictionary (which can be modified if necessary). #It should raise raccoon.NotAuthorized if the request should not be processed. #default is lambda *args: True validateExternalRequest=rhizome.validateExternalRequest #authorizeXPathFuncs is a function that is invoked on start up to #allow the application to modify a copy of the extFunctions dictionary. #(i.e. to update it with functions that do an authorization before invoking #the XPath function.) This modified dictionary is used when authorization is #needed (e.g. when processing XSLT, RxSLT, and RxUpdate stylesheets). #Its signature looks like: #def authorizeXPathFuncs(extFunctions, notCacheableDict) #where extFunctions is a copy of the dictionary of XPath functions #and notCacheableDict is a copy of NOT_CACHEABLE_FUNCTIONS #default is lambda *args: None authorizeXPathFuncs=rhizome.authorizeXPathFuncs #rhizome-config.py sets this #authorizedExtFunctions is dictionary used by rhizome.authorizeXPathFunc #where the key is the name of the XPath function and the value is #(authFunc, 0 or cachFunc) with the following signatures: # def authFunc(funcName, context, args) # returns (list of access token URIs, args) or raises NotAuthorized # def cachFunc(funcName, func) # returns either 0 (never cache), -1 (OK to cache), or a caching func authorizedExtFunctions = {('http://example.com/extfuncs','delete-everything') : (lambda name, context, args: (['http://example.com/delete-everything-token'], args), 0) } #shredders is a list of ContentProcessors that are invoked during the shredding process #default: see rhizome-config.py shredders = [ rx.ContentProcessors.RDFShredder() ]