*
**/
$sioc_ver = '1.25';
/*
* Change history:
* v1.25 - Fixed bug with wrong mbox_sha1sum(s) for comments
* v1.24 - Work around problems with Zotero COinS metadata plugin.
* v1.23 - Bugfix for work with WordPress 2.1
* v1.22 - Added export of tags created by the UltimateTagWarrior plugin
* v1.21 - Content negotiation added for 'application/rdf+xml'
* v1.20 - Renamed clean() to sioc_clean() to avoid name conflicts
* v1.19 - Improved display of author info (SIOC and FOAF) for comments and posts
* v1.18 - Using FOAF for sioc:USer - according to new version of the ontology
* - Added an option to display/hide email address (default:hide)
* v1.17 - Fixed bug that caused WP "more" tag to be ignored
* v1.16 - Fixed bug with unescaped '&' in auto-discovery link and in has_creator for comments
* v1.15 - assigned URIs to sioc:Forum and sioc:Usergroup
* - fixed property names for sioc:email and sioc:email_sha1
* v1.14 - Fixed bug with
tags appearing in sioc:content
* - Removed extra '<' from begining of CDATA sections
* v1.13 - Applied WP filters to sioc:links_to, rdfs:seeAlso (to remove invalid XML entities)
* - Applied WP filters to sioc:content (to remove invalid XML entities)
* v1.12 - Fixed bug in paging of posts on main SIOC site page
* - Fixed bug with unescaped " in sioc:links_to
* - Removed sioc:made property from the data describing a group of authors (does not exist in ontology)
* v1.11 - Wrapped code into a WP plugin
* v1.10 - Undo fix of '&' in seeAlso - '&' should be escaped
* v1.09 - Added output of plugin version
* - Added extracting of sioc:links_to from the blog post links
* - Added extracting of rdfs:seeAlso from the blog post links (type="application/rdf+xml")
* - Fixed bug with escaped '&' appearing in rdfs:seeAlso
* v1.08 - Fixed Content-Type (to application/rdf+xml)
* v1.07 - Fixed format of seeAlso info
* - Fixed problems with & in sioc:content (added htmlspecialchars() filter)
* - Added htmlspecialchars() filter to sioc:has_creator (was problem with » tag)
* v1.06 - Improved and refactored
* v1.05 - Added paging of list of posts
* v1.04 - Added display of comments
* - Changed sioc:created to created_at
* v1.03 - Fixed display of URI for sioc:has_creator
* - Switched to have_posts() loop for going through the posts [WP 1.5]
* - Added display of posts as a part of forum description ( uses sioc:container_of )
* - Added display of sioc:link to Forums and Posts
* - Added display of post categories (topics)
* v1.02 - Stripped HTML tags from sioc:content
* v1.01 - Addressing of resources (users, posts, ...) changed from rdf:nodeID to rdf:about
* - Changed sioc:description to contain authordata->user_description
* - Fixed display of sioc:content for Posts. Note: a 'rich' HTML content is currenly being displayed
* v1.00 - Inital version
*/
# TODO - Export forum's categories in SIOC or a separate SKOS file
# Modelling - site has a single forum (in wordpress case)
# - a forum has a hierarchy of categories
/*
* Bugs:
* - might be a problem in generating comment URIs with trailing '/' + '#'
* ? is » problem with the script or Redland?
*/
/*
* Assumptions:
* - blog post HTML - when extracting hyperlinks from HTML we assume that tag attributes are enclosed within double quotes ("...")
*
**/
/* Global variables */
$blog = 1; // enter your blog's ID
# $doing_rss = 1;
# SIOC generation parameters
$sioc_all_users = 1;
$sioc_show_email = 0;
# require_once('wp-blog-header.php');
# sioc_main();
# exit;
/**
* Main cycle
*
* Processes the parameters and calls appropriate functions
**/
function sioc_main() {
global $more;
$more = 1;
sioc_sanitize_vars();
remove_filter('the_content', 'coinsify_the_content');
# Create RDF header
echo sioc_rdf_header();
$sioc_type = $GLOBALS['sioc_type'];
echo '\n";
/* Main control switch */
switch($sioc_type) {
case 'site':
sioc_site_export();
break;
case 'post':
echo sioc_post_rdf($GLOBALS['sioc_id']);
break;
case 'comment':
echo sioc_comment_rdf($GLOBALS['sioc_id']);
break;
case 'user':
echo sioc_user_rdf($GLOBALS['sioc_id']);
break;
default:
echo "\n";
}
/* That's all */
echo "\n" . '' . "\n"; #end of RDF
}
/**
* Export of main site metadata
*
* Prints site's main SIOC meta information, including user and post lists.
**/
function sioc_site_export() {
sioc_users_list();
echo sioc_site_rdf();
echo sioc_forum_rdf();
# TODO - Export forum's categories in SIOC or a separate SKOS file
# Modelling - site has a single forum (in wordpress case)
# - a forum has a hierarchy of categories
}
/**
* Clean content
*
* Convert string data to avoid invalid XML symbols
* @param string $t_str
* @return string
**/
function sioc_clean($t_str) {
return htmlentities($t_str);
# htmlentities($t_str, ENT_COMPAT, 'CP1252');
# return utf8_encode(html_entity_decode($t_str, ENT_QUOTES, 'UTF-8'));
# return utf8_encode($t_str);
}
/**
* Output FOAF Document info
*
* Returns a foaf:Document describing the current SIOC page.
* It contains a foaf:primaryTopic property that points to the primary SIOC object that this page is about.
for a SIOC page
*
* @param string $primaryTopic
* @param string $objectType
*
* @return string
**/
function foaf_document_header( $primaryTopic="", $objectType="" ) {
if ( $primaryTopic=="" )
return "";
if ( $objectType )
$objectType .= " ";
$rdf = '
SIOC '. $objectType . 'profile for "' . htmlspecialchars(get_bloginfo('name')) . '"
A SIOC profile describes the structure and contents of a weblog in a machine readable form. For more information please refer to http://sioc-project.org/.
' . "\n";
return $rdf;
}
/**
* Process and sanitize input variables
*
* Acquired script GET variables.
* Variables affected: sioc_type, sioc_id, sioc_off
* (to add: clean all WP variable that we do not use)
*
* @todo add cleaning of WP variables
**/
function sioc_sanitize_vars() {
$sioc_vars = array('sioc_type', 'sioc_id', 'sioc_off');
foreach ($sioc_vars as $sioc_var) {
if (isset($_GET[$sioc_var])) {
$GLOBALS[$sioc_var] = $_GET[$sioc_var];
} else {
$GLOBALS[$sioc_var] = '';
}
}
if (empty($GLOBALS['sioc_type'])) {
$GLOBALS['sioc_type'] = 'empty';
}
}
/**
* Output RDF header
*
* Sets HTTP Content-type: and outputs RDF header
*
* @return string
**/
function sioc_rdf_header() {
header('Content-type: application/rdf+xml', true);
#header('Content-type: text/plain', true);
$rdf_code = '' . "\n";
$rdf_code .= '' . "\n";
$rdf_code .= '' . "\n\n";
return $rdf_code;
}
/**
* Output SIOC users list
*
* Prints a list of blog users as a Usergroup of sioc:User objects
**/
function sioc_users_list () {
# Generate list of authors.
global $wpdb, $tableusers, $tableposts, $sioc_all_users ;
if ($sioc_all_users==1)
$authors = $wpdb->get_results('SELECT u.ID as ID FROM ' . $tableusers . ' AS u');
else
$authors = $wpdb->get_results('SELECT u.ID as ID, count(post_status) AS pc FROM ' . $tableusers . ' AS u LEFT JOIN ' . $tableposts . ' AS p ON u.ID=p.post_author GROUP BY u.ID HAVING pc>0 or u.ID>1');
echo "\n";
if ($authors) {
echo '';
echo "\n\t" . 'Authors at "' . htmlspecialchars(get_bloginfo('name')) . '"';
# Iterate through $authors
foreach ($authors as $author) {
echo "\n\t";
echo "\n" . sioc_user_rdf($author->ID, true);
echo "\n\t";
}
echo "\n" . '' . "\n";
}
}
/**
* Form and output a SIOC Query URL
*
* Forms and outputs a URL to query SIOC with given parameters
*
* @param string $query_str
* @return string
**/
function query_url($query_str) {
$url = 'http://' . $_SERVER[HTTP_HOST] . $_SERVER[PHP_SELF] ;
if ($query_str) {
$url .= '?' . $query_str;
}
return htmlentities($url);
}
/**
* Return SIOC seeAlso link
*
* Returns a rdfs:seeAlso link to a URL to query SIOC with given parameters
*
* @param string $query_str
* @param boolean $do_tag If true, return the link in format ''
* @return string
**/
function query_link($query_str, $do_tag=false) {
if ($do_tag) {
return '';
} else {
return 'rdfs:seeAlso="' . query_url($query_str) . '"';
}
}
/**
* Generate email / sha1 property
*
* Returns a sioc:email or sioc:email_sha1sum property generated from
* user's e-mail address. In case if $enc=true and SHA1
* hash cannot be calculated, return empty string.
*
* @param string $email
* @param bool $enc if true, return sha1 hash of email
* @return string
**/
function get_email_property($email, $enc = true) {
# Based on get_foaf_output_email_property by Morten Frederiksen
$sha1 = '';
# Try to calculate SHA1 hash of email URI.
if (function_exists('sha1'))
$sha1 = sha1('mailto:' . $email);
else if (function_exists('mhash'))
$sha1 = bin2hex(mhash(MHASH_SHA1, 'mailto:' . $email));
# If no hash, return foaf:mbox instead of foaf:mbox_sha1sum.
if ($enc == false)
return '';
elseif ('' != $sha1 )
return '' . $sha1 . '';
else
return '';
}
/**
* Generate SHA1 hash of email
*
* Return a SHA1 hash of an email address
*
* @param string $email
* @return string
**/
function get_email_sha1($email) {
# Based on get_foaf_output_email_property by Morten Frederiksen
$sha1 = '';
# Try to calculate SHA1 hash of email URI.
if (function_exists('sha1'))
$sha1 = sha1('mailto:' . $email);
else if (function_exists('mhash'))
$sha1 = bin2hex(mhash(MHASH_SHA1, 'mailto:' . $email));
if ('' != $sha1 )
return $sha1;
else
return '';
}
/**
* Output person's FOAF details
*
* Returns foaf:maker record containing foaf:Person with a name, mbox_sha1 and homepage information.
*
* @param string $name
* @param string $email
* @param string $homepage
* @return string
**/
function sioc_foaf_rdf( $name, $email, $homepage ) {
$sha1 = $email ? get_email_property($email,true) : '';
$name = $name ? ' foaf:name="'.$name.'"' : '';
$rdf = "\n\t" . '';
$rdf .= "\n\t\t" . '';
$rdf .= "\n\t\t\t" . $sha1;
if ($homepage and $homepage!='http://') {
$rdf .= "\n\t\t\t" . '';
}
$rdf .= "\n\t\t" . '';
$rdf .= "\n\t" . '';
return $rdf;
}
/**
* Output SIOC User details
*
* Return an instance of SIOC:User class listing properties of a user.
*
* @param int $author_ID User ID
* @param boolean $short Set true to return only rdf:about and rdfs:seeAlso links
* @return string
**/
function sioc_user_rdf ($author_ID = '', $short = false, $mode = 'sioc' ) {
global $authordata, $sioc_show_email;
$rdf = ''; $auth = ''; $desc = '';
if (!$author_ID)
return '';
$authordata = get_userdata($author_ID);
$user_uri = htmlspecialchars(get_author_link(0, $authordata->ID, $authordata->user_nicename));
$foaf_uri = $user_uri . '#foaf';
$user_link = query_link('sioc_type=user&sioc_id=' . $authordata->ID, true );
$pref_name = htmlspecialchars( the_author('',false) );
$email = $authordata->user_email;
$sha1 = $email ? get_email_property($authordata->user_email,true) : '';
$homepage = $authordata->user_url;
if ($short) {
if ( $mode == 'sioc' ) {
$rdf = "\t\t" . '';
$rdf .= "\n\t\t\t" . $user_link;
$rdf .= "\n\t\t" . '';
return $rdf;
} elseif ( $mode == 'foaf' ) {
$rdf = "\t\t" . '';
$rdf .= "\n\t\t\t" . $sha1;
if ($homepage and $homepage!='http://')
$rdf .= "\n\t\t\t" . '';
$rdf .= "\n\t\t\t" . $user_link;
$rdf .= "\n\t\t" . '';
return $rdf;
}
} else {
$nick = $authordata->user_nickname;
$login = $authordata->user_login;
if ( $authordata->user_firstname || $authordata->user_lastname )
$foaf_name = htmlspecialchars(trim( "$authordata->user_firstname $authordata->user_lastname" ));
$rdf .= "\n" . foaf_document_header( $user_uri, 'User' );
$rdf .= "\n" . '';
if ( $foaf_name )
$rdf .= "\n\t" . "$foaf_name";
if ( $authordata->user_firstname )
$rdf .= "\n\t" . '' . htmlspecialchars($authordata->user_firstname) . '';
if ( $authordata->user_lastname )
$rdf .= "\n\t" . '' . htmlspecialchars($authordata->user_lastname) . '';
if ( $sioc_show_email and $email )
$rdf .= "\n\t" . get_email_property($authordata->user_email,false);
$rdf .= "\n\t" . $sha1;
if ( $nick )
$rdf .= "\n\t" . "$nick";
if ($homepage and $homepage!='http://')
$rdf .= "\n\t" . '';
$rdf .= "\n\t" . '';
$rdf .= "\n" . '' . "\n";
$rdf .= "\n" . '';
if ( $authordata->user_login )
$rdf .= "\n\t" . "$authordata->user_login";
if ($pref_name)
$rdf .= "\n\t" . '' . $pref_name . '';
if ($authordata->dateYMDhour)
# FIXME - the creation date is ugly for the default admin user - where database containts default value
$rdf .= "\n\t" . '' . mysql2date('Y-m-d\TH:i:s', $authordata->dateYMDhour, 1, 1) . '';
if ($authordata->user_description)
$rdf .= "\n\t" . '' . htmlspecialchars($authordata->user_description) . '';
$rdf .= "\n" . '' . "\n";
}
return $rdf;
}
/**
* Output SIOC Forum details
*
* Returns a sioc:Forum consisting of a basic information about a forum (blog) and a list
* of posts that the blog contains. Forums are paged (WP built-in paging is used) and the
* page to be displayed is indicated by a @paged global variable.
*
* @todo issue: paging can result in a crawler missing some posts if new post created between subsequent accesses.
*
* @return string
**/
function sioc_forum_rdf () {
global $post;
$rdf = '';
$forum_uri = query_url( 'sioc_type=site#weblog' );
# TODO - decide if sioc:Forum or sioc:Blog (a subclass of Forum) is appropriate here -> refine use of these classes here
$rdf = "\n" . '';
$rdf .= "\n\t" . 'Main blog at ' . htmlspecialchars(get_bloginfo('name')) . '';
$rdf .= "\n\t" . '';
# List of posts
# XXX - (Scaling) - this list will become big (= might not scale) for a large number of posts
rewind_posts();
if (have_posts()) { while (have_posts()) : the_post();
$rdf .= "\n\t" . '';
$rdf .= "\n\t\t" . '';
$rdf .= "\n\t\t\t" . query_link('sioc_type=post&sioc_id=' . $post->ID, true);
$rdf .= "\n\t\t" . '';
$rdf .= "\n\t" . '';
endwhile; }
$next_page = sioc_next_page();
if (isset($next_page)) $rdf .= "\n\t" . $next_page;
$rdf .= "\n" . '' . "\n";
return $rdf;
}
/**
* Output SIOC Comment details
*
* Returns details of a single comment of a post.
*
* @global $wpdb
* @global $post
* @param int $comment_ID Comment ID
* @return string
**/
function sioc_comment_rdf ($comment_ID) {
global $wpdb, $post, $comment;
$rdf = '';
$comment = $wpdb->get_row("SELECT * FROM $wpdb->comments WHERE comment_ID = '$comment_ID' AND comment_approved = '1' ORDER BY comment_date");
$comment_uri = get_permalink( $comment->comment_post_ID ) . '#comment-' . $comment->comment_ID;
$rdf .= "\n" . foaf_document_header( $comment_uri, 'Comment' );
$rdf .= "\n" . '';
$rdf .= "\n\t" . '';
$rdf .= "\n\t" . '';
$rdf .= "\n\t" . '' . mysql2date('Y-m-d\TH:i:s\Z', $comment->comment_date_gmt) . '';
if ( $comment->user_id > 0 ) {
$rdf .= "\n\t" . '';
$rdf .= "\n" . sioc_user_rdf( $comment->user_id, true );
$rdf .= "\n\t" . '';
$rdf .= "\n\t" . '';
$rdf .= "\n" . sioc_user_rdf( $comment->user_id, true, 'foaf' );
$rdf .= "\n\t" . '';
} else {
$rdf .= sioc_foaf_rdf( apply_filters( 'the_title_rss', apply_filters( 'the_title', wp_specialchars(get_comment_author(),1))), get_comment_author_email(), get_comment_author_url() );
}
$rdf .= "\n\t" . '' . apply_filters( 'the_excerpt_rss', strip_tags( $comment->comment_content )) . '';
$rdf .= "\n\t" . 'comment_content)) . ']]>';
$rdf .= "\n" . '' . "\n";
return $rdf;
}
/**
* Output SIOC Site details
*
* Returns details of SIOC information about a community site
*
* @todo improve contents of sioc:description and sioc:name.
* @return string
**/
function sioc_site_rdf () {
$rdf = '';
$site_uri = htmlspecialchars( get_bloginfo('siteurl') . '/' );
$rdf .= "\n" . foaf_document_header( $site_uri, 'Site' );
$rdf .= "\n" . '';
# sioc:description sioc:host_of sioc:link sioc:name sioc:topic + Title, Tagline description
$rdf .= "\n\t" . '' . htmlspecialchars(get_bloginfo('name')) . '';
$rdf .= "\n\t" . 'Weblog: ' . htmlspecialchars(get_bloginfo('name')) . '';
$rdf .= "\n\t" . '';
$rdf .= "\n\t" . '';
# rdfs:seeAlso would be needed if the SIOC:Forum info was in a separate file
$rdf .= "\n\t" . '';
$rdf .= "\n" . '' . "\n";
return $rdf;
}
/**
* Create link to a next page of Posts
*
* Returns an rdfs:seeAlso link to a next page of Posts.
* Returns empty string if there are no more pages.
*
* @return string
**/
function sioc_next_page () {
// Note: adapted from template_functions_links.php :: next_posts()
global $paged, $result, $request, $posts_per_page, $wpdb, $max_num_pages;
if (!$max_page) {
if ( isset($max_num_pages) ) {
$max_page = $max_num_pages;
} else {
$numposts = $wpdb->get_var("SELECT COUNT(ID) FROM $wpdb->posts WHERE post_status = 'publish'");
$max_page = $max_num_pages = ceil($numposts / $posts_per_page);
}
}
if (!$paged)
$paged = 1;
$nextpage = intval($paged) + 1;
if ((! is_single()) && (empty($paged) || $nextpage <= $max_page)) {
return query_link('sioc_type=site&paged=' . $nextpage, true );
}
return '';
}
/**
* Output SIOC Post
*
* Returns properties of a post.
*
* @param int $post_id Post ID
* @todo same issue as in sioc_forum_rdf()
* @return string
**/
function sioc_post_rdf ($post_id) {
global $posts; global $post, $authordata;
if (empty($post_id)) {
return '';
}
$post = get_post($post_id);
setup_postdata($post);
# $authordata = &get_userdata($post->post_author);
# echo '' . "\n";
$rdf = '';
$post_uri = htmlspecialchars( get_permalink() );
$rdf .= "\n" . foaf_document_header( $post_uri, 'Post' );
/*** Note:
Fixed bug retrieving post properties.
TODO: Review what functions we use to retrieve data.
Earlier comment:
- OK: post_id (get_permalink()); has_container; created_at ($post->post_date_gmt); title (get_the_title());
topics (get_the_category());
- BAD: content, content_encoded (get_the_content()); has_creator ($authordata->ID, $authordata->user_nicename);
***/
# TODO - see if we can use functions instead of $post->...
$rdf .= "\n" . '';
$rdf .= "\n\t" . '';
$rdf .= "\n\t" . '' ;
$rdf .= "\n\t" . '' . apply_filters('the_title_rss',apply_filters('the_title',wp_specialchars(get_the_title()))) . '';
# $rdf .= "\n\t" . '' ;
$rdf .= "\n\t" . '';
$rdf .= "\n" . sioc_user_rdf( $authordata->ID, true );
$rdf .= "\n\t" . '';
$rdf .= "\n\t" . '';
$rdf .= "\n" . sioc_user_rdf( $authordata->ID, true, 'foaf' );
$rdf .= "\n\t" . '';
# $rdf .= sioc_foaf_rdf( htmlspecialchars(the_author('',false)), $authordata->user_email, $authordata->user_url );
$rdf .= "\n\t" . '' . mysql2date('Y-m-d\TH:i:s\Z', $post->post_date_gmt) . '';
$rdf .= "\n\t" . '' . apply_filters( 'the_excerpt_rss', strip_tags( get_the_content('', 0, ''))) . '';
# $rdf .= "\n\t" . '';
$rdf .= "\n\t" . '';
# Add a list of post categories
$categories = get_the_category();
foreach ($categories as $category) {
$rdf .= "\n\t" . '';
}
# Add some tags ( from UltimateTagWarrior 3.x - http://www.neato.co.nz/ultimate-tag-warrior/ )
if ( class_exists('UltimateTagWarriorCore') ) {
global $utw;
$tags = $utw->GetTagsForPost( $post->ID );
$myFormat = array ( 'default' => "\n\t".'' );
$rdf .= $utw->FormatTags( $tags, $myFormat );
}
# Add a list of comments to this post
global $wpdb;
$comments = $wpdb->get_results("SELECT * FROM $wpdb->comments WHERE comment_post_ID = '$post->ID' AND comment_approved = '1' ORDER BY comment_date");
if ($comments) {
foreach ($comments as $comment) :
# $rdf .= "\n\t" . 'comment_ID ) . ' rdf:resource="' . get_permalink() . '#comment-' . $comment->comment_ID . '"/>';
$rdf .= "\n\t" . '';
# XXX - Find out if there should be trailing / before # (that may a be problem everywhere where comment URI is formed)
$rdf .= "\n\t\t" . '';
$rdf .= "\n\t\t\t" . query_link( 'sioc_type=comment&sioc_id=' . $comment->comment_ID, true );
$rdf .= "\n\t\t" . '';
$rdf .= "\n\t" . '';
endforeach;
}
$rdf .= extractLinks( get_the_content('', 0, '') );
$rdf .= "\n" . '' . "\n";
return $rdf;
}
function extractLinks( $html ) {
$rdf = '';
preg_match_all ('/]+)>(.*?)<\/a>/ims', $html, $out, PREG_SET_ORDER);
foreach ($out as $val) {
if ( preg_match ( '/href\s*=\s*"([^"]*)"/ims', $val[1], $anchor ) ) {
if ( preg_match( '/type\s*=\s*"application\/rdf\+xml/i', $val[1]) ) {
$rdf .= "\n\t" . '';
} else {
$rdf .= "\n\t" . '';
}
}
}
return $rdf;
}
?>