* **/ $sioc_ver = '1.25'; /* * Change history: * v1.25 - Fixed bug with wrong mbox_sha1sum(s) for comments * v1.24 - Work around problems with Zotero COinS metadata plugin. * v1.23 - Bugfix for work with WordPress 2.1 * v1.22 - Added export of tags created by the UltimateTagWarrior plugin * v1.21 - Content negotiation added for 'application/rdf+xml' * v1.20 - Renamed clean() to sioc_clean() to avoid name conflicts * v1.19 - Improved display of author info (SIOC and FOAF) for comments and posts * v1.18 - Using FOAF for sioc:USer - according to new version of the ontology * - Added an option to display/hide email address (default:hide) * v1.17 - Fixed bug that caused WP "more" tag to be ignored * v1.16 - Fixed bug with unescaped '&' in auto-discovery link and in has_creator for comments * v1.15 - assigned URIs to sioc:Forum and sioc:Usergroup * - fixed property names for sioc:email and sioc:email_sha1 * v1.14 - Fixed bug with

tags appearing in sioc:content * - Removed extra '<' from begining of CDATA sections * v1.13 - Applied WP filters to sioc:links_to, rdfs:seeAlso (to remove invalid XML entities) * - Applied WP filters to sioc:content (to remove invalid XML entities) * v1.12 - Fixed bug in paging of posts on main SIOC site page * - Fixed bug with unescaped " in sioc:links_to * - Removed sioc:made property from the data describing a group of authors (does not exist in ontology) * v1.11 - Wrapped code into a WP plugin * v1.10 - Undo fix of '&' in seeAlso - '&' should be escaped * v1.09 - Added output of plugin version * - Added extracting of sioc:links_to from the blog post links * - Added extracting of rdfs:seeAlso from the blog post links (type="application/rdf+xml") * - Fixed bug with escaped '&' appearing in rdfs:seeAlso * v1.08 - Fixed Content-Type (to application/rdf+xml) * v1.07 - Fixed format of seeAlso info * - Fixed problems with & in sioc:content (added htmlspecialchars() filter) * - Added htmlspecialchars() filter to sioc:has_creator (was problem with » tag) * v1.06 - Improved and refactored * v1.05 - Added paging of list of posts * v1.04 - Added display of comments * - Changed sioc:created to created_at * v1.03 - Fixed display of URI for sioc:has_creator * - Switched to have_posts() loop for going through the posts [WP 1.5] * - Added display of posts as a part of forum description ( uses sioc:container_of ) * - Added display of sioc:link to Forums and Posts * - Added display of post categories (topics) * v1.02 - Stripped HTML tags from sioc:content * v1.01 - Addressing of resources (users, posts, ...) changed from rdf:nodeID to rdf:about * - Changed sioc:description to contain authordata->user_description * - Fixed display of sioc:content for Posts. Note: a 'rich' HTML content is currenly being displayed * v1.00 - Inital version */ # TODO - Export forum's categories in SIOC or a separate SKOS file # Modelling - site has a single forum (in wordpress case) # - a forum has a hierarchy of categories /* * Bugs: * - might be a problem in generating comment URIs with trailing '/' + '#' * ? is » problem with the script or Redland? */ /* * Assumptions: * - blog post HTML - when extracting hyperlinks from HTML we assume that tag attributes are enclosed within double quotes ("...") * **/ /* Global variables */ $blog = 1; // enter your blog's ID # $doing_rss = 1; # SIOC generation parameters $sioc_all_users = 1; $sioc_show_email = 0; # require_once('wp-blog-header.php'); # sioc_main(); # exit; /** * Main cycle * * Processes the parameters and calls appropriate functions **/ function sioc_main() { global $more; $more = 1; sioc_sanitize_vars(); remove_filter('the_content', 'coinsify_the_content'); # Create RDF header echo sioc_rdf_header(); $sioc_type = $GLOBALS['sioc_type']; echo '\n"; /* Main control switch */ switch($sioc_type) { case 'site': sioc_site_export(); break; case 'post': echo sioc_post_rdf($GLOBALS['sioc_id']); break; case 'comment': echo sioc_comment_rdf($GLOBALS['sioc_id']); break; case 'user': echo sioc_user_rdf($GLOBALS['sioc_id']); break; default: echo "\n"; } /* That's all */ echo "\n" . '' . "\n"; #end of RDF } /** * Export of main site metadata * * Prints site's main SIOC meta information, including user and post lists. **/ function sioc_site_export() { sioc_users_list(); echo sioc_site_rdf(); echo sioc_forum_rdf(); # TODO - Export forum's categories in SIOC or a separate SKOS file # Modelling - site has a single forum (in wordpress case) # - a forum has a hierarchy of categories } /** * Clean content * * Convert string data to avoid invalid XML symbols * @param string $t_str * @return string **/ function sioc_clean($t_str) { return htmlentities($t_str); # htmlentities($t_str, ENT_COMPAT, 'CP1252'); # return utf8_encode(html_entity_decode($t_str, ENT_QUOTES, 'UTF-8')); # return utf8_encode($t_str); } /** * Output FOAF Document info * * Returns a foaf:Document describing the current SIOC page. * It contains a foaf:primaryTopic property that points to the primary SIOC object that this page is about. for a SIOC page * * @param string $primaryTopic * @param string $objectType * * @return string **/ function foaf_document_header( $primaryTopic="", $objectType="" ) { if ( $primaryTopic=="" ) return ""; if ( $objectType ) $objectType .= " "; $rdf = ' SIOC '. $objectType . 'profile for "' . htmlspecialchars(get_bloginfo('name')) . '" A SIOC profile describes the structure and contents of a weblog in a machine readable form. For more information please refer to http://sioc-project.org/. ' . "\n"; return $rdf; } /** * Process and sanitize input variables * * Acquired script GET variables. * Variables affected: sioc_type, sioc_id, sioc_off * (to add: clean all WP variable that we do not use) * * @todo add cleaning of WP variables **/ function sioc_sanitize_vars() { $sioc_vars = array('sioc_type', 'sioc_id', 'sioc_off'); foreach ($sioc_vars as $sioc_var) { if (isset($_GET[$sioc_var])) { $GLOBALS[$sioc_var] = $_GET[$sioc_var]; } else { $GLOBALS[$sioc_var] = ''; } } if (empty($GLOBALS['sioc_type'])) { $GLOBALS['sioc_type'] = 'empty'; } } /** * Output RDF header * * Sets HTTP Content-type: and outputs RDF header * * @return string **/ function sioc_rdf_header() { header('Content-type: application/rdf+xml', true); #header('Content-type: text/plain', true); $rdf_code = '' . "\n"; $rdf_code .= '' . "\n"; $rdf_code .= '' . "\n\n"; return $rdf_code; } /** * Output SIOC users list * * Prints a list of blog users as a Usergroup of sioc:User objects **/ function sioc_users_list () { # Generate list of authors. global $wpdb, $tableusers, $tableposts, $sioc_all_users ; if ($sioc_all_users==1) $authors = $wpdb->get_results('SELECT u.ID as ID FROM ' . $tableusers . ' AS u'); else $authors = $wpdb->get_results('SELECT u.ID as ID, count(post_status) AS pc FROM ' . $tableusers . ' AS u LEFT JOIN ' . $tableposts . ' AS p ON u.ID=p.post_author GROUP BY u.ID HAVING pc>0 or u.ID>1'); echo "\n"; if ($authors) { echo ''; echo "\n\t" . 'Authors at "' . htmlspecialchars(get_bloginfo('name')) . '"'; # Iterate through $authors foreach ($authors as $author) { echo "\n\t"; echo "\n" . sioc_user_rdf($author->ID, true); echo "\n\t"; } echo "\n" . '' . "\n"; } } /** * Form and output a SIOC Query URL * * Forms and outputs a URL to query SIOC with given parameters * * @param string $query_str * @return string **/ function query_url($query_str) { $url = 'http://' . $_SERVER[HTTP_HOST] . $_SERVER[PHP_SELF] ; if ($query_str) { $url .= '?' . $query_str; } return htmlentities($url); } /** * Return SIOC seeAlso link * * Returns a rdfs:seeAlso link to a URL to query SIOC with given parameters * * @param string $query_str * @param boolean $do_tag If true, return the link in format '' * @return string **/ function query_link($query_str, $do_tag=false) { if ($do_tag) { return ''; } else { return 'rdfs:seeAlso="' . query_url($query_str) . '"'; } } /** * Generate email / sha1 property * * Returns a sioc:email or sioc:email_sha1sum property generated from * user's e-mail address. In case if $enc=true and SHA1 * hash cannot be calculated, return empty string. * * @param string $email * @param bool $enc if true, return sha1 hash of email * @return string **/ function get_email_property($email, $enc = true) { # Based on get_foaf_output_email_property by Morten Frederiksen $sha1 = ''; # Try to calculate SHA1 hash of email URI. if (function_exists('sha1')) $sha1 = sha1('mailto:' . $email); else if (function_exists('mhash')) $sha1 = bin2hex(mhash(MHASH_SHA1, 'mailto:' . $email)); # If no hash, return foaf:mbox instead of foaf:mbox_sha1sum. if ($enc == false) return ''; elseif ('' != $sha1 ) return '' . $sha1 . ''; else return ''; } /** * Generate SHA1 hash of email * * Return a SHA1 hash of an email address * * @param string $email * @return string **/ function get_email_sha1($email) { # Based on get_foaf_output_email_property by Morten Frederiksen $sha1 = ''; # Try to calculate SHA1 hash of email URI. if (function_exists('sha1')) $sha1 = sha1('mailto:' . $email); else if (function_exists('mhash')) $sha1 = bin2hex(mhash(MHASH_SHA1, 'mailto:' . $email)); if ('' != $sha1 ) return $sha1; else return ''; } /** * Output person's FOAF details * * Returns foaf:maker record containing foaf:Person with a name, mbox_sha1 and homepage information. * * @param string $name * @param string $email * @param string $homepage * @return string **/ function sioc_foaf_rdf( $name, $email, $homepage ) { $sha1 = $email ? get_email_property($email,true) : ''; $name = $name ? ' foaf:name="'.$name.'"' : ''; $rdf = "\n\t" . ''; $rdf .= "\n\t\t" . ''; $rdf .= "\n\t\t\t" . $sha1; if ($homepage and $homepage!='http://') { $rdf .= "\n\t\t\t" . ''; } $rdf .= "\n\t\t" . ''; $rdf .= "\n\t" . ''; return $rdf; } /** * Output SIOC User details * * Return an instance of SIOC:User class listing properties of a user. * * @param int $author_ID User ID * @param boolean $short Set true to return only rdf:about and rdfs:seeAlso links * @return string **/ function sioc_user_rdf ($author_ID = '', $short = false, $mode = 'sioc' ) { global $authordata, $sioc_show_email; $rdf = ''; $auth = ''; $desc = ''; if (!$author_ID) return ''; $authordata = get_userdata($author_ID); $user_uri = htmlspecialchars(get_author_link(0, $authordata->ID, $authordata->user_nicename)); $foaf_uri = $user_uri . '#foaf'; $user_link = query_link('sioc_type=user&sioc_id=' . $authordata->ID, true ); $pref_name = htmlspecialchars( the_author('',false) ); $email = $authordata->user_email; $sha1 = $email ? get_email_property($authordata->user_email,true) : ''; $homepage = $authordata->user_url; if ($short) { if ( $mode == 'sioc' ) { $rdf = "\t\t" . ''; $rdf .= "\n\t\t\t" . $user_link; $rdf .= "\n\t\t" . ''; return $rdf; } elseif ( $mode == 'foaf' ) { $rdf = "\t\t" . ''; $rdf .= "\n\t\t\t" . $sha1; if ($homepage and $homepage!='http://') $rdf .= "\n\t\t\t" . ''; $rdf .= "\n\t\t\t" . $user_link; $rdf .= "\n\t\t" . ''; return $rdf; } } else { $nick = $authordata->user_nickname; $login = $authordata->user_login; if ( $authordata->user_firstname || $authordata->user_lastname ) $foaf_name = htmlspecialchars(trim( "$authordata->user_firstname $authordata->user_lastname" )); $rdf .= "\n" . foaf_document_header( $user_uri, 'User' ); $rdf .= "\n" . ''; if ( $foaf_name ) $rdf .= "\n\t" . "$foaf_name"; if ( $authordata->user_firstname ) $rdf .= "\n\t" . '' . htmlspecialchars($authordata->user_firstname) . ''; if ( $authordata->user_lastname ) $rdf .= "\n\t" . '' . htmlspecialchars($authordata->user_lastname) . ''; if ( $sioc_show_email and $email ) $rdf .= "\n\t" . get_email_property($authordata->user_email,false); $rdf .= "\n\t" . $sha1; if ( $nick ) $rdf .= "\n\t" . "$nick"; if ($homepage and $homepage!='http://') $rdf .= "\n\t" . ''; $rdf .= "\n\t" . ''; $rdf .= "\n" . '' . "\n"; $rdf .= "\n" . ''; if ( $authordata->user_login ) $rdf .= "\n\t" . "$authordata->user_login"; if ($pref_name) $rdf .= "\n\t" . '' . $pref_name . ''; if ($authordata->dateYMDhour) # FIXME - the creation date is ugly for the default admin user - where database containts default value $rdf .= "\n\t" . '' . mysql2date('Y-m-d\TH:i:s', $authordata->dateYMDhour, 1, 1) . ''; if ($authordata->user_description) $rdf .= "\n\t" . '' . htmlspecialchars($authordata->user_description) . ''; $rdf .= "\n" . '' . "\n"; } return $rdf; } /** * Output SIOC Forum details * * Returns a sioc:Forum consisting of a basic information about a forum (blog) and a list * of posts that the blog contains. Forums are paged (WP built-in paging is used) and the * page to be displayed is indicated by a @paged global variable. * * @todo issue: paging can result in a crawler missing some posts if new post created between subsequent accesses. * * @return string **/ function sioc_forum_rdf () { global $post; $rdf = ''; $forum_uri = query_url( 'sioc_type=site#weblog' ); # TODO - decide if sioc:Forum or sioc:Blog (a subclass of Forum) is appropriate here -> refine use of these classes here $rdf = "\n" . ''; $rdf .= "\n\t" . 'Main blog at ' . htmlspecialchars(get_bloginfo('name')) . ''; $rdf .= "\n\t" . ''; # List of posts # XXX - (Scaling) - this list will become big (= might not scale) for a large number of posts rewind_posts(); if (have_posts()) { while (have_posts()) : the_post(); $rdf .= "\n\t" . ''; $rdf .= "\n\t\t" . ''; $rdf .= "\n\t\t\t" . query_link('sioc_type=post&sioc_id=' . $post->ID, true); $rdf .= "\n\t\t" . ''; $rdf .= "\n\t" . ''; endwhile; } $next_page = sioc_next_page(); if (isset($next_page)) $rdf .= "\n\t" . $next_page; $rdf .= "\n" . '' . "\n"; return $rdf; } /** * Output SIOC Comment details * * Returns details of a single comment of a post. * * @global $wpdb * @global $post * @param int $comment_ID Comment ID * @return string **/ function sioc_comment_rdf ($comment_ID) { global $wpdb, $post, $comment; $rdf = ''; $comment = $wpdb->get_row("SELECT * FROM $wpdb->comments WHERE comment_ID = '$comment_ID' AND comment_approved = '1' ORDER BY comment_date"); $comment_uri = get_permalink( $comment->comment_post_ID ) . '#comment-' . $comment->comment_ID; $rdf .= "\n" . foaf_document_header( $comment_uri, 'Comment' ); $rdf .= "\n" . ''; $rdf .= "\n\t" . ''; $rdf .= "\n\t" . ''; $rdf .= "\n\t" . '' . mysql2date('Y-m-d\TH:i:s\Z', $comment->comment_date_gmt) . ''; if ( $comment->user_id > 0 ) { $rdf .= "\n\t" . ''; $rdf .= "\n" . sioc_user_rdf( $comment->user_id, true ); $rdf .= "\n\t" . ''; $rdf .= "\n\t" . ''; $rdf .= "\n" . sioc_user_rdf( $comment->user_id, true, 'foaf' ); $rdf .= "\n\t" . ''; } else { $rdf .= sioc_foaf_rdf( apply_filters( 'the_title_rss', apply_filters( 'the_title', wp_specialchars(get_comment_author(),1))), get_comment_author_email(), get_comment_author_url() ); } $rdf .= "\n\t" . '' . apply_filters( 'the_excerpt_rss', strip_tags( $comment->comment_content )) . ''; $rdf .= "\n\t" . 'comment_content)) . ']]>'; $rdf .= "\n" . '' . "\n"; return $rdf; } /** * Output SIOC Site details * * Returns details of SIOC information about a community site * * @todo improve contents of sioc:description and sioc:name. * @return string **/ function sioc_site_rdf () { $rdf = ''; $site_uri = htmlspecialchars( get_bloginfo('siteurl') . '/' ); $rdf .= "\n" . foaf_document_header( $site_uri, 'Site' ); $rdf .= "\n" . ''; # sioc:description sioc:host_of sioc:link sioc:name sioc:topic + Title, Tagline description $rdf .= "\n\t" . '' . htmlspecialchars(get_bloginfo('name')) . ''; $rdf .= "\n\t" . 'Weblog: ' . htmlspecialchars(get_bloginfo('name')) . ''; $rdf .= "\n\t" . ''; $rdf .= "\n\t" . ''; # rdfs:seeAlso would be needed if the SIOC:Forum info was in a separate file $rdf .= "\n\t" . ''; $rdf .= "\n" . '' . "\n"; return $rdf; } /** * Create link to a next page of Posts * * Returns an rdfs:seeAlso link to a next page of Posts. * Returns empty string if there are no more pages. * * @return string **/ function sioc_next_page () { // Note: adapted from template_functions_links.php :: next_posts() global $paged, $result, $request, $posts_per_page, $wpdb, $max_num_pages; if (!$max_page) { if ( isset($max_num_pages) ) { $max_page = $max_num_pages; } else { $numposts = $wpdb->get_var("SELECT COUNT(ID) FROM $wpdb->posts WHERE post_status = 'publish'"); $max_page = $max_num_pages = ceil($numposts / $posts_per_page); } } if (!$paged) $paged = 1; $nextpage = intval($paged) + 1; if ((! is_single()) && (empty($paged) || $nextpage <= $max_page)) { return query_link('sioc_type=site&paged=' . $nextpage, true ); } return ''; } /** * Output SIOC Post * * Returns properties of a post. * * @param int $post_id Post ID * @todo same issue as in sioc_forum_rdf() * @return string **/ function sioc_post_rdf ($post_id) { global $posts; global $post, $authordata; if (empty($post_id)) { return ''; } $post = get_post($post_id); setup_postdata($post); # $authordata = &get_userdata($post->post_author); # echo '' . "\n"; $rdf = ''; $post_uri = htmlspecialchars( get_permalink() ); $rdf .= "\n" . foaf_document_header( $post_uri, 'Post' ); /*** Note: Fixed bug retrieving post properties. TODO: Review what functions we use to retrieve data. Earlier comment: - OK: post_id (get_permalink()); has_container; created_at ($post->post_date_gmt); title (get_the_title()); topics (get_the_category()); - BAD: content, content_encoded (get_the_content()); has_creator ($authordata->ID, $authordata->user_nicename); ***/ # TODO - see if we can use functions instead of $post->... $rdf .= "\n" . ''; $rdf .= "\n\t" . ''; $rdf .= "\n\t" . '' ; $rdf .= "\n\t" . '' . apply_filters('the_title_rss',apply_filters('the_title',wp_specialchars(get_the_title()))) . ''; # $rdf .= "\n\t" . '' ; $rdf .= "\n\t" . ''; $rdf .= "\n" . sioc_user_rdf( $authordata->ID, true ); $rdf .= "\n\t" . ''; $rdf .= "\n\t" . ''; $rdf .= "\n" . sioc_user_rdf( $authordata->ID, true, 'foaf' ); $rdf .= "\n\t" . ''; # $rdf .= sioc_foaf_rdf( htmlspecialchars(the_author('',false)), $authordata->user_email, $authordata->user_url ); $rdf .= "\n\t" . '' . mysql2date('Y-m-d\TH:i:s\Z', $post->post_date_gmt) . ''; $rdf .= "\n\t" . '' . apply_filters( 'the_excerpt_rss', strip_tags( get_the_content('', 0, ''))) . ''; # $rdf .= "\n\t" . ''; $rdf .= "\n\t" . ''; # Add a list of post categories $categories = get_the_category(); foreach ($categories as $category) { $rdf .= "\n\t" . ''; } # Add some tags ( from UltimateTagWarrior 3.x - http://www.neato.co.nz/ultimate-tag-warrior/ ) if ( class_exists('UltimateTagWarriorCore') ) { global $utw; $tags = $utw->GetTagsForPost( $post->ID ); $myFormat = array ( 'default' => "\n\t".'' ); $rdf .= $utw->FormatTags( $tags, $myFormat ); } # Add a list of comments to this post global $wpdb; $comments = $wpdb->get_results("SELECT * FROM $wpdb->comments WHERE comment_post_ID = '$post->ID' AND comment_approved = '1' ORDER BY comment_date"); if ($comments) { foreach ($comments as $comment) : # $rdf .= "\n\t" . 'comment_ID ) . ' rdf:resource="' . get_permalink() . '#comment-' . $comment->comment_ID . '"/>'; $rdf .= "\n\t" . ''; # XXX - Find out if there should be trailing / before # (that may a be problem everywhere where comment URI is formed) $rdf .= "\n\t\t" . ''; $rdf .= "\n\t\t\t" . query_link( 'sioc_type=comment&sioc_id=' . $comment->comment_ID, true ); $rdf .= "\n\t\t" . ''; $rdf .= "\n\t" . ''; endforeach; } $rdf .= extractLinks( get_the_content('', 0, '') ); $rdf .= "\n" . '' . "\n"; return $rdf; } function extractLinks( $html ) { $rdf = ''; preg_match_all ('/]+)>(.*?)<\/a>/ims', $html, $out, PREG_SET_ORDER); foreach ($out as $val) { if ( preg_match ( '/href\s*=\s*"([^"]*)"/ims', $val[1], $anchor ) ) { if ( preg_match( '/type\s*=\s*"application\/rdf\+xml/i', $val[1]) ) { $rdf .= "\n\t" . ''; } else { $rdf .= "\n\t" . ''; } } } return $rdf; } ?>