<?php
/*
#===========================================================================
#= Script : Blog
#= File   : links-update-xml.php
#= Version: 1.55b RC2
#= Author : Mike Little
#= Email  : mike@zed1.com
#= Support: WordPress
#= Edits: Kevin A. Sesock
#===========================================================================
#= Copyright (c) 2003 Mike Little
#= You are free to use and modify this script as long as this header
#= section stays intact. This file is part of BLOG.
#=
#= This program is free software; you can redistribute it and/or modify
#= it under the terms of the GNU General Public License as published by
#= the Free Software Foundation; either version 2 of the License, or
#= (at your option) any later version.
#=
#= This program is distributed in the hope that it will be useful,
#= but WITHOUT ANY WARRANTY; without even the implied warranty of
#= MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#= GNU General Public License for more details.
#=
#= You should have received a copy of the GNU General Public License
#= along with BLOG files; if not, write to the Free Software
#= Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#=
#= Notice! This file originally appeared in WordPress v1.2, distributed
#= under the GNU General Public License version 2. Original copyrights
#= are maintained.
#===========================================================================
*/

// Links weblogs.com grabber
// Copyright (C) 2003 Mike Little -- mike@zed1.com

error_reporting(E_ERROR | E_WARNING | E_PARSE | E_NOTICE);

// globals to hold state
$updated_timestamp = 0;
$GLOBALS["all_links"] = array();


/**
 ** preload_links()
 ** Pre-load the visible, non-blank, links into an associative array $all_links
 ** key is url, value is array of link_id and update_time
 ** Note: update time is initialised to 0. That way we only have to update (in
 ** the db) the ones which have been updated (on weblogs.com).
 **/
function preload_links() {
        $sql = "SELECT dLastUpdate, cURL, nLinkIDCode FROM blog_links";
        $con = db_connect();
        $result = mysql_query($sql, $con);
        while ($link = @ mysql_fetch_array($result)){
          $link_url = transform_url($link['cURL']);
          //print $link_url."<br>";
          //print "<br>".array_values($link)."<br>";
          $GLOBALS["all_links"][$link_url] = $link;
        }
}

/**
 ** update_links()
 ** Update in the db the links which have been updated ($all_links[url][1] != 0)
 **/
function update_links() {
        $con = db_connect();
        foreach ($GLOBALS["all_links"] as $link){
                $sql = "UPDATE blog_links SET dLastUpdate = '".$link["dLastUpdate"]."' WHERE nLinkIDCode = ".$link["nLinkIDCode"];
                $result = mysql_query($sql, $con);
                if (!$result) {
                   print "Problem with SQL: ".mysql_error($con)."<br>";
                   print "SQL Sequence Sent: ".$sql."<br>";
                }
	} // end foreach
}

/**
 ** get_weblogs_updatedfile()
 ** Retrieves and caches a copy of the weblogs.com changed blogs xml file.
 ** If the file exists check it's age, get new copy if old.
 ** If a new or updated file has been written return true (needs processing)
 ** otherwise return false (nothing to do)
 **/
function get_weblogs_updatedfile() {
        //global $ignore_weblogs_cache;
	$update = false;
	$file = 'lib/links-update-cache.xml';
	//if ($ignore_weblogs_cache) {
	//	$update = true;
	//} else {
	if (file_exists($file)) {
		// is it old?
        	$modtime = filemtime($file);
		if ((time() - $modtime) > 15) {
		   $update = true;
		}
	} else { // doesn't exist
	  $update = true;
	}

	if ($update) {
		// get a new copy
		$a = @file('http://fresh.blogrolling.com/changes.xml');
		if ($a != false && count($a) && $a[0]) {
			$contents = implode('', $a);

			// Clean up the input, because weblogs.com doesn't output clean XML	
			$contents = preg_replace("/'/",'&#39;',$contents);
			$contents = preg_replace('|[^[:space:][:punct:][:alpha:][:digit:]]|','',$contents);
			$contents = preg_replace('/[^\x20-\x7E\x09\x0A\x0D]/', '', $contents);
			

			$cachefp = fopen('lib/links-update-cache.xml', "w");
			fwrite($cachefp, $contents);
			fclose($cachefp);
		} else {
			return false; //don't try to process
		}
	}
	return $update;
}

/**
 ** startElement()
 ** Callback function. Called at the start of a new xml tag.
 **/
function startElement($parser, $tagName, $attrs) {
	global $updated_timestamp;
	if ($tagName == 'WEBLOGUPDATES') {
		//convert 'updated' into php date variable
		$updated_timestamp = strtotime($attrs['UPDATED']);
	} else if ($tagName == 'WEBLOG') {
		// is this url in our links?
		$link_url = transform_url($attrs['URL']);
		if (isset($GLOBALS["all_links"][$link_url])) {
                        //print $GLOBALS["all_links"][$link_url]["dLastUpdate"]."<br>";
                        $GLOBALS["all_links"][$link_url]['dLastUpdate'] = date('Y-m-d H:i:s', ($updated_timestamp - $attrs['WHEN']) - 3600);
                        //print $updated_timestamp - $attrs['WHEN']."<br>";
			//echo('set link id ' . $GLOBALS["all_links"][$link_url]['nLinkIDCode'] . ' to date ' . $GLOBALS["all_links"][$link_url]['dLastUpdate'] . "\n");
		}
	}
}

/**
 ** endElement()
 ** Callback function. Called at the end of an xml tag.
 **/
function endElement($parser, $tagName) {
	// nothing to do.
}

/**
 ** transform_url()
 ** Transforms a url to a minimal identifier.
 **
 ** Remove www, remove index.* or default.*, remove
 ** trailing slash
 **/
function transform_url($url) {
	//echo("transform_url(): $url ");
	$url = str_replace('www.', '', $url);
	$url = str_replace('WWW.', '', $url);
	$url = preg_replace('/(?:index|default)\.[a-z]{2,}/i', '', $url);
	if (substr($url, -1, 1) == '/') {
		$url = substr($url, 0, -1);
	}
	//echo(" now equals $url\n");
	return $url;
} // end transform_url

// get/update the cache file.
// true return means new copy
if (get_weblogs_updatedfile()) {
	//echo('<pre>');
	// pre-load the links
	preload_links();

	// Create an XML parser
	$xml_parser = xml_parser_create();

	// Set the functions to handle opening and closing tags
	xml_set_element_handler($xml_parser, "startElement", "endElement");

	// Open the XML file for reading
	$fp = fopen('lib/links-update-cache.xml', "r")
		  or die("Error reading XML data.");

	// Read the XML file 16KB at a time
	while ($data = fread($fp, 16384)) {
		// Parse each 4KB chunk with the XML parser created above
		@xml_parse($xml_parser, $data, feof($fp));
				/*or die(sprintf("XML error: %s at line %d",
					xml_error_string(xml_get_error_code($xml_parser)),
					xml_get_current_line_number($xml_parser))); */
	}

	// Close the XML file
	fclose($fp);

	// Free up memory used by the XML parser
	xml_parser_free($xml_parser);

	// now update the db with latest times
	update_links();

	//echo('</pre>');
} // end if updated cache file

?>