this script converts html to an rss/xml feed. it looks for special html tags (<span class="rss:item"></span>)to do so.
it works fine everywhere else, but does not work on x10. the developer cannot be reached and i can't find another script like it.
there are no errors returned.
script: http://0daynews.org/rss/rssgenr8.php
test page: rsstest.html (located in public_html)
can anyone offer any advice?
it works fine everywhere else, but does not work on x10. the developer cannot be reached and i can't find another script like it.
there are no errors returned.
script: http://0daynews.org/rss/rssgenr8.php
test page: rsstest.html (located in public_html)
can anyone offer any advice?
Code:
<?php
if ($pageurl) {
parse_html($pageurl);
} else {
show_form();
}
function show_form() {
$server = getenv("SERVER_NAME");
$request = getenv("REQUEST_URI");
?>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=windows-1252">
<title>RSSgenr8: HTML to RSS Converter - Generate an RSS feed from a web page</title>
<meta name="description" content="RSSgenr8 is a hosted HTML to RSS Scraper Tool which generates a RSS feed from a HTML web page">
</head>
<body>
<p> </p>
<ol>
<li>Put <span class="rss:item"> ... </span> round each item in your page.
<br />In Blogger you'd do this by going to your template in blogger and changing
<br /><b><$BlogItemBody$></b> to <b><span class="rss:item"><$BlogItemBody$></span></b>
<br />And then publish something to re-create the page with the new template
<li>Then put the URL of your new and modified page into the form below.
<li>Check that what you get back looks like RSS.
<li>Now you can make a link to this file like "http://www.xmlhub.com/rssgenr8.php?pageurl=your_web_page_url"
<li>Finally add a link to it on your web page, using something like the XML image below.
<li>(As a condition of use, we ask you to show a visible HTML link to www.xmlhub.com on your site.)
</ol>
<form action="<? print 'http://' . $server . $request; ?>">
<input type="text" name="pageurl" size=50>
<input type="submit" value="Create RSS">
Include a final "/" or a filename.
</form>
<b>Usage</b>: http://www.xmlhub.com/rssgenr8.php?pageurl=your_web_page_url
<p><B>Notes:</B>
<ul>
<li>The channel title is taken from the web page title.
<li>The channel description is taken from the meta description.
<li>The item text is put in the description element.
<li>The first line or the first 100 characters of html stripped description are put in the title element.
<li>The first link in the description is put in the link element. If there isn't one, the web page url is used.
<li>Relative paths in the link url are converted to absolute paths.
<li>All tags except <A> <B> <BR> <BLOCKQUOTE> <CENTER> <DD> <DL> <DT> <HR> <I> <IMG> <LI> <OL> <P> <PRE> <U> <UL> are stripped from the description.
<li>Tabs, NewLines, etc, in the description are converted to a single space
<li>A maximum of 25 items are included in the rss.
<li>if you want more detail about RSS, take a look at the
<a href="http://www.xmlhub.com/rssfaqs.htm">FAQs</a>.</ul>
</body>
</html>
<?
}
function parse_html($pageurl){
$itemregexp = "%rss:item *\" *>(.+?)</span>%is";
$allowable_tags = "<A><B><br /><br><BLOCKQUOTE><CENTER><DD><DL><DT><HR><I><IMG><LI> <OL><P><PRE><U><UL>";
$pageurlparts = parse_url($pageurl);
if ($pageurlparts[path] == "") $pageurl .= "/";
if ($fp = @fopen($pageurl, "r")) {
while (!feof($fp)) {
$data .= fgets($fp, 128);
}
fclose($fp);
}
// print "<pre>";
// print htmlentities($data);
// eregi("<title>(.*)</title>", $data, $title);
// $channel_title = $title[1];
$channel_title = "";
if (preg_match('/<title>(.+?)<\/title>/i', $data, $regs) > 0) { $channel_title = $regs[1];
}
if (preg_match('/<meta .*description.*"(.+?)"/i', $data, $regs) > 0) { $channel_desc = $regs[1];
}
if ($channel_desc == "") $channel_desc = $pageurl;
$match_count = preg_match_all($itemregexp, $data, $items);
$match_count = ($match_count > 25) ? 25 : $match_count;
header("Content-Type: text/xml");
$output .= "<?xml version=\"1.0\" encoding=\"ISO-8859-1\" ?>\n";
$output .= "<!-- generator=\"rssgenr8/0.92\" -->\n";
$output .= "<!DOCTYPE rss PUBLIC \"-//W3C//ENTITIES Latin 1 for XHTML//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent\">\n";
$output .= "<rss version=\"0.92\">\n";
$output .= " <channel>\n";
$output .= " <title>". htmlentities(strip_tags($channel_title)) ."</title>\n";
$output .= " <link>". htmlentities($pageurl) ."</link>\n";
$output .= " <description>". htmlentities($channel_desc) ."</description>\n";
$output .= " <webMaster>". htmlentities("webmaster") ."</webMaster>\n";
$output .= " <generator>". htmlentities("RSSgenr8 from XMLhub.com") ."</generator>\n";
$output .= " <language>en</language>\n";
for ($i=0; $i< $match_count; $i++) {
$desc = $items[1][$i];
$title = wsstrip($desc);
$descout = $desc;
if (preg_match("/(.+?)(?:<\/P|<\/div|<br|<\/h|<\/td)/i", $title, $regs) > 0) {
$title = $regs[1];
if (strlen(wsstrip(trim(strip_tags($title)))) < 100) {
$descout = str_replace($title,"",$descout);
}
}
$title = wsstrip(trim(strip_tags($title)));
if (strlen($title) > 100) {
$title = substr($title,0,100) . " ...";
}
$item_url = get_link($desc, $pageurl);
$descout = wsstrip(strip_tags($descout, $allowable_tags));
$pos = strpos($descout, "<br>");
if (is_int($pos) and ($pos == 0)) {
$descout=substr($descout, 4);
}
$pos = strpos($descout, "<br />");
if (is_int($pos) and ($pos == 0)) {
$descout=substr($descout, 6);
}
$descout = htmlentities(wsstrip($descout));
$output .= " <item>\n";
$output .= " <title>". htmlentities($title) ."</title>\n";
$output .= " <link>". htmlentities($item_url) ."</link>\n";
$output .= " <description>". $descout ."</description>\n";
$output .= " </item>\n";
}
$output .= " </channel>\n";
$output .= "</rss>\n";
print $output;
// print htmlentities($output);
// print "</pre>";
}
function get_link($desc, $pageurl) {
if (stristr($desc, "href")) {
$linkurl = stristr($desc, "href");
$linkurl = substr($linkurl, strpos($linkurl, "\"")+1);
$linkurl = substr($linkurl, 0, strpos($linkurl, "\""));
$linkurl = trim($linkurl);
$pageurlarray = parse_url($linkurl);
if (empty($pageurlarray['host'])) {
$linkurl = make_abs($linkurl, $pageurl);
}
return $linkurl;
} else {
return $pageurl;
}
}
function wsstrip($str)
{
$str=ereg_replace("[\r\t\n]"," ",$str);
$str=ereg_replace (' +', ' ', trim($str));
return $str;
}
function make_abs($rel_uri, $base, $REMOVE_LEADING_DOTS = true) {
preg_match("'^([^:]+://[^/]+)/'", $base, $m);
$base_start = $m[1];
if (preg_match("'^/'", $rel_uri)) {
return $base_start . $rel_uri;
}
$base = preg_replace("{[^/]+$}", '', $base);
$base .= $rel_uri;
$base = preg_replace("{^[^:]+://[^/]+}", '', $base);
$base_array = explode('/', $base);
if (count($base_array) and!strlen($base_array[0]))
array_shift($base_array);
$i = 1;
while ($i < count($base_array)) {
if ($base_array[$i - 1] == ".") {
array_splice($base_array, $i - 1, 1);
if ($i > 1) $i--;
} elseif ($base_array[$i] == ".." and $base_array[$i - 1]!= "..") {
array_splice($base_array, $i - 1, 2);
if ($i > 1) {
$i--;
if ($i == count($base_array)) array_push($base_array, "");
}
} else {
$i++;
}
}
if (count($base_array) and $base_array[-1] == ".")
$base_array[-1] = "";
if ($REMOVE_LEADING_DOTS) {
while (count($base_array) and preg_match("/^\.\.?$/", $base_array[0])) {
array_shift($base_array);
}
}
return($base_start . '/' . implode("/", $base_array));
}
?>