cPanal: dwadmin
PKG: no-ads
path to script:
(also tried it in the root)
sample test file to parse:
i played around with permissions to no avail. the script works here (although, for whatever reason, it's not picking up the 'rss:item' tags right now):
PKG: no-ads
path to script:
(also tried it in the root)
sample test file to parse:
i played around with permissions to no avail. the script works here (although, for whatever reason, it's not picking up the 'rss:item' tags right now):
if ($pageurl) {
} else {
function show_form() {
$server = getenv("SERVER_NAME");
$request = getenv("REQUEST_URI");
<meta http-equiv="Content-Type" content="text/html; charset=windows-1252">
<title>RSSgenr8: HTML to RSS Converter - Generate an RSS feed from a web page</title>
<meta name="description" content="RSSgenr8 is a hosted HTML to RSS Scraper Tool which generates a RSS feed from a HTML web page">
<table width="100%" style="border-collapse: collapse" bordercolor="#111111" cellpadding="0" cellspacing="0">
<td><a href="">
<img border="0" src="" alt="xml hub" width="120" height="60"></a>
<B>RSSgenr8: HTML to RSS Converter</B>
<p>This form takes your web page and turns it into RSS 0.92.
<br />RSSgenr8 is a hosted HTML to RSS Scraper Tool which dynamically generates a RSS feed from a HTML web page.
<br />Changes to the web page are then automatically reflected in the RSS feed.</p>
<p align="left">RSSgenr8 is based on
<a href="">RSSify from</a> but is much
<br />Acknowledgements also to Aaron Swartz who came up with the idea and <a href="">the first implementation.</a></p>
<li>Put <span class="rss:item"> ... </span> round each item in your page.
<br />In Blogger you'd do this by going to your template in blogger and changing
<br /><b><$BlogItemBody$></b> to <b><span class="rss:item"><$BlogItemBody$></span></b>
<br />And then publish something to re-create the page with the new template
<li>Then put the URL of your new and modified page into the form below.
<li>Check that what you get back looks like RSS.
<li>Now you can make a link to this file like ""
<li>Finally add a link to it on your web page, using something like the XML image below.
<li>(As a condition of use, we ask you to show a visible HTML link to on your site.)
<font size=1>
<img src="images/xml.gif" alt="This gif is freely copyable. Just right click, save" width="36" height="14">
<br />
Powered by <br /><a href="">RSSgenr8 at</A>
<form action="<? print 'http://' . $server . $request; ?>">
The URL of your web page:
<br /><input type="text" name="pageurl" size=50> Include a final "/" or a filename.
<br /><input type="submit" value="Create RSS">
<p>If your web server runs PHP, please
<a href="">download</a>
the source and run it on your own server.
No configuration is needed - Just copy one file to the server.</p>
<br /><b>Usage</b>:
<li>The channel title is taken from the web page title.
<li>The channel description is taken from the meta description.
<li>The item text is put in the description element.
<li>The first line or the first 100 characters of html stripped description are put in the title element.
<li>The first link in the description is put in the link element. If there isn't one, the web page url is used.
<li>Relative paths in the link url are converted to absolute paths.
<li>All tags except <A> <B> <BR> <BLOCKQUOTE> <CENTER> <DD> <DL> <DT> <HR> <I> <IMG> <LI> <OL> <P> <PRE> <U> <UL> are stripped from the description.
<li>Tabs, NewLines, etc, in the description are converted to a single space
<li>A maximum of 25 items are included in the rss.
<li>if you want more detail about RSS, take a look at the
<a href="">FAQs</a>.</ul>
<a href="/">Home</a>
function parse_html($pageurl){
$itemregexp = "%rss:item *\" *>(.+?)</span>%is";
$allowable_tags = "<A><B><br /><br><BLOCKQUOTE><CENTER><DD><DL><DT><HR><I><IMG><LI> <OL><P><PRE><U><UL>";
$pageurlparts = parse_url($pageurl);
if ($pageurlparts[path] == "") $pageurl .= "/";
if ($fp = @fopen($pageurl, "r")) {
while (!feof($fp)) {
$data .= fgets($fp, 128);
// print "<pre>";
// print htmlentities($data);
// eregi("<title>(.*)</title>", $data, $title);
// $channel_title = $title[1];
$channel_title = "";
if (preg_match('/<title>(.+?)<\/title>/i', $data, $regs) > 0) { $channel_title = $regs[1];
if (preg_match('/<meta .*description.*"(.+?)"/i', $data, $regs) > 0) { $channel_desc = $regs[1];
if ($channel_desc == "") $channel_desc = $pageurl;
$match_count = preg_match_all($itemregexp, $data, $items);
$match_count = ($match_count > 25) ? 25 : $match_count;
header("Content-Type: text/xml");
$output .= "<?xml version=\"1.0\" encoding=\"ISO-8859-1\" ?>\n";
$output .= "<!-- generator=\"rssgenr8/0.92\" -->\n";
$output .= "<!DOCTYPE rss PUBLIC \"-//W3C//ENTITIES Latin 1 for XHTML//EN\" \"\">\n";
$output .= "<rss version=\"0.92\">\n";
$output .= " <channel>\n";
$output .= " <title>". htmlentities(strip_tags($channel_title)) ."</title>\n";
$output .= " <link>". htmlentities($pageurl) ."</link>\n";
$output .= " <description>". htmlentities($channel_desc) ."</description>\n";
$output .= " <webMaster>". htmlentities("webmaster") ."</webMaster>\n";
$output .= " <generator>". htmlentities("RSSgenr8 from") ."</generator>\n";
$output .= " <language>en</language>\n";
for ($i=0; $i< $match_count; $i++) {
$desc = $items[1][$i];
$title = wsstrip($desc);
$descout = $desc;
if (preg_match("/(.+?)(?:<\/P|<\/div|<br|<\/h|<\/td)/i", $title, $regs) > 0) {
$title = $regs[1];
if (strlen(wsstrip(trim(strip_tags($title)))) < 100) {
$descout = str_replace($title,"",$descout);
$title = wsstrip(trim(strip_tags($title)));
if (strlen($title) > 100) {
$title = substr($title,0,100) . " ...";
$item_url = get_link($desc, $pageurl);
$descout = wsstrip(strip_tags($descout, $allowable_tags));
$pos = strpos($descout, "<br>");
if (is_int($pos) and ($pos == 0)) {
$descout=substr($descout, 4);
$pos = strpos($descout, "<br />");
if (is_int($pos) and ($pos == 0)) {
$descout=substr($descout, 6);
$descout = htmlentities(wsstrip($descout));
$output .= " <item>\n";
$output .= " <title>". htmlentities($title) ."</title>\n";
$output .= " <link>". htmlentities($item_url) ."</link>\n";
$output .= " <description>". $descout ."</description>\n";
$output .= " </item>\n";
$output .= " </channel>\n";
$output .= "</rss>\n";
print $output;
// print htmlentities($output);
// print "</pre>";
function get_link($desc, $pageurl) {
if (stristr($desc, "href")) {
$linkurl = stristr($desc, "href");
$linkurl = substr($linkurl, strpos($linkurl, "\"")+1);
$linkurl = substr($linkurl, 0, strpos($linkurl, "\""));
$linkurl = trim($linkurl);
$pageurlarray = parse_url($linkurl);
if (empty($pageurlarray['host'])) {
$linkurl = make_abs($linkurl, $pageurl);
return $linkurl;
} else {
return $pageurl;
function wsstrip($str)
$str=ereg_replace("[\r\t\n]"," ",$str);
$str=ereg_replace (' +', ' ', trim($str));
return $str;
function make_abs($rel_uri, $base, $REMOVE_LEADING_DOTS = true) {
preg_match("'^([^:]+://[^/]+)/'", $base, $m);
$base_start = $m[1];
if (preg_match("'^/'", $rel_uri)) {
return $base_start . $rel_uri;
$base = preg_replace("{[^/]+$}", '', $base);
$base .= $rel_uri;
$base = preg_replace("{^[^:]+://[^/]+}", '', $base);
$base_array = explode('/', $base);
if (count($base_array) and!strlen($base_array[0]))
$i = 1;
while ($i < count($base_array)) {
if ($base_array[$i - 1] == ".") {
array_splice($base_array, $i - 1, 1);
if ($i > 1) $i--;
} elseif ($base_array[$i] == ".." and $base_array[$i - 1]!= "..") {
array_splice($base_array, $i - 1, 2);
if ($i > 1) {
if ($i == count($base_array)) array_push($base_array, "");
} else {
if (count($base_array) and $base_array[-1] == ".")
$base_array[-1] = "";
while (count($base_array) and preg_match("/^\.\.?$/", $base_array[0])) {
return($base_start . '/' . implode("/", $base_array));