# Podkatzer (c) moolder 2006
# Script for fetching podcasts
# needs prior run of get-feeds.pl

require 'config.pl' or die ('config.pl nicht gefunden');

open F, $dataPath."podcasts_ignore.txt" or die("konnte nicht oeffnen");
foreach $line (<F>){
	chomp $line;
	@data = split(/\t/, $line);
	$podcastIgnore{ $data[0]."\t".$data[1]} = $data[2]."\t".$data[3]."\t".$data[4];
}
close F;


open F, $dataPath."feeds.txt";
foreach $line (<F>){
	if ($line !~ /^#/){
		chomp $line;
		@data = split(/\t/, $line);
		$feedName = $data[0];
		$feedUrl{$feedName} = $data[1];
		$feedKeepDays{$feedName} = $data[2];
		push @feedNames, $feedName;
	}
}
close F;

$globalFetchCount = 0;
$feedCount = 0;

print "#FETCHSTART Downloading podcasts...\n";
opendir D, $feedPath;

while ($filename = readdir D){
	if ($filename =~ /^(.+)\.rss$/i){
		$feedName = $1;
#		print "\n----- $feedName -----\n";
		open F, $feedPath.$filename;
		$mode = "initial";
		
		$fetchCount{$feedName} = 0;
		$errorCount{$feedName} = 0;
		$ignoreCount{$feedName} = 0;
		$itemCount{$feedName} = 0;
		$enclosureExistCount{$feedName} = 0;
		$enclosureFoundCount{$feedName} = 0;
		$feedCount++;
		
		foreach $line (<F>){
			if ($mode eq "initial"){
				if ($line =~ /\<title\>([^\<]*)\<\/title\>/i){
#					print "$1\n";
				}
				if ($line =~ /\<item\>/i){
					$itemCount{$feedName}++;
					$mode = "item";
					$contentUrl = "";
					$contentFile = "";
					$enclosureUrl = "";
					$size = "";
					$pubDate = "";
				}
			} elsif ($mode eq "item"){
				if ($line =~ /<enclosure/i){
					$enclosureExistCount{$feedName}++;
				}
				if ($line =~ /\<enclosure[^>]+url *= *['"]([^'"]*)['"]/i){
					$enclosureFoundCount{$feedName}++;
					$enclosureUrl = $1;
					$enclosureUrl =~ /\/([^\/]+)$/;
					$contentFile = $1;
					$contentFile =~ s/\%20/ /g;
					$contentFile =~ s/\%23/ /g;
				} 
				if ($line =~ /\<enclosure[^>]+length *= *['"]([^'"]*)['"]/i){
					$size = $1;
				}
				if ($line =~ /\<media:content[^>]+url *= *['"]([^'"]*)['"]/i){
#					print "content: $1\n";
					$contentUrl = $1;
					$contentUrl =~ /\/([^\/]+)$/;
					$contentFile = $1;
					$contentFile =~ s/\%20/ /g;
					$contentFile =~ s/\%23/ /g;
				}
				if ($line =~ /\<pubDate\>([^\<]*)\<\/pubDate\>/i){
					$pubDate = $1;
				}
				if ($line =~ /\<\/item/i){
					if ($contentFile ne '' and $enclosureUrl ne ''){
						if (!exists($podcastIgnore{$feedName."\t".$contentFile})){
							print "#PODCAST ".$feedName."\t".$contentFile."\t";
							print $size."\t".$pubDate."\t";
							print $enclosureUrl."\n";
							if ($pushAllToIgnore == 0 and $feedKeepDays{$feedName} != -1){
								#herunterladen anstoen
								mkdir ($podcastPath.$feedName) and print $podcastPath.$feedName."/ angelegt\n";
								unlink $dataPath."wgetlog.txt";
								
								if ($content ne '') {
									print $content."\n";
								}

								system($wgetCallPodcast.'--output-file=wgetlog.txt --output-document="'.$podcastPath.$feedName.'/'.$contentFile.'" '.$enclosureUrl);
								open WGET, $dataPath."wgetlog.txt";
								$globalFetchCount++;
								$error = 0;
								foreach $line (<WGET>){
									if ($line =~ /ERROR/){
										$error = 1;
									}
								}
							}
							if ($error == 0){
								$podcastIgnore{$feedName."\t".$contentFile} = $enclosureUrl."\t".$size."\t".$pubDate;
								$fetchCount{$feedName}++;

								# ignore wegschreiben
								open F, ">".$dataPath."podcasts_ignore.txt" or die("konnte nicht oeffnen");
								foreach $k (sort keys %podcastIgnore){
									print F $k."\t".$podcastIgnore{$k}."\n";
								}
								close F;

								# content wegschreiben
								if (-f $podcastPath.$feedName.'/'.$contentFile and $content ne ''){
									open INFO, ">".$podcastPath.$feedName.'/'.$contentFile.'.txt';
									print INFO $content;
									close INFO;
									#print $content."\n";
									$content = "";
								}

							} else {
								$errorCount{$feedName}++;
							}
						} else {
							$ignoreCount{$feedName}++;
						}
					}
					$mode = "initial";
					$content = "";
				}
				
				# Annahme : content:encoded immer mehrzeilig
				if ($line =~ /\<content\:encoded\>\<\!\[CDATA\[(.*)$/i){
					$content = $1;
					$mode = "content";
				}
				# Annahme : description immer einzeilig
				if ($line =~ /\<description\>([^<]*)\<\/description\>/i){
					if ($content eq ""){
						$content = $1;
					}
				}
			} elsif ($mode eq "content"){
				if ($line =~ /^(.*)\]\]\>\<\/content\:encoded\>/i){
					$content = $content." ".$1;
					$mode = "item";
				} else {
					$content = $content." ".$line;
				}
			}
		}
		close F;

		if ($enclosureFoundCount{$feedName} != $enclosureExistCount{$feedName}){
			print "\n *** WARNUNG: Einige enclosures nicht gefunden in Feed $feedName - ";
			print "gefunden ".$enclosureFoundCount{$feedName};
			print ", existent ".$enclosureExistCount{$feedName};
			print ", Itemanzahl ".$itemCount{$feedName}."\n";
			sleep 3;
		}
		if ($enclosureFoundCount{$feedName} == 0 or 
				$enclosureExistCount{$feedName} == 0){
			print "\n *** WARNUNG: Keine Enclosures gefunden im Feed $feedName - ";
			print "gefunden ".$enclosureFoundCount{$feedName};
			print ", existent ".$enclosureExistCount{$feedName};
			print ", Itemanzahl ".$itemCount{$feedName}."\n";
			sleep 3;
		}
		print "#FEED ".$feedName.", ".$fetchCount{$feedName}." heruntergeladen";
		print ", ". $errorCount{$feedName} ." fehlerhaft";
		print ", ". $ignoreCount{$feedName} ." ignoriert.\n";
	}
}

print "#FETCHDONE Insgesamt ".$globalFetchCount." Podcasts in ".$feedCount." Feeds heruntergeladen.\n";

sleep $exitWaitSecs;