"Amigos y nadie más. El resto, la selva"
-- Jorge Guillén

Mozilla Firefox Live Bookmark: convertidor

From the convierte-de-bookmarks.html-a-RSS dept. (7484)

Como ya es la tradición de Latinomixed.com, aquí les llega un script que convierte del archivo de Firefox bookmarks.html a un archivo tipo RSS que puede ser usado como Live Bookmark. Claro, abra aquellos que usaran páginas como del.icio.us para compartir sus bookmarks, pero para todos los demás este script puede servirles de uso. Noten que este lo hice en par de horas de hackeo y necesita unas cuantas cositas.
Telecargen el archivo completo de aquí.

Para usarlo necesitaras los módulos HTML::Parser y XML::RSS de CPAN



[Actualizado: 2004-11-22 17:25 EST] La nueva versión ha sido publicada y contiene todas las funciones que uno pudiera esperar. Ahora los enlaces tienen nombres completos y el código es mas limpio. Cojanlo de aquí. Luego se actualizará esta página para reflejar dichos cambios.


     1t#!/usr/bin/perl -w
     2t# 2004-09-22 21:57 EDT $Revision: 1.2 $ 
     3t# Luis Mondesi  
     4t# Converts a bookmarks.html(firefox/mozilla/netscape) file 
     5t# to bookmarks.rss (1.0)
     6t#
     7t# There is no need to edit anything below. To run simply do:
     8t#
     9t# bookmark2rss.pl /path/to/bookmarks.html 
    10t#
    11t# The resulting output will be printed to STDOUT
    12t#
    13t# TODO
    14t#   - should live bookmarks be included? hint: instead of "href" look for FEEDURL attribute
    15t
    16tuse strict;
    17t$|++;
    18t# standard Perl modules
    19tuse Getopt::Long;
    20tGetopt::Long::Configure('bundling');
    21t# non-standard modules
    22tuse HTML::Parser 3.00 ();
    23tuse XML::RSS;
    24t# create XML::RSS object
    25tmy $rss = new XML::RSS (version => '1.0');
    26t
    27tmy $SITE = ""; # --site
    28tmy $DESC = "My Bookmarks";
    29tmy $DATE = ""; # TODO get date()
    30tmy $SUBJECT = "bookmarks";
    31tmy $CREATOR = ""; # --creator
    32tmy $PUBLISHER = "$CREATOR";
    33tmy $COPYRIGHT = "";
    34tmy $LOCALE = "en-us"; # --language
    35tmy $UPDATED = "daily"; # --update-period
    36tmy $FREQ = "1"; # --update-frequency
    37tmy $UPDATEBASE = "1901-01-01T00:00+00:00";
    38tmy $OUTPUT="";
    39t
    40tGetOptions(
    41t    # flags
    42t    # strings
    43t    's|site=s'      =>  $SITE,
    44t    'd|desc=s'      => $DESC,
    45t    'subject=s'     => $SUBJECT,

    46t    'creator=s'     => $CREATOR, # email?
    47t    'publisher=s'   => $PUBLISHER,
    48t    'copyright=s'   => $COPYRIGHT,
    49t    'l|language=s'  => $LOCALE,
    50t    'updated=s'     => $UPDATED,
    51t    'update-frequency=s'    => $FREQ,
    52t    'update-base'   => $UPDATEBASE,
    53t    'o|output=s'    => $OUTPUT,
    54t    # numbers
    55t);
    56t
    57tsub a_start_handler
    58t{
    59t    my($self, $tag, $attr) = @_;
    60t    return unless $tag eq "a";
    61t    return unless exists $attr->;
    62t    
    63t    my $url = $attr->;
    64t    #print "A $urln";
    65t    # TODO get text as title for this link
    66t    #$bookmarks = $url;
    67t    $rss->add_item( title => "$url",  link => "$url" );
    68t
    69t    $self->handler(text  => [], '@' );
    70t    $self->handler(start => &img_handler);
    71t    $self->handler(end   => &a_end_handler, "self,tagname", $url);
    72t}
    73tsub img_handler
    74t{
    75t    my($self, $tag, $attr) = @_;
    76t    return unless $tag eq "img";
    77t    push(@, $attr-> || "[IMG]");
    78t}
    79tsub a_end_handler
    80t{
    81t    my($self, $tag) = @_;
    82t    my $text = join("", @);
    83t    $text =~ s/^s+//;
    84t    $text =~ s/s+$//;
    85t    $text =~ s/s+/ /g;
    86t    #print "T $textn";
    87t    
    88t    $self->handler("text", undef);
    89t    $self->handler("start", &a_start_handler);
    90t    $self->handler("end", undef);
    91t}
    92tsub add_items
    93t{
    94t    #foreach my $url ( keys %bookmarks )
    95t    #{
    96t     #   my $text = $bookmarks;
    97t#   $rss->add_item(
    98t#   title       => "$url",  
    99t#   link        => "$url",
   100t#   description => "$text",
   101t#   dc => {
   102t#     subject  => "Bookmark",
   103t#     creator  => "Created by",
   104t#   },
   105t# );
   106t    #}
   107t}
   108tsub usage_die
   109t{
   110t    print STDERR "Usage: $0 bookmarks.htmln";
   111t    exit(1);
   112t}
   113t$rss->channel(
   114t    title        => "$SITE",
   115t    link         => "http://$SITE",
   116t    description  => "$DESC",
   117t    dc => {
   118t        date       => "$DATE",
   119t        subject    => "$SUBJECT",
   120t        creator    => "$CREATOR",
   121t        publisher  => "$PUBLISHER",
   122t        rights     => "$COPYRIGHT",
   123t        language   => "$LOCALE",
   124t    },
   125t    syn => {
   126t        updatePeriod     => "$UPDATED",
   127t        updateFrequency  => "$FREQ",
   128t        updateBase       => "$UPDATEBASE",
   129t    },
   130t);
   131t
   132t# create a HTML::Parser object
   133tmy $p = HTML::Parser->new(api_version => 3,
   134t        start_h => [&a_start_handler, "self,tagname,attr"],
   135t        report_tags => [qw(a img)],
   136t    );
   137t$p->parse_file(shift || usage_die) || usage_die($!);
   138t
   139t# print RSS
   140tif ( $OUTPUT ne "" )
   141t{
   142t    ## It seems that this is buggy, it prints:
   143t    ## Wide character in print at /usr/share/perl5/XML/RSS.pm line 1606.
   144t    # $rss->save("$OUTPUT");
   145t    open (OUTPUT,">:utf8","$OUTPUT") || die $!;
   146t    print OUTPUT $rss->as_string;
   147t    close(OUTPUT);
   148t} else {
   149t    print STDOUT $rss->as_string;
   150t}

More...


Advertisement