Perlfect Solutions
 

[Perlfect-search] Including extra dirs in the indexer

Mark McLoughlin mark@skynet.ie
Tue, 14 Nov 2000 18:56:17 +0000 (GMT)
How's it going,
        I set up perlfect yesterday and liked it, a lot,
but I could't see a way to include all our user home
web dirs (/home/(*)/public_html) so I hacked together
something to make it work.... The patch is against 3.10.

        I also made sure the indexer didn't follow
symlinks 'cause some of our users like the idea of
recursive symlinks ;)

        And I put in an option to limit the number of
navbar things.

Good Luck,
Mark

P.S. I'm not on this list

---- Patch for homedirs and symlinks -----

*** perlfect/indexer.pl Tue Nov 14 13:29:54 2000
--- perlfect-patched/indexer.pl Tue Nov 14 13:26:11 2000
***************
*** 79,82 ****
--- 79,104 ----
  print "Starting crawler...\n";
  crawl($DOCUMENT_ROOT.$INDEXER_START_DIR);
+
+ # Start Lame Hack by mark to get user pages indexed
+ my $homedir = "/home";
+ if ( opendir(HOMEDIR, $homedir) ) {
+    my @contents = readdir HOMEDIR;
+    my $username;
+
+    foreach $username ( @contents ) {
+       $userwebdir = "$homedir/$username/public_html";
+       if ( -d $userwebdir ) {
+          print "Crawler descending into $userwebdir...\n";
+          crawl( $userwebdir );
+          }
+       }
+
+    closedir(DIR);
+    }
+ else {
+    warn "Cannot open $homedir: $!";
+    }
+ # End Lame Hack
+
  print "Crawler finished($DN files, $TN terms)\n\n";

***************
*** 129,133 ****
    closedir(DIR);

!   my @dirs  = grep {-d and not /^\.{1,2}$/} @contents;
    my @files = grep {-f and /^.+\.(.+)$/ and grep {/^\Q$1\E$/} @EXT} @contents;

--- 151,156 ----
    closedir(DIR);

!   # Don't Follow Symbolic Links - mark
!   my @dirs  = grep {-d and not -l and not /^\.{1,2}$/} @contents;
    my @files = grep {-f and /^.+\.(.+)$/ and grep {/^\Q$1\E$/} @EXT} @contents;

***************
*** 263,271 ****
    my $file = $_[0];

!   $file =~ m/^$DOCUMENT_ROOT(.*)$/;
!   $file = $1;
    unless ($file =~ /^\//) {
      $file = "/".$file;
!   }

    ++$DN;
--- 286,307 ----
    my $file = $_[0];

!   # Start Lame Hack by mark to get user pages indexed
!   if ( $file =~ m/^$DOCUMENT_ROOT(.*)$/ ) {
!      $file = $1;
!      }
!   elsif ( $file =~ m/^\/home\/(\w+)\/public_html\/(.*)$/ ) {
!      my ( $user , $tmpname );
!
!      ( $user , $tmpname ) = ( $1 , $2 );
!      $file = "/~$user/$tmpname";
!      }
!   else {
!      $file = "";
!      }
!   # End Lame Hack
!
    unless ($file =~ /^\//) {
      $file = "/".$file;
!     }

    ++$DN;


--- Patch For nav bar thingy ------

diff -u -C 2 ./old/conf.pl ./new/conf.pl
*** ./old/conf.pl       Tue Nov 14 18:50:08 2000
--- ./new/conf.pl       Tue Nov 14 18:53:06 2000
***************
*** 22,25 ****
--- 22,29 ----
  $RESULTS_PER_PAGE = 5;

+ # Extra config option by mark
+ # How many pages should be on the nav bar
+ $NAV_BAR_PAGES = 10;
+
  # Do you want to index numbers? If so set $INDEX_NUMBERS to 1. [re-index]
  $INDEX_NUMBERS = 0;
diff -u -C 2 ./old/search.pl ./new/search.pl
*** ./old/search.pl     Tue Nov 14 18:50:26 2000
--- ./new/search.pl     Tue Nov 14 18:51:11 2000
***************
*** 224,229 ****
      $h{next} = "<A href=\"$SEARCH_URL?$q_and_lang&p=".($current_page+1)."\">$NEXT_PAGE{$lang}</A>";
    }
!
!   for (1..$last_page) {
      if ($_ != $current_page) {
        $h{navbar} .= "<A href=\"$SEARCH_URL?$q_and_lang&p=$_\">$_</A> ";
--- 224,231 ----
      $h{next} = "<A href=\"$SEARCH_URL?$q_and_lang&p=".($current_page+1)."\">$NEXT_PAGE{$lang}</A>";
    }
!
!   # Hack by mark to limit the amount of links in navbar
!   for (1..($last_page>$NAV_BAR_PAGES?$NAV_BAR_PAGES:$last_page)) {
!   # End of hack by mark
      if ($_ != $current_page) {
        $h{navbar} .= "<A href=\"$SEARCH_URL?$q_and_lang&p=$_\">$_</A> ";