#!/usr/bin/perl
#
# sortref -- an alternative to bibtex
#
# Copyright (C) 1998, Robert Harlander. All rights reserved.
#
# Comments, bug reports, and suggestions to 
# rh@particle.physik.uni-karlsruhe.de
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GNU Emacs; see the file COPYING.  If not, write to
# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
#
$version = "2.3.6";
print(
      "\n\n",
      "            sortref-$version\n",
      "\n");

#- {{{ changes:
#
# Oct 13, 2006 (rh):  collects bibitems from several biblio files.
#
# Jun 04, 2005 (rh):  bug fixes
#
# May 20, 2005 (rh):  comment lines are dropped, except %%CITATION = ...;%%
#
# Oct 21, 2003 (rh):  @biblist introduced
#
# Feb 20, 2003 (rh):  -v option added and -q option changed
#
# Oct  8, 1998 (rh):  -v option added and -q option changed
#
# Apr  7, 1998 (rh):  -sup and -miss options added to see superfluous and 
#                     missing references
#
# Nov  2, 1997 (rh):  rmdouble routine replaced by version also working with
#                     PERL 5.004
#

#- }}}
#- {{{ customization:

# define a list of default bibliography files here:
@biblist = ("biblio.tex",$ENV{'HOME'}."/tex/texinputs/biblio.tex");

#- }}}
#- {{{ argproc:

$quiet = argproc("q(uiet)?",0);
$verbose = argproc("v",0);
$h = argproc("h(elp)?",0);
$supflag = argproc("s(up)?",0);
$missflag = argproc("m(iss)?",0);

#- }}}
#- {{{ help pages :

#
# if it is invoked with the -h option (help), print out this and stop:
#
if ($h||($#ARGV==-1)) {
    print("\n",
	  "SYNTAX: sortref [-options] 'tex-file' ['ref-file']

USAGE:
[In the following, replace 'tex-file' by the name of your main LaTeX-file]
(1) Put all your \\bibitem\{\} entries to a separate file which you may give
    any name. In the following we will refer to it as 'ref-file'.
(2) Include the following lines in your 'tex-file':
    \\begin\{thebibliography\}\{99\}
    \\input\{'tex-file'_ref\}
    \\end\{thebibliography\}
(3) Make sure that the file 'tex-file'.aux exists.
    If it doesn't, run LaTeX over your 'tex-file' once.
(4) Call   
         sortref 'tex-file' 'ref-file'
(5) Run LaTeX twice. That's it.


OPTIONS:

-h : help -- display these help pages.

-q : quiet mode -- do not print warnings.

-v : verbose mode -- do not overwrite existing ref-file without confirmation.

-m : missing references -- show missing references in ref-file.tex.

-s : superfluous references -- show superfluous references in ref-file.tex.


DESCRIPTION: finds the references (\\cite{...}) in the aux-files
and writes the corresponding entries from 'ref-file.tex' (\\bibitem{...}) to
'tex-file_ref.tex' in their order of appearence in 'tex-file.aux'
(removing duplicates).


TIP: it is useful to have a global bibliography file where you collect all
your favorite references. Put the name (and path) of this file into the 
variable \@biblist (you have to edit 'sortref') to make this the default.
Starting from version 2.3.6, you may also have several of those files, and
'sortref' will collect the references from all of them.


WARNINGS: 
- Sorting may fail when references occur in floating environments
  (e.g. figure captions)! Check such references by hand!
- only those references which are really cited will be written to 
    'tex-file'_ref.tex.\n\n");
exit;
}
undef($h);

#- }}}
#- {{{ main program:

#==========================================================================

# main program:

$texfile = shift(@ARGV);
$texfile =~ s/\.aux$//;
$texfile =~ s/\.tex$//;

@biblistin = @ARGV;

fwwarn("$texfile_ref.tex"); # warn if tex-file_ref.tex exists

# do input-files exist?
@biblistin = filecheck(1,@biblistin);
@biblist = filecheck(0,@biblist);
@biblist = (@biblistin,@biblist);

unless (-f "$texfile.aux") {
    print("Run LaTeX once before using sortref.\n");
    exit;
}

@OUT = findrefs("$texfile",0);
@IN = ();
foreach $file (@biblist) {
    @tmp = findrefsin("$file",0);
    @IN = rmdouble(@tmp,@IN);
}

$pluralouts = "";
if ($#OUT != 0) { $pluralouts = "s" };

printmess("References found in file    $texfile.aux: ",$#OUT+1,"\n");

@found = makerefs("$texfile"."_ref.tex",[@biblist],@OUT);

@sup = diff([@found],[@IN]);
@miss = diff([@found],[@OUT]);

if ($missflag) {
    $" = ", ";
    printmess("Missing references in $texfile.tex:\n@miss\n");
    $" = " ";
}
if ($supflag) {
    $" = ", ";
    printmess("Superfluous references in $texfile.tex:\n@sup\n");
    $" = " ";
}

#===========================================================================

#- }}}
#- {{{ subroutines:

# subroutines

#- {{{ sub searchincludes:

#---------------------------------------------------------------------------
sub searchincludes {
    local($file,$input) = @_;

    $input++;
    open($input,$file) || open($input,"$file.tex") || die;

    while (<$input>) {
	if (/[^\%]*\\include\{(.*)\}/) {
	    @collect = (@collect,$1);
	    searchincludes($1,$input);
	    }
    }
    close($input);
    return(@collect)
}
#---------------------------------------------------------------------------

#- }}}
#- {{{ sub makerefs:

#---------------------------------------------------------------------------
sub makerefs {			
    # 
    # call: makerefs(file1,file2,list)
    #
    # writes the references given in 'file2' in the order of 
    # corresponding labels given in 'list' to 'file1'

    local($REF,$refout,$refin,@refin,$file,@found);

    $refout = shift;
    $refin = shift;
    @refin = @{$refin};

    $i = 0;
    
    $/ = "\n";
  MLOOP: foreach $REF (@_) {
      $REFpat = "\Q$REF\E";
      foreach $file (@refin) {
	  open(REFIN,"$file") || die
	      "error: can't open file '$file'. \n";
	  while(<REFIN>) {
	      unless (/^%%CITATION \= /) {
		  s/([^\\]?)%.+/$1/;
	      }
	      if (/\\bibitem\s*(\[.*\])?\s*{$REFpat}/) {
		  push(@found,$REF);
		  push(@REF,$_);
		REFINLOOP: while (<REFIN>) {
		    unless (/^\s*%%CITATION \= /) {
			s/([^\\]?)%.+/$1/;
		    }
		    if (/\\bibitem/) {
			close(REFIN);
			next MLOOP;
		    }
		    if (/^\s*$/) {next REFINLOOP}
		    push(@REF,$_);
		}
	      }
	  }
	  close(REFIN);
      }
  }
    
    
    open(REFOUT,">$refout");

    print {REFOUT} ("\%\n\% $refout -- generated by",
		    " sortref-$version  \n",
		    "\% ((C) R. Harlander, ",
		    "http://www.robert-harlander.de/software/)\n",
		    "\% on ",`date`,"%\n");
    $nn = 0;
    foreach $i (0..$#REF) {
	if($REF[$i] =~ /^[^\%]*bibitem/) {
	    $nn++;
	    print {REFOUT} ("\n\%",$nn,"\n");
	}
	print {REFOUT} ($REF[$i]);
    }
    if ($nn != $#found+1) {
	printwarn("WARNING: counting of references failed.\n");
    }
	
    close(REFOUT);

    $pluralfounds = "";
    if ($#found != 0) { $pluralfounds = "s" };

    printmess("References written to   $refout: ",$#found+1,"\n");

    if ($#found < $#OUT) {
	printwarn("\aMissing references: ",$#OUT-$#found,". ".
		  " Use -m option to see the missing ones.\n\n");
    }

    system("rm -f xxxref");
    return(@found);
}
#---------------------------------------------------------------------------

#- }}}
#- {{{ sub findrefs:

#---------------------------------------------------------------------------
sub findrefs {
    #
    # call: findrefs(file);
    #
    # finds citation-labels in 'file'.aux keeping their order of appearance
    # and removes doublicates
    #
    $/ = "}";
    local($file,$counter,@OUT) = @_;
    $counter++;
    open($counter,"$file") || open($counter,"$file.aux") || die "error \#1";
    while(<$counter>) {
	if (m/\\\@input\{(.*)\}/) {
	    @OUT = findrefs($1,$counter,@OUT)
	    }
	if (m/\\citation\{([^\\]*)\}/) {
	    push(@OUT,split(/\,/,$1));
	    }
    }
    return(rmdouble(@OUT));
}
#---------------------------------------------------------------------------
#--------------------------------------------------------------------------- 
sub findrefsin {
    #
    # call: findrefsin(file);
    #
    # finds citation-labels in 'file'.aux keeping their order of appearance
    # and removes doublicates
    #
    $/ = "}";
    local($file,$counter,@OUT) = @_;
    $counter++;
    open($counter,"$file") || die "error \#1";
    while(<$counter>) {
	if (m/\\\include\{(.*)\}/) {
	    @OUT = findrefs($1,$counter,@OUT)
	    }
	if (m/\\bibitem\{([^\\]*)\}/) {
	    push(@OUT,split(/\,/,$1));
	    }
    }
    return(rmdouble(@OUT));
}
#---------------------------------------------------------------------------

#- }}}
#- {{{ sub diff:

sub diff {
#
# diff([@ARRAY1],[@ARRAY2]) returns the difference of @ARRAY1 and @ARRAY2
#
    my(@IN1,@IN2);
    my($i1,$i2,$j1,$j2,$k);
    @IN1 = @{$_[0]};
    @IN2 = @{$_[1]};

    $j1 = 0;
    foreach $i1 (0..$#IN1) {
      LDIFF: foreach $i2 (0..$#IN2) {
          if ("$IN1[$i1-$j1]" eq "$IN2[$i2]") {
              splice(@IN1,$i1-$j1++,1,());
              splice(@IN2,$i2,1,());
              last LDIFF;
          }
      }
    }
    return(@IN1,@IN2);
}

#- }}}
#- {{{ sub rmdouble:

sub rmdouble { ;#; rmdouble(LIST): removes doublicates from LIST
    my($IN,$OUT,@IN,@OUT);
    @IN = @_;
  SCHLEIFE: for $IN (@IN) {
      for $OUT (@OUT)
      { if ("$OUT" eq "$IN")
        { next SCHLEIFE }; };
      @OUT = (@OUT,$IN); };
    return(@OUT);
}

#- }}}
#- {{{ sub fwwarn:

#---------------------------------------------------------------------------
sub fwwarn {
    for (@_) {
	if (open(OUTFILE,"$_") && $verbose) {
	    close(OUTFILE);
	    printwarn("\nWarning: \n'$0' will overwrite existing file '$_'.", 
		  "\n\nContinue?(y/n): ");
	    unless (<STDIN> =~ "y") {die "Stopped.\n";}
	};
    };
}
#---------------------------------------------------------------------------

#- }}}
#- {{{ sub filecheck:

#---------------------------------------------------------------------------
sub filecheck {
    my($file,$vkey,@OUT);
    $vkey = shift;
    @OUT = ();
    foreach $file (@_) {
	if (-f $file) {
	    push(@OUT,$file);
	} else {
	    if ($vkey) { printwarn("'$file' does not exist... ") }
	    $file .= ".tex";
	    if (-f $file) {
		if ($vkey) { printwarn("using '$file' instead.\n") }
		push(@OUT,$file);
	    } else {
		if ($vkey) { printwarn("skipping.\n") }
	    }
	}
    }
    return(@OUT);
}
	
#---------------------------------------------------------------------------

#- }}}
#- {{{ sub argproc:

#---------------------------------------------------------------------------

sub argproc {
    #
    # by rh
    #
    # get the options from the argument list
    # (more flexible than PERL-builtin options management)
    #
    # Usage: argproc($optkey,$optnum)
    #
    #  $optkey is the name of the option (may be a pattern)
    #  $optnum is the number of arguments for the option
    #
    # Example: argproc("d(efault)?",1) together with the program call
    #          'program -d test1 test2'  returns the list (-d,test1).
    #          @ARGV is (test2) afterwards.
    #
    my($optkey,$optnum,$val,@optlist);
    $optkey = $_[0];
    $optnum = $_[1];
    
    foreach $val (0..$#ARGV) {
	if ($ARGV[$val] =~ m/^-$optkey$/) {
	    @optlist = splice(@ARGV,$val,$optnum+1);
	    last;
	}
    }
    return(@optlist);
}

#---------------------------------------------------------------------------

#- }}}
#- {{{ printmacros :

sub printmess {
    print(@_);
}

sub printwarn {
    unless ($quiet) {print(@_)}
}

#- }}}

#- }}}




