#!/bin/sh

# Extracts a ParamChem-compatible c_file and e_file from a Gaussian log file.
# Run without command-line arguments for usage information.

# Copyright (C) 2014 Kenno Vanommeslaeghe
# 
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
# 
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

if [ $# -lt 1 ] ; then
  echo "Usage: `basename $0` <gaussian log file>
       `basename $0` <prefix for .c_file and .e_file> <gaussian log file>...

If only one argument is given, the prefix will be based on the log file name.
If 2 or more arguments are given, the first argument is taken as a prefix.
In the latter case, output files will be overwritten without prompting!

WARNING! This is the version that outputs *all* the single points." >&2
  exit 1
 fi
if [ $# = 1 ] ; then
  prefix=${1##*/}
  prefix=${prefix%.*}
  if [ -e "$prefix.e_file" -o -e "$prefix.c_file" ] ; then
    echo "ERROR: output file already exists!" >&2
    exit 1
  fi
 else
  prefix=$1
  shift 1
 fi

awk -v pre="$prefix" 'BEGIN {stage=0;efn=pre ".e_file";cfn=pre ".c_file"}
    (stage < 2){if ($1 ~ "^\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*\\*"){
        getline;if ($1 == "Gaussian") {if (stage) {err=1;exit 1};
          stage=1;i=$3;if (substr(i,1,1) > 6) gver=substr(i,1,2)
           else gver="1" substr(i,1,2)} }
       else if ($0 ~ "^ *#"){if (stage != 1) {err=1;exit 1};
        i=toupper($0);
        if (i ~ "MP2") {tf=4;et="EUMP2";ef=6;print "MP2 calculation detected." > "/dev/stderr"}
         else if (i ~ "HF") {tf=1;et="SCF";ef=5;print "HF calculation detected." > "/dev/stderr"}
         else if (i ~ "AM1") {print "AM1 calculation detected." > "/dev/stderr";
  	   if (gver < 109) {tf=1;et="Energy=";ef=2}
            else {tf=1;et="SCF";ef=5} }
         else {err=1;exit 1};
        stage=2};
      next}
    (stage == 3){if (NF == 6) printf("%9.5f %9.5f %9.5f\n",$4,$5,$6) > cfn
       else if ($1 ~ "^--------------------------------------------------------------------") stage=2
       else {err=1;exit 1} }
    (stage == 2){if ($2 == "orientation:" && ($1 == "Input" || $1 == "Z-Matrix")) {
        getline;getline;getline;getline;stage=3}
       else if ($tf == et) {e=$ef;
	if (et == "EUMP2") if (! sub("D\\+","E+",e)) {err=1;exit 1};
        printf("%-18.4f\n",e * 627.509471) > efn} }
    END{if (err || stage != 2) {
      print "ERROR: invalid or unrecognized gaussian output!" > "/dev/stderr"
      exit 1} }' $*
