#!/bin/sh

# This is the root directory of the TIGER data.
BASE="TIGER2007FE"

# This is the set base specified by Census
SETBASE="fe_2007"

# This is the schema prefix, all schemas will be created using this prefix.
PREFIX="tiger"

# Skip Census 2000 data if there is current data?
SKIP00="y"

# First, handle the national data
TMPDIR=`mktemp -d -p .`

# SRID to load the data with
SRID=4269

# Host to connect to
HOST="beren"

# Database to use
DB="gis"

# Encoding to use
ENCODING="LATIN1"

# If we are processing national-level data
NATIONAL="false"

# If we are processing state-level data
STATELVL="false"

# If we are processing a specific state
STATES=""

# If we are processing county-level data
COUNTYLVL="false"

# If we are processing a specific county
COUNTIES=""

# If we are dropping things before loading them
DROP="false"

TEMP=`getopt -o nl::s::bc::ed -n 'process_tiger.sh' -- "$@"`

if [ $? != 0 ] ; then echo "Terminating..." >&2 ; exit 1 ; fi

NATLAYERS="*"
STATES="[0-9][0-9]"
COUNTIES="[0-9][0-9][0-9]"

eval set -- "$TEMP"
while true; do
    case "$1" in
        -n) NATIONAL="true"; shift ;;
        -l) case "$2" in
                "") NATLAYERS="*"; shift 2 ;;
                *)  NATLAYERS="$2"; shift 2 ;;
            esac ;;
        -b) STATELVL="true"; shift ;;
        -s) case "$2" in
                "") STATES="[0-9][0-9]"; shift 2 ;;
                *)  STATES="$2"; shift 2 ;;
            esac ;;
        -e) COUNTYLVL="true"; shift ;;
        -c) COUNTYLVL="true";
            case "$2" in
                "") COUNTIES="[0-9][0-9][0-9]"; shift 2 ;;
                *)  COUNTIES="$2"; shift 2 ;;
            esac ;;
        -d) DROP="true"; shift ;;
        --) shift ; break ;;
        *) echo "Internal error!" ; exit 1 ;;
    esac
done

# Handle case where we were given a 5-digit county
echo $COUNTIES | grep -qE '[0-9]{5}'
if [ $? -eq 0 ]; then
    STATES=`echo $COUNTIES | cut -c1,2`
    COUNTIES=`echo $COUNTIES | cut -c3,4,5`
fi

if [ -z "$STATES" -a ! -z "$COUNTIES" ]; then
    STATES="[0-9][0-9]"
fi

# Set up encoding file
echo "SET CLIENT_ENCODING TO 'LATIN1';" > $TMPDIR/set_encoding
echo "begin;" > $TMPDIR/begin
echo "commit;" > $TMPDIR/commit

if [ "$NATIONAL" = "true" ]; then
    # Create the national schema
    SCHEMA="${PREFIX}_us"
    if [ "$DROP" = "true" ]; then
        echo "begin; drop schema if exists $SCHEMA cascade; commit;" | psql -d $DB -h $HOST
    fi
    echo "begin; create schema $SCHEMA; commit;" | psql -d $DB -h $HOST

    # Loop through the national files, they're in the base directory with a
    # file glob of $SETBASE_us_*.zip
    FILEBASE="${SETBASE}_us"
    echo "National level..."
    for zipfile in $BASE/${FILEBASE}_${NATLAYERS}.zip; do
		if [ "$zipfile" = "$BASE/${FILEBASE}_${NATLAYERS}.zip" ]; then
			echo "$BASE/${FILEBASE}_${NATLAYERS}.zip did not match anything!"
			continue
		fi
        BASENAME=`basename $zipfile | sed -e 's:\.zip$::'`
        echo "Processing $BASENAME..."
        # Handle skipping Census 2000 data if later data exists
        if [ "$SKIP00" = "y" ]; then
            # Find the non-2000 file
            NON00=`echo $zipfile | sed -e 's:00.zip$:.zip:'`
            # If the current file is not the non-2000 file, and a
            # non-2000 file exists, then skip the current file.
            if [ $zipfile != $NON00 -a -e $NON00 ]; then
                continue
            fi
        fi
        unzip -d $TMPDIR $zipfile
            for file in $TMPDIR/*.shp; do
            TBL=`basename $file | sed -e "s:^${FILEBASE}_::" -e 's:\.shp::'`
            BASESHP=`basename $file | sed -e 's:\.shp$::'`
            shp2pgsql \
                -I \
                -D \
                -s $SRID \
                $TMPDIR/$BASESHP \
                $SCHEMA.$TBL \
                | cat $TMPDIR/set_encoding - | psql -d $DB -h $HOST
        done
        rm $TMPDIR/${BASENAME}.*
    done
fi

# Loop through the state directories
for statedir in $BASE/${STATES}_*; do
    if [ "$statedir" = "$BASE/${STATES}_*" ]; then
		echo "$BASE/${STATES}_* did not match anything!"
		continue
    fi
    STATE=`basename $statedir | cut -f1 -d_`
    SCHEMA="${PREFIX}_${STATE}"
    FILEBASE="${SETBASE}_${STATE}"
    if [ "$STATELVL" = "true" ]; then
        echo "State level..."
        echo "Processing state-level for $STATE..."
        if [ "$DROP" = "true" ]; then
            echo "begin; drop schema if exists $SCHEMA cascade; commit;" | psql -d $DB -h $HOST
        fi
        echo "begin; create schema $SCHEMA; commit;" | psql -d $DB -h $HOST
        for zipfile in $statedir/${FILEBASE}_*.zip; do
            if [ "$zipfile" = "$statedir/${FILEBASE}_*.zip" ]; then
        		echo "$statedir/${FILEBASE}_*.zip did not match anything!"
        		continue
            fi
            BASENAME=`basename $zipfile | sed -e 's:\.zip$::'`
            # Handle skipping Census 2000 data if later data exists
            if [ "$SKIP00" = "y" ]; then
                # Find the non-2000 file
                NON00=`echo $zipfile | sed -e 's:00.zip$:.zip:'`
                # If the current file is not the non-2000 file, and a
                # non-2000 file exists, then skip the current file.
                if [ $zipfile != $NON00 -a -e $NON00 ]; then
                    continue
                fi
            fi
            unzip -d $TMPDIR $zipfile
            for file in $TMPDIR/*.shp; do
                TBL=`basename $file | sed -e "s:^${FILEBASE}_::" -e 's:\.shp$::'`
                BASESHP=`basename $file | sed -e 's:\.shp$::'`
                shp2pgsql \
                    -I \
                    -D \
                    -s $SRID \
                    $TMPDIR/$BASESHP \
                    $SCHEMA.$TBL \
                    | cat $TMPDIR/set_encoding - | psql -d $DB -h $HOST
            done
            rm $TMPDIR/${BASENAME}.*
        done
    fi
    if [ "$COUNTYLVL" = "true" ]; then
        echo "Processing county-level for $STATE..."
        for codir in $statedir/${STATE}${COUNTIES}_*; do
            if [ "$codir" = "$statedir/${STATE}${COUNTIES}_*" ]; then
        		echo "$statedir/${STATE}${COUNTIES}_* did not match anything!"
        		continue
            fi
            COUNTY=`basename $codir | cut -c3- | cut -f1 -d_`
            FILEBASE="${SETBASE}_${STATE}${COUNTY}"
            for zipfile in $codir/${FILEBASE}_*.zip; do
                BASENAME=`basename $zipfile | sed -e 's:\.zip$::'`
                # Handle skipping Census 2000 data if later data exists
                if [ "$SKIP00" = "y" ]; then
                    # Find the non-2000 file
                    NON00=`echo $zipfile | sed -e 's:00.zip$:.zip:'`
                    # If the current file is not the non-2000 file, and a
                    # non-2000 file exists, then skip the current file.
                    if [ $zipfile != $NON00 -a -e $NON00 ]; then
                        continue
                    fi
                fi
                unzip -d $TMPDIR $zipfile
                if [ -e $TMPDIR/${BASENAME}.shp ]; then
                    for file in $TMPDIR/${BASENAME}.shp; do
                        TBL=`basename $file | sed -e "s:^${SETBASE}_${STATE}:c:" -e 's:\.shp$::'`
                        BASESHP=`basename $file | sed -e 's:\.shp$::'`
                        DROPTBL=""
                        if [ "$DROP" = "true" ]; then
                            DROPTBL="-d"
                        fi
                        shp2pgsql \
                            $DROPTBL \
                            -I \
                            -D \
                            -s $SRID \
                            $TMPDIR/$BASESHP \
                            $SCHEMA.$TBL \
                            | cat $TMPDIR/set_encoding - | psql -d $DB -h $HOST
                    done
                else
                    # If there is no .shp file, then look for just a .dbf file to load.
                    if [ -e $TMPDIR/${BASENAME}.dbf ]; then
                        # We have to create the table for these since shp2pgsql doesn't work for just *dbf* files
                        # oh noes, why would it?
                        TYPE=`basename $TMPDIR/*.dbf | cut -f4 -d_ | cut -f1 -d.`
                        if [ "$DROP" = "true" ]; then
                            cat sql/${TYPE}_drop.sql | sed -e "s:SSSS:$SCHEMA:" -e "s:TTTT:c$COUNTY:g" | psql -d $DB -h $HOST
                        fi
                        cat sql/$TYPE.sql | sed -e "s:SSSS:$SCHEMA:" -e "s:TTTT:c$COUNTY:g" | psql -d $DB -h $HOST
                        echo "COPY ${SCHEMA}.c${COUNTY}_${TYPE} FROM STDIN WITH DELIMITER AS '|';" > $TMPDIR/startcopy
                        echo "\\." > $TMPDIR/stopcopy

                        dbview -r -d\|, -b -t $TMPDIR/${BASENAME}.dbf | sed -e 's:|$::' | \
                            cat $TMPDIR/begin $TMPDIR/set_encoding $TMPDIR/startcopy - $TMPDIR/stopcopy $TMPDIR/commit | \
                            psql -d $DB -h $HOST

                        rm $TMPDIR/startcopy
                        rm $TMPDIR/stopcopy
                    fi
                fi
                rm $TMPDIR/${BASENAME}.*
                # Check if there is still junk in the temp directory and move it out
                for junkfile in `ls -1 $TMPDIR/* | grep -Ev "($TMPDIR/begin|$TMPDIR/commit|$TMPDIR/set_encoding)"`; do
                    BASEJUNK=`basename $junkfile`
                    mv $junkfile junk/$BASEJUNK
                done
            done
        done
    fi
done

rm $TMPDIR/set_encoding
rm $TMPDIR/begin
rm $TMPDIR/commit

# Remove temp dir
rmdir $TMPDIR
