#!/usr/bin/perl
################################################################################
# (c) 2007 Brett T. Warden
# http://www.wgz.com/bwarden/
#
# GTFS data loader
# This program attempts to open all "*.txt" files in the specified directory,
# and assuming they're valid CSV, creates the SQLite database "alldata.db",
# creates a table named after each txt file with columns named from the first
# row of each txt file, then imports the rest of the data in each file into the
# database. Essentially, this is an attempt to create an SQLite database that
# could be the direct source for GTFS feeds.
# This program also creates an index in the database for each column whose name
# ends in _id, on the assumption that those fields will be used for index-based
# searching.
#
# License:
# GNU General Public License
# http://www.fsf.org/licensing/licenses/gpl.html
# Please note in particular the disclaimer of warranty and limitation of
# liability.
#
# Other licensing options may be available upon request.
#
################################################################################
use strict;
use warnings;

$ENV{PATH} = '/usr/bin:/bin';

use File::Spec;
use Text::CSV_XS;
use IO::Handle;

use DBI;

use Data::Dumper;

warn scalar localtime, "\n";

foreach my $dir (@ARGV) {
    warn "Looking in $dir\n";

    opendir(DIR, $dir)
        or die "Couldn't open dir $dir: $!\n";

    my $dbFile = 'alldata.db';
    my $dbh = DBI->connect("dbi:SQLite:dbname=$dbFile", '', '')
        or die "Couldn't open database $dbFile: $!";

    FILE:
    foreach my $file (readdir(DIR)) {
        my $fullname = File::Spec->catfile($dir, $file);
        if(-f $fullname && $file =~ m/(.*)\.txt$/i) {
            my $basename = $1;

            warn "Storing data from $file under $basename\n";

            my $csv = Text::CSV_XS->new({binary => 1, eol => $/});
            my @columns;
            open(my $io, $fullname)
                or die "Failed to open $fullname for reading: $!\n";
             
            # Get the headers
            my @headers = @{$csv->getline($io)}
                or die "Couldn't get headers for $fullname!\n";

            # Drop/add table to database
            $dbh->begin_work;
            drop_add($dbh, $basename, @headers);

            while(my $row = $csv->getline($io)) {
                # insert_row($dbh, $basename, $row);
                insert_row($dbh, $basename, $row, scalar @headers);
            }
            $dbh->commit;
            close($io);

warn scalar localtime, "\n";

        }
        else {
            next FILE;
        }
    }

    closedir(DIR);

}

exit;

sub drop_add {
    my ($dbh, $table, @columns) = @_;

    $dbh->do(sprintf('DROP TABLE IF EXISTS %s;', $dbh->quote($table)));

    $dbh->do(sprintf('CREATE TABLE %s (%s);',
            $dbh->quote($table),
            join(',', map $dbh->quote($_), @columns)));

    # Generate indexes for *_id columns
    foreach my $column (grep /_id$/, @columns) {
        my $index = join('_', q(idx), $table, $column);
        warn "Creating index $index\n";
        $dbh->do(sprintf('CREATE INDEX %s on %s (%s);',
                $dbh->quote($index),
                $dbh->quote($table),
                $dbh->quote($column),
            ));
    }
}

sub insert_row {
    my ($dbh, $table, $row, $numfields) = @_;

    my $sth = $dbh->prepare(sprintf('INSERT INTO %s VALUES(%s);',
            $dbh->quote($table),
            join(',', map $dbh->quote($_), @{$row}[0..($numfields-1)])));
    $sth->execute;
}


