#!/usr/bin/perl
################################################################################
# (c) 2007 Brett T. Warden
# http://www.wgz.com/bwarden/
#
# GTFS data loader
# This program attempts to open all "*.txt" files in the specified directory,
# and assuming they're valid CSV, creates the SQLite database "alldata.db",
# creates a table named after each txt file with columns named from the first
# row of each txt file, then imports the rest of the data in each file into the
# database. Essentially, this is an attempt to create an SQLite database that
# could be the direct source for GTFS feeds.
# This program also creates an index in the database for each column whose name
# ends in _id, on the assumption that those fields will be used for index-based
# searching.
#
# License:
# GNU General Public License
# http://www.fsf.org/licensing/licenses/gpl.html
# Please note in particular the disclaimer of warranty and limitation of
# liability.
#
# Other licensing options may be available upon request.
#
################################################################################
use strict;
use warnings;

$ENV{PATH} = '/usr/bin:/bin';

use File::BOM qw(open_bom); # to open files with BOM
use File::Spec;
use Text::CSV_XS;
use IO::Handle;
use Getopt::Std;
use Smart::Comments -ENV;

use DBI;

use Data::Dumper;

my %opts;
getopts('f:', \%opts);

my %CREATED; # cache of tables we've already created in this session

warn scalar localtime, "\n";

foreach my $dir (@ARGV) {
    warn "Looking in $dir\n";

    opendir(DIR, $dir)
        or die "Couldn't open dir $dir: $!\n";

    my $dbFile = defined $opts{f} ? $opts{f} : 'alldata.db';

    my $dbh = DBI->connect("dbi:SQLite:dbname=$dbFile", '', '')
        or die "Couldn't open database $dbFile: $!";

    warn "Creating database $dbFile\n";

    my $csv = Text::CSV_XS->new(
      {
        binary => 1,
        eol => $/,
        allow_loose_quotes => 1, # Workaround bad input
      },
    );

    FILE:
    foreach my $file (readdir(DIR)) {
        my $fullname = File::Spec->catfile($dir, $file);
        if(-f $fullname && $file =~ m/(.*)\.txt$/i) {
            my $basename = $1;

            warn "Storing data from $file under $basename\n";

            my @columns;
            #open(my $io, $fullname)
            open_bom(my $io, $fullname, ':utf8');
             
            # Get the headers
            my @headers = @{$csv->getline($io)}
                or die "Couldn't get headers for $fullname!\n";

            # Check for invalid column names
            @headers = grep {!/^\d+$/} @headers; # Found a column named "1" in md-metro stop_times.txt

            # Add/alter table to database
            $dbh->begin_work;
            add_alter($dbh, $basename, @headers);
            my $num_cols = @headers;
            my $sth = prepare_insert_row($dbh, $basename, @headers);

            warn "Filling table $basename\n";
            my $rows = 0;
            while(my $row = $csv->getline($io)) { ### Working===[%]        done
                $rows++;
                $sth->execute(@$row[0 .. $num_cols - 1]);
            }
            $dbh->commit;
            print "\nInserted $rows rows\n";
            close($io);

            warn scalar localtime, "\n";

        }
        else {
            next FILE;
        }
    }

    closedir(DIR);

}

exit;

sub add_alter {
    my ($dbh, $table, @columns) = @_;

    if (! $CREATED{$table} ){
      # We haven't created this table in this session. Drop it and create
      # a new one, with just a primary key right now.
      warn "DESTROYING $table";
      $dbh->do(sprintf('DROP TABLE IF EXISTS %s;', $dbh->quote($table)));
      warn "CREATING $table";
      $dbh->do(sprintf('CREATE TABLE %s (id INTEGER PRIMARY KEY);',
          $dbh->quote($table)))
        or die $dbh->errstr;
      warn "CREATED $table";

      # Remember that we've created this table; don't do it again this session.
      $CREATED{$table}++;
    }

    # Check which columns we already have in the table
    my $col_sth = $dbh->column_info(undef, undef, $table, undef);
    my %existing; # Note columns that already exist in db
    while (my $column_info = $col_sth->fetchrow_hashref) {
      $existing{$column_info->{COLUMN_NAME}}++;
    }
    warn "EXISTING: ".Dumper(\%existing);
    foreach my $col (@columns)
    {
      if (! $existing{$col} ) {
        # Add a column for each column. Anything ending in _id, _method, or _type
        # should be an INTEGER, except for currency_id, feed_id, and zone_id.
        my $table_sql = sprintf('ALTER TABLE %s ADD COLUMN %s;',
          $dbh->quote($table),
          $dbh->quote($col).(($col =~ m/^(?!.*currency|feed|zone).*_(id|method|type)$/) ? ' INTEGER' : ''));
        warn "$table_sql\n";
        $dbh->do($table_sql);
      }
    }

    # Generate indexes for *_id columns
    foreach my $column (grep /_id$/, @columns) {
        my $index = join('_', q(idx), $table, $column);
        warn "Creating index $index\n";
        my $index_sql = sprintf('CREATE INDEX IF NOT EXISTS %s on %s (%s);',
          map { $dbh->quote( $_ ) } $index, $table, $column
        );

        warn "$index_sql\n";
        $dbh->do($index_sql);
    }
}

sub prepare_insert_row {
  my ($dbh, $table, @columns) = @_;

  warn "PREPARE $table ".join(',', @columns);

  my $sql = sprintf('INSERT INTO %s (%s) VALUES(%s);',
    $dbh->quote($table),
    join(',', @columns),
    join(',', (('?') x @columns)));
  warn "PREPARE: $sql";
  $dbh->prepare($sql);
}

