<?php
/**
 * OutScraper CSV Import Script with Web Interface - Updated with Hours Handling Options
 * /public/import/import_outscraper.php
 */

define('IMPORT_ACCESS', true);

require_once 'config.php';
require_once 'classes/ImportLogger.php';
require_once 'classes/DatabaseConnection.php';
require_once 'classes/DataMapper.php';
require_once 'classes/HoursParser.php';

class OutScraperImporter {
    private $logger;
    private $db;
    private $mapper;
    private $hoursParser;
    private $isWebMode;
    private $importOptions;
    
    const BESTCALL_DESCRIPTION = 'Welcome to BestCall. This provider has been selected through our research process as among the finest in their field for both technical expertise and reported business ethics.  BestCall identifies top-tier service providers nationwide through anonymous evaluation. We research trades professionals without their knowledge or participation, ensuring unbiased assessments based solely on demonstrated performance and reputation.  No business can request inclusion in BestCall or influence our selection process through payment or application. We are not a commissioned referral service, advertising platform, or star rating system. Our results are provided free to consumers with no registration or personal information required.  Our mission is simple: Connect you with proven, outstanding service providers who consistently deliver exceptional results. Second, we deliver modern processes to keep leaders performing their best.  When you need professional service, make your next call your Best Call.';
    
    public function __construct($webMode = false, $options = []) {
        $this->isWebMode = $webMode;
        $this->importOptions = array_merge([
            'skip_unparseable_hours' => false,
            'strict_quality_mode' => false
        ], $options);
        
        $this->logger = new ImportLogger($webMode);
        $this->db = new DatabaseConnection($this->logger);
        $this->mapper = new DataMapper($this->db, $this->logger, $this->importOptions);
        $this->hoursParser = new HoursParser($this->db, $this->logger, $this->importOptions);
        
        if (!$this->db->validateDatabase()) {
            throw new Exception("Database validation failed - check required tables");
        }
        
        $this->logger->log("Import options: " . json_encode($this->importOptions));
    }
    
    /**
     * Clean domain name by removing protocol and www prefix
     */
    private function cleanDomainName($url) {
        if (empty($url)) {
            return '';
        }
        
        // Remove protocol (http://, https://, ftp://, etc.)
        $domain = preg_replace('/^[a-zA-Z]+:\/\//', '', $url);
        
        // Remove www. prefix
        $domain = preg_replace('/^www\./', '', $domain);
        
        // Remove trailing slash and anything after it (paths, parameters)
        $domain = preg_replace('/\/.*$/', '', $domain);
        
        // Trim any remaining whitespace
        $domain = trim($domain);
        
        return $domain;
    }
    
    /**
     * Import from CSV file
     */
    public function importFromFile($filePath) {
        $this->logger->log("Starting OutScraper CSV import from: " . basename($filePath));
        
        if (!file_exists($filePath)) {
            throw new Exception("File not found: {$filePath}");
        }
        
        try {
            // Set longer execution time for large files
            if (!$this->isWebMode) {
                set_time_limit(0);
                ini_set('memory_limit', '512M');
            }
            
            // Open CSV file with better handling
            $handle = fopen($filePath, 'r');
            if (!$handle) {
                throw new Exception("Could not open CSV file");
            }
            
            // Read header row with larger buffer
            $headers = fgetcsv($handle, 0, '|');  // 0 = unlimited length
            if (!$headers) {
                throw new Exception("No headers found in CSV file");
            }
            
            // Clean headers (remove BOM, trim whitespace)
            $headers = array_map(function($header) {
                return trim(str_replace(["\xEF\xBB\xBF", '"'], '', $header));
            }, $headers);
            
            $this->logger->log("Found " . count($headers) . " columns in CSV");
            $this->logger->log("Sample headers: " . implode(', ', array_slice($headers, 0, 10)) . "...");
            
            $this->validateHeaders($headers);
            
            $rowNumber = 2; // Start at 2 (header is row 1)
            
            // Process data rows
            while (($row = fgetcsv($handle, 0, '|')) !== false) {  // 0 = unlimited length
                $this->logger->incrementStat('processed');
                
                try {
                    // Skip empty rows
                    if (empty(array_filter($row))) {
                        continue;
                    }
                    
                    // Handle rows with different column counts
                    if (count($row) < count($headers)) {
                        $row = array_pad($row, count($headers), '');
                    } elseif (count($row) > count($headers)) {
                        $row = array_slice($row, 0, count($headers));
                    }
                    
                    $record = array_combine($headers, $row);
                    if ($record === false) {
                        throw new Exception("Failed to combine headers with row data");
                    }
                    
                    $this->processRecord($record, $rowNumber);
                    
                } catch (Exception $e) {
                    $this->logger->logError("Row {$rowNumber} failed: " . $e->getMessage());
                }
                
                $this->logger->logProgress();
                $rowNumber++;
                
                // Memory management for large files
                if ($rowNumber % 1000 === 0 && $this->isWebMode) {
                    @ob_flush(); @flush();
                }
            }
            
            fclose($handle);
            $this->logger->printFinalStats();
            return $this->logger->getStats();
            
        } catch (Exception $e) {
            if (isset($handle)) fclose($handle);
            $this->logger->logError("Critical import error: " . $e->getMessage());
            throw $e;
        }
    }
    
    /**
     * Process individual record with hours handling options
     */
    private function processRecord($record, $rowNumber) {
        $mappedData = $this->mapper->mapRecord($record);
        
        if (!$mappedData) {
            return;
        }
        
        // Clean domain name if website exists
        if (!empty($mappedData['website'])) {
            $mappedData['website'] = $this->cleanDomainName($mappedData['website']);
            $this->logger->log("Cleaned domain: {$mappedData['website']} for {$mappedData['name']}");
        }
        
        // Add BestCall description
        $mappedData['description'] = self::BESTCALL_DESCRIPTION;
        
        // Pre-check hours if skip_unparseable_hours is enabled
        if ($this->importOptions['skip_unparseable_hours']) {
            $hoursData = $this->mapper->extractHoursData($record);
            if (!$this->hoursParser->canParseHours($hoursData)) {
                $this->logger->logSkipped("Hours cannot be parsed (skip enabled)", $mappedData['name']);
                return;
            }
        }
        
        try {
            $this->db->beginTransaction();
            
            // Insert store record with description
            if (!$this->db->insertStore($mappedData)) {
                throw new Exception("Failed to insert store record");
            }
            
            // Assign default category to store
            $this->db->assignCategoryToStore($mappedData['store_id'], DEFAULT_CATEGORY_ID);
            
            // Process business hours with new handling
            $hoursData = $this->mapper->extractHoursData($record);
            $hoursResult = $this->hoursParser->parseAndImportHours($mappedData['store_id'], $hoursData);
            
            // If hours failed and we don't have strict mode, still import the business
            if (!$hoursResult && !$this->importOptions['skip_unparseable_hours']) {
                $this->logger->log("IMPORTED WITHOUT HOURS: {$mappedData['name']} (Store ID: {$mappedData['store_id']})");
            }
            
            $this->db->commit();
            $this->logger->logImport($mappedData['name'], $mappedData['store_id']);
            
        } catch (Exception $e) {
            $this->db->rollback();
            throw new Exception("Database transaction failed: " . $e->getMessage());
        }
    }
    
    private function validateHeaders($headers) {
        $required = ['name', 'street', 'city'];
        $missing = [];
        
        // Convert headers to lowercase for case-insensitive matching
        $lowerHeaders = array_map('strtolower', $headers);
        
        foreach ($required as $field) {
            if (!in_array(strtolower($field), $lowerHeaders)) {
                $missing[] = $field;
            }
        }
        
        if (!empty($missing)) {
            $this->logger->log("Available headers: " . implode(', ', array_slice($headers, 0, 20)));
            throw new Exception("Missing required columns: " . implode(', ', $missing));
        }
        
        $this->logger->log("Header validation passed - found all required columns");
        
        // Log some key column positions for debugging
        $keyColumns = ['name', 'street', 'city', 'phone', 'email_1', 'working_hours_csv_compatible'];
        foreach ($keyColumns as $col) {
            $pos = array_search($col, $headers);
            if ($pos !== false) {
                $this->logger->log("Column '{$col}' found at position {$pos}");
            }
        }
    }
    
    public function getWebSummary() {
        return $this->logger->getWebSummary();
    }
}

// Web Interface HTML + Processing
if (!empty($_POST['import_action']) || !empty($_GET['show_form'])) {
?>
<!DOCTYPE html>
<html>
<head>
    <title>OutScraper Import Utility</title>
    <style>
        body { font-family: Arial, sans-serif; max-width: 900px; margin: 0 auto; padding: 20px; }
        .container { background: #f9f9f9; padding: 20px; border-radius: 8px; margin: 20px 0; }
        .success { background: #d4edda; border: 1px solid #c3e6cb; color: #155724; }
        .error { background: #f8d7da; border: 1px solid #f5c6cb; color: #721c24; }
        .info { background: #d1ecf1; border: 1px solid #bee5eb; color: #0c5460; }
        .warning { background: #fff3cd; border: 1px solid #ffeaa7; color: #856404; }
        .log-entry { font-family: monospace; font-size: 12px; margin: 2px 0; }
        .final-stats { background: #e9ecef; padding: 15px; margin: 10px 0; }
        input[type="file"] { margin: 10px 0; }
        button { background: #007bff; color: white; padding: 10px 20px; border: none; border-radius: 4px; cursor: pointer; }
        button:hover { background: #0056b3; }
        .progress { margin: 20px 0; }
        .category-field { margin: 15px 0; padding: 10px; background: #fff3cd; border: 1px solid #ffeaa7; border-radius: 4px; }
        .options-section { margin: 20px 0; padding: 15px; background: #e2e3e5; border-radius: 4px; }
        .checkbox-group { margin: 10px 0; }
        .checkbox-group label { display: flex; align-items: center; margin: 8px 0; }
        .checkbox-group input[type="checkbox"] { margin-right: 10px; }
        .option-description { font-size: 14px; color: #6c757d; margin-left: 25px; margin-bottom: 10px; }
    </style>
</head>
<body>
    <h1>OutScraper Data Import Utility</h1>
    
    <?php if (!empty($_POST['import_action'])): ?>
        <div class="container info">
            <h2>Import Processing...</h2>
            <?php
            if (empty($_FILES['csv_file']) || $_FILES['csv_file']['error'] !== UPLOAD_ERR_OK) {
                echo '<div class="container error">No file uploaded or upload error.</div>';
            } else {
                try {
                    echo '<div class="progress">Starting import...</div>';
                    flush();
                    
                    // Increase limits for large OutScraper files
                    ini_set('max_execution_time', 300);  // 5 minutes
                    ini_set('memory_limit', '512M');
                    
                    // Build options from form
                    $importOptions = [
                        'skip_unparseable_hours' => !empty($_POST['skip_unparseable_hours']),
                        'strict_quality_mode' => !empty($_POST['strict_quality_mode'])
                    ];
                    
                    $importer = new OutScraperImporter(true, $importOptions);
                    $stats = $importer->importFromFile($_FILES['csv_file']['tmp_name']);
                    $summary = $importer->getWebSummary();
                    
                    echo '<div class="container success">';
                    echo '<h3>Import Completed Successfully!</h3>';
                    echo '<p><strong>Runtime:</strong> ' . $summary['runtime'] . '</p>';
                    echo '<p><strong>Success Rate:</strong> ' . $summary['success_rate'] . '%</p>';
                    
                    // Display import options used
                    echo '<div class="warning" style="margin: 10px 0;">';
                    echo '<h4>Import Options Used:</h4>';
                    echo '<ul>';
                    echo '<li>Skip Unparseable Hours: ' . ($importOptions['skip_unparseable_hours'] ? 'YES' : 'NO') . '</li>';
                    echo '<li>Strict Quality Mode: ' . ($importOptions['strict_quality_mode'] ? 'YES' : 'NO') . '</li>';
                    echo '</ul>';
                    echo '</div>';
                    
                    echo '<ul>';
                    echo '<li>Processed: ' . $stats['processed'] . '</li>';
                    echo '<li>Imported: ' . $stats['imported'] . '</li>';
                    echo '<li>Skipped: ' . $stats['skipped'] . '</li>';
                    echo '<li>Errors: ' . $stats['errors'] . '</li>';
                    echo '<li>Hours Parsed: ' . $stats['hours_parsed'] . '</li>';
                    echo '<li>Hours Failed: ' . $stats['hours_failed'] . '</li>';
                    echo '<li>Timezone Fallbacks: ' . $stats['timezone_fallbacks'] . '</li>';
                    echo '</ul>';
                    echo '<p><strong>Log File:</strong> ' . $summary['log_file'] . '</p>';
                    echo '<p><strong>Category Assigned:</strong> ' . DEFAULT_CATEGORY_ID . '</p>';
                    echo '<p><strong>User ID:</strong> ' . STORE_DEFAULTS['user_id'] . '</p>';
                    echo '</div>';
                    
                } catch (Exception $e) {
                    echo '<div class="container error">';
                    echo '<h3>Import Failed</h3>';
                    echo '<p>' . htmlspecialchars($e->getMessage()) . '</p>';
                    echo '</div>';
                }
            }
            ?>
        </div>
        
        <div class="container">
            <a href="?show_form=1">Import Another File</a>
        </div>
        
    <?php else: ?>
        <div class="container">
            <h2>Upload OutScraper CSV File</h2>
            
            <div class="info">
                <h4>Requirements:</h4>
                <ul>
                    <li>File must be in CSV format with pipe (|) delimiter</li>
                    <li>Required columns: <code>name</code>, <code>street</code>, <code>city</code></li>
                    <li>Recommended: <code>phone</code>, <code>email_1</code>, <code>working_hours_csv_compatible</code></li>
                    <li>File size limit: <?php echo ini_get('upload_max_filesize'); ?></li>
                    <li>Large OutScraper files with 100+ columns are supported</li>
                </ul>
            </div>
            
            <div class="category-field">
                <h4>Import Configuration:</h4>
                <p><strong>All businesses will be assigned to category:</strong> <code><?php echo DEFAULT_CATEGORY_ID; ?></code></p>
                <p><strong>All businesses will use user ID:</strong> <code><?php echo STORE_DEFAULTS['user_id']; ?></code></p>
                <p><strong>Timezone fallback:</strong> America/New_York (ID: <?php echo IMPORT_SETTINGS['default_timezone_id']; ?>)</p>
                <p><strong>BestCall description:</strong> Automatically applied to all imported businesses</p>
                <p><strong>Domain cleaning:</strong> URLs automatically cleaned (removes https://, www., etc.)</p>
            </div>
            
            <form method="post" enctype="multipart/form-data">
                <div>
                    <label for="csv_file">Select CSV File:</label><br>
                    <input type="file" name="csv_file" id="csv_file" accept=".csv" required>
                </div>
                
                <div class="options-section">
                    <h4>Hours Handling Options:</h4>
                    
                    <div class="checkbox-group">
                        <label>
                            <input type="checkbox" name="skip_unparseable_hours" value="1">
                            <strong>Skip businesses with unparseable hours</strong>
                        </label>
                        <div class="option-description">
                            When enabled, businesses with hours that cannot be parsed will be completely excluded from import. 
                            Use this for maximum quality control when accurate hours are critical.
                        </div>
                    </div>
                    
                    <div class="checkbox-group">
                        <label>
                            <input type="checkbox" name="strict_quality_mode" value="1">
                            <strong>Strict quality mode</strong>
                        </label>
                        <div class="option-description">
                            Applies enhanced filtering: requires proper domain website (no social media links), 
                            minimum rating thresholds, and complete contact information.
                        </div>
                    </div>
                </div>
                
                <div class="warning">
                    <h4>Hours Handling Behavior:</h4>
                    <ul>
                        <li><strong>If "Skip unparseable hours" is CHECKED:</strong> Businesses with unparseable hours are completely excluded from import</li>
                        <li><strong>If "Skip unparseable hours" is UNCHECKED:</strong> Businesses are imported but hours tables remain empty (no false "closed" status)</li>
                        <li><strong>Never:</strong> Businesses are never marked as "closed" due to parsing failures</li>
                    </ul>
                </div>
                
                <div style="margin: 20px 0;">
                    <input type="hidden" name="import_action" value="1">
                    <button type="submit">Start Import</button>
                </div>
            </form>
        </div>
        
        <div class="container info">
            <h4>Import Process:</h4>
            <ol>
                <li>Upload your OutScraper CSV export</li>
                <li>Configure quality and hours handling options</li>
                <li>System validates file format and required columns</li>
                <li>Data is mapped and cleaned according to database schema</li>
                <li>Domain names are automatically cleaned (removes protocols, www, paths)</li>
                <li>BestCall description is automatically applied to all businesses</li>
                <li>Hours parsing is attempted with your chosen handling method</li>
                <li>Each business gets assigned standard user_id and category</li>
                <li>Records are imported with full transaction safety</li>
                <li>Detailed log file is generated for review</li>
            </ol>
        </div>
    <?php endif; ?>
    
</body>
</html>
<?php
    exit;
}

// Command line fallback (if accessed via CLI)
if (php_sapi_name() === 'cli') {
    if ($argc < 2) {
        echo "Usage: php import_outscraper.php <csv_file_path> [--skip-unparseable-hours] [--strict-quality]\n";
        echo "Or visit the web interface: http://yoursite.com/import/import_outscraper.php?show_form=1\n";
        exit(1);
    }
    
    // Parse CLI options
    $options = [
        'skip_unparseable_hours' => in_array('--skip-unparseable-hours', $argv),
        'strict_quality_mode' => in_array('--strict-quality', $argv)
    ];
    
    try {
        echo "OutScraper CSV Import Utility\n";
        echo "=============================\n";
        echo "File: {$argv[1]}\n";
        echo "Skip Unparseable Hours: " . ($options['skip_unparseable_hours'] ? 'YES' : 'NO') . "\n";
        echo "Strict Quality Mode: " . ($options['strict_quality_mode'] ? 'YES' : 'NO') . "\n";
        echo "Default Category: " . DEFAULT_CATEGORY_ID . "\n";
        echo "User ID: " . STORE_DEFAULTS['user_id'] . "\n";
        echo "BestCall Description: Applied to all imports\n";
        echo "Domain Cleaning: Enabled\n\n";
        
        $importer = new OutScraperImporter(false, $options);
        $stats = $importer->importFromFile($argv[1]);
        
        echo "\nImport completed!\n";
        echo "Imported: {$stats['imported']} records\n";
        
    } catch (Exception $e) {
        echo "\nIMPORT FAILED: " . $e->getMessage() . "\n";
        exit(1);
    }
} else {
    // Web access without parameters - show instructions
    echo "<h1>OutScraper Import Utility</h1>";
    echo "<p><a href='?show_form=1'>Start Import Process</a></p>";
    echo "<p><a href='test_import.php'>Test System Configuration</a></p>";
}
?>