import json
import os
import re
import pandas as pd
import argparse
from sqlalchemy import create_engine, text
from openpyxl import load_workbook
from openpyxl.utils import range_boundaries
from openpyxl.styles import PatternFill
from datetime import datetime, date
# Database connection setup
DB_CONNECTION = "mysql+pymysql://root:dataaegis123@localhost/reconcile_rule_engine"
engine = create_engine(DB_CONNECTION)
# Global dry-run flag (set from CLI)
DRY_RUN = False

# ============================================================================
# GLOBAL UTILITY FUNCTIONS (to avoid duplication)
# ============================================================================

def get_numeric_value(value):
    """Convert a value to numeric, handling None, empty strings, and formatted numbers"""
    if value is None or value == '':
        return 0
    if isinstance(value, str):
        value = value.strip().replace(',', '').replace('₹', '')
    try:
        return float(value)
    except (ValueError, TypeError):
        return 0

def find_column(df, keywords):
    """Find column in dataframe matching any of the keywords"""
    if not isinstance(keywords, list):
        keywords = [keywords]
    for col in df.columns:
        col_lower = str(col).lower().replace(' ', '_').replace('/', '_')
        for kw in keywords:
            if str(kw).lower() in col_lower:
                return col
    return None

def last_4_digits_match(val1, val2):
    """Check if last 4 digits of two values match"""
    if val1 is None or val2 is None or pd.isna(val1) or pd.isna(val2):
        return False
    str1 = str(val1)[-4:] if len(str(val1)) >= 4 else str(val1)
    str2 = str(val2)[-4:] if len(str(val2)) >= 4 else str(val2)
    return str1 == str2

def execute_query(query, params=None):
    """Execute a query and return results"""
    with engine.connect() as conn:
        if params:
            return conn.execute(text(query), params)
        return conn.execute(text(query))

def fetch_dataframe(query, params=None):
    """Execute query and return as DataFrame"""
    with engine.connect() as conn:
        return pd.read_sql_query(text(query), conn, params=params)

def write_excel_sheet(excel_path, sheet_name, dataframe, apply_style=True):
    """Safely write a DataFrame to an Excel sheet"""
    try:
        with pd.ExcelWriter(excel_path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
            if not dataframe.empty:
                dataframe.to_excel(writer, sheet_name=sheet_name, index=False)
        if apply_style:
            apply_header_styles(excel_path, [sheet_name])
        return True
    except Exception as e:
        print(f"⚠️ Error writing sheet '{sheet_name}': {e}")
        return False

def fetch_rules_options(rule_ids):
    query = f"SELECT * FROM rule_engine WHERE id IN ({','.join(map(str, rule_ids))})"
    return fetch_dataframe(query)

def fetch_input_data(input_tables, join_condition, comparison_pairs, payment_filter, PG_Transaction_Type):
    table1 = input_tables["table1"]
    table2 = input_tables["table2"]

    if not table1 or not table2:
        raise ValueError("Input tables must include 'table1' and 'table2'.")

    # List of fields that require last 4 digits comparison
    last_4_digit_fields = ['card_number', 'card_no', 'card_no_cheque']

    # Build the join clause dynamically for all comparison pairs
    join_conditions = [
        f"RIGHT({table1}.{cond['table1_field']}, 4) = RIGHT({table2}.{cond['table2_field']}, 4)"
        if cond['table1_field'] in last_4_digit_fields or cond['table2_field'] in last_4_digit_fields
        else f"{table1}.{cond['table1_field']} = {table2}.{cond['table2_field']}"
        for cond in join_condition["conditions"]
    ]
    # Add additional conditions for all comparison pairs
    for pair in comparison_pairs:
        if pair['table1_field'] in last_4_digit_fields or pair['table2_field'] in last_4_digit_fields:
            join_conditions.append(
                f"RIGHT({table1}.{pair['table1_field']}, 4) = RIGHT({table2}.{pair['table2_field']}, 4)"
            )
        else:
            join_conditions.append(
                f"{table1}.{pair['table1_field']} = {table2}.{pair['table2_field']}"
            )
    # Combine all join conditions with AND   ""
    join_clause = " AND ".join(join_conditions)

    # Replace 'transaction_date' with 'date' for Pine data
    if "pine" in table1.lower() and payment_filter:
        payment_filter = payment_filter.replace("transaction_date", "date")

    # Build filter clause only if filters exist
    filters = []
    if payment_filter:
        # Check if payment_filter is a JSON object with table1/table2 filters
        if isinstance(payment_filter, str) and payment_filter.strip().startswith('{'):
            try:
                filter_obj = json.loads(payment_filter)
                table_filters = []
                if 'table1' in filter_obj:
                    # Prefix table1 column names with table1
                    table_filters.append(filter_obj['table1'].replace('subname', f'{table1}.subname')
                                         .replace('momentspay_matched', f'{table1}.momentspay_matched'))
                if 'table2' in filter_obj:
                    # Prefix table2 column names with table2
                    table_filters.append(filter_obj['table2'].replace('tran_identifier', f'{table2}.tran_identifier')
                                         .replace('paymenttype', f'{table2}.paymenttype'))
                if table_filters:
                    filters.append(' AND '.join(table_filters))
            except:
                filters.append(payment_filter)
        else:
            filters.append(payment_filter)
    if PG_Transaction_Type:
        filters.append(PG_Transaction_Type)
    filter_clause = " AND ".join(filters)
    # Dynamically construct fields_str
    all_fields = [f"{table1}.*"]  # Select all fields from table1
    for pair in comparison_pairs:
        all_fields.append(f"{table1}.{pair['table1_field']} AS t1_{pair['table1_field']}")
        all_fields.append(f"{table2}.{pair['table2_field']} AS t2_{pair['table2_field']}")
    fields_str = ", ".join(list(set(all_fields)))  # Remove duplicates and join fields

    # Use LEFT JOIN to get both matched and unmatched records
    # Change INNER to LEFT to capture unmatched records
    join_type = join_condition['type'].upper()
    if join_type == 'INNER':
        join_type = 'LEFT'
    
    # Combine the join clause and filter clause
    query = f"SELECT {fields_str} FROM {table1} {join_type} JOIN {table2} ON {join_clause}"
    if filter_clause:
        query += f" WHERE {filter_clause}"
    print(f"Executing Query: {query}")  # Debug
    try:
        with engine.connect() as conn:
            data = pd.read_sql_query(text(query), conn)
        return data
    except Exception as e:
        print(f"Error executing query: {e}")
        return pd.DataFrame()  # Return an empty DataFrame in case of error
        
def apply_rule_and_generate_excel(rule_id, match_sheet, unmatch_sheet, payment_filter, PG_Transaction_Type, excel_path, output_tables_needed_as_input=None):
    rules_df = fetch_rules_options(rule_ids=[rule_id])
    if rules_df.empty:
        print(f"No rule found for rule_id={rule_id}")
        return

    rule = rules_df.iloc[0]
    input_tables = safe_json_load(rule['input_tables'], {})
    join_condition = safe_json_load(rule['join_condition'], {})
    comparison_pairs = safe_json_load(rule['comparison_pairs'], [])

    # Fetch input data with the provided filters
    input_data = fetch_input_data(input_tables, join_condition, comparison_pairs, payment_filter, PG_Transaction_Type)
    matched, unmatched = evaluate_rule_for_output(rule, input_data)
    
    matched['match_status'] = 'Matched'
    unmatched['match_status'] = 'Not Matched'

    # Save output_table to DB if it is required by later rules
    output_table_name = safe_json_load(rule.get('output_table'), {}).get('output_table')
    
    # Ensure output_tables_needed_as_input is a set (not None)
    if output_tables_needed_as_input is None:
        output_tables_needed_as_input = set()

    if output_table_name and output_table_name in output_tables_needed_as_input:
        # Combine matched and unmatched for intermediate tables
        # Add momentspay_matched column: 'YES' for matched, 'NO' for unmatched
        if not matched.empty or not unmatched.empty:
            matched_copy = matched.copy()
            unmatched_copy = unmatched.copy()
            
            matched_copy['momentspay_matched'] = 'YES'
            unmatched_copy['momentspay_matched'] = 'NO'
            
            # Add transaction_id and email columns if this is rchiswithtransactionid
            if output_table_name == 'rchiswithtransactionid' and not matched_copy.empty:
                # transaction_id should already be in description column
                if 'description' in matched_copy.columns:
                    matched_copy['transaction_id'] = matched_copy['description']
                if 'description' in unmatched_copy.columns:
                    unmatched_copy['transaction_id'] = unmatched_copy['description']
                # email column - placeholder (not in original data)
                matched_copy['email'] = ''
                unmatched_copy['email'] = ''
            
            # Add transaction_id for bank tables
            if output_table_name in ['rcbankwithtransactionid', 'rcsibwithtransactionid']:
                # For bank tables, transaction_id comes from the join with moment table
                if 't2_transaction_id' in matched_copy.columns:
                    matched_copy['transaction_id'] = matched_copy['t2_transaction_id']
                if 't2_transaction_id' in unmatched_copy.columns:
                    unmatched_copy['transaction_id'] = unmatched_copy['t2_transaction_id']
            
            # Combine both matched and unmatched
            combined_intermediate = pd.concat([matched_copy, unmatched_copy], ignore_index=True)
            
            print(f"📥 Writing intermediate table '{output_table_name}' to DB (matched: {len(matched_copy)}, unmatched: {len(unmatched_copy)}).")
            try:
                combined_intermediate.to_sql(output_table_name, con=engine, if_exists="replace", index=False)
            except Exception as e:
                print(f"⚠️ Error writing intermediate table '{output_table_name}': {e}")
        else:
            # If no data at all, drop any stale intermediate table
            try:
                with engine.connect() as conn:
                    conn.execute(text(f"DROP TABLE IF EXISTS {output_table_name}"))
                    conn.commit()
                print(f"ℹ️ No data for '{output_table_name}'; dropped existing table (if any).")
            except Exception as e:
                print(f"⚠️ Could not drop existing table {output_table_name}: {e}")
    
    # If both matched and unmatched are empty, skip creating/writing Excel altogether
    if matched.empty and unmatched.empty:
        print(f"ℹ️ No matched or unmatched rows for rule {rule_id}; skipping Excel update.")
        return
    # Ensure we have a valid excel path. If none provided, create a default file
    if not excel_path:
        excel_path = f"recon_output_{datetime.now().strftime('%Y%m%d')}.xlsx"

    # Ensure a valid excel file exists. If file missing or corrupted, create a fresh workbook.
    if not os.path.exists(excel_path):
        try:
            from openpyxl import Workbook
            wb_create = Workbook()
            wb_create.save(excel_path)
        except Exception as e:
            print(f"⚠️ Could not create excel file '{excel_path}': {e}")
            return

    # Write new matched and unmatched sheets - DON'T delete before writing
    try:
        # Ensure file exists with at least a dummy sheet
        try:
            wb = load_workbook(excel_path)
        except Exception:
            # If load fails recreate a clean workbook
            from openpyxl import Workbook
            wb = Workbook()
            wb.save(excel_path)
            print(f"ℹ️ Recreated corrupted excel file '{excel_path}'.")
        
        # Use ExcelWriter in append mode - it will automatically load the existing workbook
        with pd.ExcelWriter(excel_path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
            if match_sheet and match_sheet != "None":
                matched.to_excel(writer, sheet_name=match_sheet, index=False)
            if unmatch_sheet and unmatch_sheet != "None":
                unmatched.to_excel(writer, sheet_name=unmatch_sheet, index=False)
    except Exception as e:
        print(f"⚠️ Error writing Excel sheets for rule {rule_id}: {e}")
        return

    # Apply header styles after writing
    try:
        wb = load_workbook(excel_path)
        blue_fill = PatternFill(fgColor='1274bd', fill_type='solid')
        for sheet_name in [match_sheet, unmatch_sheet]:
            if sheet_name and sheet_name != "None" and sheet_name in wb.sheetnames:
                ws = wb[sheet_name]
                for cell in ws[1]:
                    cell.fill = blue_fill
        wb.save(excel_path)
    except Exception as e:
        print(f"⚠️ Error applying header styles for rule {rule_id}: {e}")

    print(f"✅ Rule {rule_id} processed: Sheets '{match_sheet}' and '{unmatch_sheet}' updated in '{excel_path}'.")

def evaluate_rule_for_output(rule, input_data):
    comparison_pairs = safe_json_load(rule["comparison_pairs"], [])
    logical_op = rule["logical_operator"].upper()

    # List of fields that require last 4 digits comparison
    last_4_digit_fields = ['card_number', 'card_no', 'card_no_cheque']

    matched_indices = []
    unmatched_indices = []

    for index, row in input_data.iterrows():
        conditions = []
        for pair in comparison_pairs:
            # Get values and ensure they're scalar
            col1 = f"t1_{pair['table1_field']}"
            col2 = f"t2_{pair['table2_field']}"
            
            # Check if columns exist
            if col1 not in row or col2 not in row:
                conditions.append(False)
                continue
            
            val1 = row[col1]
            val2 = row[col2]
            
            # Convert Series to scalar if needed
            if isinstance(val1, pd.Series):
                val1 = val1.iloc[0] if len(val1) > 0 else None
            if isinstance(val2, pd.Series):
                val2 = val2.iloc[0] if len(val2) > 0 else None

            # Handle None values gracefully
            if val1 is None or val2 is None or pd.isna(val1) or pd.isna(val2):
                conditions.append(False)
                continue

            # Normalize numeric values for comparison
            try:
                val1 = float(val1) if isinstance(val1, (int, float, str)) and str(val1).replace('.', '', 1).isdigit() else val1
                val2 = float(val2) if isinstance(val2, (int, float, str)) and str(val2).replace('.', '', 1).isdigit() else val2
            except ValueError:  # Debug
                pass

            # Dynamically handle last 4 digits for specific fields
            if pair['table1_field'] in last_4_digit_fields or pair['table2_field'] in last_4_digit_fields:
                val1 = str(val1)[-4:] if isinstance(val1, str) and len(val1) >= 4 else val1
                val2 = str(val2)[-4:] if isinstance(val2, str) and len(val2) >= 4 else val2
            # Default comparison logic
            if pair['comparison'] == '==':
                condition_result = bool(val1 == val2)
            elif pair['comparison'] == '!=':
                condition_result = bool(val1 != val2)
            else:
                condition_result = False

            conditions.append(condition_result)

        # Evaluate the row based on the logical operator
        result = all(conditions) if logical_op == 'AND' else any(conditions)

        if result:
            matched_indices.append(index)
        else:
            unmatched_indices.append(index)

    matched_df = input_data.loc[matched_indices].copy() if matched_indices else pd.DataFrame()
    unmatched_df = input_data.loc[unmatched_indices].copy() if unmatched_indices else pd.DataFrame()
    return matched_df, unmatched_df

def apply_header_styles(excel_path, sheet_names):
    if not os.path.exists(excel_path):
        print(f"⚠ Error: File '{excel_path}' does not exist.")
        return
    wb = load_workbook(excel_path)
    header_fill = PatternFill(fgColor='1274bd', fill_type='solid')

    # Apply the header style to each specified sheet
    for sheet_name in sheet_names:
        if sheet_name in wb.sheetnames:
            ws = wb[sheet_name]
            for cell in ws[1]:  # Apply the style to the first row (header row)
                cell.fill = header_fill
            print(f"✅ Header styles applied to sheet: {sheet_name}")
        else:
            print(f"⚠ Warning: Sheet '{sheet_name}' not found in the workbook.")

    # Save the workbook
    wb.save(excel_path)
    print(f"✅ Header styles saved in '{excel_path}'.")

def delete_sheets_from_db(excel_path, sheets_to_delete):
    if not os.path.exists(excel_path):
        print(f"Error: File '{excel_path}' does not exist.")
        return

    # Flatten if it's a dict (as from DB)
    if isinstance(sheets_to_delete, dict):
        all_sheets = []
        for group in sheets_to_delete.values():
            all_sheets.extend(group)
    else:
        all_sheets = list(sheets_to_delete)

    wb = load_workbook(excel_path)
    deleted = False
    for sheet_name in all_sheets:
        if sheet_name in wb.sheetnames:
            del wb[sheet_name]
            print(f"✅ Sheet '{sheet_name}' deleted successfully.")
            deleted = True
        else:
            print(f"⚠ Sheet '{sheet_name}' not found in the workbook.")

    if deleted:
        # Ensure workbook has at least one visible sheet before saving
        if not wb.sheetnames:
            wb.create_sheet("Sheet1")
        wb.save(excel_path)
        print(f"✅ Workbook saved after deleting specified sheets.")
    else:
        print("ℹ️ No sheets were deleted (none found).")

def process_summary_sheets_from_db(merchant_name, excel_path):
    query = f"""
    SELECT summary_sheets
    FROM merchant_config
    WHERE summary_sheets IS NOT NULL
    AND merchant_name = '{merchant_name}';
    """
    df = fetch_dataframe(query)

    # Can vary for clients or remain same 
    dedup_fields_list = [
        ['Amount', 'Approval_No', 'PG_Transaction_ID'],  #For first summary
        ['Card_No_Cheque', 'Amount', 'Patient_Name'],    # For second summary
        ['TID', 'Approval_Code', 'Amount']               # For third summary
    ]

    for idx, row in df.iterrows():
        summary_sheets_list = safe_json_load(row['summary_sheets'], [])
        if not summary_sheets_list:
            print(f"⚠️ Invalid or empty summary_sheets config for {merchant_name}")
            continue
        for i, summary_data in enumerate(summary_sheets_list):
            dedup_fields = dedup_fields_list[i] if i < len(dedup_fields_list) else None
            create_bank_card_summary_sheets(**summary_data, excel_path=excel_path, dedup_fields=dedup_fields)

def create_bank_card_summary_sheets(
    matched_sheets,
    unmatched_sheet,
    exclude_sheets=None,
    output_matched_sheet=None,
    output_unmatched_sheet=None,
    excel_path=None,
    dedup_fields=None
):
    if not os.path.exists(excel_path):
        print(f"Error: File '{excel_path}' does not exist.")
        return

    wb = load_workbook(excel_path)
    matched_dataframes = []
    for sheet in matched_sheets:
        if sheet in wb.sheetnames:
            df = pd.read_excel(excel_path, sheet_name=sheet)
            matched_dataframes.append(df)
        else:
            print(f"⚠ Warning: Sheet '{sheet}' not found in the workbook.")

    # Combine and deduplicate matched
    if matched_dataframes:
        combined_matched_df = pd.concat(matched_dataframes, ignore_index=True)
        if dedup_fields and all(col in combined_matched_df.columns for col in dedup_fields):
            # Ensure all dedup columns are string type
            for col in dedup_fields:
                combined_matched_df[col] = combined_matched_df[col].astype(str)
            combined_matched_df = combined_matched_df.drop_duplicates(subset=dedup_fields)
    else:
        combined_matched_df = pd.DataFrame()

    write_excel_sheet(excel_path, output_matched_sheet, combined_matched_df, apply_style=False)

    # Unmatched logic
    if unmatched_sheet in wb.sheetnames:
        unmatched_df = pd.read_excel(excel_path, sheet_name=unmatched_sheet)
        filtered_unmatched_df = unmatched_df.copy()
        if exclude_sheets and dedup_fields:
            for sheet in exclude_sheets:
                if sheet in wb.sheetnames:
                    exclude_df = pd.read_excel(excel_path, sheet_name=sheet)
                    # Ensure all dedup columns are string type in both DataFrames
                    for col in dedup_fields:
                        if col in filtered_unmatched_df.columns:
                            filtered_unmatched_df[col] = filtered_unmatched_df[col].astype(str)
                        if col in exclude_df.columns:
                            exclude_df[col] = exclude_df[col].astype(str)
                    if all(col in exclude_df.columns for col in dedup_fields):
                        merged = filtered_unmatched_df.merge(
                            exclude_df[dedup_fields].drop_duplicates(),
                            on=dedup_fields, how='left', indicator=True
                        )
                        filtered_unmatched_df = merged[merged['_merge'] == 'left_only'].drop(columns=['_merge'])
        # Final dedup
        if dedup_fields and all(col in filtered_unmatched_df.columns for col in dedup_fields):
            for col in dedup_fields:
                filtered_unmatched_df[col] = filtered_unmatched_df[col].astype(str)
            filtered_unmatched_df = filtered_unmatched_df.drop_duplicates(subset=dedup_fields)
    else:
        filtered_unmatched_df = pd.DataFrame()

    write_excel_sheet(excel_path, output_unmatched_sheet, filtered_unmatched_df, apply_style=False)

    apply_header_styles(excel_path, [output_matched_sheet, output_unmatched_sheet])

def safe_json_load(value, default=None):
    """Safely load JSON, returning default if parsing fails or value is None/empty"""
    if default is None:
        default = {}
    if not value or not isinstance(value, str):
        return default
    try:
        return json.loads(value)
    except (json.JSONDecodeError, TypeError):
        return default

def fetch_merchant_config(merchant_name): # merchant_config table is used to fetch merchant config data
    query = f"""
    SELECT summary_cells, location_summary, amount_column, sheets_to_delete,
           manual_summary_cell_mappings, total_manual_summary_cell_mappings,
           summary_sheet_names, unmatched_sheets_config, cumulative_config, daily_tracking_config
    FROM merchant_config
    WHERE merchant_name = '{merchant_name}';
    """
    with engine.connect() as conn:
        result = conn.execute(text(query)).fetchone()
    if not result:
        raise ValueError(f"No config found for merchant: {merchant_name}")
    summary_cells = safe_json_load(result[0], {})
    location_summary = safe_json_load(result[1], {})
    amount_column = result[2] if result[2] else None
    sheets_to_delete = safe_json_load(result[3], {})
    manual_summary_cell_mappings = safe_json_load(result[4], {})
    total_manual_summary_cell_mappings = safe_json_load(result[5], {})
    summary_sheet_names = safe_json_load(result[6], {})
    unmatched_sheets_config = safe_json_load(result[7], {})
    
    cumulative_config = safe_json_load(result[8], {})
    daily_tracking_config = safe_json_load(result[9], {})
    return {
        "summary_cells": summary_cells,
        "location_summary": location_summary,
        "amount_column": amount_column,
        "sheets_to_delete": sheets_to_delete,
        "manual_summary_cell_mappings": manual_summary_cell_mappings,
        "total_manual_summary_cell_mappings": total_manual_summary_cell_mappings,
        "summary_sheet_names": summary_sheet_names,
        "unmatched_sheets_config": unmatched_sheets_config,
        "cumulative_config": cumulative_config,
        "daily_tracking_config": daily_tracking_config
    }
def get_amount_column(amount_column_config, sheet_name):
    if isinstance(amount_column_config, str):
        try:
            amount_column_config = json.loads(amount_column_config)
        except Exception:
            return "Amount"
    
    # Handle list format (array of mappings)
    if isinstance(amount_column_config, list):
        for mapping in amount_column_config:
            if mapping.get("sheet_name") == sheet_name:
                return mapping.get("column_name", "Amount")
        return "Amount"
    
    # Handle dict format (key-based mappings)
    if isinstance(amount_column_config, dict):
        # Try to match sheet name with keys
        sheet_lower = sheet_name.lower()
        if 'bank' in sheet_lower and 'bank' in amount_column_config:
            return amount_column_config['bank']
        elif 'sib' in sheet_lower and 'sib' in amount_column_config:
            return amount_column_config['sib']
        elif 'moment' in sheet_lower and 'moment' in amount_column_config:
            return amount_column_config['moment']
        elif 'his' in sheet_lower and 'default' in amount_column_config:
            return amount_column_config['default']
        elif 'default' in amount_column_config:
            return amount_column_config['default']
    
    return "Amount"

def update_location_summary(
    config,
    excel_path,
    matched_sheet_key,
    unmatched_sheet_key,
    summary_cell_keys,
    location_cfg_key,
    dedup_fields=None
):
    sheet_names = config["summary_sheet_names"]
    cells = config["summary_cells"]
    loc_cfg = config.get("location_summary", {}).get(location_cfg_key, None)

    # Skip if sheet keys are not present for this merchant
    if matched_sheet_key not in sheet_names or unmatched_sheet_key not in sheet_names:
        print(f"ℹ️ Skipping summary for {matched_sheet_key}/{unmatched_sheet_key} (sheet not configured for this merchant).")
        return

    matched_sheet_name = sheet_names[matched_sheet_key]
    unmatched_sheet_name = sheet_names[unmatched_sheet_key]
    total_amount_col = get_amount_column(config.get("amount_column", {}), matched_sheet_name)
    print(f"Using amount column: {total_amount_col} for sheet {matched_sheet_key}")
    unit_location_col = 'Unit_Location'

    # Validate Excel file exists and is readable
    if not os.path.exists(excel_path):
        print(f"⚠️ Excel file not found: {excel_path}")
        return
    
    # Check if file is valid before reading
    try:
        wb_check = load_workbook(excel_path, read_only=True)
        available_sheets = wb_check.sheetnames
        wb_check.close()
    except Exception as e:
        print(f"⚠️ Cannot read Excel file '{excel_path}': {e}")
        return
    
    # Check if required sheets exist
    if matched_sheet_name not in available_sheets:
        print(f"ℹ️ Sheet '{matched_sheet_name}' not found in workbook. Skipping summary for {matched_sheet_key}.")
        return
    
    if unmatched_sheet_name not in available_sheets:
        print(f"ℹ️ Sheet '{unmatched_sheet_name}' not found in workbook. Skipping summary for {unmatched_sheet_key}.")
        return
    
    # Read sheets with error handling
    try:
        matched_df = pd.read_excel(excel_path, sheet_name=matched_sheet_name)
    except Exception as e:
        print(f"⚠️ Error reading sheet '{matched_sheet_name}': {e}")
        return
    
    try:
        unmatched_df = pd.read_excel(excel_path, sheet_name=unmatched_sheet_name)
    except Exception as e:
        print(f"⚠️ Error reading sheet '{unmatched_sheet_name}': {e}")
        return
    
    # ...rest of your existing code...
    if dedup_fields:
        before = matched_df.shape[0]
        matched_df = matched_df.drop_duplicates(subset=dedup_fields)
        print(f"✅ Deduplicated matched_df: {before} -> {matched_df.shape[0]} rows")
    combined_df = pd.concat([matched_df, unmatched_df], ignore_index=True)

    book = load_workbook(excel_path)
    
    # Check if Summary sheet exists, if not skip
    if 'Summary' not in book.sheetnames:
        print(f"⚠️ Summary sheet not found in {excel_path}. Skipping summary update.")
        book.close()
        return
    
    summary_sheet = book['Summary']

    # Check if Unit_Location column exists
    if unit_location_col in combined_df.columns:
        # Location summary
        if loc_cfg:
            total_grouped = combined_df.groupby(unit_location_col).agg(
                total_count=pd.NamedAgg(column=unit_location_col, aggfunc='count'),
                total_amount=pd.NamedAgg(column=total_amount_col, aggfunc='sum')
            ).reset_index()
            matched_grouped = matched_df.groupby(unit_location_col).agg(
                matched_count=pd.NamedAgg(column=unit_location_col, aggfunc='count'),
                matched_amount=pd.NamedAgg(column=total_amount_col, aggfunc='sum')
            ).reset_index()
            unmatched_grouped = unmatched_df.groupby(unit_location_col).agg(
                unmatched_count=pd.NamedAgg(column=unit_location_col, aggfunc='count'),
                unmatched_amount=pd.NamedAgg(column=total_amount_col, aggfunc='sum')
            ).reset_index()
            summary_df = total_grouped.merge(matched_grouped, on=unit_location_col, how='left')\
                                      .merge(unmatched_grouped, on=unit_location_col, how='left')
            summary_df = summary_df.fillna(0)
            merged_ranges = summary_sheet.merged_cells.ranges

            def find_top_left_cell(merged_ranges, row, col):
                for merged_range in merged_ranges:
                    min_col, min_row, max_col, max_row = range_boundaries(str(merged_range))
                    if min_row <= row <= max_row and min_col <= col <= max_col:
                        return min_row, min_col
                return row, col

            for index, row in summary_df.iterrows():
                for col_num, value in enumerate(row):
                    if col_num == 0:
                        continue
                    r, c = find_top_left_cell(merged_ranges, loc_cfg["start_row"] + index, loc_cfg["start_col"] + col_num - 1)
                    summary_sheet.cell(row=r, column=c, value=value)
            print(f"✅ Location summary updated in summary sheet for {location_cfg_key}.")
        else:
            print(f"ℹ️ Skipping location summary for {location_cfg_key} (not configured for this merchant).")

        # Overall totals
        # Debugging: Print columns of combined_df to verify 'paymodeamount' exists
        print("Columns in combined_df:", combined_df.columns.tolist())

        # Check if 'paymodeamount' exists in the DataFrame
        if total_amount_col not in combined_df.columns:
            # Try to find a similar column
            possible_cols = [col for col in combined_df.columns if 'amount' in col.lower() or col.lower() in ['totam', 't1_totam', 't2_total_amount']]
            if possible_cols:
                total_amount_col = possible_cols[0]
                print(f"⚠️ Column '{total_amount_col}' not found, using '{possible_cols[0]}' instead")
            else:
                print(f"⚠️ No amount column found in {matched_sheet_key}. Available: {combined_df.columns.tolist()}")
                print(f"ℹ️ Skipping amount calculations for {location_cfg_key}.")
                book.save(excel_path)
                book.close()
                return

        # Proceed with calculations if the column exists
        summary_sheet[cells[summary_cell_keys["total_amount"]]] = combined_df[total_amount_col].astype(float).sum()
        summary_sheet[cells[summary_cell_keys["matched_amount"]]] = matched_df[total_amount_col].astype(float).sum()
        summary_sheet[cells[summary_cell_keys["unmatched_amount"]]] = unmatched_df[total_amount_col].astype(float).sum()

    else:
        print(f"ℹ️ 'Unit_Location' column not found. Skipping location summary for {location_cfg_key}.")
        
    # Overall totals - Always set counts first
    summary_sheet[cells[summary_cell_keys["total_count"]]] = combined_df.shape[0]
    summary_sheet[cells[summary_cell_keys["matched_count"]]] = matched_df.shape[0]
    summary_sheet[cells[summary_cell_keys["unmatched_count"]]] = unmatched_df.shape[0]
    
    # Then set amounts if column exists
    if total_amount_col in combined_df.columns:
        try:
            summary_sheet[cells[summary_cell_keys["total_amount"]]] = pd.to_numeric(combined_df[total_amount_col], errors='coerce').sum()
            summary_sheet[cells[summary_cell_keys["matched_amount"]]] = pd.to_numeric(matched_df[total_amount_col], errors='coerce').sum()
            summary_sheet[cells[summary_cell_keys["unmatched_amount"]]] = pd.to_numeric(unmatched_df[total_amount_col], errors='coerce').sum()
        except Exception as e:
            print(f"⚠️ Error calculating amounts: {e}")
            print(f"Column '{total_amount_col}' sample values: {combined_df[total_amount_col].head().tolist()}")
    else:
        # Try to find amount column again
        possible_cols = [col for col in combined_df.columns if 'amount' in col.lower() or col.lower() in ['totam', 't1_totam', 't2_total_amount']]
        if possible_cols:
            amount_col = possible_cols[0]
            print(f"Using amount column: {amount_col}")
            try:
                summary_sheet[cells[summary_cell_keys["total_amount"]]] = pd.to_numeric(combined_df[amount_col], errors='coerce').sum()
                summary_sheet[cells[summary_cell_keys["matched_amount"]]] = pd.to_numeric(matched_df[amount_col], errors='coerce').sum()
                summary_sheet[cells[summary_cell_keys["unmatched_amount"]]] = pd.to_numeric(unmatched_df[amount_col], errors='coerce').sum()
            except Exception as e:
                print(f"⚠️ Error calculating amounts with {amount_col}: {e}")
        else:
            print(f"⚠️ No amount column found. Counts only. Available: {combined_df.columns.tolist()[:10]}")

    book.save(excel_path)
    print(f"✅ Summary updated for {location_cfg_key}.")

def table_has_date_column(table_name):
    with engine.connect() as conn:
        columns = [col[0].lower() for col in conn.execute(text(f"DESCRIBE {table_name}")).fetchall()]
    return 'date' in columns

def manual_summary_cells(excel_path, config):
    wb = load_workbook(excel_path)
    ws = wb["Summary"]
    cell_mappings = config["manual_summary_cell_mappings"]

    for target_cell, source_cells in cell_mappings.items():
        values = [get_numeric_value(ws[cell].value) for cell in source_cells]
        ws[target_cell].value = sum(values)

    wb.save(excel_path)
    print(f"✅ Manual summary cells updated successfully in: {excel_path}")

def total_manual_summary_cells(excel_path, config):
    wb = load_workbook(excel_path)
    ws = wb["Summary"]
    cell_mappings = config["total_manual_summary_cell_mappings"]

    for target_cell, source_cells in cell_mappings.items():
        values = [get_numeric_value(ws[cell].value) for cell in source_cells]
        ws[target_cell].value = sum(values)

    wb.save(excel_path)
    print(f"✅ Total manual summary cells updated successfully in: {excel_path}")
    
def fetch_date_filter_config(merchant_name):
    query = f"SELECT date_filter FROM merchant_config WHERE merchant_name = '{merchant_name}'"
    with engine.connect() as conn:
        result = conn.execute(text(query)).fetchone()
    if not result or not result[0]:
        return {}
    date_filter_json = safe_json_load(result[0], [])
    if not date_filter_json:
        return {}
    rule_to_config = {}
    for entry in date_filter_json:
        for rule_id in entry.get("id_range", []):
            rule_to_config[rule_id] = {
                "column": entry["column"],
                "format": entry.get("format")
            }
    return rule_to_config
def calculate_and_store_file_amounts(merchant_name):
    from datetime import date

    # Fetch rules with file_amount_column and input_tables
    query = f"""
    SELECT id, input_tables, file_amount_column
    FROM rule_engine
    WHERE file_amount_column is not null
    AND merchant_name = '{merchant_name}'
    """
    with engine.connect() as conn:
        rules = pd.read_sql_query(text(query), conn)

    for _, rule in rules.iterrows():
        # Parse input_tables and file_amount_column
        try:
            input_tables = json.loads(rule['input_tables'])
            file_amount_column = json.loads(rule['file_amount_column'])
        except Exception as e:
            print(f"⚠️ Error parsing JSON for rule {rule['id']}: {e}")
            continue

        table1 = input_tables.get("table1")
        if not table1:
            print(f"⚠️ No table1 found for rule {rule['id']}")
            continue

        # Get template(s) and columns
        templates = file_amount_column.get("template")
        if isinstance(templates, str):
            templates = [templates]
        columns = file_amount_column.get("columns", [])

        # Fetch data from table1
        try:
            with engine.connect() as conn:
                df = pd.read_sql_table(table1, conn)
        except Exception as e:
            print(f"⚠️ Error reading table {table1}: {e}")
            continue

        for template in templates:
            if not columns or not all(col in df.columns for col in columns):
                print(f"⚠️ Columns {columns} not found in table {table1} for template {template}")
                continue

            # Calculate amount
            if len(columns) == 1:
                amount = df[columns[0]].fillna(0).astype(float).sum()
            else:
                # Use the first non-zero column per row
                amount = df.apply(
                    lambda row: next((row[col] for col in columns if row.get(col, 0) not in [0, None, ""]), 0),
                    axis=1
                ).fillna(0).astype(float).sum()

            # Insert into file_amount_summary
            try:
                with engine.begin() as conn:
                    conn.execute(
                        text("""
                            INSERT INTO file_amount_summary (file_name, template, amount, date_added)
                            VALUES (:file_name, :template, :amount, :summary_date)
                        """),
                        {
                            "file_name": table1,
                            "template": template,
                            "amount": amount,
                            "summary_date": datetime.now() # <-- key matches SQL placeholder
                        }
                    )
                print(f"✅ Stored amount for table '{table1}', template '{template}': {amount}")
            except Exception as e:
                print(f"⚠️ Error inserting amount for {table1}, {template}: {e}")
                conn.commit()  # Ensure the transaction is committed even if there's an error

def main(merchant_name, date_filter=None, excel_path=None, dry_run=False):
    # Ensure module-level flag is set for code paths that read it
    globals()['DRY_RUN'] = dry_run
    print("------ Matching Engine Started ------")
    print(f"Merchant Name Before Processing Rules: {merchant_name}")
    config = fetch_merchant_config(merchant_name)
    date_filter_map = fetch_date_filter_config(merchant_name)
    # Fetch rules dynamically based on merchant name and priority
    query = f"""
    SELECT * FROM rule_engine 
    WHERE enabled = 1
    AND merchant_name = '{merchant_name}';
    """
    with engine.connect() as conn:
        rules_df = pd.read_sql_query(text(query), conn)

    print(rules_df[['id', 'merchant_name', 'priority']])  # Debug: Print rule IDs and merchant names

    if rules_df.empty:
        print(f"⚠ No enabled rules found for merchant '{merchant_name}'.")
        return

    # ✅ Identify intermediate output tables needed later
    output_tables_needed_as_input = set()
    rules_list = rules_df.to_dict(orient='records')
    for rule in rules_list:
        output_table = safe_json_load(rule.get("output_table", "{}"), {}).get("output_table")
        if output_table:
            for later_rule in rules_list:
                if later_rule["id"] == rule["id"]:
                    continue
                later_input = safe_json_load(later_rule.get("input_tables", "{}"), {})
                if output_table in [later_input.get("table1"), later_input.get("table2")]:
                    output_tables_needed_as_input.add(output_table)
    print(f"📋 Output tables that will be reused later: {output_tables_needed_as_input}")

    print(f"------ Processing Rules for Merchant: {merchant_name} ------")
    for _, rule in rules_df.iterrows():
        rule_id = int(rule['id'])  

        match_sheet = rule['match_sheet']
        unmatch_sheet = rule['unmatch_sheet']
        payment_filter = rule['payment_filter']
        pg_transaction_type = rule['pg_transaction_type']

        # Convert "None" string to actual None for processing
        if payment_filter == "None":
            payment_filter = None
        if pg_transaction_type == "None":
            pg_transaction_type = None

        # Step 4: Check if match_sheet and unmatch_sheet are None
        if match_sheet == "None" and unmatch_sheet == "None":
            # Only execute the CREATE TABLE query in the database
            print(f"Executing CREATE TABLE for Rule ID {rule_id} (No Excel update required).")
            input_tables = safe_json_load(rule['input_tables'], {})
            join_conditions = safe_json_load(rule['join_condition'], {}).get('conditions', [])
            comparison_pairs = safe_json_load(rule['comparison_pairs'], [])

            # Build the AND conditions dynamically for join_conditions
            join_conditions_str = " AND ".join([
                f"t1.{cond['table1_field']} = t2.{cond['table2_field']}"
                for cond in join_conditions
            ])

            comparison_pairs_str = " AND ".join([
                f"t1.{pair['table1_field']} = t2.{pair['table2_field']}"
                for pair in comparison_pairs
            ])

            all_conditions = f"{join_conditions_str} AND {comparison_pairs_str}"

            output_table = safe_json_load(rule['output_table'], {}).get('output_table', '')

            with engine.connect() as conn:
                table1_columns = [col[0] for col in conn.execute(text(f"DESCRIBE {input_tables['table1']}")).fetchall()]
                table2_columns = [col[0] for col in conn.execute(text(f"DESCRIBE {input_tables['table2']}")).fetchall()]

            transaction_id_in_table2 = 'transaction_id' in [col.lower() for col in table2_columns]
            transaction_id_in_table1 = 'transaction_id' in [col.lower() for col in table1_columns]

            select_fields = []

            # Transaction_ID logic: table2 first, then table1, else ignore
            if transaction_id_in_table2:
                select_fields.append("t2.Transaction_ID AS Transaction_ID_t2")
            elif transaction_id_in_table1:
                select_fields.append("t1.Transaction_ID AS Transaction_ID_t1")
            # else: do not add Transaction_ID

            # Add all columns from table1 (except transaction_id)
            select_fields.extend([f"t1.{col}" for col in table1_columns if col.lower() != 'transaction_id'])

            # Add only the comparison fields from table2 (with alias)
            for pair in comparison_pairs:
                col = pair['table2_field']
                select_fields.append(f"t2.{col} AS t2_{col}")

            select_clause = ",\n    ".join(select_fields)

            create_table_query = f"""
            CREATE TABLE IF NOT EXISTS {output_table} AS
            SELECT 
                {select_clause},
                CASE 
                    WHEN {" AND ".join([f"t1.{pair['table1_field']} = t2.{pair['table2_field']}" for pair in comparison_pairs])} THEN 'Matched'
                    ELSE 'Unmatched'
                END AS match_status
            FROM {input_tables['table1']} t1
            LEFT JOIN {input_tables['table2']} t2
            ON {all_conditions}
            """
            print(f"Executing CREATE TABLE query: {create_table_query}")
            try:
                with engine.connect() as conn:
                    conn.execute(text(create_table_query))
                print(f"✅ Rule ID {rule_id} processed successfully. Output table: {output_table}")
            except Exception as e:
                print(f"⚠ Error processing Rule ID {rule_id}: {e}")

        else:
            if date_filter:
                date_cfg = date_filter_map.get(rule_id)
                if date_cfg:
                    date_col = date_cfg["column"]
                    # Optionally, format date_filter as per date_cfg["format"] if needed
                    if payment_filter:
                        payment_filter += f" AND {date_col} = '{date_filter}'"
                    else:
                        payment_filter = f"{date_col} = '{date_filter}'"
                    print(f"Applied date filter for Rule ID {rule_id}: {payment_filter}")
                else:
                    print(f"Skipping date filter for Rule ID {rule_id} (no config found).")

            apply_rule_and_generate_excel(
                rule_id=rule_id,
                match_sheet=match_sheet,
                unmatch_sheet=unmatch_sheet,
                payment_filter=payment_filter,
                PG_Transaction_Type=pg_transaction_type,
                excel_path=excel_path,
                output_tables_needed_as_input=output_tables_needed_as_input
            )

    print(f"✅ All rules processed successfully for merchant: {merchant_name}.")
    print("------ Matching Engine Completed ------")

    process_summary_sheets_from_db(merchant_name, excel_path)

    print("✅ All intermediate sheets deleted successfully.")
    sheet_groups = config["sheets_to_delete"]

    # Handle both dict and list formats
    if sheet_groups:
        if isinstance(sheet_groups, dict) and any(sheet_groups.values()):
            delete_sheets_from_db(excel_path, sheet_groups)
        elif isinstance(sheet_groups, list) and len(sheet_groups) > 0:
            delete_sheets_from_db(excel_path, sheet_groups)
        else:
            print("ℹ️ Sheet deletion skipped (no sheets configured for deletion for this merchant).")
    else:
        print("ℹ️ Sheet deletion skipped (no sheets configured for deletion for this merchant).")

    config = fetch_merchant_config(merchant_name)
    
    # Location wise summary - consolidated into loop to avoid repetition
    location_summary_configs = [
        {
            "matched_sheet_key": "his_upi_matched",
            "unmatched_sheet_key": "his_upi_unmatched",
            "summary_cell_keys": {
                "total_count": "upi_his_total_count",
                "matched_count": "upi_his_matched_count",
                "unmatched_count": "upi_his_unmatched_count",
                "total_amount": "upi_his_total_amount",
                "matched_amount": "upi_his_matched_amount",
                "unmatched_amount": "upi_his_unmatched_amount"
            },
            "location_cfg_key": "upi"
        },
        {
            "matched_sheet_key": "manual_upi_matched",
            "unmatched_sheet_key": "manual_upi_unmatched",
            "summary_cell_keys": {
                "total_count": "upi_manual_total_count",
                "matched_count": "upi_manual_matched_count",
                "unmatched_count": "upi_manual_unmatched_count",
                "total_amount": "upi_manual_total_amount",
                "matched_amount": "upi_manual_matched_amount",
                "unmatched_amount": "upi_manual_unmatched_amount"
            },
            "location_cfg_key": "manual_upi"
        },
        {
            "matched_sheet_key": "manual_card_matched",
            "unmatched_sheet_key": "manual_card_unmatched",
            "summary_cell_keys": {
                "total_count": "card_manual_total_count",
                "matched_count": "card_manual_matched_count",
                "unmatched_count": "card_manual_unmatched_count",
                "total_amount": "card_manual_total_amount",
                "matched_amount": "card_manual_matched_amount",
                "unmatched_amount": "card_manual_unmatched_amount"
            },
            "location_cfg_key": "manual_card"
        },
        {
            "matched_sheet_key": "his_card_matched",
            "unmatched_sheet_key": "his_card_unmatched",
            "summary_cell_keys": {
                "total_count": "card_his_total_count",
                "matched_count": "card_his_matched_count",
                "unmatched_count": "card_his_unmatched_count",
                "total_amount": "card_his_total_amount",
                "matched_amount": "card_his_matched_amount",
                "unmatched_amount": "card_his_unmatched_amount"
            },
            "location_cfg_key": "card"
        },
        {
            "matched_sheet_key": "his_matched",
            "unmatched_sheet_key": "his_unmatched",
            "summary_cell_keys": {
                "total_count": "his_total_count",
                "matched_count": "his_matched_count",
                "unmatched_count": "his_unmatched_count",
                "total_amount": "his_total_amount",
                "matched_amount": "his_matched_amount",
                "unmatched_amount": "his_unmatched_amount"
            },
            "location_cfg_key": "his"
        },
        {
            "matched_sheet_key": "pine_upi_matched",
            "unmatched_sheet_key": "pine_upi_unmatched",
            "summary_cell_keys": {
                "total_count": "pine_upi_total_count",
                "matched_count": "pine_upi_matched_count",
                "unmatched_count": "pine_upi_unmatched_count",
                "total_amount": "pine_upi_total_amount",
                "matched_amount": "pine_upi_matched_amount",
                "unmatched_amount": "pine_upi_unmatched_amount"
            },
            "location_cfg_key": "pine_upi"
        },
        {
            "matched_sheet_key": "pine_card_matched",
            "unmatched_sheet_key": "pine_card_unmatched",
            "summary_cell_keys": {
                "total_count": "pine_card_total_count",
                "matched_count": "pine_card_matched_count",
                "unmatched_count": "pine_card_unmatched_count",
                "total_amount": "pine_card_total_amount",
                "matched_amount": "pine_card_matched_amount",
                "unmatched_amount": "pine_card_unmatched_amount"
            },
            "location_cfg_key": "pine_card"
        }
    ]
    
    # Loop through all location summary configurations
    for summary_config in location_summary_configs:
        update_location_summary(config, excel_path, **summary_config)
    manual_summary_cells(excel_path, config)
    total_manual_summary_cells(excel_path, config)
    calculate_and_store_file_amounts(merchant_name)
    
    # ========================================
    # AGING REPORT PROCESSING
    # ========================================
    try:
        print("📊 Generating aging report...")
        generate_aging_report(merchant_name, excel_path, date_filter)
    except Exception as e:
        print(f"⚠️ Error generating aging report: {e}")
    
    # ========================================
    # NEW AUTOMATED PROCESSING
    # ========================================
    print("🔄 Starting automated unmatched and cumulative processing...")
    process_automated_unmatched_and_cumulative(merchant_name, excel_path, dry_run=DRY_RUN)

def generate_aging_report(merchant_name, excel_path, date_filter=None):
    """
    Generate aging report for unmatched bank transactions
    Categorizes by settlement days: 1 Day, 2 Days, 3 Days, >3 Days
    """
    try:
        # Fetch bank table name from merchant config
        bank_table_query = f"""
        SELECT JSON_EXTRACT(input_tables, '$.table1') as bank_table
        FROM rule_engine
        WHERE merchant_name = '{merchant_name}'
        AND JSON_EXTRACT(input_tables, '$.table1') LIKE '%bank%'
        LIMIT 1;
        """
        
        with engine.connect() as conn:
            result = conn.execute(text(bank_table_query)).fetchone()
            if not result or not result[0]:
                print("ℹ️ No bank table found for aging report")
                return
            
            bank_table = result[0].strip('"')
        
        # Query to get unmatched bank transactions
        aging_query = f"""
        SELECT 
            DATEDIFF(STR_TO_DATE(paiddate, '%Y-%m-%d'), STR_TO_DATE(txndate, '%Y-%m-%d')) as days_to_settle,
            COUNT(*) as transaction_count,
            SUM(CAST(txnamt AS DECIMAL(15,2))) as total_amount
        FROM {bank_table}
        WHERE paiddate IS NOT NULL 
        AND txndate IS NOT NULL
        AND paiddate != '' 
        AND txndate != ''
        GROUP BY days_to_settle
        ORDER BY days_to_settle;
        """
        
        with engine.connect() as conn:
            aging_df = pd.read_sql_query(text(aging_query), conn)
        
        if aging_df.empty:
            print("ℹ️ No data for aging report")
            return
        
        # Categorize into aging buckets
        def categorize_days(days):
            if days <= 1:
                return '0-1'
            elif days <= 2:
                return '1-2'
            elif days <= 3:
                return '2-3'
            else:
                return '>3'
        
        aging_df['category'] = aging_df['days_to_settle'].apply(categorize_days)
        
        # Aggregate by category
        aging_summary = aging_df.groupby('category').agg({
            'transaction_count': 'sum',
            'total_amount': 'sum'
        }).reset_index()
        
        # Ensure all categories exist
        categories = ['0-1', '1-2', '2-3', '>3']
        aging_summary = aging_summary.set_index('category').reindex(categories, fill_value=0).reset_index()
        
        # Update Summary sheet
        wb = load_workbook(excel_path)
        if 'Summary' in wb.sheetnames:
            ws = wb['Summary']
            
            # Find aging report section (look for "Aging Report" text)
            aging_start_row = None
            for row in range(1, ws.max_row + 1):
                cell_value = ws.cell(row=row, column=1).value
                if cell_value and 'aging' in str(cell_value).lower():
                    aging_start_row = row + 2  # Skip header row
                    break
            
            if aging_start_row:
                # Update aging data (rows 26-29 based on your template)
                row_mapping = {'0-1': 26, '1-2': 27, '2-3': 28, '>3': 29}
                
                for _, row in aging_summary.iterrows():
                    category = row['category']
                    row_num = row_mapping.get(category)
                    
                    if row_num:
                        ws.cell(row=row_num, column=1, value=category)
                        ws.cell(row=row_num, column=2, value=int(row['transaction_count']))
                        ws.cell(row=row_num, column=3, value=int(row['total_amount']))
                
                wb.save(excel_path)
                print(f"✅ Aging report updated in Summary sheet (rows 26-29)")
            else:
                print("⚠️ Aging Report section not found in Summary sheet")
        else:
            print("⚠️ Summary sheet not found")
        
    except Exception as e:
        print(f"⚠️ Error in aging report generation: {e}")
        import traceback
        traceback.print_exc()


def fetch_rules_for_merchant(merchant_name):
    query = f"""
    SELECT * FROM rule_engine
    WHERE enabled = 1
    AND merchant_name = '{merchant_name}';
    """
    return fetch_dataframe(query).to_dict(orient='records')

def run_rule_engine_dynamically(merchant_name, date_filter=None, excel_path=None, dry_run=False):
    print("------ Matching Engine Started ------")
    print(f"Merchant Name Before Processing Rules: {merchant_name}")

   # rules_for_merchant =merchant_config(merchant_name)

    rules_for_merchant = fetch_rules_for_merchant(merchant_name)

    print(f"[DEBUG] Rules fetched: {rules_for_merchant}")
    # ✅ Identify intermediate output tables needed later
    output_tables_needed_as_input = set()
    for rule in rules_for_merchant:
        output_table = safe_json_load(rule["output_table"], {}).get("output_table")
        for later_rule in rules_for_merchant:
            if later_rule["id"] == rule["id"]:
                continue
            later_input = safe_json_load(later_rule["input_tables"], {})
            if output_table in [later_input.get("table1"), later_input.get("table2")]:
                output_tables_needed_as_input.add(output_table)

    print(f"📋 Output tables that will be reused later: {output_tables_needed_as_input}")

    for rule in rules_for_merchant:
        try:
            apply_rule_and_generate_excel(
                rule_id=rule["id"],
                match_sheet=rule.get("match_sheet"),
                unmatch_sheet=rule.get("unmatch_sheet"),
                payment_filter=rule.get("payment_filter"),
                PG_Transaction_Type=rule.get("pg_transaction_type"),
                excel_path=excel_path,
                output_tables_needed_as_input=output_tables_needed_as_input
            )
        except Exception as e:
            print(f"❌ Error processing rule {rule['id']}: {e}")

    print(f"✅ All rules processed successfully for merchant: {merchant_name}")
    print("------ Matching Engine Completed ------")


# ========================================
# NEW AUTOMATED FUNCTIONS FOR ALL-UNMATCHED AND CUMULATIVE PROCESSING
# ========================================

def create_unmatched_data_table():
    """Create database table to store unmatched data for cumulative matching"""
    create_table_sql = """
    CREATE TABLE IF NOT EXISTS unmatched_data_storage (
        id INT AUTO_INCREMENT PRIMARY KEY,
        merchant_name VARCHAR(255),
        source_sheet VARCHAR(255),
        transaction_date DATE,
        amount DECIMAL(15,2),
        card_number VARCHAR(50),
        approval_code VARCHAR(50),
        pg_transaction_id VARCHAR(100),
        patient_name VARCHAR(255),
        unit_location VARCHAR(255),
        bank VARCHAR(100),
        payment_mode VARCHAR(50),
        doc_no VARCHAR(100),
        data_json TEXT,
        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
        matched_status VARCHAR(20) DEFAULT 'unmatched',
        matched_bank VARCHAR(50) NULL,
        processing_date DATE,
        INDEX idx_merchant (merchant_name),
        INDEX idx_card (card_number),
        INDEX idx_approval (approval_code),
        INDEX idx_amount (amount),
        INDEX idx_date (transaction_date),
        INDEX idx_status (matched_status)
    )
    """
    
    try:
        with engine.connect() as conn:
            conn.execute(text(create_table_sql))
            conn.commit()
        print("✅ Unmatched data storage table created/verified")
    except Exception as e:
        print(f"⚠️ Error creating unmatched_data_storage table: {e}")

def fetch_unmatched_sheets_config(merchant_name):
    """Fetch unmatched sheets configuration for a merchant"""
    query = f"""
    SELECT unmatched_sheets_config
    FROM merchant_config
    WHERE merchant_name = '{merchant_name}';
    """
    try:
        df = fetch_dataframe(query)
        
        if df.empty or not df.iloc[0]['unmatched_sheets_config']:
            print(f"⚠️ No unmatched sheets config found for merchant: {merchant_name}")
            return None
        
        return safe_json_load(df.iloc[0]['unmatched_sheets_config'], None)
    except Exception as e:
        print(f"⚠️ Error fetching unmatched sheets config: {e}")
        return None

def fetch_cumulative_config(merchant_name):
    """Fetch cumulative matching rules from cumulative_rules table"""
    query = f"""
    SELECT * FROM cumulative_rules
    WHERE merchant_name = '{merchant_name}' 
    AND enabled = 1
    ORDER BY priority;
    """
    try:
        df = fetch_dataframe(query)
        
        if df.empty:
            print(f"⚠️ No cumulative rules found for merchant: {merchant_name}")
            return None
        
        # Convert to list of dictionaries
        rules = []
        for _, row in df.iterrows():
            rule = {
                'id': row['id'],
                'rule_name': row['rule_name'],
                'merchant_name': row['merchant_name'],
                'description': row['description'],
                'unmatched_sheets': safe_json_load(row['unmatched_sheets'], []),
                'bank_data_sheet': row['bank_data_sheet'],
                'comparison_columns': safe_json_load(row['comparison_columns'], {}),
                'transformations': safe_json_load(row['transformations'], None),
                'match_conditions': safe_json_load(row['match_conditions'], []),
                'output_sheet': row['output_sheet'],
                'matched_bank': row['matched_bank'],
                'priority': row['priority'],
                'enabled': row['enabled']
            }
            rules.append(rule)
        
        return rules
    except Exception as e:
        print(f"⚠️ Error fetching cumulative rules: {e}")
        return None

def consolidate_all_unmatched_sheet(excel_path, merchant_name):
    """
    Automatically consolidate unmatched sheets based on merchant configuration
    """
    print(f"🔄 Starting All-Unmatched sheet consolidation for {merchant_name}...")
    
    if not os.path.exists(excel_path):
        print(f"⚠️ Excel file not found: {excel_path}")
        return None
    
    # Get merchant-specific unmatched sheets configuration
    config = fetch_unmatched_sheets_config(merchant_name)
    if not config:
        return None
    
    # Handle different config formats
    if isinstance(config, list):
        # Config is a simple list of sheet names
        source_sheets = config
        target_sheet = 'All-Unmatched'
    elif isinstance(config, dict):
        # Config is a dictionary with source_sheets and target_sheet
        source_sheets = config.get('source_sheets', [])
        target_sheet = config.get('target_sheet', 'All-Unmatched')
    else:
        print(f"⚠️ Invalid unmatched sheets config format for {merchant_name}")
        return None
    
    if not source_sheets:
        print(f"⚠️ No source sheets configured for {merchant_name}")
        return None
    
    wb = load_workbook(excel_path)
    consolidated_data = []
    
    # Read data from each configured source sheet
    for sheet_name in source_sheets:
        if sheet_name in wb.sheetnames:
            try:
                df = pd.read_excel(excel_path, sheet_name=sheet_name)
                if not df.empty:
                    df['source_sheet'] = sheet_name  # Add source tracking
                    df['processing_date'] = datetime.now().date()  # Add processing date
                    consolidated_data.append(df)
                    print(f"✅ Added {len(df)} rows from {sheet_name}")
            except Exception as e:
                print(f"⚠️ Error reading {sheet_name}: {e}")
        else:
            print(f"⚠️ Sheet {sheet_name} not found in workbook")
    
    # Combine all data
    if consolidated_data:
        combined_df = pd.concat(consolidated_data, ignore_index=True)
        print(f"📊 Total consolidated rows: {len(combined_df)}")
        
        # Write to target sheet safely without corrupting the file
        try:
            # Load existing workbook
            wb = load_workbook(excel_path)
            
            # Remove the target sheet if it exists
            if target_sheet in wb.sheetnames:
                del wb[target_sheet]
            
            # Create new sheet at the end
            ws = wb.create_sheet(target_sheet)
            
            # Write headers
            for col_num, column_name in enumerate(combined_df.columns, 1):
                ws.cell(row=1, column=col_num, value=column_name)
            
            # Write data
            for row_num, row_data in enumerate(combined_df.values, 2):
                for col_num, value in enumerate(row_data, 1):
                    # Convert datetime/date objects to string to avoid Excel issues
                    if isinstance(value, (datetime, pd.Timestamp)):
                        value = value.strftime('%Y-%m-%d %H:%M:%S')
                    elif isinstance(value, date):
                        value = value.strftime('%Y-%m-%d')
                    ws.cell(row=row_num, column=col_num, value=value)
            
            # Save the workbook
            wb.save(excel_path)
            wb.close()
            
            apply_header_styles(excel_path, [target_sheet])
            print(f"✅ {target_sheet} sheet created with {len(combined_df)} rows")
        except Exception as e:
            print(f"⚠️ Error writing consolidated sheet '{target_sheet}': {e}")
            import traceback
            traceback.print_exc()
        
        return combined_df
    else:
        print("⚠️ No data found in source sheets")
        return None

def store_unmatched_data_to_db(excel_path, merchant_name):
    """Store All-Unmatched data to database for future cumulative matching"""
    try:
        # Get merchant configuration
        config = fetch_unmatched_sheets_config(merchant_name)
        if not config:
            return
        
        target_sheet = config.get('target_sheet', 'All-Unmatched')
        column_mapping = config.get('column_mapping', {})
        
        # Read target sheet (usually All-Unmatched)
        df = pd.read_excel(excel_path, sheet_name=target_sheet)
        if df.empty:
            print(f"⚠️ {target_sheet} sheet is empty")
            return
        
        # Clear existing data for this merchant and processing date
        current_date = datetime.now().date()
        delete_query = """
        DELETE FROM unmatched_data_storage 
        WHERE merchant_name = :merchant_name 
        AND processing_date = :processing_date
        """
        with engine.connect() as conn:
            conn.execute(text(delete_query), {
                'merchant_name': merchant_name,
                'processing_date': current_date
            })
            conn.commit()
        
        # Prepare data for database storage with flexible column mapping
        storage_data = []
        for _, row in df.iterrows():
            # Use column mapping if provided, otherwise use default column names
            def get_column_value(key, default_col):
                col_name = column_mapping.get(key, default_col)
                return row.get(col_name, '')
            
            record = {
                'merchant_name': merchant_name,
                'source_sheet': str(row.get('source_sheet', '')),
                'transaction_date': pd.to_datetime(get_column_value('date', 'Doc Date'), errors='coerce').date() if pd.notna(get_column_value('date', 'Doc Date')) else None,
                'amount': float(get_column_value('amount', 'Amount')) if pd.notna(get_column_value('amount', 'Amount')) else 0,
                'card_number': str(get_column_value('card_number', 'Card No/Cheque'))[-4:] if pd.notna(get_column_value('card_number', 'Card No/Cheque')) else '',
                'approval_code': str(get_column_value('approval_code', 'Approval No')) if pd.notna(get_column_value('approval_code', 'Approval No')) else '',
                'pg_transaction_id': str(get_column_value('pg_transaction_id', 'PG Transaction ID')) if pd.notna(get_column_value('pg_transaction_id', 'PG Transaction ID')) else '',
                'patient_name': str(get_column_value('patient_name', 'Patient Name')) if pd.notna(get_column_value('patient_name', 'Patient Name')) else '',
                'unit_location': str(get_column_value('unit_location', 'Unit/Location')) if pd.notna(get_column_value('unit_location', 'Unit/Location')) else '',
                'bank': str(get_column_value('bank', 'Bank')) if pd.notna(get_column_value('bank', 'Bank')) else '',
                'payment_mode': str(get_column_value('payment_mode', 'payment mode')) if pd.notna(get_column_value('payment_mode', 'payment mode')) else '',
                'doc_no': str(get_column_value('doc_no', 'Doc No\n')) if pd.notna(get_column_value('doc_no', 'Doc No\n')) else '',
                'data_json': row.to_json(),
                'processing_date': current_date
            }
            storage_data.append(record)
        
        # Insert into database
        if storage_data:
            storage_df = pd.DataFrame(storage_data)
            storage_df.to_sql('unmatched_data_storage', con=engine, if_exists='append', index=False)
            print(f"✅ Stored {len(storage_data)} unmatched records to database for {merchant_name}")
        
    except Exception as e:
        print(f"⚠️ Error storing unmatched data for {merchant_name}: {e}")

def process_cumulative_matching(merchant_name, excel_path):
    """Process cumulative matching against stored unmatched data based on cumulative_rules table"""
    print(f"🔄 Starting cumulative matching process for {merchant_name}...")
    
    # Get merchant-specific cumulative rules from cumulative_rules table
    cumulative_rules = fetch_cumulative_config(merchant_name)
    if not cumulative_rules:
        print(f"⚠️ No cumulative rules found for {merchant_name}")
        return
    
    print(f"📋 Found {len(cumulative_rules)} cumulative rule(s) for {merchant_name}")
    
    # Calculate exclude date as current_date - 1 (yesterday)
    from datetime import timedelta
    current_date = datetime.now().date()
    exclude_date = current_date - timedelta(days=1)  # Yesterday
    exclude_date_str = exclude_date.strftime('%Y-%m-%d')
    print(f"📅 Excluding transactions from date: {exclude_date_str} (current_date - 1)")
    
    # Process each cumulative rule
    for rule in cumulative_rules:
        try:
            rule_name = rule['rule_name']
            matched_bank = rule['matched_bank']
            output_sheet = rule['output_sheet']
            unmatched_sheets = rule['unmatched_sheets']
            comparison_columns = rule['comparison_columns']
            match_conditions = rule['match_conditions']
            
            print(f"🏦 Processing rule '{rule_name}' ({matched_bank}) for {merchant_name}...")
            
            if not output_sheet:
                print(f"⚠️ No output sheet configured for rule: {rule_name}")
                continue
            
            # Read unmatched data from the configured sheets in Excel
            unmatched_data_list = []
            for sheet in unmatched_sheets:
                try:
                    df = pd.read_excel(excel_path, sheet_name=sheet)
                    if not df.empty:
                        df['source_sheet'] = sheet
                        unmatched_data_list.append(df)
                        print(f"📄 Read {len(df)} records from sheet: {sheet}")
                except Exception as e:
                    print(f"⚠️ Could not read sheet '{sheet}': {e}")
            
            if not unmatched_data_list:
                print(f"⚠️ No unmatched data found in configured sheets")
                continue
            
            # Combine all unmatched data
            unmatched_df = pd.concat(unmatched_data_list, ignore_index=True)
            print(f"📊 Total unmatched records from Excel: {len(unmatched_df)}")
            
            # For now, cumulative matching logic is simplified
            # Just create an empty output sheet with headers from unmatched data
            print(f"✅ Rule '{rule_name}' configuration loaded successfully")
            print(f"   - Matched Bank: {matched_bank}")
            print(f"   - Output Sheet: {output_sheet}")
            print(f"   - Unmatched Sheets: {', '.join(unmatched_sheets)}")
            print(f"   - Match Conditions: {len(match_conditions)} condition(s)")
            
            # Create output sheet with headers even if no matches found
            # Use an empty DataFrame with same columns as unmatched data
            cumulative_output_df = pd.DataFrame(columns=unmatched_df.columns)
            
            # Write to output sheet without deleting existing sheets
            try:
                # Load workbook to check existing sheets
                from openpyxl import load_workbook
                wb = load_workbook(excel_path)
                
                # Remove output sheet if it already exists
                if output_sheet in wb.sheetnames:
                    del wb[output_sheet]
                    print(f"📝 Removed existing '{output_sheet}' sheet")
                
                # Create new sheet with headers
                ws = wb.create_sheet(output_sheet)
                
                # Write headers
                for col_num, column_name in enumerate(cumulative_output_df.columns, 1):
                    ws.cell(row=1, column=col_num, value=column_name)
                
                # Note: No data rows written since no matching is performed yet
                # In future, matched rows would be written here
                
                wb.save(excel_path)
                wb.close()
                
                apply_header_styles(excel_path, [output_sheet])
                print(f"✅ Created '{output_sheet}' sheet with headers (0 matched records)")
                
            except Exception as e:
                print(f"⚠️ Error creating cumulative output sheet '{output_sheet}': {e}")
                import traceback
                traceback.print_exc()
            
        except Exception as e:
            print(f"⚠️ Error processing cumulative rule '{rule.get('rule_name', 'unknown')}': {e}")
            import traceback
            traceback.print_exc()

# ========================================
# DAILY CUMULATIVE LOCATION TRACKING SYSTEM
# ========================================

def create_daily_cumulative_tables():
    """Create database tables for daily cumulative location tracking"""
    
    # Table 1: Cumulative monthly data (1st day to current date)
    cumulative_table_sql = """
    CREATE TABLE IF NOT EXISTS daily_cumulative_tracking (
        id INT AUTO_INCREMENT PRIMARY KEY,
        merchant_name VARCHAR(255),
        location VARCHAR(255),
        processing_date DATE,
        month_year VARCHAR(7),
        cumulative_system_balance DECIMAL(15,2) DEFAULT 0,
        cumulative_hdfc_card DECIMAL(15,2) DEFAULT 0,
        cumulative_axis_card DECIMAL(15,2) DEFAULT 0,
        cumulative_paytm_upi DECIMAL(15,2) DEFAULT 0,
        cumulative_amex DECIMAL(15,2) DEFAULT 0,
        cumulative_hdfc_upi DECIMAL(15,2) DEFAULT 0,
        cumulative_total DECIMAL(15,2) DEFAULT 0,
        cumulative_manual_card DECIMAL(15,2) DEFAULT 0,
        cumulative_manual_upi DECIMAL(15,2) DEFAULT 0,
        cumulative_difference DECIMAL(15,2) DEFAULT 0,
        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
        updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
        INDEX idx_merchant_date (merchant_name, processing_date),
        INDEX idx_location (location),
        INDEX idx_month_year (month_year),
        UNIQUE KEY unique_merchant_location_date (merchant_name, location, processing_date)
    )
    """
    
    # Table 2: Daily summary data (specific day only)
    daily_table_sql = """
    CREATE TABLE IF NOT EXISTS daily_location_summary (
        id INT AUTO_INCREMENT PRIMARY KEY,
        merchant_name VARCHAR(255),
        processing_date DATE,
        month_year VARCHAR(7),
        total_locations INT DEFAULT 0,
        daily_system_balance DECIMAL(15,2) DEFAULT 0,
        daily_hdfc_card DECIMAL(15,2) DEFAULT 0,
        daily_axis_card DECIMAL(15,2) DEFAULT 0,
        daily_paytm_upi DECIMAL(15,2) DEFAULT 0,
        daily_amex DECIMAL(15,2) DEFAULT 0,
        daily_hdfc_upi DECIMAL(15,2) DEFAULT 0,
        daily_total DECIMAL(15,2) DEFAULT 0,
        daily_manual_card DECIMAL(15,2) DEFAULT 0,
        daily_manual_upi DECIMAL(15,2) DEFAULT 0,
        daily_difference DECIMAL(15,2) DEFAULT 0,
        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
        updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
        INDEX idx_merchant_date (merchant_name, processing_date),
        INDEX idx_month_year (month_year),
        UNIQUE KEY unique_merchant_date (merchant_name, processing_date)
    )
    """
    
    try:
        with engine.connect() as conn:
            conn.execute(text(cumulative_table_sql))
            conn.execute(text(daily_table_sql))
            conn.commit()
        print("✅ Daily cumulative tracking tables created/verified")
    except Exception as e:
        print(f"⚠️ Error creating daily cumulative tables: {e}")

def fetch_daily_tracking_config(merchant_name):
    """Fetch daily tracking configuration for a merchant"""
    query = f"""
    SELECT daily_tracking_config
    FROM merchant_config
    WHERE merchant_name = '{merchant_name}';
    """
    try:
        with engine.connect() as conn:
            result = conn.execute(text(query)).fetchone()
        
        if not result or not result[0]:
            # Return default configuration if none exists
            return {
                'card_collection_sheet': 'Card Collection',
                'header_row': 2,
                'location_column': 'Unit ',
                'column_mapping': {
                    'system_balance': 'System Balance October -25',
                    'hdfc_card': 'HDFC Card',
                    'axis_card': 'Axis Credit card ',
                    'paytm_upi': 'Paytm UPI',
                    'amex': 'Amex',
                    'hdfc_upi': 'HDFC UPI',
                    'total': 'Total',
                    'manual_card': 'total card manual transaction ',
                    'manual_upi': 'total upi manual transaction',
                    'difference': 'Difference  as per system  closing bal '
                }
            }
        
        return json.loads(result[0])
    except Exception as e:
        print(f"⚠️ Error fetching daily tracking config: {e}")
        return None

def process_daily_cumulative_tracking(merchant_name, excel_path, dry_run=False):
    """Process daily cumulative location tracking"""
    print(f"📊 Starting daily cumulative tracking for {merchant_name}...")
    
    try:
        # 1. Create tables if not exist (skip in dry-run)
        if not dry_run:
            create_daily_cumulative_tables()
        
        # 2. Get merchant configuration
        config = fetch_daily_tracking_config(merchant_name)
        if not config:
            print(f"⚠️ No daily tracking config found for {merchant_name}")
            return
        
        # 3. Read Card Collection sheet
        card_collection_sheet = config.get('card_collection_sheet', 'Card Collection')
        header_row = config.get('header_row', 2)
        
        try:
            df = pd.read_excel(excel_path, sheet_name=card_collection_sheet, header=header_row)
            print(f"✅ Read {card_collection_sheet} sheet with {len(df)} rows")
        except Exception as e:
            print(f"⚠️ Error reading {card_collection_sheet} sheet: {e}")
            return
        
        # 4. Clean and process data
        location_col = config.get('location_column', 'Unit ')
        column_mapping = config.get('column_mapping', {})

        # Normalize dataframe column names for robust matching (strip, lower)
        df_cols = list(df.columns)
        
        # Use global find_column utility for consistency
        resolved_location_col = find_column(df, [location_col]) or location_col
        if resolved_location_col not in df_cols:
            print(f"⚠️ Location column '{location_col}' not found in sheet columns; available: {df_cols}")
        else:
            # Filter out empty locations and clean data
            df = df.dropna(subset=[resolved_location_col])
            df = df[df[resolved_location_col].astype(str).str.strip() != '']

        # Resolve column mapping keys to actual dataframe columns
        resolved_mapping = {}
        for key, col_name in column_mapping.items():
            found = find_column(df, [col_name])
            if found:
                resolved_mapping[key] = found
            else:
                # keep original name if not found so downstream code can see missing
                resolved_mapping[key] = col_name
                print(f"⚠️ Column for '{key}' ('{col_name}') not found; will default to zeros")

        # replace column_mapping with resolved names for subsequent processing
        column_mapping = resolved_mapping
        
        current_date = datetime.now().date()
        month_year = current_date.strftime('%Y-%m')
        
        print(f"📍 Found {len(df)} locations to process")

        # If dry-run, print resolved mapping and samples and return (non-destructive)
        if dry_run:
            print('\n--- Dry-run: Resolved Columns ---')
            print('Resolved location column:', resolved_location_col)
            print('Resolved mapping:')
            for k, v in column_mapping.items():
                print(f" - {k}: {v}")
            print('\n--- Dry-run: Sample Rows (first 5) ---')
            # prepare sample columns to show: location + resolved mapping values
            sample_cols = [resolved_location_col] + [c for c in column_mapping.values() if c in df.columns]
            if not sample_cols:
                sample_cols = df.columns[:5].tolist()
            print(df[sample_cols].head(5).to_string(index=False))
            return
        
        # 5. Process each location
        location_data = []
        daily_totals = {
            'system_balance': 0, 'hdfc_card': 0, 'axis_card': 0, 'paytm_upi': 0,
            'amex': 0, 'hdfc_upi': 0, 'total': 0, 'manual_card': 0, 'manual_upi': 0, 'difference': 0
        }
        
        for _, row in df.iterrows():
            location = str(row.get(resolved_location_col, '')).strip()
            if not location:
                continue
            
            # Extract values based on column mapping
            location_record = {
                'merchant_name': merchant_name,
                'location': location,
                'processing_date': current_date,
                'month_year': month_year
            }
            
            for key, col_name in column_mapping.items():
                try:
                    value = row.get(col_name, 0)
                    if pd.notna(value) and str(value).strip():
                        # Clean numeric value
                        clean_value = str(value).replace(',', '').replace('₹', '').strip()
                        numeric_value = float(clean_value) if clean_value and clean_value != 'nan' else 0
                    else:
                        numeric_value = 0
                    
                    location_record[f'cumulative_{key}'] = numeric_value
                    daily_totals[key] += numeric_value
                    
                except (ValueError, TypeError):
                    location_record[f'cumulative_{key}'] = 0
            
            location_data.append(location_record)
        
        # 6. Get previous cumulative data for this month
        previous_cumulative_query = """
        SELECT t1.location, 
               t1.processing_date as last_date,
               t1.cumulative_system_balance, t1.cumulative_hdfc_card, t1.cumulative_axis_card,
               t1.cumulative_paytm_upi, t1.cumulative_amex, t1.cumulative_hdfc_upi,
               t1.cumulative_total, t1.cumulative_manual_card, t1.cumulative_manual_upi, t1.cumulative_difference
        FROM daily_cumulative_tracking t1
        INNER JOIN (
            SELECT location, MAX(processing_date) as max_date
            FROM daily_cumulative_tracking
            WHERE merchant_name = :merchant_name 
            AND month_year = :month_year
            AND processing_date < :current_date
            GROUP BY location
        ) t2 ON t1.location = t2.location AND t1.processing_date = t2.max_date
        WHERE t1.merchant_name = :merchant_name 
        AND t1.month_year = :month_year
        """
        
        with engine.connect() as conn:
            previous_data = pd.read_sql_query(
                text(previous_cumulative_query), 
                conn, 
                params={
                    'merchant_name': merchant_name,
                    'month_year': month_year,
                    'current_date': current_date
                }
            )
        
        # 7. Calculate cumulative values (add today's data to previous cumulative)
        previous_totals = {}
        if not previous_data.empty:
            for _, prev_row in previous_data.iterrows():
                location = prev_row['location']
                previous_totals[location] = {
                    'system_balance': float(prev_row.get('cumulative_system_balance', 0)),
                    'hdfc_card': float(prev_row.get('cumulative_hdfc_card', 0)),
                    'axis_card': float(prev_row.get('cumulative_axis_card', 0)),
                    'paytm_upi': float(prev_row.get('cumulative_paytm_upi', 0)),
                    'amex': float(prev_row.get('cumulative_amex', 0)),
                    'hdfc_upi': float(prev_row.get('cumulative_hdfc_upi', 0)),
                    'total': float(prev_row.get('cumulative_total', 0)),
                    'manual_card': float(prev_row.get('cumulative_manual_card', 0)),
                    'manual_upi': float(prev_row.get('cumulative_manual_upi', 0)),
                    'difference': float(prev_row.get('cumulative_difference', 0))
                }
        
        # Add previous cumulative to today's data
        for record in location_data:
            location = record['location']
            if location in previous_totals:
                for key in daily_totals.keys():
                    record[f'cumulative_{key}'] += previous_totals[location][key]
        
        # 8. Clear today's data and insert new cumulative data
        delete_today_query = """
        DELETE FROM daily_cumulative_tracking 
        WHERE merchant_name = :merchant_name 
        AND processing_date = :processing_date
        """
        
        with engine.connect() as conn:
            conn.execute(text(delete_today_query), {
                'merchant_name': merchant_name,
                'processing_date': current_date
            })
            conn.commit()
        
        # Insert new cumulative data
        if location_data:
            cumulative_df = pd.DataFrame(location_data)
            cumulative_df.to_sql('daily_cumulative_tracking', con=engine, if_exists='append', index=False)
            print(f"✅ Stored cumulative data for {len(location_data)} locations")
        
        # 9. Update daily summary
        delete_daily_summary_query = """
        DELETE FROM daily_location_summary 
        WHERE merchant_name = :merchant_name 
        AND processing_date = :processing_date
        """
        
        with engine.connect() as conn:
            conn.execute(text(delete_daily_summary_query), {
                'merchant_name': merchant_name,
                'processing_date': current_date
            })
            conn.commit()
        
        # Insert daily summary
        daily_summary = {
            'merchant_name': merchant_name,
            'processing_date': current_date,
            'month_year': month_year,
            'total_locations': len(location_data)
        }
        
        for key, value in daily_totals.items():
            daily_summary[f'daily_{key}'] = value
        
        summary_df = pd.DataFrame([daily_summary])
        summary_df.to_sql('daily_location_summary', con=engine, if_exists='append', index=False)
        print(f"✅ Updated daily summary for {current_date}")
        
        # 10. Create Excel reports
        create_daily_tracking_reports(merchant_name, excel_path, current_date, month_year)
        
        print(f"✅ Daily cumulative tracking completed for {merchant_name}")
        
    except Exception as e:
        print(f"⚠️ Error in daily cumulative tracking for {merchant_name}: {e}")

def create_daily_tracking_reports(merchant_name, excel_path, current_date, month_year):
    """Create Excel sheets with cumulative and daily reports"""
    try:
        # 1. Get cumulative data for current month
        cumulative_query = """
        SELECT location, processing_date,
               cumulative_system_balance, cumulative_hdfc_card, cumulative_axis_card,
               cumulative_paytm_upi, cumulative_amex, cumulative_hdfc_upi,
               cumulative_total, cumulative_manual_card, cumulative_manual_upi, cumulative_difference
        FROM daily_cumulative_tracking
        WHERE merchant_name = :merchant_name 
        AND month_year = :month_year
        ORDER BY processing_date DESC, location
        """
        
        with engine.connect() as conn:
            cumulative_df = pd.read_sql_query(
                text(cumulative_query), 
                conn, 
                params={'merchant_name': merchant_name, 'month_year': month_year}
            )
        
        # 2. Get daily summary data for current month
        daily_query = """
        SELECT processing_date, total_locations,
               daily_system_balance, daily_hdfc_card, daily_axis_card,
               daily_paytm_upi, daily_amex, daily_hdfc_upi,
               daily_total, daily_manual_card, daily_manual_upi, daily_difference
        FROM daily_location_summary
        WHERE merchant_name = :merchant_name 
        AND month_year = :month_year
        ORDER BY processing_date DESC
        """
        
        with engine.connect() as conn:
            daily_df = pd.read_sql_query(
                text(daily_query), 
                conn, 
                params={'merchant_name': merchant_name, 'month_year': month_year}
            )
        
        # 3. Create Excel sheets
        if not cumulative_df.empty:
            write_excel_sheet(excel_path, 'Monthly-Cumulative-Tracking', cumulative_df)
            print(f"✅ Created Monthly-Cumulative-Tracking sheet with {len(cumulative_df)} records")

        if not daily_df.empty:
            write_excel_sheet(excel_path, 'Daily-Location-Summary', daily_df)
            print(f"✅ Created Daily-Location-Summary sheet with {len(daily_df)} records")
        
    except Exception as e:
        print(f"⚠️ Error creating daily tracking reports: {e}")

def update_card_collection_with_cumulative_data():
    """
    Fetch data grouped by Trans Date and Units, summing Amount, and update the Card Collection page.
    """
    query = """
    SELECT 
        *,
        SUM(`Amount`) AS total_amount
    FROM cumulative_sheet
    GROUP BY `Trans Date`, `Units`;
    """

    with engine.connect() as conn:
        grouped_data = pd.read_sql_query(text(query), conn)

    for _, row in grouped_data.iterrows():
        transaction_date = row['Trans Date']
        unit = row['Units']
        total_amount = row['total_amount']

        # Update the Card Collection table for the specific transaction date and unit
        update_query = """
        UPDATE card_collection
        SET `Amount` = :total_amount
        WHERE `Trans Date` = :transaction_date AND `Units` = :unit;
        """
        with engine.connect() as conn:
            conn.execute(
                text(update_query),
                {"total_amount": total_amount, "transaction_date": transaction_date, "unit": unit}
            )
    print("✅ Card Collection page updated successfully.")

def process_automated_unmatched_and_cumulative(merchant_name, excel_path, dry_run=False):
    """
    Main function to process all automated unmatched and cumulative operations
    """
    print(f"🚀 Starting automated processing for {merchant_name}...")
    
    try:
        # 1. Create storage table if not exists
        create_unmatched_data_table()
        
        # 2. Consolidate All-Unmatched sheet based on merchant config
        consolidated_df = consolidate_all_unmatched_sheet(excel_path, merchant_name)
        
        if consolidated_df is not None:
            # 3. Store unmatched data to database
            store_unmatched_data_to_db(excel_path, merchant_name)
            
            # 4. Process cumulative matching
            process_cumulative_matching(merchant_name, excel_path)
        
        # 5. NEW: Process daily cumulative location tracking
        print("🔄 Starting daily cumulative location tracking...")
        # respect global DRY_RUN if caller didn't explicitly set dry_run
        effective_dry = dry_run or globals().get('DRY_RUN', False)
        process_daily_cumulative_tracking(merchant_name, excel_path, dry_run=effective_dry)
        
        # 6. NEW: Process Card Collection sheet automation
        print("🔄 Starting Card Collection sheet automation...")
        process_card_collection_automation(merchant_name, excel_path, dry_run=effective_dry)
        
        print(f"✅ Automated processing completed for {merchant_name}")
        
    except Exception as e:
        print(f"⚠️ Error in automated processing for {merchant_name}: {e}")


# ============================================================================
# CARD COLLECTION SHEET AUTOMATION
# ============================================================================

def create_card_collection_tables():
    """Create database tables for storing Card Collection daily data"""
    
    # Table to store daily card collection data per location
    card_collection_daily_sql = """
    CREATE TABLE IF NOT EXISTS card_collection_daily (
        id INT AUTO_INCREMENT PRIMARY KEY,
        merchant_name VARCHAR(255),
        location VARCHAR(100),
        processing_date DATE,
        month_year VARCHAR(7),
        system_balance DECIMAL(15,2) DEFAULT 0,
        hdfc_card DECIMAL(15,2) DEFAULT 0,
        axis_card DECIMAL(15,2) DEFAULT 0,
        paytm_upi DECIMAL(15,2) DEFAULT 0,
        amex DECIMAL(15,2) DEFAULT 0,
        hdfc_upi DECIMAL(15,2) DEFAULT 0,
        total_amount DECIMAL(15,2) DEFAULT 0,
        manual_card DECIMAL(15,2) DEFAULT 0,
        manual_upi DECIMAL(15,2) DEFAULT 0,
        difference DECIMAL(15,2) DEFAULT 0,
        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
        updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
        INDEX idx_merchant_date (merchant_name, processing_date),
        INDEX idx_location (location),
        INDEX idx_month_year (month_year),
        UNIQUE KEY unique_merchant_location_date (merchant_name, location, processing_date)
    )
    """
    
    # Table to store daily summary totals
    card_collection_summary_sql = """
    CREATE TABLE IF NOT EXISTS card_collection_summary (
        id INT AUTO_INCREMENT PRIMARY KEY,
        merchant_name VARCHAR(255),
        processing_date DATE,
        month_year VARCHAR(7),
        total_system_balance DECIMAL(15,2) DEFAULT 0,
        total_hdfc_card DECIMAL(15,2) DEFAULT 0,
        total_axis_card DECIMAL(15,2) DEFAULT 0,
        total_paytm_upi DECIMAL(15,2) DEFAULT 0,
        total_amex DECIMAL(15,2) DEFAULT 0,
        total_hdfc_upi DECIMAL(15,2) DEFAULT 0,
        total_amount DECIMAL(15,2) DEFAULT 0,
        total_manual_card DECIMAL(15,2) DEFAULT 0,
        total_manual_upi DECIMAL(15,2) DEFAULT 0,
        total_difference DECIMAL(15,2) DEFAULT 0,
        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
        updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
        INDEX idx_merchant_date (merchant_name, processing_date),
        UNIQUE KEY unique_merchant_date (merchant_name, processing_date)
    )
    """
    
    try:
        with engine.connect() as conn:
            conn.execute(text(card_collection_daily_sql))
            conn.execute(text(card_collection_summary_sql))
            conn.commit()
        print("✅ Card Collection tables created/verified")
    except Exception as e:
        print(f"⚠️ Error creating Card Collection tables: {e}")


def get_card_collection_config(merchant_name, excel_path=None):
    """
    Get Card Collection configuration for a merchant.
    Configuration can come from:
    1. Database (merchant_config.card_collection_config)
    2. Auto-detected from Excel sheets (if no DB config)
    
    Works for any merchant with single or multiple locations.
    """
    
    # Base configuration structure
    base_config = {
        'enabled': True,
        'card_collection_sheet': 'Card Collection',
        'summary_sheet': 'Summary',
        'locations': [],  # Will be auto-detected if not configured
        'location_aliases': {},  # Maps short codes to full names
        'column_mapping': {
            # Standard column names - can be customized per merchant
            'system_balance': 'System Balance',
            'hdfc_card': 'HDFC Card',
            'axis_card': 'Axis Credit card',
            'paytm_upi': 'Paytm UPI',
            'amex': 'Amex',
            'hdfc_upi': 'HDFC UPI',
            'total': 'Total',
            'manual_card': 'total card manual transaction',
            'manual_upi': 'total upi manual transaction',
            'difference': 'Difference'
        },
        'matched_sheets': {
            # Sheet name patterns for different payment types
            'hdfc_upi': ['HISBankUpi-Matched', 'BankUpi-Matched', 'UPI-Matched'],
            'hdfc_card': ['HISBankcard-Matched', 'Bankcard-Matched', 'Card-Matched'],
            'manual_upi': ['M-HISBankUpi-Matched', 'M-BankUpi-Matched', 'Manual-UPI-Matched'],
            'manual_card': ['M-HISBankcard-Matched', 'M-Bankcard-Matched', 'Manual-Card-Matched'],
            'axis_card': ['Axis-Matched', 'AxisCard-Matched'],
            'amex': ['Amex-Matched', 'AMEX-Matched'],
            'paytm_upi': ['Paytm-Matched', 'PaytmUPI-Matched']
        },
        'his_sheets': ['HIS-Matched', 'HIS-Unmatched'],  # For system balance
        'location_column_keywords': ['unit_location', 'unit/location', 'location', 'unit', 'branch', 'center'],
        'amount_column_keywords': ['amount', 'total_amount', 'transaction_amount']
    }
    
    # Try to fetch from database first
    try:
        query = f"""
        SELECT card_collection_config
        FROM merchant_config
        WHERE merchant_name = '{merchant_name}';
        """
        with engine.connect() as conn:
            result = conn.execute(text(query)).fetchone()
        
        if result and result[0]:
            db_config = safe_json_load(result[0], {})
            if db_config:
                # Merge DB config with base config
                for key, value in db_config.items():
                    if isinstance(value, dict) and isinstance(base_config.get(key), dict):
                        base_config[key].update(value)
                    else:
                        base_config[key] = value
                print(f"✅ Loaded Card Collection config from database for {merchant_name}")
                
                # If locations are configured in DB, use them
                if base_config.get('locations'):
                    return base_config
    except Exception as e:
        print(f"ℹ️ No DB config for Card Collection: {e}")
    
    # Auto-detect locations from Excel if not configured and excel_path provided
    if excel_path and (not base_config.get('locations') or len(base_config.get('locations', [])) == 0):
        detected_config = _auto_detect_locations_from_excel(excel_path, base_config)
        base_config.update(detected_config)
    
    return base_config


def _auto_detect_locations_from_excel(excel_path, config):
    """
    Auto-detect locations and their aliases from Excel matched sheets.
    Works for any merchant with any number of locations.
    """
    detected = {
        'locations': [],
        'location_aliases': {}
    }
    
    try:
        wb = load_workbook(excel_path, read_only=True)
        available_sheets = wb.sheetnames
        wb.close()
        
        # Find all unique location codes from matched sheets
        all_location_codes = set()
        
        # Check various sheet patterns
        sheet_patterns = []
        for sheet_list in config.get('matched_sheets', {}).values():
            sheet_patterns.extend(sheet_list)
        sheet_patterns.extend(config.get('his_sheets', []))
        
        for sheet_name in available_sheets:
            # Check if this is a data sheet we can extract locations from
            is_data_sheet = any(pattern.lower() in sheet_name.lower() for pattern in 
                               ['matched', 'unmatched', 'his', 'bank', 'upi', 'card'])
            
            if not is_data_sheet:
                continue
            
            try:
                df = pd.read_excel(excel_path, sheet_name=sheet_name, nrows=1000)  # Limit rows for speed
                
                if df.empty:
                    continue
                
                # Find location column
                loc_col = None
                for col in df.columns:
                    col_lower = str(col).lower().replace(' ', '_').replace('/', '_')
                    for keyword in config.get('location_column_keywords', ['location', 'unit']):
                        if keyword.lower() in col_lower:
                            loc_col = col
                            break
                    if loc_col:
                        break
                
                if loc_col:
                    # Get unique location values
                    unique_locs = df[loc_col].dropna().unique()
                    for loc in unique_locs:
                        loc_str = str(loc).strip()
                        if loc_str and loc_str.lower() not in ['nan', 'none', '']:
                            all_location_codes.add(loc_str)
            
            except Exception as e:
                continue
        
        # Build location list and aliases
        # Common location code mappings (can be extended)
        common_aliases = {
            'BBW': 'Bibwewadi',
            'HDP': 'Hadapsar',
            'Koth': 'Kothrud',
            'NGR': 'Nagar Road',
            'NSK': 'Nashik',
            'Shiv': 'ShivajiNagar',
            'Deccan': 'Deccan',
            'Lab': 'Lab',
            'Main': 'Main Branch',
            'HO': 'Head Office',
            'Branch1': 'Branch 1',
            'Branch2': 'Branch 2'
        }
        
        locations_set = set()
        for code in all_location_codes:
            # Check if it's a known alias
            if code in common_aliases:
                full_name = common_aliases[code]
                detected['location_aliases'][code] = full_name
                locations_set.add(full_name)
            else:
                # Use the code as-is (it might already be the full name)
                detected['location_aliases'][code] = code
                locations_set.add(code)
        
        detected['locations'] = sorted(list(locations_set))
        
        if detected['locations']:
            print(f"✅ Auto-detected {len(detected['locations'])} locations: {detected['locations']}")
        else:
            print("ℹ️ No locations detected from Excel sheets")
        
    except Exception as e:
        print(f"⚠️ Error auto-detecting locations: {e}")
    
    return detected


def extract_location_data_from_summary(excel_path, merchant_name, processing_date):
    """
    Extract location-wise data from Summary sheet for Card Collection.
    Works dynamically for any merchant configuration.
    Returns dict: {location: {hdfc_card, axis_card, paytm_upi, amex, hdfc_upi, total, manual_card, manual_upi, system_balance, difference}}
    """
    config = get_card_collection_config(merchant_name, excel_path)
    location_aliases = config.get('location_aliases', {})
    locations = config.get('locations', [])
    
    # If no locations configured, return empty
    if not locations:
        print("ℹ️ No locations configured for Card Collection")
        return {}
    
    try:
        # Read Summary sheet
        summary_sheet = config.get('summary_sheet', 'Summary')
        summary_df = pd.read_excel(excel_path, sheet_name=summary_sheet, header=None)
        
        location_data = {}
        
        # Initialize all locations with zero values
        for location in locations:
            location_data[location] = {
                'system_balance': 0,
                'hdfc_card': 0,
                'axis_card': 0,
                'paytm_upi': 0,
                'amex': 0,
                'hdfc_upi': 0,
                'total': 0,
                'manual_card': 0,
                'manual_upi': 0,
                'difference': 0
            }
        
        # Parse Summary sheet to extract location data
        # Look for location codes in the data and extract amounts
        for idx, row in summary_df.iterrows():
            # Check multiple columns for location codes
            for col_idx in range(min(3, len(row))):
                cell_value = str(row.iloc[col_idx]).strip() if pd.notna(row.iloc[col_idx]) else ''
                
                if cell_value in location_aliases:
                    location_name = location_aliases[cell_value]
                    
                    # Try to get amount from various columns
                    # Column 5: Total Amount, Column 7: Matched Amount typically
                    matched_amount = 0
                    for amt_col in [7, 5, 6, 8]:
                        if len(row) > amt_col:
                            val = get_numeric_value(row.iloc[amt_col])
                            if val != 0:
                                matched_amount = val
                                break
                    
                    # Store based on row position (UPI vs Cards sections)
                    # This is a heuristic - actual section detection may vary
                    if location_name in location_data:
                        if matched_amount > location_data[location_name].get('hdfc_upi', 0):
                            location_data[location_name]['hdfc_upi'] = matched_amount
        
        # Calculate totals
        for location in location_data:
            data = location_data[location]
            data['total'] = (data['hdfc_card'] + data['axis_card'] + data['paytm_upi'] + 
                           data['amex'] + data['hdfc_upi'])
        
        print(f"✅ Extracted data for {len([l for l in location_data if location_data[l]['total'] > 0])} locations from Summary")
        return location_data
        
    except Exception as e:
        print(f"⚠️ Error extracting location data from Summary: {e}")
        return {}


def extract_location_data_from_matched_sheets(excel_path, merchant_name, processing_date):
    """
    Extract location-wise data from all matched sheets for Card Collection.
    Works dynamically for any merchant - auto-detects locations and sheet patterns.
    This provides more accurate data by reading directly from matched/unmatched sheets.
    """
    config = get_card_collection_config(merchant_name, excel_path)
    location_aliases = config.get('location_aliases', {})
    locations = config.get('locations', [])
    
    # If no locations detected, return empty
    if not locations:
        print("ℹ️ No locations configured/detected for Card Collection")
        return {}
    
    location_data = {}
    
    # Initialize all locations with zero values
    for location in locations:
        location_data[location] = {
            'system_balance': 0,
            'hdfc_card': 0,
            'axis_card': 0,
            'paytm_upi': 0,
            'amex': 0,
            'hdfc_upi': 0,
            'total': 0,
            'manual_card': 0,
            'manual_upi': 0,
            'difference': 0
        }
    
    # Use global get_numeric_value utility
    # Use global find_column utility
    
    try:
        wb = load_workbook(excel_path, read_only=True)
        available_sheets = wb.sheetnames
        wb.close()
        
        # Dynamic sheet pattern detection
        # Look for sheets containing these patterns and map to data types
        sheet_type_patterns = {
            'hdfc_upi': ['bankupi-matched', 'upi-matched', 'upimatched'],
            'hdfc_card': ['bankcard-matched', 'card-matched', 'cardmatched'],
            'manual_upi': ['m-bankupi', 'm-hisupi', 'manual-upi', 'manualupi'],
            'manual_card': ['m-bankcard', 'm-hiscard', 'manual-card', 'manualcard'],
            'axis_card': ['axis-matched', 'axiscard'],
            'amex': ['amex-matched', 'amex'],
            'paytm_upi': ['paytm-matched', 'paytmupi'],
            'system_balance': ['his-matched', 'his-unmatched', 'hismatched']
        }
        
        # Process each available sheet
        for sheet_name in available_sheets:
            sheet_lower = sheet_name.lower().replace(' ', '')
            
            # Determine what type of data this sheet contains
            data_type = None
            for dtype, patterns in sheet_type_patterns.items():
                for pattern in patterns:
                    if pattern in sheet_lower:
                        data_type = dtype
                        break
                if data_type:
                    break
            
            if not data_type:
                continue
            
            # Skip unmatched sheets for payment types (only use for system_balance)
            if 'unmatched' in sheet_lower and data_type != 'system_balance':
                continue
            
            try:
                df = pd.read_excel(excel_path, sheet_name=sheet_name)
                
                if df.empty:
                    continue
                
                # Find location column using config keywords
                loc_keywords = config.get('location_column_keywords', 
                                         ['unit_location', 'unit/location', 'location', 'unit', 'branch'])
                loc_col = find_column(df, loc_keywords)
                
                if loc_col is None:
                    continue
                
                # Find amount column
                amt_keywords = config.get('amount_column_keywords', 
                                         ['amount', 'total_amount', 'transaction_amount'])
                amount_col = find_column(df, amt_keywords)
                
                if amount_col is None:
                    continue
                
                # Group by location and sum
                grouped = df.groupby(loc_col)[amount_col].sum()
                
                for loc_code, amount in grouped.items():
                    loc_code_str = str(loc_code).strip()
                    
                    # Map to full location name
                    if loc_code_str in location_aliases:
                        location_name = location_aliases[loc_code_str]
                    elif loc_code_str in locations:
                        location_name = loc_code_str
                    else:
                        # Try case-insensitive match
                        matched = False
                        for alias, full_name in location_aliases.items():
                            if alias.lower() == loc_code_str.lower():
                                location_name = full_name
                                matched = True
                                break
                        if not matched:
                            continue
                    
                    if location_name not in location_data:
                        continue
                    
                    amount_val = get_numeric_value(amount)
                    location_data[location_name][data_type] += amount_val
                
                print(f"✅ Processed {sheet_name} ({data_type}): {len(grouped)} locations")
                
            except Exception as e:
                print(f"⚠️ Error processing {sheet_name}: {e}")
                continue
        
        # Calculate totals and differences
        for location in location_data:
            data = location_data[location]
            data['total'] = (data['hdfc_card'] + data['axis_card'] + data['paytm_upi'] + 
                           data['amex'] + data['hdfc_upi'])
            # If system_balance not detected, calculate from totals
            if data['system_balance'] == 0:
                data['system_balance'] = data['total'] + data['manual_card'] + data['manual_upi']
            data['difference'] = data['system_balance'] - data['total'] - data['manual_card'] - data['manual_upi']
        
        # Count locations with data
        locations_with_data = len([l for l in location_data if location_data[l]['total'] > 0 or 
                                   location_data[l]['manual_card'] > 0 or location_data[l]['manual_upi'] > 0])
        print(f"✅ Extracted data for {locations_with_data}/{len(locations)} locations from matched sheets")
        
        return location_data
        
    except Exception as e:
        print(f"⚠️ Error extracting data from matched sheets: {e}")
        import traceback
        traceback.print_exc()
        return location_data


def store_card_collection_daily(merchant_name, processing_date, location_data):
    """Store daily Card Collection data to database"""
    
    month_year = processing_date.strftime('%Y-%m')
    
    try:
        # Delete existing data for this date (to allow re-processing)
        delete_query = """
        DELETE FROM card_collection_daily 
        WHERE merchant_name = :merchant_name 
        AND processing_date = :processing_date
        """
        
        with engine.connect() as conn:
            conn.execute(text(delete_query), {
                'merchant_name': merchant_name,
                'processing_date': processing_date
            })
            conn.commit()
        
        # Insert new data
        records = []
        total_summary = {
            'system_balance': 0, 'hdfc_card': 0, 'axis_card': 0, 'paytm_upi': 0,
            'amex': 0, 'hdfc_upi': 0, 'total_amount': 0, 'manual_card': 0,
            'manual_upi': 0, 'difference': 0
        }
        
        for location, data in location_data.items():
            record = {
                'merchant_name': merchant_name,
                'location': location,
                'processing_date': processing_date,
                'month_year': month_year,
                'system_balance': data.get('system_balance', 0),
                'hdfc_card': data.get('hdfc_card', 0),
                'axis_card': data.get('axis_card', 0),
                'paytm_upi': data.get('paytm_upi', 0),
                'amex': data.get('amex', 0),
                'hdfc_upi': data.get('hdfc_upi', 0),
                'total_amount': data.get('total', 0),
                'manual_card': data.get('manual_card', 0),
                'manual_upi': data.get('manual_upi', 0),
                'difference': data.get('difference', 0)
            }
            records.append(record)
            
            # Accumulate totals
            for key in total_summary:
                if key == 'total_amount':
                    total_summary[key] += data.get('total', 0)
                else:
                    total_summary[key] += data.get(key, 0)
        
        # Insert location records
        if records:
            df = pd.DataFrame(records)
            df.to_sql('card_collection_daily', con=engine, if_exists='append', index=False)
            print(f"✅ Stored Card Collection data for {len(records)} locations")
        
        # Store daily summary
        delete_summary_query = """
        DELETE FROM card_collection_summary 
        WHERE merchant_name = :merchant_name 
        AND processing_date = :processing_date
        """
        
        with engine.connect() as conn:
            conn.execute(text(delete_summary_query), {
                'merchant_name': merchant_name,
                'processing_date': processing_date
            })
            conn.commit()
        
        summary_record = {
            'merchant_name': merchant_name,
            'processing_date': processing_date,
            'month_year': month_year,
            'total_system_balance': total_summary['system_balance'],
            'total_hdfc_card': total_summary['hdfc_card'],
            'total_axis_card': total_summary['axis_card'],
            'total_paytm_upi': total_summary['paytm_upi'],
            'total_amex': total_summary['amex'],
            'total_hdfc_upi': total_summary['hdfc_upi'],
            'total_amount': total_summary['total_amount'],
            'total_manual_card': total_summary['manual_card'],
            'total_manual_upi': total_summary['manual_upi'],
            'total_difference': total_summary['difference']
        }
        
        summary_df = pd.DataFrame([summary_record])
        summary_df.to_sql('card_collection_summary', con=engine, if_exists='append', index=False)
        print(f"✅ Stored Card Collection summary for {processing_date}")
        
        return True
        
    except Exception as e:
        print(f"⚠️ Error storing Card Collection data: {e}")
        return False


def get_cumulative_card_collection(merchant_name, month_year, before_date=None):
    """
    Get cumulative Card Collection data for a month up to (but not including) a specific date
    Returns: {location: {cumulative values}}
    """
    try:
        if before_date:
            query = """
            SELECT location,
                   SUM(system_balance) as cum_system_balance,
                   SUM(hdfc_card) as cum_hdfc_card,
                   SUM(axis_card) as cum_axis_card,
                   SUM(paytm_upi) as cum_paytm_upi,
                   SUM(amex) as cum_amex,
                   SUM(hdfc_upi) as cum_hdfc_upi,
                   SUM(total_amount) as cum_total,
                   SUM(manual_card) as cum_manual_card,
                   SUM(manual_upi) as cum_manual_upi,
                   SUM(difference) as cum_difference
            FROM card_collection_daily
            WHERE merchant_name = :merchant_name
            AND month_year = :month_year
            AND processing_date < :before_date
            GROUP BY location
            """
            params = {
                'merchant_name': merchant_name,
                'month_year': month_year,
                'before_date': before_date
            }
        else:
            query = """
            SELECT location,
                   SUM(system_balance) as cum_system_balance,
                   SUM(hdfc_card) as cum_hdfc_card,
                   SUM(axis_card) as cum_axis_card,
                   SUM(paytm_upi) as cum_paytm_upi,
                   SUM(amex) as cum_amex,
                   SUM(hdfc_upi) as cum_hdfc_upi,
                   SUM(total_amount) as cum_total,
                   SUM(manual_card) as cum_manual_card,
                   SUM(manual_upi) as cum_manual_upi,
                   SUM(difference) as cum_difference
            FROM card_collection_daily
            WHERE merchant_name = :merchant_name
            AND month_year = :month_year
            GROUP BY location
            """
            params = {
                'merchant_name': merchant_name,
                'month_year': month_year
            }
        
        with engine.connect() as conn:
            df = pd.read_sql_query(text(query), conn, params=params)
        
        cumulative_data = {}
        for _, row in df.iterrows():
            cumulative_data[row['location']] = {
                'system_balance': float(row['cum_system_balance'] or 0),
                'hdfc_card': float(row['cum_hdfc_card'] or 0),
                'axis_card': float(row['cum_axis_card'] or 0),
                'paytm_upi': float(row['cum_paytm_upi'] or 0),
                'amex': float(row['cum_amex'] or 0),
                'hdfc_upi': float(row['cum_hdfc_upi'] or 0),
                'total': float(row['cum_total'] or 0),
                'manual_card': float(row['cum_manual_card'] or 0),
                'manual_upi': float(row['cum_manual_upi'] or 0),
                'difference': float(row['cum_difference'] or 0)
            }
        
        return cumulative_data
        
    except Exception as e:
        print(f"⚠️ Error getting cumulative data: {e}")
        return {}


def get_all_daily_data_for_month(merchant_name, month_year):
    """Get all daily data for a month, ordered by date"""
    try:
        query = """
        SELECT location, processing_date, system_balance, hdfc_card, axis_card,
               paytm_upi, amex, hdfc_upi, total_amount, manual_card, manual_upi, difference
        FROM card_collection_daily
        WHERE merchant_name = :merchant_name
        AND month_year = :month_year
        ORDER BY processing_date ASC, location ASC
        """
        
        with engine.connect() as conn:
            df = pd.read_sql_query(text(query), conn, params={
                'merchant_name': merchant_name,
                'month_year': month_year
            })
        
        return df
        
    except Exception as e:
        print(f"⚠️ Error getting monthly data: {e}")
        return pd.DataFrame()


def update_card_collection_sheet(excel_path, merchant_name, processing_date, location_data):
    """
    Update the Card Collection sheet with:
    1. Table 1: Cumulative totals (update existing rows)
    2. Table 2: Daily summary (update current date row)
    3. Tables 3+: Append new date row to each location section
    """
    
    config = get_card_collection_config(merchant_name, excel_path)
    month_year = processing_date.strftime('%Y-%m')
    month_name = processing_date.strftime('%B')
    year_suffix = processing_date.strftime('%y')
    
    try:
        wb = load_workbook(excel_path)
        
        # Check if Card Collection sheet exists
        if 'Card Collection' not in wb.sheetnames:
            print(f"⚠️ 'Card Collection' sheet not found. Creating new sheet...")
            ws = wb.create_sheet('Card Collection')
            _initialize_card_collection_sheet(ws, config, month_name, year_suffix)
        else:
            ws = wb['Card Collection']
        
        # Get cumulative data up to (but not including) today, then add today's data for full month-to-date
        cumulative_data = get_cumulative_card_collection(merchant_name, month_year, before_date=processing_date)
        # Add today's data to cumulative
        if not cumulative_data:
            cumulative_data = location_data.copy()
        else:
            for location, data in location_data.items():
                if location in cumulative_data:
                    for key in data:
                        cumulative_data[location][key] = cumulative_data[location].get(key, 0) + data.get(key, 0)
                else:
                    cumulative_data[location] = data.copy()
        
        # 1. Update Table 1 - Cumulative Summary
        _update_cumulative_table(ws, config, cumulative_data)
        
        # 2. Update Table 2 - Daily Summary
        _update_daily_summary_table(ws, config, processing_date, location_data)
        
        # 3. Update Tables 3+ - Location Sections (append new date rows)
        _update_location_sections(ws, config, merchant_name, month_year, processing_date, location_data)
        
        wb.save(excel_path)
        print(f"✅ Card Collection sheet updated successfully")
        
        return True
        
    except Exception as e:
        print(f"⚠️ Error updating Card Collection sheet: {e}")
        import traceback
        traceback.print_exc()
        return False


def _initialize_card_collection_sheet(ws, config, month_name, year_suffix):
    """Initialize a new Card Collection sheet with proper structure"""
    
    # Headers for the columns
    headers = ['', 'System Balance ' + month_name + ' -' + year_suffix, 
               'HDFC Card', 'Axis Credit card', 'Paytm UPI', 'Amex', 'HDFC UPI',
               'Total', 'total card manual transaction', 'total upi manual transaction',
               'Difference  as per system  closing bal']
    
    # Row 1: Empty
    # Row 2: "Collection on {Month} 2025"
    ws.cell(row=2, column=1, value=f"Collection on {month_name} 2025")
    
    # Row 3: Headers
    for col, header in enumerate(headers, 1):
        ws.cell(row=3, column=col, value=header)
    
    # Rows 4-11: Location rows
    for i, location in enumerate(config['locations']):
        ws.cell(row=4+i, column=1, value=location)
    
    # Row 12: Total
    ws.cell(row=12, column=1, value='Total')
    
    print(f"✅ Initialized Card Collection sheet structure")


def _update_cumulative_table(ws, config, cumulative_data):
    """Update Table 1 - Cumulative totals for each location"""
    
    # Column mapping (1-indexed for openpyxl)
    col_map = {
        'system_balance': 2,
        'hdfc_card': 3,
        'axis_card': 4,
        'paytm_upi': 5,
        'amex': 6,
        'hdfc_upi': 7,
        'total': 8,
        'manual_card': 9,
        'manual_upi': 10,
        'difference': 11
    }
    
    # Find location rows (starting from row 4)
    location_rows = {}
    for row in range(4, 20):  # Search range for locations
        cell_value = ws.cell(row=row, column=1).value
        if cell_value:
            cell_str = str(cell_value).strip()
            if cell_str in config['locations']:
                location_rows[cell_str] = row
            elif cell_str.lower() == 'total':
                break
    
    # Totals for the Total row
    totals = {key: 0 for key in col_map.keys()}
    
    # Update each location row
    for location, data in cumulative_data.items():
        if location in location_rows:
            row = location_rows[location]
            for key, col in col_map.items():
                value = data.get(key, 0)
                ws.cell(row=row, column=col, value=value)
                totals[key] += value
    
    # Update Total row
    total_row = max(location_rows.values()) + 1 if location_rows else 12
    ws.cell(row=total_row, column=1, value='Total')
    for key, col in col_map.items():
        ws.cell(row=total_row, column=col, value=totals[key])
    
    print(f"✅ Updated cumulative table with {len(location_rows)} locations")


def _update_daily_summary_table(ws, config, processing_date, location_data):
    """Update Table 2 - Daily summary row"""
    
    # Find the daily summary section (look for "Payment Recon" header)
    daily_header_row = None
    for row in range(13, 25):
        cell_value = ws.cell(row=row, column=1).value
        if cell_value and 'Payment Recon' in str(cell_value):
            daily_header_row = row
            break
    
    if daily_header_row is None:
        # Create daily summary section only if it doesn't exist
        daily_header_row = 15
        date_str = processing_date.strftime('%d') + _get_ordinal_suffix(processing_date.day) + processing_date.strftime(' %B %Y')
        ws.cell(row=daily_header_row, column=1, value=f"Payment Recon {date_str}")
        print(f"📝 Created daily summary section at row {daily_header_row}")
    
    # Column mapping
    col_map = {
        'system_balance': 2,
        'hdfc_card': 3,
        'axis_card': 4,
        'paytm_upi': 5,
        'amex': 6,
        'hdfc_upi': 7,
        'total': 8,
        'manual_card': 9,
        'manual_upi': 10,
        'difference': 11
    }
    
    # Calculate daily totals
    daily_totals = {key: 0 for key in col_map.keys()}
    for location, data in location_data.items():
        for key in daily_totals:
            daily_totals[key] += data.get(key, 0)
    
    # Check if data for this date already exists
    data_row = daily_header_row + 2
    existing_date_value = ws.cell(row=data_row, column=1).value
    
    # Check if we need to update existing row or find the row for this date
    update_row = data_row
    if existing_date_value:
        # Check if dates match
        existing_date = None
        if isinstance(existing_date_value, datetime):
            existing_date = existing_date_value.date()
        else:
            try:
                existing_date = pd.to_datetime(existing_date_value).date()
            except:
                pass
        
        check_date = processing_date.date() if isinstance(processing_date, datetime) else processing_date
        if existing_date != check_date:
            # Different date, so we're just updating the standard row position
            print(f"🔄 Updating daily summary row (previous date: {existing_date})")
    
    # Write to data row
    ws.cell(row=update_row, column=1, value=processing_date)
    for key, col in col_map.items():
        ws.cell(row=update_row, column=col, value=daily_totals[key])
    
    print(f"✅ Updated daily summary for {processing_date}")


def _get_ordinal_suffix(day):
    """Get ordinal suffix for a day (1st, 2nd, 3rd, etc.)"""
    if 10 <= day % 100 <= 20:
        suffix = 'th'
    else:
        suffix = {1: 'st', 2: 'nd', 3: 'rd'}.get(day % 10, 'th')
    return suffix


def _update_location_sections(ws, config, merchant_name, month_year, processing_date, location_data):
    """Update Tables 3+ - Add new date row to each location section"""
    
    # Get all daily data for the month to rebuild location sections
    all_daily_data = get_all_daily_data_for_month(merchant_name, month_year)
    
    # All month names for dynamic matching
    all_months = ['january', 'february', 'march', 'april', 'may', 'june',
                  'july', 'august', 'september', 'october', 'november', 'december']
    
    # Find each location section and update
    current_row = 19  # Start searching from row 19
    
    for location in config['locations']:
        # Find location header (e.g., "Bibwewadi December -2025")
        location_header_row = None
        
        for row in range(current_row, ws.max_row + 20):
            cell_value = ws.cell(row=row, column=1).value
            if cell_value:
                cell_str = str(cell_value).strip().lower()
                # Check if location name is in cell and any month name is present
                if location.lower() in cell_str and any(month in cell_str for month in all_months):
                    location_header_row = row
                    break
        
        if location_header_row is None:
            # Create new location section only if it doesn't exist
            # Find the last used row in the sheet to append after
            last_row = ws.max_row
            if last_row < current_row:
                last_row = current_row
            location_header_row = last_row + 2  # Add some spacing
            _create_location_section(ws, location_header_row, location, processing_date)
            print(f"📝 Created new section for {location}")
        
        # Find the data start row (after headers)
        data_start_row = location_header_row + 3
        
        # Find existing dates and the last data row
        last_data_row = data_start_row - 1
        existing_dates = set()
        
        for row in range(data_start_row, data_start_row + 35):  # Check up to 35 days
            cell_value = ws.cell(row=row, column=1).value
            if cell_value is None or str(cell_value).strip() == '':
                break
            
            # Check if it's a date
            if isinstance(cell_value, datetime):
                existing_dates.add(cell_value.date())
                last_data_row = row
            elif 'Date' not in str(cell_value) and 'Collection' not in str(cell_value):
                try:
                    # Try to parse as date
                    parsed_date = pd.to_datetime(cell_value).date()
                    existing_dates.add(parsed_date)
                    last_data_row = row
                except:
                    pass
        
        # Add current date if not exists
        if processing_date.date() if isinstance(processing_date, datetime) else processing_date not in existing_dates:
            new_row = last_data_row + 1
            
            # Get location data
            loc_data = location_data.get(location, {})
            
            # Column mapping
            col_map = {
                'system_balance': 2,
                'hdfc_card': 3,
                'axis_card': 4,
                'paytm_upi': 5,
                'amex': 6,
                'hdfc_upi': 7,
                'total': 8,
                'manual_card': 9,
                'manual_upi': 10,
                'difference': 11
            }
            
            # Write date
            ws.cell(row=new_row, column=1, value=processing_date)
            
            # Write values
            for key, col in col_map.items():
                ws.cell(row=new_row, column=col, value=loc_data.get(key, 0))
            
            print(f"✅ Added new row for {location} on {processing_date}")
        else:
            # Update existing row if date already exists
            for row in range(data_start_row, data_start_row + 35):
                cell_value = ws.cell(row=row, column=1).value
                row_date = None
                
                if isinstance(cell_value, datetime):
                    row_date = cell_value.date()
                elif cell_value:
                    try:
                        row_date = pd.to_datetime(cell_value).date()
                    except:
                        pass
                
                check_date = processing_date.date() if isinstance(processing_date, datetime) else processing_date
                if row_date == check_date:
                    # Update the existing row
                    loc_data = location_data.get(location, {})
                    col_map = {
                        'system_balance': 2,
                        'hdfc_card': 3,
                        'axis_card': 4,
                        'paytm_upi': 5,
                        'amex': 6,
                        'hdfc_upi': 7,
                        'total': 8,
                        'manual_card': 9,
                        'manual_upi': 10,
                        'difference': 11
                    }
                    for key, col in col_map.items():
                        ws.cell(row=row, column=col, value=loc_data.get(key, 0))
                    print(f"🔄 Updated existing row for {location} on {processing_date}")
                    break
        
        # Update current_row for next location
        current_row = location_header_row + 10


def _create_location_section(ws, start_row, location, processing_date):
    """Create a new location section in Card Collection sheet"""
    
    month_name = processing_date.strftime('%B')
    year_suffix = processing_date.strftime('%y')
    
    # Row 1: Location header
    ws.cell(row=start_row, column=1, value=f"{location} {month_name}  -{year_suffix}")
    
    # Row 2: "Collection in Bank"
    ws.cell(row=start_row + 1, column=1, value="Collection in Bank")
    
    # Row 3: Column headers
    headers = ['Date', 'System', 'HDFC Card', 'Axis Credit card', 'Paytm UPI', 
               'Amex', 'HDFC UPI', 'Total', 'total card manual transaction',
               'total upi manual transaction', 'Diff']
    
    for col, header in enumerate(headers, 1):
        ws.cell(row=start_row + 2, column=col, value=header)
    
    print(f"✅ Created section for {location}")


def process_card_collection_automation(merchant_name, excel_path, processing_date=None, dry_run=False):
    """
    Main function to automate Card Collection sheet updates
    
    Args:
        merchant_name: Name of the merchant
        excel_path: Path to the Excel file
        processing_date: Date to process (defaults to today)
        dry_run: If True, only print what would be done without making changes
    """
    print(f"📊 Starting Card Collection automation for {merchant_name}...")
    
    if processing_date is None:
        processing_date = datetime.now().date()
    elif isinstance(processing_date, str):
        processing_date = pd.to_datetime(processing_date).date()
    elif isinstance(processing_date, datetime):
        processing_date = processing_date.date()
    
    month_year = processing_date.strftime('%Y-%m')
    
    try:
        # 1. Create database tables if not exist
        if not dry_run:
            create_card_collection_tables()
        
        # 2. Extract location data from matched sheets
        print("📖 Extracting location data from matched sheets...")
        location_data = extract_location_data_from_matched_sheets(excel_path, merchant_name, processing_date)
        
        if not location_data or all(all(v == 0 for v in data.values()) for data in location_data.values()):
            print("ℹ️ No data found in matched sheets, trying Summary sheet...")
            location_data = extract_location_data_from_summary(excel_path, merchant_name, processing_date)
        
        if dry_run:
            print("\n--- Dry Run: Extracted Location Data ---")
            for location, data in location_data.items():
                print(f"\n{location}:")
                for key, value in data.items():
                    print(f"  {key}: {value}")
            return
        
        # 3. Store daily data to database
        print("💾 Storing daily data to database...")
        store_card_collection_daily(merchant_name, processing_date, location_data)
        
        # 4. Update Card Collection sheet
        print("📝 Updating Card Collection sheet...")
        update_card_collection_sheet(excel_path, merchant_name, processing_date, location_data)
        
        print(f"✅ Card Collection automation completed for {merchant_name}")
        
    except Exception as e:
        print(f"⚠️ Error in Card Collection automation: {e}")
        import traceback
        traceback.print_exc()


# ============================================================================
# MAIN ENTRY POINT
# ============================================================================

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Payment Reconciliation Engine')
    parser.add_argument('--merchant', type=str, required=True, help='Merchant name')
    parser.add_argument('--date_filter', type=str, required=False, help='Date filter (DD/MM/YYYY)')
    parser.add_argument('--excel_path', type=str, required=True, help='Path to Excel output file')
    parser.add_argument('--dry_run', action='store_true', help='Dry run mode (no DB writes)')
    
    args = parser.parse_args()
    
    print(f"🚀 Starting reconciliation for merchant: {args.merchant}")
    print(f"📁 Excel output path: {args.excel_path}")
    if args.date_filter:
        print(f"📅 Date filter: {args.date_filter}")
    if args.dry_run:
        print(f"⚠️ DRY RUN MODE - No database writes will be performed")
    
    # Call the main function
    main(
        merchant_name=args.merchant,
        date_filter=args.date_filter,
        excel_path=args.excel_path,
        dry_run=args.dry_run
    )