import pandas as pd
from openpyxl import load_workbook
from openpyxl.utils.dataframe import dataframe_to_rows

# Load both files
bank_deposit_df = pd.read_excel('DrmohansCashfile.xlsx')
his_report_df = pd.read_excel('DrmohansHIS.xlsx')

# Standardize cluster mapping
cluster_map = {
    'ap': 'Andra Pradesh',
    'andhra pradesh': 'Andra Pradesh',   # normalize both forms
    'andra pradesh': 'Andra Pradesh',
    'rotn': 'ROTN',
    'roi': 'ROI',
    'kerala': 'Kerala',
    'karnataka': 'Karnataka',
    'telangana': 'Telangana',
    'telengana': 'Telangana',
    'chennai': 'Chennai'
}

def clean_str(x):
    return str(x).strip().lower()

# ---------------------------
# Normalize HIS Data
# ---------------------------
his_report_df = his_report_df[his_report_df['PAYMENTMODE'].str.lower() == 'cash']
his_report_df['Cluster'] = his_report_df['CLUSTER'].apply(clean_str)
his_report_df['Cluster'] = his_report_df['Cluster'].map(cluster_map).fillna(his_report_df['Cluster'])
his_report_df['Branch'] = his_report_df['BRANCH'].apply(clean_str)
his_report_df['Entity'] = his_report_df['ENTITY'].apply(clean_str)
his_report_df['Amount'] = pd.to_numeric(his_report_df['PAYMODEAMOUNT'], errors='coerce').fillna(0)

his_summary = his_report_df.groupby(['Cluster', 'Branch', 'Entity'], dropna=False)['Amount'].sum().reset_index()
his_summary.rename(columns={'Amount': 'Net Amount (HIS report)'}, inplace=True)

# ---------------------------
# Normalize CDM Data
# ---------------------------
bank_deposit_df['Cluster'] = bank_deposit_df['CLSUTER'].apply(clean_str)
bank_deposit_df['Cluster'] = bank_deposit_df['Cluster'].map(cluster_map).fillna(bank_deposit_df['Cluster'])
bank_deposit_df['Branch'] = bank_deposit_df['BRANCH'].apply(clean_str)
bank_deposit_df['Entity'] = bank_deposit_df['ENTITY'].apply(clean_str)
bank_deposit_df['Amount'] = pd.to_numeric(bank_deposit_df['SLIP AMOUNT'], errors='coerce').fillna(0)

cdm_summary = bank_deposit_df.groupby(['Cluster', 'Branch', 'Entity'], dropna=False)['Amount'].sum().reset_index()
cdm_summary.rename(columns={'Amount': 'Deposit Amt (CDM report)'}, inplace=True)

# ---------------------------
# Merge and Calculate Difference
# ---------------------------
summary_df = pd.merge(
    cdm_summary,
    his_summary,
    on=['Cluster', 'Branch', 'Entity'],
    how='left'
)

summary_df['Net Amount (HIS report)'] = summary_df['Net Amount (HIS report)'].fillna(0)
summary_df['Deposit Amt (CDM report)'] = summary_df['Deposit Amt (CDM report)'].fillna(0)
summary_df['Difference'] = summary_df['Deposit Amt (CDM report)'] - summary_df['Net Amount (HIS report)']
summary_df = summary_df.round(2)
summary_df = summary_df.sort_values(by=['Cluster', 'Branch', 'Entity'])

# ---------------------------
# Write to Excel
# ---------------------------
output_path = 'Drmohan-HISBANK03-06.xlsx'
wb = load_workbook(output_path)
ws = wb['Cash Reco-DMDSC']

# Clear old rows starting at B7 (columns B to F)
for row in ws.iter_rows(min_row=7, min_col=2, max_col=6):
    for cell in row:
        cell.value = None

# Write updated summary starting at B7
for r_idx, row in enumerate(dataframe_to_rows(summary_df, index=False, header=True), start=7):
    for c_idx, value in enumerate(row, start=2):  # column B is index 2
        ws.cell(row=r_idx, column=c_idx, value=value)

wb.save(output_path)
print("✅ Cleaned and merged summary saved.")

