import pandas as pd
from openpyxl import load_workbook
from openpyxl.utils import range_boundaries
from openpyxl.utils.dataframe import dataframe_to_rows

# Load data
moments_pay_bank_df = pd.read_excel('DMDSCstatement.xlsx')
transaction_details_df = pd.read_excel('DrmohansHIS.xlsx')

# Clean and process HIS data
transaction_details_df['CCAUTHNO'] = transaction_details_df['CCAUTHNO'].astype(str).replace(r'\.0$', '', regex=True)
transaction_details_df['CHEQUENO'] = transaction_details_df['CHEQUENO'].astype(str).replace(r'\.0$', '', regex=True)
transaction_details_df['Amount'] = transaction_details_df['PAYMODEAMOUNT'].astype(float).round(2)

transaction_details_df['TransactionId'] = (
    transaction_details_df['CCAUTHNO']
    .where(transaction_details_df['CCAUTHNO'].str.fullmatch(r'\d{1,12}'))
    .fillna(transaction_details_df['CHEQUENO'].where(transaction_details_df['CHEQUENO'].str.fullmatch(r'\d{1,12}')))
    .replace(r"'", '', regex=True)
    .apply(lambda x: x.zfill(12) if pd.notnull(x) else x)
)

# Clean and process bank data
moments_pay_bank_df['transaction_id'] = moments_pay_bank_df['RRN'].astype(str).replace(r'\.0$', '', regex=True)
moments_pay_bank_df['Txn Amt'] = moments_pay_bank_df['GROSS_AMT'].astype(float).round(2)

# Apply matching logic
field_mapping = {'TransactionId': 'transaction_id', 'Amount': 'Txn Amt'}
transaction_details_df = transaction_details_df[transaction_details_df['ENTITY'] == "DMDSC"]

upi_transactions_df = transaction_details_df[transaction_details_df['PAYMODECODE'].isin(['EWALLET', 'CREDITCARD', 'DEBITCARD'])].copy()

# Strip whitespace
upi_transactions_df.loc[:, 'TransactionId'] = upi_transactions_df['TransactionId'].str.strip()
moments_pay_bank_df.loc[:, 'transaction_id'] = moments_pay_bank_df['transaction_id'].str.strip()

# Remove duplicates
upi_transactions_df = upi_transactions_df.drop_duplicates(subset=['TransactionId', 'Amount'], keep='first')
moments_pay_bank_df_dedup = moments_pay_bank_df.drop_duplicates(subset=['transaction_id', 'Txn Amt'], keep='first')

# Merge for matching
merged_df = pd.merge(
    upi_transactions_df,
    moments_pay_bank_df_dedup[list(field_mapping.values())],
    left_on=['TransactionId', 'Amount'],
    right_on=['transaction_id', 'Txn Amt'],
    how='left',
    indicator=True
)

# Prepare matched and unmatched records
matched_records = merged_df[merged_df['_merge'] == 'both'].copy()
matched_records.insert(0, 'S.No.', range(1, len(matched_records) + 1))
matched_records['momentpay matched'] = 'YES'

unmatched_records = merged_df[merged_df['_merge'] != 'both'].copy()
unmatched_records.insert(0, 'S.No.', range(1, len(unmatched_records) + 1))
unmatched_records['momentpay matched'] = 'NO'

# Summary preparation by Cluster
matched_summary = matched_records.groupby('CLUSTER').agg(
    Matched_Count=('Amount', 'count'),
    Matched_Amount=('Amount', 'sum')
)

unmatched_summary = unmatched_records.groupby('CLUSTER').agg(
    Unmatched_Count=('Amount', 'count'),
    Unmatched_Amount=('Amount', 'sum')
)

summary = pd.merge(
    matched_summary,
    unmatched_summary,
    left_index=True,
    right_index=True,
    how='outer'
).fillna(0)

summary['Total_Count'] = summary['Matched_Count'] + summary['Unmatched_Count']
summary['Total_Amount'] = summary['Matched_Amount'] + summary['Unmatched_Amount']
summary = summary.reset_index()
summary.rename(columns={'Cluster': 'CLUSTER'}, inplace=True)

# Load the workbook and target sheet
excel_path = 'Drmohan-HISBANK04-06.xlsx'
wb = load_workbook(excel_path)
summary_ws = wb['Summary']

# Write DataFrame to worksheet starting from row 54, column 1
start_row = 59
start_col = 1

# Unmerge any merged cells that might cause issues
for merged_range in list(summary_ws.merged_cells.ranges):
    min_col, min_row, max_col, max_row = range_boundaries(str(merged_range))
    if start_row <= max_row:
        summary_ws.unmerge_cells(str(merged_range))

# Write data to worksheet
for r_idx, row in enumerate(dataframe_to_rows(summary, index=False, header=True), start=start_row):
    for c_idx, value in enumerate(row, start=start_col):
        summary_ws.cell(row=r_idx, column=c_idx, value=value)

# Save the file
wb.save(excel_path)
print("✅ Summary written successfully to:", excel_path)

