import pandas as pd

# Load data from the Excel file
transaction_details_df = pd.read_excel('AphonepeHISManual10-06.xlsx')

# Convert 'Source Order No.' to string and clean up trailing '.0'
transaction_details_df['ChequeorCardNo'] = transaction_details_df['ChequeorCardNo'].astype(str).replace(r'\.0$', '', regex=True)

#transaction_details_df = transaction_details_df[~transaction_details_df['Status'].str.lower().isin(['void', 'rejected'])]

# Columns to be summed
numeric_columns = ['Amount']

# Convert numeric columns to numeric and handle errors
for col in numeric_columns:
    transaction_details_df[col] = pd.to_numeric(transaction_details_df[col], errors='coerce')

# Fill NaN in numeric columns with 0
transaction_details_df[numeric_columns] = transaction_details_df[numeric_columns].fillna(0)

# Group by 'Source Order No.' and sum numeric columns
transaction_details_grouped = transaction_details_df.groupby('ChequeorCardNo', as_index=False).agg({
    'Amount': 'sum',
    **{col: 'first' for col in transaction_details_df.columns if col not in ['ChequeorCardNo'] + numeric_columns}
})

# Specify desired columns
desired_columns = ['TenentName','Patient_Name','BillNo','VisitDate','Client_Name','Invest_Name','BillAmount','Discount','NetAmount','Amount','ReceivedDate','PayMode','BankName','ChequeorCardNo','TransactionId','ProcessingId',	'BillUser'
]

# Check for missing columns and add them if necessary
missing_columns = [col for col in desired_columns if col not in transaction_details_grouped.columns]
for col in missing_columns:
    transaction_details_grouped[col] = None  # Add missing columns with default values

# Create output DataFrame with desired columns
transaction_details_output = transaction_details_grouped[desired_columns]

# Save the output to a new Excel file
transaction_details_output.to_excel('AndersonHISManual.xlsx', index=False)

print("File has been created with summed amounts and deduplicated order numbers.")

