I need to read data from an Excel file. The first cell contains the property name, and the second cell contains the property value. However, some of the property names in the first column are merged across two or more columns, and the corresponding values are in the next cell. For example, the property name "Ref" is in columns A and B, and its value is in column C.
I want to retrieve the value of the "Ref" property from column C in my Excel file.
Here is my excel image:
I am using python. Here is the output:
Approval Memo of : SHILPI AKTER
Name of the Applicant : SHILPI AKTER
Name of Territory : Comilla
Total Family Expenses : 30000
Ref : N/A
Amount : N/A
Total Amount : 3000
Ref and Amount Properties value not found. Here is my code:
import os
import openpyxl
from openpyxl.utils import column_index_from_string
file_path = r"D:\file\input\example.xlsx"
if os.path.exists(file_path):
print("File exists!")
else:
print("File not found! Check the path.")
exit()
target_sheet = "Output Approval Templete"
# Define the properties to extract
properties = [
"Approval Memo of",
"Name of the Applicant",
"Name of Territory",
"Total Family Expenses",
"Ref",
"Amount",
"Total Amount"
]
# Function to get the actual value from a merged cell
def get_merged_cell_value(sheet, row, col):
for merged_range in sheet.merged_cells.ranges:
min_row, min_col, max_row, max_col = merged_range.bounds # Extract merged cell bounds
if min_row <= row <= max_row and min_col <= col <= max_col:
return sheet.cell(min_row, min_col).value # Return the first cell's value of the merged range
return sheet.cell(row, col).value
# Function to format numeric values properly
def format_value(value):
if isinstance(value, float) and value > 1e10: # Large numbers like NID
return str(int(value)) # Convert to integer and string to avoid scientific notation
elif isinstance(value, (int, float)): # General number formatting
return str(value)
elif value is None:
return "N/A" # Handle missing values
return str(value).strip()
try:
# Load the workbook
wb = openpyxl.load_workbook(file_path, data_only=True)
if target_sheet not in wb.sheetnames:
print(f"Sheet '{target_sheet}' not found in the file.")
else:
ws = wb[target_sheet]
extracted_data = {}
# Iterate over rows to extract data
for row in ws.iter_rows():
for cell in row:
# Check if the cell value is a property we are looking for
if cell.value and isinstance(cell.value, str) and cell.value.strip() in properties:
prop_name = cell.value.strip()
col_idx = cell.column # Get column index (1-based)
next_col_idx = col_idx + 1 # Next column index
# Ensure next column exists within sheet bounds
if next_col_idx <= ws.max_column:
# Check if the cell is merged, and get its value
next_value = get_merged_cell_value(ws, cell.row, next_col_idx)
# Store the formatted value for the property
extracted_data[prop_name] = format_value(next_value) # Store extracted value
# Print extracted values
for key, value in extracted_data.items():
print(f"{key} : {value}")
except Exception as e:
print(f"Error loading workbook: {e}")
Please help me to find out merge cell properties value.