"""
Multiwfn Interface Module
This module provides a clean interface to the Multiwfn external quantum chemistry
analysis program for charge partitioning and multipole moment calculations.
Classes
-------
MultiwfnInterface
Main interface class for running Multiwfn calculations
Functions
---------
getmultipoles
Static function to parse multipole moment output files
"""
import os
import re
import sys
import time
import subprocess
import traceback
import numpy as np
from importlib import resources
from distutils.dir_util import copy_tree
[docs]class MoldenObject:
"""Helper class for parsing molden files to count basis functions."""
def __init__(self, file_path_xyz, molden_filename):
self.file_path_xyz = file_path_xyz
self.molden_filename = molden_filename
[docs] def countBasis(self):
"""Count the number of basis functions in the molden file."""
# This is a simplified version - you may need to import from the actual module
basis_count = 0
with open(self.molden_filename, 'r') as f:
for line in f:
if '[MO]' in line:
break
if 'Ene=' in line or 'Occup=' in line:
basis_count += 1
return basis_count // 2 if basis_count > 0 else 0
[docs]class MultiwfnInterface:
"""
Interface class for Multiwfn charge partitioning and multipole calculations.
This class encapsulates all interactions with the Multiwfn external program,
providing methods for charge partitioning, multipole moment calculations,
and parsing of Multiwfn output files.
Attributes
----------
dict_of_calcs : dict
Mapping of charge scheme names to Multiwfn command codes
dict_of_multipole : dict
Mapping of multipole schemes to Multiwfn command sequences
config : dict
Configuration dictionary with Multiwfn settings
"""
# Multiwfn command codes for charge calculation methods
CHARGE_SCHEMES = {
'Hirshfeld': '1', 'Voronoi': '2', 'Mulliken': '5',
'Lowdin': '6', 'SCPA': '7', 'Becke': '10', 'ADCH': '11',
'CHELPG': '12', 'MK': '13', 'AIM': '14', 'Hirshfeld_I': '15',
'CM5': '16', 'EEM': '17', 'RESP': '18', 'PEOE': '19'
}
# Multiwfn command codes for multipole moment calculations
MULTIPOLE_SCHEMES = {
'Hirshfeld': ['3', '2'],
'Hirshfeld_I': ['4', '1', '2'],
'Becke': ['1', '2']
}
[docs] @staticmethod
def get_monopole_commands(charge_type):
"""
Get the Multiwfn command sequence for a given charge type.
This is the single source of truth for all Multiwfn monopole charge
calculation commands. Different charge types require different menu
navigation sequences in Multiwfn.
Parameters
----------
charge_type : str
The charge partitioning scheme name
Returns
-------
list of str
Command sequence to send to Multiwfn stdin
Raises
------
ValueError
If charge_type is 'AIM' (not supported in menu 7)
"""
calc_command = MultiwfnInterface.CHARGE_SCHEMES.get(charge_type)
if charge_type == 'AIM':
raise ValueError(
"AIM charges cannot be calculated in population analysis module (menu 7). "
"Use basin analysis module (main menu 17) instead."
)
elif charge_type == 'Mulliken':
# Mulliken has a sub-menu that needs extra '0' to exit back to population menu
return ['7', '5', '1', 'y', '0', '0', 'q']
elif charge_type == 'Becke':
# Becke has sub-menu: 0=Start calculation
return ['7', '10', '0', 'y', '0', 'q']
elif charge_type == 'SCPA':
# SCPA directly calculates, no sub-menu
return ['7', '7', 'y', '0', 'q']
elif charge_type == 'Lowdin':
# Lowdin asks for output path (empty=screen), then y/n for .chg file
return ['7', '6', '', 'y', '0', 'q']
elif charge_type == 'EEM':
# EEM needs 'g' to guess bonds from molden, then 0=start calculation
return ['7', '17', 'g', '0', 'y', '0', 'q']
elif charge_type == 'PEOE':
# PEOE directly calculates (may fail for some elements like Na)
return ['7', '19', 'y', '0', 'q']
elif charge_type in ['CHELPG', 'RESP', 'MK']:
# ESP fitting methods have an extra menu level
return ['7', calc_command, '1', 'y', '0', '0', 'q']
elif charge_type == 'Hirshfeld_I':
# Hirshfeld_I: 1=start calculation (may need special handling for large basis)
return ['7', '15', '1', 'y', '0', 'q']
else:
# Default for Hirshfeld, Voronoi, ADCH, CM5:
# Sub-menu where option 1 = use built-in densities or output charges
return ['7', calc_command, '1', 'y', '0', 'q']
[docs] def __init__(self, config=None):
"""
Initialize the Multiwfn interface.
Parameters
----------
config : dict, optional
Configuration dictionary with keys:
- molden_filename: Name of molden file
- xyzfilename: Name of XYZ geometry file
- rerun: Whether to rerun existing calculations
- hasECP: Whether calculation used ECP
- maxIHirshFuzzyBasis: Max basis functions for Hirshfeld-I fuzzy
- maxIHirshBasis: Max basis functions for Hirshfeld-I
"""
self.config = config if config is not None else {
'molden_filename': 'final_optim.molden',
'xyzfilename': 'final_optim.xyz',
'rerun': False,
'hasECP': False,
'maxIHirshFuzzyBasis': 1500,
'maxIHirshBasis': 2000
}
self.dict_of_calcs = self.CHARGE_SCHEMES
self.dict_of_multipole = self.MULTIPOLE_SCHEMES
[docs] def run_multiwfn(self, command, input_commands, output_file=None,
description="Multiwfn calculation", capture_output=True):
"""
Centralized Multiwfn runner with proper error handling and output display.
Parameters
----------
command : str
The shell command to run Multiwfn (e.g., "multiwfn file.molden")
input_commands : list of str
List of commands to send to Multiwfn's stdin
output_file : str, optional
Path to redirect stdout to a file
description : str, optional
Description of the calculation for error messages
capture_output : bool, optional
If True, capture and display output. If False, let it stream to terminal
Returns
-------
tuple
(stdout, stderr, returncode) from the Multiwfn process
Raises
------
RuntimeError
If Multiwfn fails with non-zero exit code
"""
# Prepare the full command with output redirection if specified
full_command = command
if output_file:
full_command = f"{command} > {output_file}"
print(f"\n{'='*60}")
print(f"Running: {description}")
print(f"Command: {full_command}")
print(f"{'='*60}\n")
try:
# Run Multiwfn process
if capture_output:
proc = subprocess.Popen(
command,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=True,
text=True
)
# Send input commands
input_str = '\n'.join(input_commands) + '\n'
stdout, stderr = proc.communicate(input=input_str)
# Save to output file if specified
if output_file:
with open(output_file, 'w') as f:
f.write(stdout)
# Display output
if stdout:
print("STDOUT:")
print(stdout[:500]) # Print first 500 chars
if len(stdout) > 500:
print(f"... (truncated {len(stdout) - 500} characters)")
if stderr:
print("\nSTDERR:")
print(stderr)
returncode = proc.returncode
else:
# Let output stream to terminal
proc = subprocess.Popen(
command,
stdin=subprocess.PIPE,
shell=True,
text=True
)
input_str = '\n'.join(input_commands) + '\n'
proc.communicate(input=input_str)
stdout, stderr, returncode = "", "", proc.returncode
if returncode != 0:
raise RuntimeError(
f"{description} failed with exit code {returncode}\n"
f"Command: {full_command}"
)
print(f"\n{'='*60}")
print(f"Completed: {description}")
print(f"{'='*60}\n")
return stdout, stderr, returncode
except Exception as e:
print(f"\n{'='*60}")
print(f"ERROR running Multiwfn")
print(f"Description: {description}")
print(f"Command: {full_command}")
print(f"{'='*60}")
print(f"Error: {str(e)}")
print(f"{'='*60}\n")
raise
[docs] def partitionCharge(self, multipole_bool, f,
multiwfn_path, charge_type, owd,
molden_filename=None, xyz_filename=None):
"""
Partition electron density using Multiwfn to generate partial charges or multipole moments.
This is the centralized interface for all charge partitioning schemes (Hirshfeld, CHELPG, etc.).
Checks if calculation was previously completed and reuses results unless rerun=True.
Parameters
----------
multipole_bool : bool
True to compute multipole moments, False for monopoles only
f : str
Complete path to folder containing the calculation
multiwfn_path : str
Path to Multiwfn executable
charge_type : str
Partitioning scheme (e.g., 'Hirshfeld', 'CHELPG', 'Hirshfeld_I')
owd : str
Original working directory
molden_filename : str, optional
Name of the molden file (not full path, just filename).
If None, uses self.config['molden_filename']
xyz_filename : str, optional
Name of the xyz file (not full path, just filename).
If None, uses self.config['xyzfilename']
Returns
-------
float
Computation time in seconds, 0 if calculation was previously completed (skipped),
or -1 if calculation failed
Notes
-----
For Hirshfeld-I calculations, atmrad files are automatically loaded from
pyef.resources.atmrad (bundled with the package).
"""
# Handle f being a list (take first element)
if isinstance(f, list):
f = f[0]
# Use provided filenames or fall back to config defaults
if molden_filename is None:
molden_filename = self.config['molden_filename']
if xyz_filename is None:
final_structure_file = self.config['xyzfilename']
# Handle xyzfilename being a list (take first element)
if isinstance(final_structure_file, list):
final_structure_file = final_structure_file[0]
else:
final_structure_file = xyz_filename
comp_cost = 0 # Default to 0 for "already exists" case
num_atoms = 0
need_to_run_calculation = True
os.chdir(owd)
os.chdir(f)
file_path_multipole = f"{os.getcwd()}/Multipole{charge_type}.txt"
file_path_monopole = f"{os.getcwd()}/Charges{charge_type}.txt"
file_path_xyz = f"{os.getcwd()}/{final_structure_file}"
# Check if previous calculations fully converged for desired multipole/charge scheme
calc_type = "multipole" if multipole_bool else "monopole"
if multipole_bool:
if os.path.exists(file_path_multipole):
with open(file_path_multipole, 'r') as file:
contents = file.read()
if "Calculation took up" in contents:
need_to_run_calculation = False
else:
if os.path.exists(file_path_monopole):
need_to_run_calculation = False
# If you need to run the calculations, run either the multipole or the monopole calculation!
if need_to_run_calculation or self.config['rerun']:
print(f'Running {charge_type} {calc_type} calculation for {os.path.basename(f)}')
try:
start = time.time()
if multipole_bool:
chg_prefix, _ = os.path.splitext(molden_filename)
# Dynamically get path to package settings.ini file
with resources.path('pyef.resources', 'settings.ini') as ini_path:
path_to_init_file = str(ini_path)
command = f"{multiwfn_path} {molden_filename} -set {path_to_init_file}"
multiwfn_commands = ['15', '-1'] + self.dict_of_multipole[charge_type] + ['0', 'q']
num_atoms = self.count_atoms(final_structure_file)
if charge_type == 'Hirshfeld_I':
# Get the number of basis functions
num_basis = MoldenObject(file_path_xyz, molden_filename).countBasis()
# Use bundled atmrad from package resources
with resources.path('pyef.resources', 'atmrad') as atmrad_resource:
atmrad_src = str(atmrad_resource)
copy_tree(atmrad_src, os.getcwd() + '/atmrad/')
print(f'Current num of basis is: {num_basis}')
print(f'The current max num is: {self.config["maxIHirshFuzzyBasis"]}')
if num_basis > self.config['maxIHirshFuzzyBasis']:
print(f'Number of basis functions: {num_basis}')
multiwfn_commands = ['15', '-1'] + ['4', '-2', '1', '2'] + ['0', 'q']
print(f'I-Hirshfeld command should be low memory and slow to accommodate large system')
# Use centralized Multiwfn runner
self.run_multiwfn(
command=command,
input_commands=multiwfn_commands,
output_file=file_path_multipole,
description=f"Multipole {charge_type} calculation for {f}"
)
else:
# Dynamically get path to package settings.ini file
with resources.path('pyef.resources', 'settings.ini') as ini_path:
path_to_init_file = str(ini_path)
command = f"{multiwfn_path} {molden_filename} -set {path_to_init_file}"
chg_prefix, _ = os.path.splitext(molden_filename)
calc_command = self.dict_of_calcs[charge_type]
# Default command for most charge types (Hirshfeld, Voronoi, ADCH, CM5)
# These have sub-menu where option 1 = use built-in densities or output charges
commands = ['7', calc_command, '1', 'y', '0', 'q']
if charge_type == 'Mulliken':
# Mulliken has a sub-menu that needs extra '0' to exit back to population menu
commands = ['7', '5', '1', 'y', '0', '0', 'q']
elif charge_type == 'Becke':
# Becke has sub-menu: 0=Start calculation
commands = ['7', '10', '0', 'y', '0', 'q']
elif charge_type == 'SCPA':
# SCPA directly calculates, no sub-menu
commands = ['7', '7', 'y', '0', 'q']
elif charge_type == 'Lowdin':
# Lowdin asks for output path (empty=screen), then y/n for .chg file
commands = ['7', '6', '', 'y', '0', 'q']
elif charge_type == 'EEM':
# EEM needs 'g' to guess bonds from molden, then 0=start calculation
commands = ['7', '17', 'g', '0', 'y', '0', 'q']
elif charge_type == 'PEOE':
# PEOE directly calculates (may fail for some elements like Na)
commands = ['7', '19', 'y', '0', 'q']
elif charge_type == 'AIM':
# AIM is not available in menu 7 - needs basin analysis module
print(f"WARNING: AIM charges cannot be calculated in population analysis module.")
print(f" Use basin analysis module (main menu 17) instead.")
raise ValueError("AIM charge calculation not supported in this module")
elif charge_type in ['CHELPG', 'RESP', 'MK']:
commands = ['7', calc_command, '1', 'y', '0', '0', 'q']
elif charge_type == 'Hirshfeld_I':
num_basis = MoldenObject(file_path_xyz, molden_filename).countBasis()
print(f'Number of basis functions: {num_basis}')
if num_basis > self.config['maxIHirshBasis']:
commands = ['7', '15', '-2', '1', '\n', 'y', '0', 'q']
# Use bundled atmrad from package resources
with resources.path('pyef.resources', 'atmrad') as atmrad_resource:
atmrad_src = str(atmrad_resource)
copy_tree(atmrad_src, os.getcwd() + '/atmrad/')
# Use centralized Multiwfn runner
self.run_multiwfn(
command=command,
input_commands=commands,
description=f"Monopole {charge_type} calculation for {f}"
)
os.rename(f'{chg_prefix}.chg', file_path_monopole)
end = time.time()
comp_cost = end - start
except Exception as e:
print(f"\n{'='*60}")
print(f"ERROR in partitionCharge for {f}")
print(f"Charge type: {charge_type}, Multipole: {multipole_bool}")
print(f"{'='*60}")
print(f"Error: {str(e)}")
print(f"\nFull traceback:")
print(traceback.format_exc())
print(f"{'='*60}\n")
# Issue could be from lost memory or Multiwfn failure
os.chdir(owd)
return -1 # Return -1 to indicate failure
else:
result_file = os.path.basename(file_path_multipole if multipole_bool else file_path_monopole)
print(f'Found existing {charge_type} {calc_type} result: {result_file} for {os.path.basename(f)}')
os.chdir(owd)
return comp_cost
[docs] @staticmethod
def count_atoms(xyz_filename):
"""Count number of atoms in XYZ file (mapcount replacement)."""
with open(xyz_filename, 'r') as f:
for i, line in enumerate(f):
pass
return i + 1 # Total lines including first two header lines
[docs] @staticmethod
def getmultipoles(multipole_name):
"""
Parse multipole moments from Multiwfn output file.
Parameters
----------
multipole_name : str
Path to multipole moment output file from Multiwfn.
Returns
-------
list of dict
List of dictionaries containing multipole data for each atom:
- 'Index': atom index (str)
- 'Element': element symbol (str)
- 'Atom_Charge': atomic charge (float)
- 'Dipole_Moment': dipole moment vector [x, y, z] (list of float)
- 'Quadrupole_Moment': 3x3 quadrupole tensor (np.ndarray)
Notes
-----
Parses Multiwfn output format with sections separated by asterisks.
Extracts monopole (charge), dipole, and quadrupole moments for each atom.
"""
# Read the text file
with open(multipole_name, 'r') as file:
text = file.read()
# Split the text into sections based on the delimiter
sections = re.split(r'\n\s*[*]+\s*', text)
# Define a pattern to extract the index, element name, and atomic dipole and quadrupole moment values
pattern = (r'Atomic multipole moments of\s+(\d+)\s*\(([^)]+)\s*\).*?'
r'Atomic monopole moment \(electron\):\s*([-+]?\d+\.\d+)\s+Atomic charge:\s*([-+]?\d+\.\d+).*?'
r'X=\s*([-+]?\d+\.\d+)\s+Y=\s*([-+]?\d+\.\d+)\s+Z=\s*([-+]?\d+\.\d+).*?'
r'XX=\s*([-+]?\d+\.\d+)\s+XY=\s*([-+]?\d+\.\d+)\s+XZ=\s*([-+]?\d+\.\d+).*?'
r'YX=\s*([-+]?\d+\.\d+)\s+YY=\s*([-+]?\d+\.\d+)\s+YZ=\s*([-+]?\d+\.\d+).*?'
r'ZX=\s*([-+]?\d+\.\d+)\s+ZY=\s*([-+]?\d+\.\d+)\s+ZZ=\s*([-+]?\d+\.\d+)'
)
atomicDicts = []
# Iterate through sections and extract the information
for section in sections:
match = re.search(pattern, section, re.DOTALL)
if match:
index = match.group(1)
element = match.group(2)
monopole_moment = float(match.group(3))
atomic_charge = float(match.group(4))
x_value = match.group(5)
y_value = match.group(6)
z_value = match.group(7)
dipole_moment = [float(x_value), float(y_value), float(z_value)]
xx_value = match.group(8)
xy_value = match.group(9)
xz_value = match.group(10)
yx_value = match.group(11)
yy_value = match.group(12)
yz_value = match.group(13)
zx_value = match.group(14)
zy_value = match.group(15)
zz_value = match.group(16)
# Create a 3x3 matrix for the quadrupole moment
quadrupole_moment = np.array([
[float(xx_value), float(xy_value), float(xz_value)],
[float(yx_value), float(yy_value), float(yz_value)],
[float(zx_value), float(zy_value), float(zz_value)]
])
atomDict = {
"Index": index,
"Element": element,
"Atom_Charge": atomic_charge,
'Dipole_Moment': dipole_moment,
'Quadrupole_Moment': quadrupole_moment
}
atomicDicts.append(atomDict)
return atomicDicts