@PHDTHESIS{ 2025:1623552113, title = {An automated method for neural network quantization refinement}, year = {2025}, url = "https://tede.ufam.edu.br/handle/tede/10912", abstract = "In the rapidly evolving field of artificial intelligence, deep neural networks (DNNs) have become integral to numerous applications, from autonomous vehicles to healthcare systems. However, the increasing size and complexity of DNNs pose significant challenges for deployment in resource-constrained and safety-critical environments due to high computational and memory demands. This dissertation addresses the critical problem of neural network quantization, which aims to reduce the precision of network parameters to enhance efficiency while preserving functional behavior. The primary contribution is the development of the Counterexample-Guided Quantization for Neural Networks (CEG4N) framework, which integrates formal equivalence verification (FEV)—a technique that rigorously verifies that two models exhibit identical behavior—into the quantization process. Methodologically, CEG4N employs an iterative optimization loop that leverages formal verification tools to systematically detect and correct discrepancies between the original and quantized models, thus ensuring their functional equivalence (FE). Experimental evaluations conducted on benchmark datasets, including Iris, Seeds, MNIST, and CIFAR-10, demonstrate that CEG4N effectively maintains model accuracy and behavioral integrity, outperforming traditional quantization methods such as the greedy path following quantization (GPFQ), particularly in smaller and sparse networks. While scalability to larger models remains challenging due to increased computational overhead associated with formal verification methods, the proposed framework significantly advances the robustness and reliability of quantized neural networks. Moreover, this research underscores the importance of integrating formal verification into quantization to optimally balance efficiency and accuracy. Ultimately, these contributions facilitate the deployment of neural networks in environments with stringent resource and reliability constraints, promoting safer and more efficient artificial intelligence systems.", publisher = {Universidade Federal do Amazonas}, scholl = {Programa de Pós-graduação em Informática}, note = {Instituto de Computação} }