@inproceedings{19a353ac9fe641c9814248ca43e355c0,
title = "A multi-layered approach for Arabic text diacritization",
abstract = "Text diacritization is a critical task which plays an important role for improving the performance of many NLP tasks for languages that include diacritics in their orthographies. In this paper, we handle the problem of Arabic text diacritization such that our system diacritize input Arabic sequence of words both morphologically and syntactically. The operation of the system is divided into three layers: the first layer uses HMM for the morphological diacritization of previously seen words, the second layer uses an external morphological analyzer for the morphological diacritization of OOV words, and the third layer uses CRF for the syntactic diacritization of all words. To evaluate the performance of the system, we used the benchmark LDC Arabic Treebank Part 3 datasets used by the state-of-the-art systems. The proposed system achieved a morphological WER of 4.3%, and a syntactic WER of 9.4%.",
keywords = "CRF, HMM, text diacritization",
author = "Metwally, {Aya S.} and Rashwan, {Mohsen A.} and Atiya, {Amir F.}",
note = "Publisher Copyright: {\textcopyright} 2016 IEEE.; 2016 IEEE International Conference on Cloud Computing and Big Data Analysis (ICCCBDA) ; Conference date: 05-07-2016 Through 07-07-2016",
year = "2016",
month = aug,
day = "2",
doi = "10.1109/ICCCBDA.2016.7529589",
language = "English",
isbn = "9781509025954",
series = "Proceedings of 2016 IEEE International Conference on Cloud Computing and Big Data Analysis, ICCCBDA 2016",
publisher = "IEEE, Institute of Electrical and Electronics Engineers",
pages = "389--393",
booktitle = "Proceedings of 2016 IEEE International Conference on Cloud Computing and Big Data Analysis, ICCCBDA 2016",
address = "United States",
}