@inproceedings{dcd9ccd6573a425dbb38fb6367642986,
title = "An Automatic Speech Segmentation Algorithm of Portuguese based on Spectrogram Windowing",
abstract = "Sentence segmentation is important for improving the human readability of Automatic Speech Recognition (ASR) systems. Although it has been explored through numerous interdisciplinary studies, segmentation of Portuguese is still time-consuming due to the lack of efficient automatic segmentation methods and the reliance on qualified phonetic experts. This paper presents a novel algorithm that efficiently segments speech into sentences by learning the spectrogram of sentences through windows using a classification model developed with an Artificial Neural Network (ANN). Based on our experiments, the beginning part of a European Portuguese (EP) sentence enables better identification of the sentence's boundaries. In addition, a window frame of spectrogram constructed by the previous ending of 100 milliseconds (ms) and the subsequent beginning of 300 ms presents the best performance in the automatic sentence segmentation. As a result, the proposed algorithm can automatically segment Portuguese speech into sentences by analyzing its spectrogram without knowing the speech semantics.",
keywords = "Portuguese speech, natural language processing, sentence segmentation, spectrogram",
author = "Hoi, {Lap Man} and Yuqi Sun and Im, {Sio Kei}",
note = "Publisher Copyright: {\textcopyright} 2022 IEEE.; 2022 IEEE World AI IoT Congress, AIIoT 2022 ; Conference date: 06-06-2022 Through 09-06-2022",
year = "2022",
doi = "10.1109/AIIoT54504.2022.9817299",
language = "English",
series = "2022 IEEE World AI IoT Congress, AIIoT 2022",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "290--295",
booktitle = "2022 IEEE World AI IoT Congress, AIIoT 2022",
address = "United States",
}