@misc{17717, keywords = {Machine learning}, author = {Mehdi Sarkhoosh and Sushant Gautam and Cise Midoglu and Saeed Sabet and P{\r a}l Halvorsen}, title = {Multimodal AI-Based Summarization and Storytelling for Soccer on Social Media}, abstract = {The rapid advancement of technology has been revolutionizing the field of sports media, where there is a growing need for sophisticated data processing methods. Current methodologies for extracting information from soccer broadcast videos to generate game highlights and summaries for social media are predominantly manual and rely heavily on text-based NLP techniques, overlooking the rich visual and auditory information available. In response to this challenge, our research introduces SoccerSum, a tool that innovates in the field by integrating computer vision, audio analysis with advanced language models like GPT-4. This multimodal approach enables automated, enriched content summarization, including detection of players and key field elements, thereby enhancing the metadata used in summarization algorithms. SoccerSum uniquely combines textual and visual data, offering a comprehensive solution for generating accurate, platform-specific content. This development represents a significant advancement in automated, data-driven sports media dissemination, and sets a new benchmark in the realm of soccer information extraction. A video of the demo can be found here: https://youtu.be/za4VIi2ARXY.}, year = {2024}, journal = {MMSys {\textquoteright}24: Proceedings of the 15th ACM Multimedia Systems Conference}, publisher = {Association for Computing Machinery}, doi = {10.1145/3625468.3652197}, }