{"id":115706,"date":"2026-02-12T11:02:55","date_gmt":"2026-02-12T05:32:55","guid":{"rendered":"https:\/\/www.mygreatlearning.com\/blog\/?page_id=115706"},"modified":"2026-02-11T15:52:24","modified_gmt":"2026-02-11T10:22:24","slug":"kid-activity-emotion-detection-cctv","status":"publish","type":"page","link":"https:\/\/www.mygreatlearning.com\/blog\/kid-activity-emotion-detection-cctv\/","title":{"rendered":"Activity &amp; Emotion Detection of Recognized kidsin CCTV Video for Day Care Using SlowFast &amp; CNN"},"content":{"rendered":"\n<!-- Import Google Fonts -->\n<link rel=\"preconnect\" href=\"https:\/\/fonts.googleapis.com\">\n<link rel=\"preconnect\" href=\"https:\/\/fonts.gstatic.com\" crossorigin>\n<link href=\"https:\/\/fonts.googleapis.com\/css2?family=Inter:wght@400;500;600&family=Merriweather:ital,wght@0,300;0,400;0,700;1,300;1,400&family=Playfair+Display:wght@700;900&display=swap\" rel=\"stylesheet\">\n\n<style>\n    \/* SCOPING: All styles are prefixed with #custom-paper-scope *\/\n    #custom-paper-scope {\n        \/* CSS Variables for easy theming *\/\n        --font-heading: 'Playfair Display', serif;\n        --font-body: 'Merriweather', serif;\n        --font-ui: 'Inter', sans-serif;\n        \n        --color-paper: #ffffff;\n        --color-text-main: #2c3e50;\n        --color-text-light: #5f6c7b;\n        --color-accent: #34495e; \/* Premium Slate *\/\n        --color-link: #2980b9;\n        --color-border: #eaeaea;\n        \n        padding: 20px 0;\n        font-family: var(--font-body);\n        color: var(--color-text-main);\n        line-height: 1.8;\n        -webkit-font-smoothing: antialiased;\n        box-sizing: border-box;\n        width: 100%;\n    }\n\n    #custom-paper-scope *,\n    #custom-paper-scope *:before,\n    #custom-paper-scope *:after {\n        box-sizing: inherit;\n    }\n\n    \/* --- LAYOUT CONTAINER --- *\/\n    #custom-paper-scope .paper-container {\n        max-width: 850px !important;\n        margin: 0 auto;\n        padding: 30px 40px;\n        background: var(--color-paper);\n        border-radius: 8px;\n    }\n\n    \/* --- BREADCRUMBS --- *\/\n    #custom-paper-scope .breadcrumbs {\n        font-family: var(--font-ui);\n        font-size: 1rem;\n        color: var(--color-text-light);\n        margin-bottom: 25px;\n        display: flex;\n        flex-wrap: wrap;\n        align-items: center;\n        gap: 8px;\n    }\n\n    #custom-paper-scope .breadcrumbs a {\n        color: var(--color-link);\n        text-decoration: none;\n        font-weight: 500;\n        transition: color 0.2s;\n    }\n\n    #custom-paper-scope .breadcrumbs a:hover {\n        text-decoration: underline;\n        color: var(--color-accent);\n    }\n\n    #custom-paper-scope .breadcrumbs .separator {\n        font-size: 0.7rem;\n        color: #999;\n    }\n\n    #custom-paper-scope .breadcrumbs .current-crumb {\n        color: #7f8c8d;\n        cursor: default;\n    }\n\n    \/* --- TYPOGRAPHY & TITLE --- *\/\n    #custom-paper-scope .paper-title {\n        font-size: 2.5rem;\n        font-weight: 900;\n        text-align: left;\n        margin-bottom: 40px;\n        color: #1a252f;\n        line-height: 1.2;\n        letter-spacing: -0.5px;\n    }\n\n    #custom-paper-scope p {\n        font-size: 1.2rem !important; \n        margin-bottom: 24px;\n        color: #374151;\n        font-weight: 300;\n        line-height: 2;\n        text-align: justify;\n        hyphens: auto; \n    }\n\n    \/* --- AUTHORS GRID --- *\/\n    #custom-paper-scope .authors {\n        display: grid;\n        grid-template-columns: repeat(auto-fill, minmax(220px, 1fr));\n        gap: 30px;\n        margin-bottom: 60px;\n        padding-bottom: 40px;\n        border-bottom: 1px solid var(--color-border);\n    }\n\n    #custom-paper-scope .author {\n        text-align: left;\n        font-family: var(--font-ui);\n        font-size: 1rem;\n        padding: 0; \n    }\n\n    #custom-paper-scope .author-name {\n        font-weight: 700;\n        font-size: 1rem;\n        color: #111;\n        margin-bottom: 4px;\n        text-transform: uppercase;\n        letter-spacing: 0.5px;\n        display: block;\n    }\n\n    #custom-paper-scope .author-role,\n    #custom-paper-scope .author-affiliation {\n        color: var(--color-text-light);\n        display: block;\n        margin-bottom: 2px;\n        font-size: 0.9rem;\n    }\n\n    \/* --- ABSTRACT --- *\/\n    #custom-paper-scope .abstract {\n        background: #f1f4f8; \n        padding: 15px 20px;\n        margin: 40px 0 60px 0;\n        border-radius: 6px;\n        border-left: 5px solid var(--color-accent);\n        position: relative;\n    }\n\n    #custom-paper-scope .abstract-title {\n        font-family: var(--font-ui);\n        font-weight: 700;\n        text-transform: uppercase;\n        letter-spacing: 1px;\n        font-size: 0.85rem;\n        color: var(--color-accent);\n        margin-bottom: 15px;\n        display: block;\n        text-align: left;\n    }\n    \n    #custom-paper-scope .abstract p {\n        font-size: 1.1rem !important;\n        margin-bottom: 0;\n        color: #445566;\n        text-align: left;\n    }\n\n    #custom-paper-scope .keywords {\n        margin-top: 15px;\n        font-family: var(--font-ui);\n        font-size: 0.9rem;\n        color: #555;\n        font-style: italic;\n    }\n\n    \/* --- SECTIONS --- *\/\n    #custom-paper-scope .section {\n        margin: 50px 0;\n    }\n\n    #custom-paper-scope .section-title {\n        font-family: var(--font-ui);\n        font-size: 1.4rem;\n        font-weight: 700;\n        margin: 40px 0 20px 0;\n        padding-bottom: 10px;\n        border-bottom: 2px solid #eee;\n        color: #111;\n        letter-spacing: -0.02em;\n        text-align: left;\n    }\n\n    #custom-paper-scope .subsection-title {\n        font-family: var(--font-ui);\n        font-size: 1.15rem;\n        font-weight: 600;\n        margin: 30px 0 15px 0;\n        color: var(--color-text-main);\n        text-align: left;\n    }\n\n    \/* --- LISTS --- *\/\n    #custom-paper-scope ul, \n    #custom-paper-scope ol {\n        margin: 20px 0 20px 20px;\n        padding-left: 20px;\n        color: #374151;\n        font-family: var(--font-ui);\n    }\n\n    #custom-paper-scope li {\n        margin-bottom: 10px;\n        font-size: 1.1rem !important;\n    }\n\n    \/* --- FIGURES --- *\/\n    #custom-paper-scope .figure {\n        margin: 50px 0;\n        text-align: center;\n        background: #fff;\n        padding: 10px;\n    }\n\n    #custom-paper-scope .figure img {\n        max-width: 100%;\n        height: auto;\n        border-radius: 4px;\n        box-shadow: 0 4px 12px rgba(0,0,0,0.1);\n        border: 1px solid #eee;\n    }\n\n    #custom-paper-scope .figure-caption {\n        font-family: var(--font-ui);\n        font-size: 0.85rem;\n        color: #7f8c8d;\n        margin-top: 15px;\n        display: block;\n        font-weight: 500;\n        text-align: center;\n    }\n\n    \/* --- TABLES --- *\/\n    #custom-paper-scope table {\n        width: 100%;\n        border-collapse: collapse;\n        margin: 30px 0;\n        font-size: 0.95rem;\n        font-family: var(--font-ui);\n        box-shadow: 0 1px 3px rgba(0,0,0,0.1);\n    }\n\n    #custom-paper-scope th, #custom-paper-scope td {\n        border: 1px solid var(--color-border);\n        padding: 12px;\n        text-align: left;\n        vertical-align: top;\n    }\n\n    #custom-paper-scope th {\n        background-color: #f8f9fa;\n        font-weight: 600;\n        color: var(--color-accent);\n    }\n    \n    #custom-paper-scope tr:nth-child(even) {\n        background-color: #fafafa;\n    }\n\n    \/* --- REFERENCES --- *\/\n    #custom-paper-scope .references {\n        background: #fafafa;\n        padding: 30px;\n        border-top: 4px solid #333;\n        margin-top: 80px;\n    }\n    \n    #custom-paper-scope .references .section-title {\n        border-bottom: none;\n        margin-top: 0;\n    }\n\n    #custom-paper-scope .references ol {\n        margin-left: 0;\n        padding-left: 20px;\n    }\n\n    #custom-paper-scope .references li {\n        margin-bottom: 15px;\n        font-size: 0.9rem !important;\n        font-family: var(--font-ui);\n        color: #555;\n    }\n\n    \/* --- RESPONSIVENESS --- *\/\n    @media (max-width: 768px) {\n        #custom-paper-scope {\n            padding: 0;\n            background: #fff;\n        }\n        \n        #custom-paper-scope .paper-container {\n            padding: 20px 0;\n            box-shadow: none;\n            width: 100%;\n        }\n\n        #custom-paper-scope .paper-title {\n            font-size: 2rem;\n        }\n\n        #custom-paper-scope .abstract {\n            padding: 20px;\n            margin: 20px 0;\n        }\n        \n        #custom-paper-scope .authors {\n            grid-template-columns: 1fr; \n        }\n        \n        #custom-paper-scope .breadcrumbs {\n            font-size: 0.75rem;\n        }\n        \n        #custom-paper-scope table {\n            display: block;\n            overflow-x: auto;\n        }\n    }\n<\/style>\n\n<div id=\"custom-paper-scope\">\n    <div class=\"paper-container\">\n        \n        <!-- BREADCRUMBS SECTION -->\n        <nav class=\"breadcrumbs\" aria-label=\"Breadcrumb\">\n            <a href=\"https:\/\/www.mygreatlearning.com\/\">Great Learning<\/a>\n            <span class=\"separator\">&gt;<\/span>\n            \n            <a href=\"https:\/\/www.mygreatlearning.com\/blog\/\">Blog<\/a>\n            <span class=\"separator\">&gt;<\/span>\n            \n            <a href=\"https:\/\/www.mygreatlearning.com\/blog\/research-and-studies\/\">Research and studies<\/a>\n            <span class=\"separator\">&gt;<\/span>\n            \n            <span class=\"current-crumb\">Activity & Emotion Detection of Recognized kidsin CCTV Video for Day Care Using SlowFast & CNN<\/span>\n        <\/nav>\n        \n        <h1 class=\"paper-title\" class=\"paper-title\" id=\"research-activity-emotion-detection-of-recognized-kidsin-cctv-video-for-day-care-using-slowfast-cnn\">Research: Activity & Emotion Detection of Recognized kidsin CCTV Video for Day Care Using SlowFast & CNN<\/h1>\n\n    <!-- authors grid -->\n     <div class=\"authors\">\n            <div class=\"author\">\n                <span class=\"author-name\">NARAYANA DARAPANENI<\/span>\n                <span class=\"author-role\">Director - AIML<\/span>\n                <span class=\"author-affiliation\">Great Learning\/Northwestern University<\/span>\n                <span class=\"author-affiliation\">Illinois, USA<\/span>\n            <\/div>\n\n            <div class=\"author\">\n                <span class=\"author-name\">Akash Srivastav<\/span>\n                <span class=\"author-role\">Student- AIML<\/span>\n                <span class=\"author-affiliation\">Great Learning<\/span>\n                <span class=\"author-affiliation\">Bangalore, India<\/span>\n            <\/div>\n\n            <div class=\"author\">\n                <span class=\"author-name\">Dhirendra Singh<\/span>\n                <span class=\"author-role\">Student - AIML<\/span>\n                <span class=\"author-affiliation\">Great Learning<\/span>\n                <span class=\"author-affiliation\">Bangalore, India<\/span>\n            <\/div>\n\n            <div class=\"author\">\n                <span class=\"author-name\">Neeraj Kilhore<\/span>\n                <span class=\"author-role\">Student - AIML<\/span>\n                <span class=\"author-affiliation\">Great Learning<\/span>\n                <span class=\"author-affiliation\">Bangalore, India<\/span>\n            <\/div>\n            \n            <div class=\"author\">\n                <span class=\"author-name\">Rashmi Tomer<\/span>\n                <span class=\"author-role\">Student - AIML<\/span>\n                <span class=\"author-affiliation\">Great Learning<\/span>\n                <span class=\"author-affiliation\">Bangalore, India<\/span>\n            <\/div>\n\n             <div class=\"author\">\n                <span class=\"author-name\">Sharat Chandra<\/span>\n                <span class=\"author-role\">Student - AIML<\/span>\n                <span class=\"author-affiliation\">Great Learning<\/span>\n                <span class=\"author-affiliation\">Bangalore, India<\/span>\n            <\/div>\n\n<div class=\"author\">\n                <span class=\"author-name\">Shivangi Chopra<\/span>\n                <span class=\"author-role\">Student - AIML<\/span>\n                <span class=\"author-affiliation\">Great Learning<\/span>\n                <span class=\"author-affiliation\">Bangalore, India<\/span>\n            <\/div>\n<div class=\"author\">\n                <span class=\"author-name\">Sudhershan Sureshrao Deshmukh<\/span>\n                <span class=\"author-role\">Mentor- AIML<\/span>\n                <span class=\"author-affiliation\">Great Learning<\/span>\n                <span class=\"author-affiliation\">Mumbai, India<\/span>\n            <\/div>\n\n            <div class=\"author\">\n<span class=\"author-name\"><a href=\"https:\/\/scholar.google.com\/citations?user=T1KFBS0AAAAJ&hl=en&oi=ao\" target=\"_blank\">Anwesh Reddy Paduri<\/a><\/span>\n                <span class=\"author-role\">Data Scientist - AIML<\/span>\n                <span class=\"author-affiliation\">Great Learning<\/span>\n                <span class=\"author-affiliation\">Mumbai, India<\/span>\n            <\/div>\n        <\/div>\n\n <!-- Abstract -->\n    <div class=\"abstract\">\n      <div class=\"abstract-title\">Abstract<\/div>\n      <p>\n        For working parents a real-life challenge faced\nis to keep track of their child activities in playschool and\ncreche. Despite having CCTV surveillance available to\nparents, monitoring 8-10 hours videos on a day-to-day basis is\nnot possible, hence CCTV videos, which carry millions of\ninformation, get unnoticed by parents and day-cares. The aim\nof project was to process CCTV videos to identify the child,\ndetect their respective expressions as well as\nplanned\/unplanned activities throughout the day. We have\nconsidered angry, disgust, scared, happy, sad, surprised,\nneutral expressions of kids to be monitored on daily basis. The\nvarious activities performed at daycare like playing, drawing,\nrhyming, dancing along with not so usual activities like\nslapping, falling, pushing would also be monitored on kids. It\ncan be further extended to create a 8-10 minutes\nglimpse\/summary in form of video\/timeline of children\u2019s entire\nday activities and inform their respective parents. This can\nimprovise the overall experience of the daycare for parents.\n      <\/p>\n    <\/div>\n\n<!-- Introduction -->\n<div class=\"section\">\n  <div class=\"section-title\">I. Introduction <\/div>\n\n  <p>\nFor working parents a real-life challenge faced is to keep track of their child activities in playschool and creche. Despite having CCTV surveillance available to parents, monitoring 8-10 hours videos on a day-to-day basis is not possible, hence CCTV videos, which carry millions of information, get unnoticed by parents and day-cares. The proposed solution gives a summary about the kid's activities at a day care. It will not only impact the overall engagement of Parents, Kids and Day-cares\/Schools but also will improve the quality and trust among them along with saving time. Knowing that a kid is active\/happy\/sad would also help daycares in taking care of them in a personalized way.\n  <\/p>\n<\/div>\n\n<figure style=\"margin: 24px 0; text-align: center;\">\n  <img decoding=\"async\" \n    src=\"http:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2026\/02\/1-1.png\"\n    alt=\"Figure image\"\n    style=\"max-width:100%; height:auto;\"\n  >\n  <figcaption style=\"margin-top:8px; font-size:14px; color:#555;\">\n  <\/figcaption>\n<\/figure>\n\n<!-- Field Review-->\n<div class=\"section\">\n  <div class=\"section-title\">II. Field Review<\/div>\n\n  <p>\nOur use-case consists of 3 different problems Recognition of the kids, Expression of kids and Activity of kids, so we could define a kid's profile for each day in day care. For recognition we experimented over several models available like Siamese [8], Yolo [11] and found them working for single person recognition in a frame but not with group.  <\/p>\n\n  <p>\nNext, we tried Haar cascade-based models and found this works well with frontal images and did not give promising accuracy with our experimental frames. We ended up with a network architecture which is based on ResNet-34 from the Deep Residual Learning for Image Recognition [27].\n <\/p>\n\n  <p>\nFor expression detection, we considered FER-2013 dataset and models with Xception CNN & Haar feature-based cascade classifiers. For activity detection [9], we considered a few known action datasets like UCF101, Kinetics 600[24] and AVA [2] and discovered their pros and cons. UCF101 do not contain kid's images and the action categories did not match to common activities in a day care. Kinetics 600[25] dataset had a great match with our use case, but the dataset had a single caption per frame[19].\n <\/p>\n\n  <p>\nWe, however, needed to caption each kid's activity. AVA fulfils this constraint for us. The AVA dataset densely annotates 80 atomic visual actions in 430 15-minute movie clips, where actions are localized [2] in space and time, resulting in multiple action labels per human. For activity captioning, we used SlowFast [5] model trained over AVA dataset. However, there is no single model or architecture as of today which could give the desired solution.\n <\/p>\n<\/div>\n<!-- Materials and Methods-->\n<div class=\"section\">\n  <div class=\"section-title\">III. Materials and Methods<\/div>\n\n  <p>\nWe explored various models that were among the best in industry to perform facial recognition along with expressions. We also looked for any existing model which could track individuals' activities from CCTV videos. We collected several daycare CCTV videos from source and after thoroughly analyzing them, we identified activities which were of concern at daycare. We asked for daily schedule of activities and started looking for any existing dataset that contains most of them. Kinetic and AVA datasets had most of the activities of our concern.\n <\/p>\n\n  <p>\nA. Training Data\n  <p>\n\n  <p>\nWe prepared our dataset by taking videos of the most common scenarios at daycare. We wrote a code to randomly select a few frames and considered them as input to all the models. A collection of 3 sample videos (30 seconds each) was taken into account, out of which total 20 random frames were selected at a gap of 2-3 seconds (with the help of random frame selector code) for our experiment. The 3 algorithms associated with face, expression and activity detection were made to run on below set of 20 input frames.\n  <\/p>\n<figure style=\"margin: 24px 0; text-align: center;\">\n  <img decoding=\"async\" \n    src=\"http:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2026\/02\/2-1.png\"\n    alt=\"Figure image\"\n    style=\"max-width:100%; height:auto;\"\n  >\n  <figcaption style=\"margin-top:8px; font-size:14px; color:#555;\">\n  <\/figcaption>\n<\/figure>\n  <p>\nB. Ground Truth Labelling\n  <p>\n\n  <p>\nTo measure the result, we had created ground truths for recognition, expression and activity as shown in Table I. There were 9 activities prominently visible in our daycare sample, out of 80 that are available in AVA dataset. For expressions, we have considered 7 expressions.\n <\/p>\n<figure style=\"margin: 24px 0; text-align: center;\">\n  <img decoding=\"async\" \n    src=\"http:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2026\/02\/Table-1.png\"\n    alt=\"Figure image\"\n    style=\"max-width:100%; height:auto;\"\n  >\n  <figcaption style=\"margin-top:8px; font-size:14px; color:#555;\">\n  <\/figcaption>\n<\/figure>\n  <p>\nC. Predictions\n  <p>\n\n  <p>\nWe divided our problem into 3 sub problems viz. facial recognition, expression detection and activity detection.\n <\/p>\n\n <p>\n1. Face Recognition: For Facial recognition, we created 128-d embeddings of each face from the training dataset and compared it with the facial embeddings in the testing image. \n <p>\n<figure style=\"margin: 24px 0; text-align: center;\">\n  <img decoding=\"async\" \n    src=\"http:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2026\/02\/3-1.png\"\n    alt=\"Figure image\"\n    style=\"max-width:100%; height:auto;\"\n  >\n  <figcaption style=\"margin-top:8px; font-size:14px; color:#555;\">\n  <\/figcaption>\n<\/figure>\n <p>\nWe then calculated the Euclidean distance between the known embeddings and the calculated embeddings and used a simple k-NN model along with votes to make final face classification.\n <\/p>\n<figure style=\"margin: 24px 0; text-align: center;\">\n  <img decoding=\"async\" \n    src=\"http:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2026\/02\/4-1.png\"\n    alt=\"Figure image\"\n    style=\"max-width:100%; height:auto;\"\n  >\n  <figcaption style=\"margin-top:8px; font-size:14px; color:#555;\">\n  <\/figcaption>\n<\/figure>\n<p>\nThe face recognition network architecture is based on ResNet-34 with fewer layers and the number of filters reduced by half from the Deep Residual Learning for Image Recognition paper by He et al. The face recognition module uses dlib and imutils library. We matched each face in the input image (encoding) to our known encoding's dataset (stored in the pickle file). If Euclidean distance is below some tolerance (the smaller the tolerance, the stricter our facial recognition system will be) then we return True, indicating the faces match. Otherwise, if the distance is above the tolerance threshold, we return False as the faces do not match.\n\n <p>\n2. Expression Recognition: Our final expression detection algorithm is a two-step process. In the first step, front faces are extracted from group photos using HAAR cascade classifier and in the second step we are using an Xception CNN model to detect one of the 7 natural expressions of the subject (angry, disgust, scared, happy, sad, surprised, neutral). \n <p>\n\n<figure style=\"margin: 24px 0; text-align: center;\">\n  <img decoding=\"async\" \n    src=\"http:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2026\/02\/5-1.png\"\n    alt=\"Figure image\"\n    style=\"max-width:100%; height:auto;\"\n  >\n  <figcaption style=\"margin-top:8px; font-size:14px; color:#555;\">\n  <\/figcaption>\n<\/figure>\n\n <p>\nThe Model was trained with FER 2013 data set. Average accuracy achieved was 73.44%. Images are categorized based on the emotion shown in the facial expressions (angry, disgust, scared, happy, sad, surprised, neutral).\n<p>\n<figure style=\"margin: 24px 0; text-align: center;\">\n  <img decoding=\"async\" \n    src=\"http:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2026\/02\/6-2.png\"\n    alt=\"Figure image\"\n    style=\"max-width:100%; height:auto;\"\n  >\n  <figcaption style=\"margin-top:8px; font-size:14px; color:#555;\">\n  <\/figcaption>\n<\/figure>\n\n<figure style=\"margin: 24px 0; text-align: center;\">\n  <img decoding=\"async\" \n    src=\"http:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2026\/02\/7-2.png\"\n    alt=\"Figure image\"\n    style=\"max-width:100%; height:auto;\"\n  >\n  <figcaption style=\"margin-top:8px; font-size:14px; color:#555;\">\n  <\/figcaption>\n<\/figure>\n\n<p>\n3. Activity Recognition: We found AVA dataset most suitable for our use-case because of the following reasons: Annotations were already provided for each individual per video clip. The AVA dataset densely annotates 80 atomic visual actions in 430 movie clips (15-minute each), where actions are localized in space and time, resulting in 1.62M action labels with multiple labels per human occurring frequently, which in our use-case was needed. AVA provides audiovisual annotations of video with improved understanding of human activity. Each of the video clips has been exhaustively annotated and together they represent a rich variety of scenes, recording conditions, and expressions of human activity. The AVA dataset is focused on spatiotemporal localization of human actions. The data is taken from 437 movies. Spatiotemporal labels are provided for one frame per second, with every person annotated with a bounding box and (possibly multiple) actions.\n <\/p>\n<figure style=\"margin: 24px 0; text-align: center;\">\n  <img decoding=\"async\" \n    src=\"http:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2026\/02\/9-2.png\"\n    alt=\"Figure image\"\n    style=\"max-width:100%; height:auto;\"\n  >\n  <figcaption style=\"margin-top:8px; font-size:14px; color:#555;\">\n  <\/figcaption>\n<\/figure>\n\n<p>\nWe explored the state-of-the-art action recognition algorithm SlowFast Networks. It uses ResNet-50 as the backbone of the network. We used the SlowFast pretrained model with 32 frames which are processed at sampling rate of 2 fps and 101 deep layered ResNet model. SlowFast networks for video recognition involves a Slow pathway, operating at low frame rate, to capture spatial semantics and a Fast pathway, operating at high frame rate, to capture motion at fine temporal resolution. The Fast pathway can be made very lightweight by reducing its channel capacity, yet can learn useful temporal information for video recognition.\n<p>\n\n<figure style=\"margin: 24px 0; text-align: center;\">\n  <img decoding=\"async\" \n    src=\"http:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2026\/02\/8-2.png\"\n    alt=\"Figure image\"\n    style=\"max-width:100%; height:auto;\"\n  >\n  <figcaption style=\"margin-top:8px; font-size:14px; color:#555;\">\n  <\/figcaption>\n<\/figure>\n\n<\/div>\n\n<!-- Results and Discussions-->\n<div class=\"section\">\n  <div class=\"section-title\">IV. Results and Discussions<\/div>\n\n<p>\nThe result table below in Table II is the output for the ensemble model wherein predictions for each of the 3 categories are documented of each frame. Each row in the below table represents the number of children detected, ground truth as well as prediction on a frame for face recognition, expression detection and activity recognition. For e.g., if we take V1_Frame350 from the result table in Table II, then following are the metrics using Table I ground truth references:<p>\n<figure style=\"margin: 24px 0; text-align: center;\">\n  <img decoding=\"async\" \n    src=\"http:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2026\/02\/Table-2.png\"\n    alt=\"Figure image\"\n    style=\"max-width:100%; height:auto;\"\n  >\n  <figcaption style=\"margin-top:8px; font-size:14px; color:#555;\">\n  <\/figcaption>\n<\/figure>\n<p>\nA. Face Recognition: Person Detected: 3, Ground Truth: [0,3,5], Prediction: [1,3,6], Accuracy: 33% (as out of 3 children only one child was recognized correctly).\n<p>\n\n<p>\nB. Expression Detection: Person Detected: 2, Ground Truth: [4,6], Prediction: [4,3], Accuracy: 50% (as out of 2 children only one child's expression was recognized correctly).\n<p>\n\n<p>\nC. Activity Recognition: Person Detected: 3, Ground Truth: [6,2,8], Prediction: [6,-,8], Accuracy: 66.7% (as out of 3 children, 2 child's activity was recognized correctly).\n<p>\n\n  <p>\nBased upon our use-case, different weightages have been assigned with each task as per their importance. In TABLE III, [a, b, c] signifies the importance of facial recognition, expression detection and activity detection of kids respectively. The below table documents the weights for each task and based upon that calculates the overall mean accuracy for the ensemble model which comes out to be 75.36%.\n <p>\n<figure style=\"margin: 24px 0; text-align: center;\">\n  <img decoding=\"async\" \n    src=\"http:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2026\/02\/Table-3.png\"\n    alt=\"Figure image\"\n    style=\"max-width:100%; height:auto;\"\n  >\n  <figcaption style=\"margin-top:8px; font-size:14px; color:#555;\">\n  <\/figcaption>\n<\/figure>\n  <p>\nThe further plan is to synchronize these models in a way that a single run gives one output having all 3 detections in one frame. We want to move this existing model running in batch to real time. We could use the output of this model as an input to a sequence-based NLP model and generate relevant summarization text for each kid by combining the results of recognition, expression and activity. For e.g. [Madhav, Happy, Sit] the output could be Madhav is happily sitting in class.\n <\/p>\n\n  <p>\nWe achieved an average accuracy of 92.36% as shown in Table IV. In our proposed approach, using existing models we got 76.5% accuracy which can be further improved to approx 90% with good quality frontal images data.\n <p>\n<figure style=\"margin: 24px 0; text-align: center;\">\n  <img decoding=\"async\" \n    src=\"http:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2026\/02\/Table-4.png\"\n    alt=\"Figure image\"\n    style=\"max-width:100%; height:auto;\"\n  >\n  <figcaption style=\"margin-top:8px; font-size:14px; color:#555;\">\n  <\/figcaption>\n<\/figure>\n<\/div>\n\n<!-- Future Scope-->\n<div class=\"section\">\n  <div class=\"section-title\">V. Future Scope<\/div>\n\n<p>\nThe further plan is to synchronize these models in a way that a single run gives one output having all 3 detections in one frame.\n<p>\n\n<p>\n1. We could plan to move this existing model running in batch to real time.\n<p>\n\n<p>\n2. We could use the output of this model as an input to a sequence-based NLP model and generate relevant summarization text for each kid by combining the results of recognition, expression and activity. For e.g. [Madhav, Happy, Sit] the output could be Madhav is happily sitting in class.\n<p>\n\n<p>\n3. Solution can send real-time alerts to Day-care management & Parents in case of any inappropriate things are taking place or any kid is absent, or teachers are engaged with phones, or they are not attentive at any time etc.\n<p>\n\n<\/div>\n\n<!-- Conclusion-->\n<div class=\"section\">\n  <div class=\"section-title\">VI. Conclusion<\/div>\n\n<p>\nAs per our research there is no existing model which targets all 3 tasks together i.e. face recognition, expression and activity detection for kids in day-care. Hence this can be considered as the benchmark accuracy for such an ensemble model which caters this unique use-case\n<p>\n\n<\/div>\n\n<!-- ===== REFERENCES GREY BOX STYLE ===== -->\n\n<div style=\"\n    background:#f2f2f2;\n    padding:40px 50px;\n    margin-top:80px;\n    border:1px solid #e0e0e0;\n\">\n\n    <h2 style=\"\n        font-family:'Inter', sans-serif;\n        font-weight:700;\n        letter-spacing:1px;\n        font-size:22px;\n        margin-bottom:30px;\n        color:#222;\n    \">\n        REFERENCES\n    <\/h2>\n\n    <ol style=\"\n        margin:0;\n        padding-left:25px;\n        font-size:15px;\n        line-height:1.8;\n        color:#333;\n    \">\n\n        <li style=\"margin-bottom:18px;\">\n            Christoph Feichtenhofer, Haoqi Fan, Jitendra Malik and Kaiming He,\n            <em>\"SlowFast Networks for Video Recognition,\"<\/em>\n            Facebook AI Research (FAIR), arXiv:1812.03982v3 [cs.CV], 29 Oct 2019.\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            C. Gu et al.,\n            <em>\"AVA: A video dataset of spatiotemporally localized atomic visual actions,\"<\/em>\n            Proc. CVPR, 2018.\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            Navneet Dalal and Bill Triggs,\n            <em>\"Histograms of Oriented Gradients for Human Detection,\"<\/em>\n            INRIA Rhone-Alpes, France.\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            Vahid Kazemi and Josephine Sullivan,\n            <em>\"One Millisecond Face Alignment with an Ensemble of Regression Trees,\"<\/em>\n            KTH Royal Institute of Technology, Sweden.\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            C. Feichtenhofer et al.,\n            <em>\"SlowFast networks for video recognition in ActivityNet challenge 2019.\"<\/em>\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            Ashwin Geet D'Sa and Dr. B.G. Prasad,\n            <em>\"An IoT Based Framework For Activity Recognition Using Deep Learning Technique,\"<\/em>\n            Jun. 2019.\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            Octavio Arriaga et al.,\n            <em>\"Real-time Convolutional Neural Networks for Emotion and Gender Classification,\"<\/em>\n            Oct. 2017.\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            Yujia Zhang et al.,\n            <em>\"Motion-patch-based Siamese CNNs for human activity recognition in videos,\"<\/em>\n            Jan. 2020.\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            Guilhem Cheron, Ivan Laptev, Cordelia Schmid,\n            <em>\"P-CNN: Pose-based CNN Features for Action Recognition,\"<\/em>\n            2015.\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            P. Khorrami et al.,\n            <em>\"How deep neural networks can improve emotion recognition on video data,\"<\/em>\n            IEEE ICIP, 2016.\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            W. Yang and Z. Jiachun,\n            <em>\"Real-time face detection based on YOLO,\"<\/em>\n            2018.\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            M. S. Howlader et al.,\n            <em>\"Detection of Human Actions in Library Using YOLO V3,\"<\/em>\n            Dec. 2018.\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            Srinath Shiv Kumar et al.,\n            <em>\"Deep Learning-Based Automated Detection of Sewer Defects in CCTV Videos.\"<\/em>\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            J. Redmon and A. Farhadi,\n            <em>\"YOLO9000: Better, Faster, Stronger,\"<\/em>\n            IEEE CVPR, 2017.\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            D. K. Appana et al.,\n            <em>\"Video-based smoke detection using spatial-temporal analyses,\"<\/em>\n            Information Sciences, 2017.\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            M. Hendri,\n            <em>\"Design of smoke and fire detection system using image processing,\"<\/em>\n            Thesis, 2018.\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            R. Girshick et al.,\n            <em>\"Rich feature hierarchies for accurate object detection,\"<\/em>\n            CVPR, 2014.\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            R. Huang et al.,\n            <em>\"YOLO-LITE: A real-time object detection algorithm optimized for non-GPU computers,\"<\/em>\n            IEEE Big Data, 2018.\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            S. Shinde et al.,\n            <em>\"YOLO based human action recognition and localization,\"<\/em>\n            Procedia Computer Science, 2018.\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            G. Li et al.,\n            <em>\"A new method of image detection for small datasets under YOLO framework.\"<\/em>\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            Paul Ekman et al.,\n            <em>\"Universals and cultural differences in judgments of facial expressions,\"<\/em>\n            Journal of Personality and Social Psychology, 1987.\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            Paul Ekman,\n            <em>\"Strong evidence for universals in facial expressions,\"<\/em>\n            1994.\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            ActivityNet-Challenge, 2019 evaluation documentation.\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            J. Carreira et al.,\n            <em>\"A short note about Kinetics-600,\"<\/em>\n            arXiv:1808.01340, 2018.\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            J. Carreira et al.,\n            <em>\"A short note on the Kinetics-700 human action dataset,\"<\/em>\n            arXiv:1907.06987, 2019.\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            C. Gu et al.,\n            <em>\"AVA dataset of atomic visual actions,\"<\/em>\n            CVPR, 2018.\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            K. He et al.,\n            <em>\"Deep residual learning for image recognition,\"<\/em>\n            CVPR, 2016.\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            A. Diba et al.,\n            <em>\"Spatio-temporal channel correlation networks for action classification,\"<\/em>\n            ECCV, 2018.\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            Yuxin Wu et al.,\n            <em>\"Detectron2,\"<\/em>\n            2019.\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            Kensho Hara et al.,\n            <em>\"Can Spatiotemporal 3D CNNs Retrace the History of 2D CNNs,\"<\/em>\n            2017.\n        <\/li>\n\n        <li style=\"margin-bottom:18px;\">\n            Kaiming He et al.,\n            <em>\"Deep Residual Learning for Image Recognition,\"<\/em>\n            2015.\n        <\/li>\n\n    <\/ol>\n\n<\/div>\n\n\n\n<p><\/p>\n\n\n\n<a href=\"https:\/\/www.mygreatlearning.com\/blog\/research-and-studies\/\" \n   style=\"text-decoration: none; color: #007BFF; font-size: 16px; font-weight: bold; display: block; text-align: center; padding: 10px; margin: 0px auto 40px auto; width: fit-content;\">\n   Explore More Research and Studies\n<\/a>\n","protected":false},"excerpt":{"rendered":"<p>AI-powered system for recognizing kids in CCTV footage and detecting their activities and emotions using SlowFast and CNN models for smarter daycare monitoring.<\/p>\n","protected":false},"author":41,"featured_media":115733,"parent":0,"menu_order":0,"comment_status":"closed","ping_status":"closed","template":"","meta":{"_acf_changed":false,"_uag_custom_page_level_css":"","site-sidebar-layout":"default","site-content-layout":"","ast-site-content-layout":"default","site-content-style":"default","site-sidebar-style":"default","ast-global-header-display":"","ast-banner-title-visibility":"","ast-main-header-display":"","ast-hfb-above-header-display":"","ast-hfb-below-header-display":"","ast-hfb-mobile-header-display":"","site-post-title":"disabled","ast-breadcrumbs-content":"","ast-featured-img":"","footer-sml-layout":"","ast-disable-related-posts":"","theme-transparent-header-meta":"","adv-header-id-meta":"","stick-header-meta":"","header-above-stick-meta":"","header-main-stick-meta":"","header-below-stick-meta":"","astra-migrate-meta-layouts":"set","ast-page-background-enabled":"default","ast-page-background-meta":{"desktop":{"background-color":"var(--ast-global-color-4)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"ast-content-background-meta":{"desktop":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"tablet":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""},"mobile":{"background-color":"var(--ast-global-color-5)","background-image":"","background-repeat":"repeat","background-position":"center center","background-size":"auto","background-attachment":"scroll","background-type":"","background-media":"","overlay-type":"","overlay-color":"","overlay-opacity":"","overlay-gradient":""}},"footnotes":""},"categories":[36818],"tags":[],"class_list":["post-115706","page","type-page","status-publish","has-post-thumbnail","hentry","category-research-and-studies"],"acf":[],"yoast_head":"<!-- This site is optimized with the Yoast SEO Premium plugin v27.3 (Yoast SEO v27.3) - https:\/\/yoast.com\/product\/yoast-seo-premium-wordpress\/ -->\n<title>Activity &amp; Emotion Detection of Recognized kidsin CCTV Video for Day Care Using SlowFast &amp; CNN - Great Learning Blog: Free Resources what Matters to shape your Career!<\/title>\n<meta name=\"description\" content=\"AI-powered system for recognizing kids in CCTV footage and detecting their activities and emotions using SlowFast and CNN models for smarter daycare monitoring.\" \/>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.mygreatlearning.com\/blog\/kid-activity-emotion-detection-cctv\/\" \/>\n<meta property=\"og:locale\" content=\"en_US\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"Activity &amp; Emotion Detection of Recognized kidsin CCTV Video for Day Care Using SlowFast &amp; CNN\" \/>\n<meta property=\"og:description\" content=\"AI-powered system for recognizing kids in CCTV footage and detecting their activities and emotions using SlowFast and CNN models for smarter daycare monitoring.\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.mygreatlearning.com\/blog\/kid-activity-emotion-detection-cctv\/\" \/>\n<meta property=\"og:site_name\" content=\"Great Learning Blog: Free Resources what Matters to shape your Career!\" \/>\n<meta property=\"article:publisher\" content=\"https:\/\/www.facebook.com\/GreatLearningOfficial\/\" \/>\n<meta property=\"og:image\" content=\"http:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2026\/02\/mmmmmmmmmmmmmmmmmmmmmmmm-1-Picsart-AiImageEnhancer-1024x573.png\" \/>\n\t<meta property=\"og:image:width\" content=\"1024\" \/>\n\t<meta property=\"og:image:height\" content=\"573\" \/>\n\t<meta property=\"og:image:type\" content=\"image\/png\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:site\" content=\"@Great_Learning\" \/>\n<meta name=\"twitter:label1\" content=\"Est. reading time\" \/>\n\t<meta name=\"twitter:data1\" content=\"14 minutes\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\\\/\\\/schema.org\",\"@graph\":[{\"@type\":\"Article\",\"@id\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/kid-activity-emotion-detection-cctv\\\/#article\",\"isPartOf\":{\"@id\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/kid-activity-emotion-detection-cctv\\\/\"},\"author\":{\"name\":\"Great Learning Editorial Team\",\"@id\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/#\\\/schema\\\/person\\\/6f993d1be4c584a335951e836f2656ad\"},\"headline\":\"Activity &amp; Emotion Detection of Recognized kidsin CCTV Video for Day Care Using SlowFast &amp; CNN\",\"datePublished\":\"2026-02-12T05:32:55+00:00\",\"mainEntityOfPage\":{\"@id\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/kid-activity-emotion-detection-cctv\\\/\"},\"wordCount\":2326,\"publisher\":{\"@id\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/#organization\"},\"image\":{\"@id\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/kid-activity-emotion-detection-cctv\\\/#primaryimage\"},\"thumbnailUrl\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/wp-content\\\/uploads\\\/2026\\\/02\\\/mmmmmmmmmmmmmmmmmmmmmmmm-1-Picsart-AiImageEnhancer.png\",\"articleSection\":[\"Research and Studies\"],\"inLanguage\":\"en-US\"},{\"@type\":\"WebPage\",\"@id\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/kid-activity-emotion-detection-cctv\\\/\",\"url\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/kid-activity-emotion-detection-cctv\\\/\",\"name\":\"Activity &amp; Emotion Detection of Recognized kidsin CCTV Video for Day Care Using SlowFast &amp; CNN - Great Learning Blog: Free Resources what Matters to shape your Career!\",\"isPartOf\":{\"@id\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/#website\"},\"primaryImageOfPage\":{\"@id\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/kid-activity-emotion-detection-cctv\\\/#primaryimage\"},\"image\":{\"@id\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/kid-activity-emotion-detection-cctv\\\/#primaryimage\"},\"thumbnailUrl\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/wp-content\\\/uploads\\\/2026\\\/02\\\/mmmmmmmmmmmmmmmmmmmmmmmm-1-Picsart-AiImageEnhancer.png\",\"datePublished\":\"2026-02-12T05:32:55+00:00\",\"description\":\"AI-powered system for recognizing kids in CCTV footage and detecting their activities and emotions using SlowFast and CNN models for smarter daycare monitoring.\",\"breadcrumb\":{\"@id\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/kid-activity-emotion-detection-cctv\\\/#breadcrumb\"},\"inLanguage\":\"en-US\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/kid-activity-emotion-detection-cctv\\\/\"]}]},{\"@type\":\"ImageObject\",\"inLanguage\":\"en-US\",\"@id\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/kid-activity-emotion-detection-cctv\\\/#primaryimage\",\"url\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/wp-content\\\/uploads\\\/2026\\\/02\\\/mmmmmmmmmmmmmmmmmmmmmmmm-1-Picsart-AiImageEnhancer.png\",\"contentUrl\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/wp-content\\\/uploads\\\/2026\\\/02\\\/mmmmmmmmmmmmmmmmmmmmmmmm-1-Picsart-AiImageEnhancer.png\",\"width\":2004,\"height\":1122,\"caption\":\"Activity & Emotion Detection of Recognized kidsin CCTV Video for Day Care Using SlowFast & CNN\"},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/kid-activity-emotion-detection-cctv\\\/#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"Blog\",\"item\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"Activity &amp; Emotion Detection of Recognized kidsin CCTV Video for Day Care Using SlowFast &amp; CNN\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/#website\",\"url\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/\",\"name\":\"Great Learning Blog\",\"description\":\"Learn, Upskill &amp; Career Development Guide and Resources\",\"publisher\":{\"@id\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/#organization\"},\"alternateName\":\"Great Learning\",\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/?s={search_term_string}\"},\"query-input\":{\"@type\":\"PropertyValueSpecification\",\"valueRequired\":true,\"valueName\":\"search_term_string\"}}],\"inLanguage\":\"en-US\"},{\"@type\":\"Organization\",\"@id\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/#organization\",\"name\":\"Great Learning\",\"url\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/\",\"logo\":{\"@type\":\"ImageObject\",\"inLanguage\":\"en-US\",\"@id\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/#\\\/schema\\\/logo\\\/image\\\/\",\"url\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/wp-content\\\/uploads\\\/2022\\\/06\\\/GL-Logo.jpg\",\"contentUrl\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/wp-content\\\/uploads\\\/2022\\\/06\\\/GL-Logo.jpg\",\"width\":900,\"height\":900,\"caption\":\"Great Learning\"},\"image\":{\"@id\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/#\\\/schema\\\/logo\\\/image\\\/\"},\"sameAs\":[\"https:\\\/\\\/www.facebook.com\\\/GreatLearningOfficial\\\/\",\"https:\\\/\\\/x.com\\\/Great_Learning\",\"https:\\\/\\\/www.instagram.com\\\/greatlearningofficial\\\/\",\"https:\\\/\\\/www.linkedin.com\\\/school\\\/great-learning\\\/\",\"https:\\\/\\\/in.pinterest.com\\\/greatlearning12\\\/\",\"https:\\\/\\\/www.youtube.com\\\/user\\\/beaconelearning\\\/\"],\"description\":\"Great Learning is a leading global ed-tech company for professional training and higher education. It offers comprehensive, industry-relevant, hands-on learning programs across various business, technology, and interdisciplinary domains driving the digital economy. These programs are developed and offered in collaboration with the world's foremost academic institutions.\",\"email\":\"info@mygreatlearning.com\",\"legalName\":\"Great Learning Education Services Pvt. Ltd\",\"foundingDate\":\"2013-11-29\",\"numberOfEmployees\":{\"@type\":\"QuantitativeValue\",\"minValue\":\"1001\",\"maxValue\":\"5000\"}},{\"@type\":\"Person\",\"@id\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/#\\\/schema\\\/person\\\/6f993d1be4c584a335951e836f2656ad\",\"name\":\"Great Learning Editorial Team\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"en-US\",\"@id\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/wp-content\\\/uploads\\\/2022\\\/02\\\/unnamed.webp\",\"url\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/wp-content\\\/uploads\\\/2022\\\/02\\\/unnamed.webp\",\"contentUrl\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/wp-content\\\/uploads\\\/2022\\\/02\\\/unnamed.webp\",\"caption\":\"Great Learning Editorial Team\"},\"description\":\"The Great Learning Editorial Staff includes a dynamic team of subject matter experts, instructors, and education professionals who combine their deep industry knowledge with innovative teaching methods. Their mission is to provide learners with the skills and insights needed to excel in their careers, whether through upskilling, reskilling, or transitioning into new fields.\",\"sameAs\":[\"https:\\\/\\\/www.mygreatlearning.com\\\/\",\"https:\\\/\\\/in.linkedin.com\\\/school\\\/great-learning\\\/\",\"https:\\\/\\\/x.com\\\/https:\\\/\\\/twitter.com\\\/Great_Learning\",\"https:\\\/\\\/www.youtube.com\\\/channel\\\/UCObs0kLIrDjX2LLSybqNaEA\"],\"award\":[\"Best EdTech Company of the Year 2024\",\"Education Economictimes Outstanding Education\\\/Edtech Solution Provider of the Year 2024\",\"Leading E-learning Platform 2024\"],\"url\":\"https:\\\/\\\/www.mygreatlearning.com\\\/blog\\\/author\\\/greatlearning\\\/\"}]}<\/script>\n<!-- \/ Yoast SEO Premium plugin. -->","yoast_head_json":{"title":"Activity &amp; Emotion Detection of Recognized kidsin CCTV Video for Day Care Using SlowFast &amp; CNN - Great Learning Blog: Free Resources what Matters to shape your Career!","description":"AI-powered system for recognizing kids in CCTV footage and detecting their activities and emotions using SlowFast and CNN models for smarter daycare monitoring.","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.mygreatlearning.com\/blog\/kid-activity-emotion-detection-cctv\/","og_locale":"en_US","og_type":"article","og_title":"Activity &amp; Emotion Detection of Recognized kidsin CCTV Video for Day Care Using SlowFast &amp; CNN","og_description":"AI-powered system for recognizing kids in CCTV footage and detecting their activities and emotions using SlowFast and CNN models for smarter daycare monitoring.","og_url":"https:\/\/www.mygreatlearning.com\/blog\/kid-activity-emotion-detection-cctv\/","og_site_name":"Great Learning Blog: Free Resources what Matters to shape your Career!","article_publisher":"https:\/\/www.facebook.com\/GreatLearningOfficial\/","og_image":[{"width":1024,"height":573,"url":"http:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2026\/02\/mmmmmmmmmmmmmmmmmmmmmmmm-1-Picsart-AiImageEnhancer-1024x573.png","type":"image\/png"}],"twitter_card":"summary_large_image","twitter_site":"@Great_Learning","twitter_misc":{"Est. reading time":"14 minutes"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"Article","@id":"https:\/\/www.mygreatlearning.com\/blog\/kid-activity-emotion-detection-cctv\/#article","isPartOf":{"@id":"https:\/\/www.mygreatlearning.com\/blog\/kid-activity-emotion-detection-cctv\/"},"author":{"name":"Great Learning Editorial Team","@id":"https:\/\/www.mygreatlearning.com\/blog\/#\/schema\/person\/6f993d1be4c584a335951e836f2656ad"},"headline":"Activity &amp; Emotion Detection of Recognized kidsin CCTV Video for Day Care Using SlowFast &amp; CNN","datePublished":"2026-02-12T05:32:55+00:00","mainEntityOfPage":{"@id":"https:\/\/www.mygreatlearning.com\/blog\/kid-activity-emotion-detection-cctv\/"},"wordCount":2326,"publisher":{"@id":"https:\/\/www.mygreatlearning.com\/blog\/#organization"},"image":{"@id":"https:\/\/www.mygreatlearning.com\/blog\/kid-activity-emotion-detection-cctv\/#primaryimage"},"thumbnailUrl":"https:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2026\/02\/mmmmmmmmmmmmmmmmmmmmmmmm-1-Picsart-AiImageEnhancer.png","articleSection":["Research and Studies"],"inLanguage":"en-US"},{"@type":"WebPage","@id":"https:\/\/www.mygreatlearning.com\/blog\/kid-activity-emotion-detection-cctv\/","url":"https:\/\/www.mygreatlearning.com\/blog\/kid-activity-emotion-detection-cctv\/","name":"Activity &amp; Emotion Detection of Recognized kidsin CCTV Video for Day Care Using SlowFast &amp; CNN - Great Learning Blog: Free Resources what Matters to shape your Career!","isPartOf":{"@id":"https:\/\/www.mygreatlearning.com\/blog\/#website"},"primaryImageOfPage":{"@id":"https:\/\/www.mygreatlearning.com\/blog\/kid-activity-emotion-detection-cctv\/#primaryimage"},"image":{"@id":"https:\/\/www.mygreatlearning.com\/blog\/kid-activity-emotion-detection-cctv\/#primaryimage"},"thumbnailUrl":"https:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2026\/02\/mmmmmmmmmmmmmmmmmmmmmmmm-1-Picsart-AiImageEnhancer.png","datePublished":"2026-02-12T05:32:55+00:00","description":"AI-powered system for recognizing kids in CCTV footage and detecting their activities and emotions using SlowFast and CNN models for smarter daycare monitoring.","breadcrumb":{"@id":"https:\/\/www.mygreatlearning.com\/blog\/kid-activity-emotion-detection-cctv\/#breadcrumb"},"inLanguage":"en-US","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.mygreatlearning.com\/blog\/kid-activity-emotion-detection-cctv\/"]}]},{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/www.mygreatlearning.com\/blog\/kid-activity-emotion-detection-cctv\/#primaryimage","url":"https:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2026\/02\/mmmmmmmmmmmmmmmmmmmmmmmm-1-Picsart-AiImageEnhancer.png","contentUrl":"https:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2026\/02\/mmmmmmmmmmmmmmmmmmmmmmmm-1-Picsart-AiImageEnhancer.png","width":2004,"height":1122,"caption":"Activity & Emotion Detection of Recognized kidsin CCTV Video for Day Care Using SlowFast & CNN"},{"@type":"BreadcrumbList","@id":"https:\/\/www.mygreatlearning.com\/blog\/kid-activity-emotion-detection-cctv\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"Blog","item":"https:\/\/www.mygreatlearning.com\/blog\/"},{"@type":"ListItem","position":2,"name":"Activity &amp; Emotion Detection of Recognized kidsin CCTV Video for Day Care Using SlowFast &amp; CNN"}]},{"@type":"WebSite","@id":"https:\/\/www.mygreatlearning.com\/blog\/#website","url":"https:\/\/www.mygreatlearning.com\/blog\/","name":"Great Learning Blog","description":"Learn, Upskill &amp; Career Development Guide and Resources","publisher":{"@id":"https:\/\/www.mygreatlearning.com\/blog\/#organization"},"alternateName":"Great Learning","potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/www.mygreatlearning.com\/blog\/?s={search_term_string}"},"query-input":{"@type":"PropertyValueSpecification","valueRequired":true,"valueName":"search_term_string"}}],"inLanguage":"en-US"},{"@type":"Organization","@id":"https:\/\/www.mygreatlearning.com\/blog\/#organization","name":"Great Learning","url":"https:\/\/www.mygreatlearning.com\/blog\/","logo":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/www.mygreatlearning.com\/blog\/#\/schema\/logo\/image\/","url":"https:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2022\/06\/GL-Logo.jpg","contentUrl":"https:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2022\/06\/GL-Logo.jpg","width":900,"height":900,"caption":"Great Learning"},"image":{"@id":"https:\/\/www.mygreatlearning.com\/blog\/#\/schema\/logo\/image\/"},"sameAs":["https:\/\/www.facebook.com\/GreatLearningOfficial\/","https:\/\/x.com\/Great_Learning","https:\/\/www.instagram.com\/greatlearningofficial\/","https:\/\/www.linkedin.com\/school\/great-learning\/","https:\/\/in.pinterest.com\/greatlearning12\/","https:\/\/www.youtube.com\/user\/beaconelearning\/"],"description":"Great Learning is a leading global ed-tech company for professional training and higher education. It offers comprehensive, industry-relevant, hands-on learning programs across various business, technology, and interdisciplinary domains driving the digital economy. These programs are developed and offered in collaboration with the world's foremost academic institutions.","email":"info@mygreatlearning.com","legalName":"Great Learning Education Services Pvt. Ltd","foundingDate":"2013-11-29","numberOfEmployees":{"@type":"QuantitativeValue","minValue":"1001","maxValue":"5000"}},{"@type":"Person","@id":"https:\/\/www.mygreatlearning.com\/blog\/#\/schema\/person\/6f993d1be4c584a335951e836f2656ad","name":"Great Learning Editorial Team","image":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2022\/02\/unnamed.webp","url":"https:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2022\/02\/unnamed.webp","contentUrl":"https:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2022\/02\/unnamed.webp","caption":"Great Learning Editorial Team"},"description":"The Great Learning Editorial Staff includes a dynamic team of subject matter experts, instructors, and education professionals who combine their deep industry knowledge with innovative teaching methods. Their mission is to provide learners with the skills and insights needed to excel in their careers, whether through upskilling, reskilling, or transitioning into new fields.","sameAs":["https:\/\/www.mygreatlearning.com\/","https:\/\/in.linkedin.com\/school\/great-learning\/","https:\/\/x.com\/https:\/\/twitter.com\/Great_Learning","https:\/\/www.youtube.com\/channel\/UCObs0kLIrDjX2LLSybqNaEA"],"award":["Best EdTech Company of the Year 2024","Education Economictimes Outstanding Education\/Edtech Solution Provider of the Year 2024","Leading E-learning Platform 2024"],"url":"https:\/\/www.mygreatlearning.com\/blog\/author\/greatlearning\/"}]}},"uagb_featured_image_src":{"full":["https:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2026\/02\/mmmmmmmmmmmmmmmmmmmmmmmm-1-Picsart-AiImageEnhancer.png",2004,1122,false],"thumbnail":["https:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2026\/02\/mmmmmmmmmmmmmmmmmmmmmmmm-1-Picsart-AiImageEnhancer-150x150.png",150,150,true],"medium":["https:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2026\/02\/mmmmmmmmmmmmmmmmmmmmmmmm-1-Picsart-AiImageEnhancer-300x168.png",300,168,true],"medium_large":["https:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2026\/02\/mmmmmmmmmmmmmmmmmmmmmmmm-1-Picsart-AiImageEnhancer-768x430.png",768,430,true],"large":["https:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2026\/02\/mmmmmmmmmmmmmmmmmmmmmmmm-1-Picsart-AiImageEnhancer-1024x573.png",1024,573,true],"1536x1536":["https:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2026\/02\/mmmmmmmmmmmmmmmmmmmmmmmm-1-Picsart-AiImageEnhancer-1536x860.png",1536,860,true],"2048x2048":["https:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2026\/02\/mmmmmmmmmmmmmmmmmmmmmmmm-1-Picsart-AiImageEnhancer.png",2004,1122,false],"web-stories-poster-portrait":["https:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2026\/02\/mmmmmmmmmmmmmmmmmmmmmmmm-1-Picsart-AiImageEnhancer-640x853.png",640,853,true],"web-stories-publisher-logo":["https:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2026\/02\/mmmmmmmmmmmmmmmmmmmmmmmm-1-Picsart-AiImageEnhancer-96x96.png",96,96,true],"web-stories-thumbnail":["https:\/\/www.mygreatlearning.com\/blog\/wp-content\/uploads\/2026\/02\/mmmmmmmmmmmmmmmmmmmmmmmm-1-Picsart-AiImageEnhancer-150x84.png",150,84,true]},"uagb_author_info":{"display_name":"Great Learning Editorial Team","author_link":"https:\/\/www.mygreatlearning.com\/blog\/author\/greatlearning\/"},"uagb_comment_info":0,"uagb_excerpt":"AI-powered system for recognizing kids in CCTV footage and detecting their activities and emotions using SlowFast and CNN models for smarter daycare monitoring.","_links":{"self":[{"href":"https:\/\/www.mygreatlearning.com\/blog\/wp-json\/wp\/v2\/pages\/115706","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.mygreatlearning.com\/blog\/wp-json\/wp\/v2\/pages"}],"about":[{"href":"https:\/\/www.mygreatlearning.com\/blog\/wp-json\/wp\/v2\/types\/page"}],"author":[{"embeddable":true,"href":"https:\/\/www.mygreatlearning.com\/blog\/wp-json\/wp\/v2\/users\/41"}],"replies":[{"embeddable":true,"href":"https:\/\/www.mygreatlearning.com\/blog\/wp-json\/wp\/v2\/comments?post=115706"}],"version-history":[{"count":33,"href":"https:\/\/www.mygreatlearning.com\/blog\/wp-json\/wp\/v2\/pages\/115706\/revisions"}],"predecessor-version":[{"id":115734,"href":"https:\/\/www.mygreatlearning.com\/blog\/wp-json\/wp\/v2\/pages\/115706\/revisions\/115734"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/www.mygreatlearning.com\/blog\/wp-json\/wp\/v2\/media\/115733"}],"wp:attachment":[{"href":"https:\/\/www.mygreatlearning.com\/blog\/wp-json\/wp\/v2\/media?parent=115706"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.mygreatlearning.com\/blog\/wp-json\/wp\/v2\/categories?post=115706"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.mygreatlearning.com\/blog\/wp-json\/wp\/v2\/tags?post=115706"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}