3617. Find Students with Study Spiral Pattern

Description

Table: students

+--++
\| student_id   \| int     \|
\| student_name \| varchar \|
\| major        \| varchar \|
+--+++++
\| session_id \| student_id \| subject    \| session_date \| hours_studied \|
++++--++

Output:

++--++--+-+
\| student_id \| student_name \| major            \| cycle_length \| total_study_hours \|
++--++--+-+
\| 2          \| Bob Johnson  \| Mathematics      \| 4            \| 26.0              \|
\| 1          \| Alice Chen   \| Computer Science \| 3            \| 15.0              \|
++--++--+-+

Explanation:

Alice Chen (student_id = 1):
- Study sequence: Math → Physics → Chemistry → Math → Physics → Chemistry
- Pattern: 3 subjects (Math, Physics, Chemistry) repeating for 2 complete cycles
- Consecutive dates: Oct 1-6 with no gaps > 2 days
- Cycle length: 3 subjects
- Total hours: 2.5 + 3.0 + 2.0 + 2.5 + 3.0 + 2.0 = 15.0 hours
Bob Johnson (student_id = 2):
- Study sequence: Algebra → Calculus → Statistics → Geometry → Algebra → Calculus → Statistics → Geometry
- Pattern: 4 subjects (Algebra, Calculus, Statistics, Geometry) repeating for 2 complete cycles
- Consecutive dates: Oct 1-8 with no gaps > 2 days
- Cycle length: 4 subjects
- Total hours: 4.0 + 3.5 + 2.5 + 3.0 + 4.0 + 3.5 + 2.5 + 3.0 = 26.0 hours
Students not included:
- Carol Davis (student_id = 3): Only 2 subjects (Biology, Chemistry) - doesn't meet minimum 3 subjects requirement
- David Wilson (student_id = 4): Only 2 study sessions with a 4-day gap - doesn't meet consecutive dates requirement
- Emma Brown (student_id = 5): No study sessions recorded

The result table is ordered by cycle_length in descending order, then by total_study_hours in descending order.

</div>

Solutions

Solution 1

Python
SQL

import pandas as pd
from datetime import timedelta


def find_study_spiral_pattern(
    students: pd.DataFrame, study_sessions: pd.DataFrame
) -> pd.DataFrame:
    # Convert session_date to datetime
    study_sessions["session_date"] = pd.to_datetime(study_sessions["session_date"])

    result = []

    # Group study sessions by student
    for student_id, group in study_sessions.groupby("student_id"):
        # Sort sessions by date
        group = group.sort_values("session_date").reset_index(drop=True)

        temp = []  # Holds current contiguous segment
        last_date = None

        for idx, row in group.iterrows():
            if not temp:
                temp.append(row)
            else:
                delta = (row["session_date"] - last_date).days
                if delta <= 2:
                    temp.append(row)
                else:
                    # Check the previous contiguous segment
                    if len(temp) >= 6:
                        _check_pattern(student_id, temp, result)
                    temp = [row]
            last_date = row["session_date"]

        # Check the final segment
        if len(temp) >= 6:
            _check_pattern(student_id, temp, result)

    # Build result DataFrame
    df_result = pd.DataFrame(
        result, columns=["student_id", "cycle_length", "total_study_hours"]
    )

    if df_result.empty:
        return pd.DataFrame(
            columns=[
                "student_id",
                "student_name",
                "major",
                "cycle_length",
                "total_study_hours",
            ]
        )

    # Join with students table to get name and major
    df_result = df_result.merge(students, on="student_id")

    df_result = df_result[
        ["student_id", "student_name", "major", "cycle_length", "total_study_hours"]
    ]

    return df_result.sort_values(
        by=["cycle_length", "total_study_hours"], ascending=[False, False]
    ).reset_index(drop=True)


def _check_pattern(student_id, sessions, result):
    subjects = [row["subject"] for row in sessions]
    hours = sum(row["hours_studied"] for row in sessions)

    n = len(subjects)

    # Try possible cycle lengths from 3 up to half of the sequence
    for cycle_len in range(3, n // 2 + 1):
        if n % cycle_len != 0:
            continue

        # Extract the first cycle
        first_cycle = subjects[:cycle_len]
        is_pattern = True

        # Compare each following cycle with the first
        for i in range(1, n // cycle_len):
            if subjects[i * cycle_len : (i + 1) * cycle_len] != first_cycle:
                is_pattern = False
                break

        # If a repeated cycle is detected, store the result
        if is_pattern:
            result.append(
                {
                    "student_id": student_id,
                    "cycle_length": cycle_len,
                    "total_study_hours": hours,
                }
            )
            break  # Stop at the first valid cycle found

# Write your MySQL query statement below
WITH
    ranked_sessions AS (
        SELECT
            s.student_id,
            ss.session_date,
            ss.subject,
            ss.hours_studied,
            ROW_NUMBER() OVER (
                PARTITION BY s.student_id
                ORDER BY ss.session_date
            ) AS rn
        FROM
            study_sessions ss
            JOIN students s ON s.student_id = ss.student_id
    ),
    grouped_sessions AS (
        SELECT
            *,
            DATEDIFF(
                session_date,
                LAG(session_date) OVER (
                    PARTITION BY student_id
                    ORDER BY session_date
                )
            ) AS date_diff
        FROM ranked_sessions
    ),
    session_groups AS (
        SELECT
            *,
            SUM(
                CASE
                    WHEN date_diff > 2
                    OR date_diff IS NULL THEN 1
                    ELSE 0
                END
            ) OVER (
                PARTITION BY student_id
                ORDER BY session_date
            ) AS group_id
        FROM grouped_sessions
    ),
    valid_sequences AS (
        SELECT
            student_id,
            group_id,
            COUNT(*) AS session_count,
            GROUP_CONCAT(subject ORDER BY session_date) AS subject_sequence,
            SUM(hours_studied) AS total_hours
        FROM session_groups
        GROUP BY student_id, group_id
        HAVING session_count >= 6
    ),
    pattern_detected AS (
        SELECT
            vs.student_id,
            vs.total_hours,
            vs.subject_sequence,
            COUNT(
                DISTINCT
                SUBSTRING_INDEX(SUBSTRING_INDEX(subject_sequence, ',', n), ',', -1)
            ) AS cycle_length
        FROM
            valid_sequences vs
            JOIN (
                SELECT a.N + b.N * 10 + 1 AS n
                FROM
                    (
                        SELECT 0 AS N
                        UNION
                        SELECT 1
                        UNION
                        SELECT 2
                        UNION
                        SELECT 3
                        UNION
                        SELECT 4
                        UNION
                        SELECT 5
                        UNION
                        SELECT 6
                        UNION
                        SELECT 7
                        UNION
                        SELECT 8
                        UNION
                        SELECT 9
                    ) a,
                    (
                        SELECT 0 AS N
                        UNION
                        SELECT 1
                        UNION
                        SELECT 2
                        UNION
                        SELECT 3
                        UNION
                        SELECT 4
                        UNION
                        SELECT 5
                        UNION
                        SELECT 6
                        UNION
                        SELECT 7
                        UNION
                        SELECT 8
                        UNION
                        SELECT 9
                    ) b
            ) nums
                ON n <= 10
        WHERE
            -- Check if the sequence repeats every `k` items, for some `k >= 3` and divides session_count exactly
            -- We simplify by checking the start and middle halves are equal
            LENGTH(subject_sequence) > 0
            AND LOCATE(',', subject_sequence) > 0
            AND (
                -- For cycle length 3:
                subject_sequence LIKE CONCAT(
                    SUBSTRING_INDEX(subject_sequence, ',', 3),
                    ',',
                    SUBSTRING_INDEX(SUBSTRING_INDEX(subject_sequence, ',', 6), ',', -3),
                    '%'
                )
                OR subject_sequence LIKE CONCAT(
                    -- For cycle length 4:
                    SUBSTRING_INDEX(subject_sequence, ',', 4),
                    ',',
                    SUBSTRING_INDEX(SUBSTRING_INDEX(subject_sequence, ',', 8), ',', -4),
                    '%'
                )
            )
        GROUP BY vs.student_id, vs.total_hours, vs.subject_sequence
    ),
    final_output AS (
        SELECT
            s.student_id,
            s.student_name,
            s.major,
            pd.cycle_length,
            pd.total_hours AS total_study_hours
        FROM
            pattern_detected pd
            JOIN students s ON s.student_id = pd.student_id
        WHERE pd.cycle_length >= 3
    )
SELECT *
FROM final_output
ORDER BY cycle_length DESC, total_study_hours DESC;

3617 - Find Students with Study Spiral Pattern

3617. Find Students with Study Spiral Pattern

Description

Solutions

Solution 1

All Problems

All Solutions

Welcome to Subscribe On Youtube

3617. Find Students with Study Spiral Pattern

Description

Solutions

Solution 1

All Problems

All Solutions