Welcome to Subscribe On Youtube

3617. Find Students with Study Spiral Pattern

Description

Table: students

+--++
\| student_id   \| int     \|
\| student_name \| varchar \|
\| major        \| varchar \|
+--+++++
\| session_id \| student_id \| subject    \| session_date \| hours_studied \|
++++--++

Output:

++--++--+-+
\| student_id \| student_name \| major            \| cycle_length \| total_study_hours \|
++--++--+-+
\| 2          \| Bob Johnson  \| Mathematics      \| 4            \| 26.0              \|
\| 1          \| Alice Chen   \| Computer Science \| 3            \| 15.0              \|
++--++--+-+

Explanation:

  • Alice Chen (student_id = 1):
    • Study sequence: Math → Physics → Chemistry → Math → Physics → Chemistry
    • Pattern: 3 subjects (Math, Physics, Chemistry) repeating for 2 complete cycles
    • Consecutive dates: Oct 1-6 with no gaps > 2 days
    • Cycle length: 3 subjects
    • Total hours: 2.5 + 3.0 + 2.0 + 2.5 + 3.0 + 2.0 = 15.0 hours
  • Bob Johnson (student_id = 2):
    • Study sequence: Algebra → Calculus → Statistics → Geometry → Algebra → Calculus → Statistics → Geometry
    • Pattern: 4 subjects (Algebra, Calculus, Statistics, Geometry) repeating for 2 complete cycles
    • Consecutive dates: Oct 1-8 with no gaps > 2 days
    • Cycle length: 4 subjects
    • Total hours: 4.0 + 3.5 + 2.5 + 3.0 + 4.0 + 3.5 + 2.5 + 3.0 = 26.0 hours
  • Students not included:
    • Carol Davis (student_id = 3): Only 2 subjects (Biology, Chemistry) - doesn't meet minimum 3 subjects requirement
    • David Wilson (student_id = 4): Only 2 study sessions with a 4-day gap - doesn't meet consecutive dates requirement
    • Emma Brown (student_id = 5): No study sessions recorded

The result table is ordered by cycle_length in descending order, then by total_study_hours in descending order.

</div>

Solutions

Solution 1

  • import pandas as pd
    from datetime import timedelta
    
    
    def find_study_spiral_pattern(
        students: pd.DataFrame, study_sessions: pd.DataFrame
    ) -> pd.DataFrame:
        # Convert session_date to datetime
        study_sessions["session_date"] = pd.to_datetime(study_sessions["session_date"])
    
        result = []
    
        # Group study sessions by student
        for student_id, group in study_sessions.groupby("student_id"):
            # Sort sessions by date
            group = group.sort_values("session_date").reset_index(drop=True)
    
            temp = []  # Holds current contiguous segment
            last_date = None
    
            for idx, row in group.iterrows():
                if not temp:
                    temp.append(row)
                else:
                    delta = (row["session_date"] - last_date).days
                    if delta <= 2:
                        temp.append(row)
                    else:
                        # Check the previous contiguous segment
                        if len(temp) >= 6:
                            _check_pattern(student_id, temp, result)
                        temp = [row]
                last_date = row["session_date"]
    
            # Check the final segment
            if len(temp) >= 6:
                _check_pattern(student_id, temp, result)
    
        # Build result DataFrame
        df_result = pd.DataFrame(
            result, columns=["student_id", "cycle_length", "total_study_hours"]
        )
    
        if df_result.empty:
            return pd.DataFrame(
                columns=[
                    "student_id",
                    "student_name",
                    "major",
                    "cycle_length",
                    "total_study_hours",
                ]
            )
    
        # Join with students table to get name and major
        df_result = df_result.merge(students, on="student_id")
    
        df_result = df_result[
            ["student_id", "student_name", "major", "cycle_length", "total_study_hours"]
        ]
    
        return df_result.sort_values(
            by=["cycle_length", "total_study_hours"], ascending=[False, False]
        ).reset_index(drop=True)
    
    
    def _check_pattern(student_id, sessions, result):
        subjects = [row["subject"] for row in sessions]
        hours = sum(row["hours_studied"] for row in sessions)
    
        n = len(subjects)
    
        # Try possible cycle lengths from 3 up to half of the sequence
        for cycle_len in range(3, n // 2 + 1):
            if n % cycle_len != 0:
                continue
    
            # Extract the first cycle
            first_cycle = subjects[:cycle_len]
            is_pattern = True
    
            # Compare each following cycle with the first
            for i in range(1, n // cycle_len):
                if subjects[i * cycle_len : (i + 1) * cycle_len] != first_cycle:
                    is_pattern = False
                    break
    
            # If a repeated cycle is detected, store the result
            if is_pattern:
                result.append(
                    {
                        "student_id": student_id,
                        "cycle_length": cycle_len,
                        "total_study_hours": hours,
                    }
                )
                break  # Stop at the first valid cycle found
    
    
  • # Write your MySQL query statement below
    WITH
        ranked_sessions AS (
            SELECT
                s.student_id,
                ss.session_date,
                ss.subject,
                ss.hours_studied,
                ROW_NUMBER() OVER (
                    PARTITION BY s.student_id
                    ORDER BY ss.session_date
                ) AS rn
            FROM
                study_sessions ss
                JOIN students s ON s.student_id = ss.student_id
        ),
        grouped_sessions AS (
            SELECT
                *,
                DATEDIFF(
                    session_date,
                    LAG(session_date) OVER (
                        PARTITION BY student_id
                        ORDER BY session_date
                    )
                ) AS date_diff
            FROM ranked_sessions
        ),
        session_groups AS (
            SELECT
                *,
                SUM(
                    CASE
                        WHEN date_diff > 2
                        OR date_diff IS NULL THEN 1
                        ELSE 0
                    END
                ) OVER (
                    PARTITION BY student_id
                    ORDER BY session_date
                ) AS group_id
            FROM grouped_sessions
        ),
        valid_sequences AS (
            SELECT
                student_id,
                group_id,
                COUNT(*) AS session_count,
                GROUP_CONCAT(subject ORDER BY session_date) AS subject_sequence,
                SUM(hours_studied) AS total_hours
            FROM session_groups
            GROUP BY student_id, group_id
            HAVING session_count >= 6
        ),
        pattern_detected AS (
            SELECT
                vs.student_id,
                vs.total_hours,
                vs.subject_sequence,
                COUNT(
                    DISTINCT
                    SUBSTRING_INDEX(SUBSTRING_INDEX(subject_sequence, ',', n), ',', -1)
                ) AS cycle_length
            FROM
                valid_sequences vs
                JOIN (
                    SELECT a.N + b.N * 10 + 1 AS n
                    FROM
                        (
                            SELECT 0 AS N
                            UNION
                            SELECT 1
                            UNION
                            SELECT 2
                            UNION
                            SELECT 3
                            UNION
                            SELECT 4
                            UNION
                            SELECT 5
                            UNION
                            SELECT 6
                            UNION
                            SELECT 7
                            UNION
                            SELECT 8
                            UNION
                            SELECT 9
                        ) a,
                        (
                            SELECT 0 AS N
                            UNION
                            SELECT 1
                            UNION
                            SELECT 2
                            UNION
                            SELECT 3
                            UNION
                            SELECT 4
                            UNION
                            SELECT 5
                            UNION
                            SELECT 6
                            UNION
                            SELECT 7
                            UNION
                            SELECT 8
                            UNION
                            SELECT 9
                        ) b
                ) nums
                    ON n <= 10
            WHERE
                -- Check if the sequence repeats every `k` items, for some `k >= 3` and divides session_count exactly
                -- We simplify by checking the start and middle halves are equal
                LENGTH(subject_sequence) > 0
                AND LOCATE(',', subject_sequence) > 0
                AND (
                    -- For cycle length 3:
                    subject_sequence LIKE CONCAT(
                        SUBSTRING_INDEX(subject_sequence, ',', 3),
                        ',',
                        SUBSTRING_INDEX(SUBSTRING_INDEX(subject_sequence, ',', 6), ',', -3),
                        '%'
                    )
                    OR subject_sequence LIKE CONCAT(
                        -- For cycle length 4:
                        SUBSTRING_INDEX(subject_sequence, ',', 4),
                        ',',
                        SUBSTRING_INDEX(SUBSTRING_INDEX(subject_sequence, ',', 8), ',', -4),
                        '%'
                    )
                )
            GROUP BY vs.student_id, vs.total_hours, vs.subject_sequence
        ),
        final_output AS (
            SELECT
                s.student_id,
                s.student_name,
                s.major,
                pd.cycle_length,
                pd.total_hours AS total_study_hours
            FROM
                pattern_detected pd
                JOIN students s ON s.student_id = pd.student_id
            WHERE pd.cycle_length >= 3
        )
    SELECT *
    FROM final_output
    ORDER BY cycle_length DESC, total_study_hours DESC;
    
    

All Problems

All Solutions