#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
YouTube 동영상을 AI 기반으로 스마트 요약하는 프로그램
- Whisper로 음성 인식
- 중요 구간 자동 추출
- ChatGPT API로 요약 생성
Author: 최만규
"""

import os
import sys
import argparse
import json
from pathlib import Path
from datetime import datetime
import yt_dlp
import whisper
from moviepy.editor import VideoFileClip, concatenate_videoclips, TextClip, CompositeVideoClip
from openai import OpenAI
import numpy as np
from collections import Counter

# ALSA 경고 숨기기
os.environ['SDL_AUDIODRIVER'] = 'dummy'


class SmartShortsConverter:
    """AI 기반 스마트 동영상 요약 클래스"""
    
    def __init__(self, output_dir="./smart_shorts_output", openai_api_key=None):
        """
        초기화
        
        Args:
            output_dir (str): 출력 디렉토리
            openai_api_key (str): OpenAI API 키
        """
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(exist_ok=True)
        self.temp_dir = self.output_dir / "temp"
        self.temp_dir.mkdir(exist_ok=True)
        
        # OpenAI 클라이언트 초기화
        self.openai_api_key = openai_api_key or os.getenv('OPENAI_API_KEY')
        if self.openai_api_key:
            self.client = OpenAI(api_key=self.openai_api_key)
        else:
            print("⚠️  OpenAI API 키가 설정되지 않았습니다. 요약 기능은 사용할 수 없습니다.")
            self.client = None
        
        # Whisper 모델 로드 (lazy loading)
        self.whisper_model = None
        
    def load_whisper_model(self, model_size="base"):
        """
        Whisper 모델 로드
        
        Args:
            model_size (str): 모델 크기 (tiny, base, small, medium, large)
        """
        if self.whisper_model is None:
            print(f"🤖 Whisper 모델 로딩 중 ({model_size})...")
            self.whisper_model = whisper.load_model(model_size)
            print(f"✅ Whisper 모델 로드 완료")
        return self.whisper_model
    
    def download_youtube_video(self, url):
        """유튜브 동영상 다운로드"""
        print(f"📥 유튜브 동영상 다운로드 중: {url}")
        
        ydl_opts = {
            'format': (
                'bestvideo[ext=mp4][height<=1080]+bestaudio[ext=m4a]/best[ext=mp4][height<=1080]/'
                'bestvideo[ext=mp4]+bestaudio/best[ext=mp4]/'
                'bestvideo+bestaudio/best'
            ),
            'outtmpl': str(self.temp_dir / '%(id)s.%(ext)s'),
            'quiet': False,
            'no_warnings': False,
            'merge_output_format': 'mp4',
            'postprocessors': [{
                'key': 'FFmpegVideoConvertor',
                'preferedformat': 'mp4',
            }],
            'nocheckcertificate': True,
            'http_headers': {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
            },
            'extractor_args': {
                'youtube': {
                    'player_client': ['android', 'web'],
                    'skip': ['hls', 'dash']
                }
            },
        }
        
        try:
            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                print("🔍 동영상 정보 추출 중...")
                info = ydl.extract_info(url, download=True)
                
                video_id = info['id']
                video_title = info['title']
                
                # 다운로드된 파일 찾기
                possible_extensions = ['mp4', 'webm', 'mkv']
                downloaded_file = None
                
                for ext in possible_extensions:
                    candidate = self.temp_dir / f"{video_id}.{ext}"
                    if candidate.exists():
                        downloaded_file = candidate
                        break
                
                if not downloaded_file:
                    files = list(self.temp_dir.glob("*"))
                    if files:
                        downloaded_file = max(files, key=lambda x: x.stat().st_mtime)
                
                if not downloaded_file or not downloaded_file.exists():
                    raise Exception("다운로드된 파일을 찾을 수 없습니다.")
                
                print(f"✅ 다운로드 완료: {video_title}")
                print(f"📁 파일: {downloaded_file}")
                
                return str(downloaded_file), video_title
                
        except Exception as e:
            print(f"❌ 다운로드 실패: {str(e)}")
            raise
    
    def transcribe_video(self, video_path, language="ko"):
        """
        동영상 음성을 텍스트로 변환 (Whisper)
        
        Args:
            video_path (str): 동영상 파일 경로
            language (str): 언어 코드 (ko, en 등)
            
        Returns:
            dict: 타임스탬프가 포함된 전사 결과
        """
        print("\n🎙️  음성 인식 시작 (Whisper)...")
        
        model = self.load_whisper_model("base")
        
        print("📝 전사 중... (시간이 걸릴 수 있습니다)")
        result = model.transcribe(
            video_path,
            language=language,
            task="transcribe",
            verbose=False
        )
        
        print(f"✅ 전사 완료: {len(result['segments'])}개 세그먼트")
        
        # 전사 내용 저장
        transcript_file = self.temp_dir / "transcript.json"
        with open(transcript_file, 'w', encoding='utf-8') as f:
            json.dump(result, f, ensure_ascii=False, indent=2)
        
        print(f"💾 전사 내용 저장: {transcript_file}")
        
        return result
    
    def analyze_important_segments(self, transcription, video_duration):
        """
        전사 내용을 분석하여 중요한 구간 추출
        
        Args:
            transcription (dict): Whisper 전사 결과
            video_duration (float): 동영상 총 길이
            
        Returns:
            list: 중요 구간 리스트 [(start, end, text, score), ...]
        """
        print("\n🧠 중요 구간 분석 중...")
        
        segments = transcription['segments']
        
        # 각 세그먼트에 점수 부여
        scored_segments = []
        
        # 한국어 중요 키워드 (LMS 강의용)
        important_keywords = [
            '중요', '핵심', '포인트', '주목', '강조', '정리', '요약', '결론',
            '첫째', '둘째', '셋째', '먼저', '다음', '마지막',
            '이것', '이거', '바로', '특히', '예를 들어', '예시',
            '문제', '해결', '방법', '기법', '원리', '개념', '정의',
            '질문', '답', '이유', '왜냐하면', '그래서', '따라서'
        ]
        
        for seg in segments:
            text = seg['text'].strip()
            start = seg['start']
            end = seg['end']
            duration = end - start
            
            # 점수 계산
            score = 0
            
            # 1. 키워드 점수 (가장 중요)
            keyword_count = sum(1 for kw in important_keywords if kw in text)
            score += keyword_count * 3
            
            # 2. 문장 길이 점수 (너무 짧거나 긴 것 제외)
            if 10 < len(text) < 200:
                score += 2
            
            # 3. 구간 길이 점수 (너무 짧은 것 제외)
            if duration > 3:
                score += 1
            
            # 4. 위치 점수 (시작 부분과 끝 부분 중요)
            position_ratio = start / video_duration
            if position_ratio < 0.1 or position_ratio > 0.9:
                score += 2
            
            scored_segments.append({
                'start': start,
                'end': end,
                'text': text,
                'score': score,
                'duration': duration
            })
        
        # 점수 순으로 정렬
        scored_segments.sort(key=lambda x: x['score'], reverse=True)
        
        print(f"📊 분석 완료: 총 {len(scored_segments)}개 세그먼트")
        print(f"   상위 점수: {scored_segments[0]['score'] if scored_segments else 0}")
        
        return scored_segments
    
    def select_segments_for_target_duration(self, scored_segments, target_duration=60):
        """
        목표 시간에 맞게 세그먼트 선택
        
        Args:
            scored_segments (list): 점수가 매겨진 세그먼트
            target_duration (int): 목표 시간 (초)
            
        Returns:
            list: 선택된 세그먼트 리스트
        """
        print(f"\n🎯 {target_duration}초 분량 선택 중...")
        
        selected = []
        total_duration = 0
        
        for seg in scored_segments:
            if total_duration + seg['duration'] <= target_duration:
                selected.append(seg)
                total_duration += seg['duration']
            
            if total_duration >= target_duration * 0.9:  # 90% 이상이면 충분
                break
        
        # 시간순으로 정렬 (자연스러운 흐름)
        selected.sort(key=lambda x: x['start'])
        
        print(f"✅ {len(selected)}개 구간 선택됨 (총 {total_duration:.1f}초)")
        
        return selected
    
    def create_smart_shorts(self, video_path, selected_segments, add_subtitles=False):
        """
        선택된 구간으로 쇼츠 동영상 생성
        
        Args:
            video_path (str): 원본 동영상 경로
            selected_segments (list): 선택된 구간
            add_subtitles (bool): 자막 추가 여부
            
        Returns:
            str: 생성된 쇼츠 경로
        """
        print("\n🎬 쇼츠 동영상 생성 중...")
        
        video = VideoFileClip(video_path)
        
        # 각 구간 추출
        clips = []
        for i, seg in enumerate(selected_segments):
            print(f"  구간 {i+1}/{len(selected_segments)}: {seg['start']:.1f}s ~ {seg['end']:.1f}s")
            
            clip = video.subclip(seg['start'], seg['end'])
            
            # 자막 추가 (옵션)
            if add_subtitles and seg['text']:
                try:
                    # 텍스트 길이에 따라 폰트 크기 조정
                    text_length = len(seg['text'])
                    fontsize = max(30, min(50, 300 // (text_length // 10 + 1)))
                    
                    subtitle = TextClip(
                        seg['text'][:100],  # 최대 100자
                        fontsize=fontsize,
                        color='white',
                        font='Arial',
                        stroke_color='black',
                        stroke_width=2,
                        method='caption',
                        size=(1000, None)
                    ).set_position(('center', 0.85), relative=True).set_duration(clip.duration)
                    
                    clip = CompositeVideoClip([clip, subtitle])
                except Exception as e:
                    print(f"    ⚠️  자막 추가 실패: {str(e)}")
            
            clips.append(clip)
        
        # 클립 연결
        print("\n🔗 클립 연결 중...")
        final_video = concatenate_videoclips(clips, method="compose")
        
        # 쇼츠 포맷 (9:16)으로 변환
        print("📐 쇼츠 포맷으로 변환 중...")
        original_width, original_height = video.size
        shorts_width = 1080
        shorts_height = 1920
        
        # 리사이즈
        original_ratio = original_width / original_height
        if original_ratio > (shorts_width / shorts_height):
            resized = final_video.resize(height=shorts_height)
        else:
            resized = final_video.resize(width=shorts_width)
        
        # 중앙 크롭
        resized_width, resized_height = resized.size
        cropped = resized.crop(
            x_center=resized_width / 2,
            y_center=resized_height / 2,
            width=shorts_width,
            height=shorts_height
        )
        
        # 저장
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        output_filename = f"smart_shorts_{timestamp}.mp4"
        output_path = self.output_dir / output_filename
        
        print(f"\n💾 인코딩 중: {output_path.name}")
        cropped.write_videofile(
            str(output_path),
            codec='libx264',
            audio_codec='aac',
            fps=30,
            preset='medium',
            bitrate='5000k',
            verbose=False,
            logger=None
        )
        
        # 정리
        video.close()
        final_video.close()
        resized.close()
        cropped.close()
        
        print(f"✅ 쇼츠 생성 완료!")
        return str(output_path)
    
    def generate_summary_with_gpt(self, transcription):
        """
        ChatGPT API로 동영상 내용 요약
        
        Args:
            transcription (dict): Whisper 전사 결과
            
        Returns:
            str: 1-2줄 요약
        """
        if not self.client:
            print("⚠️  OpenAI API 키가 없어 요약을 생성할 수 없습니다.")
            return "요약 생성 불가 (API 키 필요)"
        
        print("\n🤖 ChatGPT로 요약 생성 중...")
        
        # 전체 텍스트 추출
        full_text = " ".join([seg['text'] for seg in transcription['segments']])
        
        # 너무 긴 경우 앞부분만 (GPT 토큰 제한)
        if len(full_text) > 3000:
            full_text = full_text[:3000] + "..."
        
        try:
            response = self.client.chat.completions.create(
                model="gpt-4o-mini",  # 또는 "gpt-3.5-turbo"
                messages=[
                    {
                        "role": "system",
                        "content": "당신은 강의 동영상을 요약하는 전문가입니다. 주어진 전사 내용을 1-2줄로 핵심만 간결하게 요약해주세요."
                    },
                    {
                        "role": "user",
                        "content": f"다음 강의 내용을 1-2줄로 요약해주세요:\n\n{full_text}"
                    }
                ],
                max_tokens=200,
                temperature=0.7
            )
            
            summary = response.choices[0].message.content.strip()
            print(f"✅ 요약 생성 완료")
            print(f"📝 요약: {summary}")
            
            return summary
            
        except Exception as e:
            print(f"❌ 요약 생성 실패: {str(e)}")
            return f"요약 생성 실패: {str(e)}"
    
    def save_summary_file(self, summary, selected_segments, output_path):
        """
        요약 정보를 텍스트 파일로 저장
        
        Args:
            summary (str): ChatGPT 요약
            selected_segments (list): 선택된 구간
            output_path (str): 쇼츠 파일 경로
        """
        summary_file = Path(output_path).with_suffix('.txt')
        
        with open(summary_file, 'w', encoding='utf-8') as f:
            f.write("=" * 60 + "\n")
            f.write("📹 스마트 쇼츠 요약 정보\n")
            f.write("=" * 60 + "\n\n")
            
            f.write("📝 AI 요약 (ChatGPT):\n")
            f.write(f"{summary}\n\n")
            
            f.write("=" * 60 + "\n")
            f.write(f"📊 포함된 구간 ({len(selected_segments)}개):\n")
            f.write("=" * 60 + "\n\n")
            
            for i, seg in enumerate(selected_segments, 1):
                f.write(f"{i}. [{seg['start']:.1f}s ~ {seg['end']:.1f}s] (점수: {seg['score']})\n")
                f.write(f"   {seg['text']}\n\n")
        
        print(f"💾 요약 파일 저장: {summary_file}")
        return str(summary_file)
    
    def cleanup_temp_files(self):
        """임시 파일 정리"""
        print("\n🧹 임시 파일 정리 중...")
        try:
            for file in self.temp_dir.glob("*"):
                if file.suffix in ['.mp4', '.webm', '.mkv']:
                    file.unlink()
                    print(f"  삭제: {file.name}")
        except Exception as e:
            print(f"⚠️  정리 중 오류: {str(e)}")
    
    def convert_url_to_smart_shorts(self, youtube_url, target_duration=60, 
                                   add_subtitles=False, keep_temp=False,
                                   language="ko"):
        """
        유튜브 URL을 AI 기반 스마트 쇼츠로 변환 (전체 프로세스)
        
        Args:
            youtube_url (str): 유튜브 URL
            target_duration (int): 목표 길이 (초)
            add_subtitles (bool): 자막 추가 여부
            keep_temp (bool): 임시 파일 유지 여부
            language (str): 음성 언어 (ko, en 등)
            
        Returns:
            tuple: (쇼츠 경로, 요약 텍스트)
        """
        print("=" * 60)
        print("🧠 AI 스마트 쇼츠 메이커 시작")
        print("=" * 60)
        
        try:
            # 1. 유튜브 다운로드
            video_path, video_title = self.download_youtube_video(youtube_url)
            
            # 2. 음성 인식 (Whisper)
            transcription = self.transcribe_video(video_path, language=language)
            
            # 3. 동영상 길이 확인
            video = VideoFileClip(video_path)
            video_duration = video.duration
            video.close()
            
            # 4. 중요 구간 분석
            scored_segments = self.analyze_important_segments(transcription, video_duration)
            
            # 5. 목표 시간에 맞게 구간 선택
            selected_segments = self.select_segments_for_target_duration(
                scored_segments, target_duration
            )
            
            # 6. 쇼츠 동영상 생성
            shorts_path = self.create_smart_shorts(
                video_path, selected_segments, add_subtitles
            )
            
            # 7. ChatGPT로 요약 생성
            summary = self.generate_summary_with_gpt(transcription)
            
            # 8. 요약 파일 저장
            summary_file = self.save_summary_file(summary, selected_segments, shorts_path)
            
            # 9. 임시 파일 정리
            if not keep_temp:
                self.cleanup_temp_files()
            
            print("\n" + "=" * 60)
            print("🎉 모든 작업 완료!")
            print("=" * 60)
            print(f"📹 쇼츠: {shorts_path}")
            print(f"📝 요약: {summary_file}")
            print(f"💬 한줄 요약: {summary}")
            
            return shorts_path, summary
            
        except Exception as e:
            print(f"\n❌ 작업 실패: {str(e)}")
            raise


def main():
    """메인 함수"""
    parser = argparse.ArgumentParser(
        description='AI 기반 스마트 쇼츠 메이커 - Whisper + ChatGPT',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
사용 예시:
  python smart_shorts_maker.py "URL" --api-key "sk-..."
  python smart_shorts_maker.py "URL" -d 60 --subtitles
  
환경 변수:
  export OPENAI_API_KEY="sk-..."
        """
    )
    
    parser.add_argument('url', help='유튜브 URL')
    parser.add_argument('-d', '--duration', type=int, default=60, 
                       help='목표 길이 (초, 기본값: 60)')
    parser.add_argument('-o', '--output', default='./smart_shorts_output',
                       help='출력 디렉토리')
    parser.add_argument('--api-key', help='OpenAI API 키')
    parser.add_argument('--subtitles', action='store_true',
                       help='자막 추가')
    parser.add_argument('--keep-temp', action='store_true',
                       help='임시 파일 유지')
    parser.add_argument('--language', default='ko',
                       help='음성 언어 (기본값: ko)')
    
    args = parser.parse_args()
    
    # 변환기 초기화
    converter = SmartShortsConverter(
        output_dir=args.output,
        openai_api_key=args.api_key
    )
    
    try:
        shorts_path, summary = converter.convert_url_to_smart_shorts(
            youtube_url=args.url,
            target_duration=args.duration,
            add_subtitles=args.subtitles,
            keep_temp=args.keep_temp,
            language=args.language
        )
        
        print(f"\n✨ 완료!")
        
    except Exception as e:
        print(f"\n💥 오류: {str(e)}")
        sys.exit(1)


if __name__ == "__main__":
    main()