Git Product home page Git Product logo

Comments (8)

GreatV avatar GreatV commented on August 17, 2024

最好可以提供一个可复现的demo

from paddleocr.

Test-Jim avatar Test-Jim commented on August 17, 2024

最好可以提供一个可复现的demo

你好,这是demo,可以直接执行。

import cv2  # 导入OpenCV库
import difflib
from skimage.metrics import structural_similarity  # 导入结构相似度指标
from paddleocr import PaddleOCR
import pysrt
def image_extract_title(img):
    title = ""  # 初始化字幕内容为空字符串
    ocr_results = ocr.ocr(img, cls=False)#左上、右上、右下、左下
    if ocr_results == [None]:
        return title

    if len(ocr_results[0])>=2 :
        print(r'多条字幕:',ocr_results)
        if ocr_results[0][0][1][1]>0.89:
            title = ocr_results[0][0][1][0]+ocr_results[0][1][1][0]
        return title
    for result in ocr_results:
        title=result[0][1][0]
    return title  # 返回提取的字幕内容和位置信息

def video_extra_title(video_path, output_srt_path,y1,y2):
    subsampling = 5  # 采样率设定为5
    similarity_thresh = 0.8  # 相似度阈值设定为0.8

    cap = cv2.VideoCapture(video_path)  # 打开视频文件
    w = cap.get(cv2.CAP_PROP_FRAME_WIDTH)  # 获取视频帧宽度
    h = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)  # 获取视频帧高度
    count = cap.get(cv2.CAP_PROP_FRAME_COUNT)  # 获取视频总帧数
    fps = cap.get(cv2.CAP_PROP_FPS)  # 获取视频帧率
    print('Video info w: {}, h: {}, count: {}, fps: {}'.format(w, h, count, fps))

    cur = 0  # 当前帧数计数
    detected = False  # 是否检测到字幕标志
    content = ''  # 字幕内容
    start = 0  # 字幕起始帧数
    ref_gray_image = None  # 参考灰度图像

    def _add_subs(end):
        """
        添加新的字幕
        :param end: 字幕结束帧数
        """
        global index

        start_total_seconds = start / fps
        start_hours = int(start_total_seconds // 3600)
        start_minutes = int((start_total_seconds % 3600) // 60)
        start_seconds = int(start_total_seconds % 60)
        start_milliseconds = int((start_total_seconds - int(start_total_seconds)) * 1000)

        end_total_seconds = end / fps
        end_hours = int(end_total_seconds // 3600)
        end_minutes = int((end_total_seconds % 3600) // 60)
        end_seconds = int(end_total_seconds % 60)
        end_milliseconds = int((end_total_seconds - int(end_total_seconds)) * 1000)

        if len(subs)>0:
            if difflib.SequenceMatcher(None, subs[-1].text, content.strip()).quick_ratio()>0.8:
                subs[-1].end=pysrt.SubRipTime(hours=end_hours, minutes=end_minutes, seconds=end_seconds, milliseconds=end_milliseconds)
                return

        end = pysrt.SubRipTime(hours=end_hours, minutes=end_minutes, seconds=end_seconds,milliseconds=end_milliseconds)
        text = content.strip().replace('7', '了')
        sub=pysrt.SubRipItem(index=index,start=pysrt.SubRipTime(hours=start_hours, minutes=start_minutes, seconds=start_seconds,milliseconds=start_milliseconds)
                            ,end=end,text=text)
        print('写入:',end,text)
        subs.append(sub)
        index+=1

    # 主循环,处理视频的每一帧
    while cap.isOpened():
        ret, frame = cap.read()  # 读取视频的一帧
        if not ret:  # 如果未成功读取帧则跳出循环
            if detected:
                _add_subs(cur)
            break
        frame = frame[y1:y2, :]
        cur += 1
        if cur % subsampling != 0:  # 根据采样率决定是否继续处理当前帧
            continue
        if detected:  # 如果已经检测到字幕
            # 判断当前帧与参考帧的相似度
            hyp_gray_image = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            similarity = structural_similarity(hyp_gray_image, ref_gray_image)
            #print('similarity:',similarity)
            if similarity > similarity_thresh:  # 如果相似度高,则为同一字幕
                continue
            else:
                # 记录当前字幕信息
                _add_subs(cur - subsampling)
                detected = False
        else:  # 如果未检测到字幕
            content = image_extract_title(frame)  # 提取当前帧的字幕内容和位置
            if content == "":  # 如果未提取到字幕则标记未检测到字幕
                detected = False
            else:
                detected = True  # 检测到字幕
                start = cur
                ref_gray_image = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    cap.release()

    # 将提取的字幕信息写入SRT文件
    subs.save(output_srt_path, encoding='utf-8')


if __name__ == '__main__':
    import logging
    logging.disable(logging.WARNING)
    subs = pysrt.SubRipFile()
    index = 1
    ocr = PaddleOCR(use_angle_cls=False, lang="ch", use_gpu=True, show_log=False)
    video_path = r'http://60.12.15.236:9803/download/%E8%B6%85%E7%BA%A7%E9%BE%99%E5%A9%BF.mp4'
    output_srt_path = 'longxu_zh_.srt'
    #y1,y2,w,h=find_word_location(video_path)
    # y1,y2=1291, 1553
    y1,y2=1109, 1261
    video_extra_title(video_path, output_srt_path,y1,y2)  #这里是生成字幕

from paddleocr.

Test-Jim avatar Test-Jim commented on August 17, 2024

最好可以提供一个可复现的demo

cfd8097a4d08e5fe38f27208ab57263

from paddleocr.

GreatV avatar GreatV commented on August 17, 2024

可以检查一下是不是截取的字幕有问题

from paddleocr.

Test-Jim avatar Test-Jim commented on August 17, 2024

可以检查一下是不是截取的字幕有问题
没有问题呢,这个是视频地址,您可以根据上面错误的时间点 对着视频看下,字幕是没有错的。要错就会一起错,不可能一段对,一段错。。 video_path = r'http://60.12.15.236:9803/download/%E8%B6%85%E7%BA%A7%E9%BE%99%E5%A9%BF.mp4'

from paddleocr.

GreatV avatar GreatV commented on August 17, 2024

应该是字幕位置没对上

image

810_WEE

from paddleocr.

GreatV avatar GreatV commented on August 17, 2024

字幕正确是没有问题的

image

1372_你知道该怎么做了吧

from paddleocr.

Test-Jim avatar Test-Jim commented on August 17, 2024

字幕正确是没有问题的

image ![1372_你知道该怎么做了吧](https://private-user-images.githubusercontent.com/17264618/348295598-5d56063d-bf07-4305-8bf3-26fae88049b5.png?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3MjA4MzI5ODIsIm5iZiI6MTcyMDgzMjY4MiwicGF0aCI6Ii8xNzI2NDYxOC8zNDgyOTU1OTgtNWQ1NjA2M2QtYmYwNy00MzA1LThiZjMtMjZmYWU4ODA0OWI1LnBuZz9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNDA3MTMlMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjQwNzEzVDAxMDQ0MlomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPTUyYTk1OTk3YmJlYTEyMTFmOGFjYmRiMGVhNmJiMTdhMTVlNjRlM2ViNGIzN2Y1ZjU1YTBmM2JmZTE5N2ZkODgmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0JmFjdG9yX2lkPTAma2V5X2lkPTAmcmVwb19pZD0wIn0.MHnZCOXhRulPQy_GIB7QcwMwAmqyy6Le9SuLsTyrfuk)

谢谢

from paddleocr.

Related Issues (20)

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.