diff --git a/.env.example b/.env.example index 3c60636..62e35f5 100644 --- a/.env.example +++ b/.env.example @@ -1,2 +1,2 @@ GOOGLE_API_KEY= -OPENAI_API_KEY= \ No newline at end of file +OPENAI_API_KEY= diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 5061027..4967eed 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -112,9 +112,9 @@ This section guides you through submitting an enhancement suggestion for audiote #### How Do I Submit a Good Enhancement Suggestion? -Enhancement suggestions are tracked as [GitHub issues](https://github.com/HenestrosaConH/audiotext/issues). This is the [issue template](https://github.com/HenestrosaConH/audiotext/tree/main/.github/workflows/ISSUE_TEMPLATE.md). +Enhancement suggestions are tracked as [GitHub issues](https://github.com/HenestrosaConH/audiotext/issues). This is the [issue template](https://github.com/HenestrosaConH/audiotext/tree/main/.github/workflows/ISSUE_TEMPLATE.md). -Don't forget to follow these principles: +Don't forget to follow these principles: - Use a **clear and descriptive title** for the issue to identify the suggestion. - Provide a **step-by-step description of the suggested enhancement** in as many details as possible. diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml index 4a384cc..825743b 100644 --- a/.github/FUNDING.yml +++ b/.github/FUNDING.yml @@ -1 +1 @@ -ko_fi: henestrosadev \ No newline at end of file +ko_fi: henestrosadev diff --git a/.github/ISSUE_TEMPLATE/bug_report_template.md b/.github/ISSUE_TEMPLATE/bug_report_template.md index 8be2840..44e991f 100644 --- a/.github/ISSUE_TEMPLATE/bug_report_template.md +++ b/.github/ISSUE_TEMPLATE/bug_report_template.md @@ -1,5 +1,5 @@ --- -name: Bug Report +name: Bug Report about: Create a report to help us improve title: "[Bug] " labels: '' @@ -34,7 +34,7 @@ System information - **System**: (For example, "Ubuntu 20.04 LTS x64", "Windows 11 x64", or "macOS Monterey") - **System language**: (Please, indicate the region as well) -- **Audiotext version**: +- **Audiotext version**: Code To Duplicate @@ -55,4 +55,4 @@ Screenshot, Sketch, or Drawing Optional. Feel free to provide an image if you think it adds more useful information to the issue. -You can use [this tool](https://www.cockos.com/licecap/) to record GIFs on macOS and Windows, and [this tool](https://github.com/colinkeenan/silentcast) or [this tool](https://github.com/GNOME/byzanz) on Linux. \ No newline at end of file +You can use [this tool](https://www.cockos.com/licecap/) to record GIFs on macOS and Windows, and [this tool](https://github.com/colinkeenan/silentcast) or [this tool](https://github.com/GNOME/byzanz) on Linux. diff --git a/.github/ISSUE_TEMPLATE/feature_request_template.md b/.github/ISSUE_TEMPLATE/feature_request_template.md index aa813bc..7db1791 100644 --- a/.github/ISSUE_TEMPLATE/feature_request_template.md +++ b/.github/ISSUE_TEMPLATE/feature_request_template.md @@ -12,7 +12,7 @@ Overview In this section, provide a brief overview of the enhancement you are proposing. -Motivation +Motivation ------------------ In this section, describe the motivation behind the enhancement. What problem are you trying to solve? How will this enhancement benefit users of the project? @@ -37,4 +37,4 @@ Next Steps In this section, outline the next steps for implementing the enhancement. This could include assigning the enhancement to a specific team member, setting a timeline for implementation, or opening a pull request for review. -An enhancement template provides a clear and structured approach for proposing enhancements to a project, which can help ensure that proposals are well-thought-out and considered by the project's maintainers. \ No newline at end of file +An enhancement template provides a clear and structured approach for proposing enhancements to a project, which can help ensure that proposals are well-thought-out and considered by the project's maintainers. diff --git a/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md b/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md index 65ee279..21d4089 100644 --- a/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md +++ b/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md @@ -28,6 +28,3 @@ Put an x in the boxes that apply. ## Additional Notes Please provide any additional information or context about your changes here. - - - diff --git a/.github/workflows/code-quality.yml b/.github/workflows/code-quality.yml new file mode 100644 index 0000000..4afd0a1 --- /dev/null +++ b/.github/workflows/code-quality.yml @@ -0,0 +1,34 @@ +name: Code Quality + +on: + pull_request: + branches: + - "*" + push: + branches: + - main + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.10"] + steps: + - uses: actions/checkout@master + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + - name: Install PortAudio to install PyAudio + run: | + sudo apt-get update + sudo apt-get install python3-pyaudio portaudio19-dev python3-dev + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt -r requirements-dev.txt + - name: Run pre-commit + run: | + pre-commit run --all-files --show-diff-on-failure diff --git a/.gitignore b/.gitignore index 177316a..11ec53f 100644 --- a/.gitignore +++ b/.gitignore @@ -159,4 +159,4 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ -audio-chunks/ \ No newline at end of file +audio-chunks/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..9d32c77 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,31 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: check-ast + - id: check-case-conflict + - id: check-docstring-first + - id: check-merge-conflict + - id: check-vcs-permalinks + - id: debug-statements + - id: end-of-file-fixer + - id: fix-byte-order-marker + - id: mixed-line-ending + - id: requirements-txt-fixer + - id: no-commit-to-branch + args: [ --branch, main ] + - id: trailing-whitespace + - repo: local + hooks: + - id: mypy-local + name: Run mypy with all dev dependencies present + language: system + types: + - python + entry: mypy --strict + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.5.5 + hooks: + - id: ruff + args: [--fix, --exit-non-zero-on-fix] + - id: ruff-format diff --git a/LICENSE b/LICENSE index b6cd659..4e754be 100644 --- a/LICENSE +++ b/LICENSE @@ -19,9 +19,9 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE -USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README.md b/README.md index 47d12b8..37fd264 100644 --- a/README.md +++ b/README.md @@ -12,16 +12,16 @@
- - Logo @@ -30,52 +30,52 @@

A desktop application that transcribes audio from files, microphone input or YouTube videos with the option to translate the content and create subtitles.

- Version - GitHub Contributors - License
- GitHub Contributors - Issues - GitHub pull requests

Report Bug - - · + + · Request Feature - - · + + · Ask Question @@ -93,8 +93,8 @@ - [Built With](#built-with) - [Getting Started](#getting-started) - [Installation](#installation) - - [Set Up the Project Locally](#set-up-the-project-locally) - - [Notes](#notes) + - [Setting Up the Project Locally](#setting-up-the-project-locally) + - [Notes](#notes) - [Usage](#usage) - [Transcription Language](#transcription-language) - [Transcription Method](#transcription-method) @@ -135,7 +135,7 @@ ![Main](docs/main-system.png) -**Audiotext** transcribes the audio from an audio file, video file, microphone input, directory, or YouTube video into one of the 99 different languages it supports. You can transcribe using the [**Google Speech-to-Text API**](https://cloud.google.com/speech-to-text) or [**WhisperX**](https://github.com/m-bain/whisperX), which can even translate the transcription or generate subtitles! +**Audiotext** transcribes the audio from an audio file, video file, microphone input, directory, or YouTube video into any of the 99 different languages it supports. You can transcribe using the [**Google Speech-to-Text API**](https://cloud.google.com/speech-to-text), the [**Whisper API**](https://platform.openai.com/docs/guides/speech-to-text), or [**WhisperX**](https://github.com/m-bain/whisperX). The last two methods can even translate the transcription or generate subtitles! You can also choose the theme you like best. It can be dark, light, or the one configured in the system. @@ -370,7 +370,7 @@ You can also choose the theme you like best. It can be dark, light, or the one c ├───models │ │ __init__.py │ │ transcription.py - │ │ + │ │ │ └───config │ __init__.py │ config_subtitles.py @@ -390,9 +390,9 @@ You can also choose the theme you like best. It can be dark, light, or the one c │ path_helper.py │ └───views - │ __init__.py + │ __init__.py │ main_window.py - │ + │ └───custom_widgets __init__.py ctk_scrollable_dropdown/ @@ -426,7 +426,7 @@ You can also choose the theme you like best. It can be dark, light, or the one c ### Installation -1. Install [FFmpeg](https://ffmpeg.org) to execute the program. Otherwise, it won't be able to process the audio files. +1. Install [FFmpeg](https://ffmpeg.org) to execute the program. Otherwise, it won't be able to process the audio files. To check if you have it installed on your system, run `ffmpeg -version`. It should return something similar to this: ``` @@ -446,16 +446,16 @@ You can also choose the theme you like best. It can be dark, light, or the one c ``` # on Ubuntu or Debian sudo apt update && sudo apt install ffmpeg - + # on Arch Linux sudo pacman -S ffmpeg - + # on MacOS using Homebrew (https://brew.sh/) brew install ffmpeg - + # on Windows using Chocolatey (https://chocolatey.org/) choco install ffmpeg - + # on Windows using Scoop (https://scoop.sh/) scoop install ffmpeg ``` @@ -463,7 +463,8 @@ You can also choose the theme you like best. It can be dark, light, or the one c 3. Decompress the downloaded file. 4. Open the `audiotext` folder and double-click the `Audiotext` executable file. -### Set Up the Project Locally +### Setting Up the Project Locally + 1. Clone the repository by running `git clone https://github.com/HenestrosaDev/audiotext.git`. 2. Change the current working directory to `audiotext` by running `cd audiotext`. 3. (Optional but recommended) Create a Python virtual environment in the project root. If you're using `virtualenv`, you would run `virtualenv venv`. @@ -474,14 +475,18 @@ You can also choose the theme you like best. It can be dark, light, or the one c # if you get the error `FullyQualifiedErrorId : UnauthorizedAccess`, run this: Set-ExecutionPolicy Unrestricted -Scope Process # and then . venv/Scripts/activate - + # on macOS and Linux source venv/Scripts/activate ``` 5. Run `pip install -r requirements.txt` to install the dependencies. -6. Run `python src/app.py` to start the program. +6. (Optional) If you want to contribute to the project, run `pip install -r requirements-dev.txt` to install the development dependencies. +7. (Optional) If you followed step 6, run `pre-commit` to install the pre-commit hooks in your `.git/` directory. +8. Copy and paste the `.env.example` file as `.env` to the root of the directory. +9. Run `python src/app.py` to start the program. ### Notes + - You cannot generate a single executable file for this project with PyInstaller due to the dependency with the CustomTkinter package (reason [here](https://github.com/TomSchimansky/CustomTkinter/wiki/Packaging)). - For **Apple Silicon Macs**: An error occurs when trying to install the `pyaudio` package. [Here](https://stackoverflow.com/questions/73268630/error-could-not-build-wheels-for-pyaudio-which-is-required-to-install-pyprojec) is a StackOverflow post explaining how to solve this issue. - I had to comment out the lines `pprint(response_text, indent=4)` in the `recognize_google` function from the `__init__.py` file of the `SpeechRecognition` package to avoid opening a command line along with the GUI. Otherwise, the program would not be able to use the Google API transcription method because `pprint` throws an error if it cannot print to the CLI, preventing the code from generating the transcription. The same applies to the lines using the `logger` package in the `moviepy/audio/io/ffmpeg_audiowriter` file from the `moviepy` package. There is also a change in the line 169 that changes `logger=logger` to `logger=None` to avoid more errors related to opening the console. @@ -495,15 +500,15 @@ You can also choose the theme you like best. It can be dark, light, or the one c Once you open the **Audiotext** executable file (explained in the [Getting Started](#getting-started) section), you'll see something like this: - - - Main @@ -534,7 +539,7 @@ There are three transcription methods available in **Audiotext**: You can transcribe from four different audio sources: -- **File** (see image above): Click the file explorer icon to select the file you want to transcribe, or manually enter the path to the file in the `Path` input field. You can transcribe audio from both audio and video files. +- **File** (see image above): Click the file explorer icon to select the file you want to transcribe, or manually enter the path to the file in the `Path` input field. You can transcribe audio from both audio and video files. Note that the file explorer has the `All supported files` option selected by default. To select only audio files or video files, click the combo box in the lower right corner of the file explorer to change the file type, as marked in red in the following image: @@ -542,18 +547,18 @@ You can transcribe from four different audio sources: ![Supported files](docs/supported-files.png) -- **Directory**: Click the file explorer icon to select the directory containing the files you want to transcribe, or manually enter the path to the directory in the `Path` input field. Note that the `Autosave` option is checked and cannot be unchecked because each file's transcription will automatically be saved in the same path as the source file. +- **Directory**: Click the file explorer icon to select the directory containing the files you want to transcribe, or manually enter the path to the directory in the `Path` input field. Note that the `Autosave` option is checked and cannot be unchecked because each file's transcription will automatically be saved in the same path as the source file. - - - Main @@ -595,7 +600,7 @@ You can transcribe from four different audio sources: Note that if we check the `Overwrite existing files` option, all files will be processed again and the existing transcription files will be overwritten. -- **Microphone**: To start recording, simply click the `Start recording` button to begin the process. The text of the button will change to `Stop recording` and its color will change to red. Click it to stop recording and generate the transcription. +- **Microphone**: To start recording, simply click the `Start recording` button to begin the process. The text of the button will change to `Stop recording` and its color will change to red. Click it to stop recording and generate the transcription. Here is a video demonstrating this feature: @@ -603,19 +608,19 @@ You can transcribe from four different audio sources: https://github.com/user-attachments/assets/61f2173b-bcfb-4251-a910-0cf6b37598c6 Note that your operating system must recognize an input source, otherwise an error message will appear in the text box indicating that no input source was detected. - + - **YouTube video**: Requires an Internet connection to get the audio of the video. To generate the transcription, simply enter the URL of the video in the `YouTube video URL` field and click the `Generate transcription` button when you are finished adjusting the settings. - - - From microphone @@ -631,7 +636,7 @@ If checked, the transcription will automatically be saved in the root of the fol Note that if you create a transcription using the `Microphone` or `YouTube` audio sources with the `Autosave` action enabled, the transcription files will be saved in the root of the `audiotext-vX.X.X` directory. -#### Overwrite Existing Files +#### Overwrite Existing Files This option can only be checked if the `Autosave` option is checked. If `Overwrite existing files` is checked, existing transcriptions in the root directory of the file to be transcribed will be overwritten when saving. @@ -654,12 +659,12 @@ The `Google API options` frame appears if the selected transcription method is *

- - google-api-options @@ -672,12 +677,12 @@ Since the program uses the free **Google API** tier by default, which allows you

- - Google API key dialog @@ -692,12 +697,12 @@ The `Whisper API options` frame appears if the selected transcription method is

- - Whisper API options @@ -712,12 +717,12 @@ To add it, click the `Set OpenAI API key` button. You'll be presented with a dia

- - OpenAI API key dialog @@ -728,11 +733,11 @@ OpenAI charges for the use of the API key, for which **Audiotext** is not respon #### Response Format -The format of the transcript output, in one of these options: +The format of the transcript output, in one of these options: -- `json` +- `json` - `srt` (subtitle file type) -- `text` +- `text` - `verbose_json` - `vtt` (subtitle file type) @@ -746,7 +751,7 @@ Defaults to 0. #### Timestamp Granularities -The timestamp granularities to populate for this transcription. `Response format` must be set `verbose_json` to use timestamp granularities. Either or both of these options are supported: `word`, or `segment`. +The timestamp granularities to populate for this transcription. `Response format` must be set `verbose_json` to use timestamp granularities. Either or both of these options are supported: `word`, or `segment`. **Note**: There is no additional latency for segment timestamps, but generating word timestamps incurs additional latency. @@ -758,16 +763,16 @@ The **WhisperX** options appear when the selected transcription method is **Whis

- - - WhisperX options @@ -775,7 +780,7 @@ The **WhisperX** options appear when the selected transcription method is **Whis #### Output File Types -You can select one or more of the following transcription output file types: +You can select one or more of the following transcription output file types: - `.aud` - `.json` @@ -801,16 +806,16 @@ When you select the `.srt` and/or the `.vtt` output file type(s), the `Subtitle

- - - Subtitle options @@ -826,7 +831,7 @@ Underline each word as it's spoken in `.srt` and `.vtt` subtitle files. Not chec The maximum number of lines in a segment. `2` by default. -#### Max. Line Width +#### Max. Line Width The maximum number of characters in a line before breaking the line. `42` by default. @@ -836,12 +841,12 @@ When you click the `Show advanced options` button in the `WhisperX options` fram

- - WhisperX advanced options @@ -880,8 +885,8 @@ This term refers to different data types used in computing, particularly in the There are three possible values for **Audiotext**: - `int8`: Default if using CPU. It represents whole numbers without any fractional part. Its size is 8 bits (1 byte) and it can represent integer values from -128 to 127 (signed) or 0 to 255 (unsigned). It is used in scenarios where memory efficiency is critical, such as in quantized neural networks or edge devices with limited computational resources. - `float16`: Default if using CUDA GPU. It's a half precision type representing 16-bit floating point numbers. Its size is 16 bits (2 bytes). It has a smaller range and precision compared to `float32`. It's often used in applications where memory is a critical resource, such as in deep learning models running on GPUs or TPUs. -- `float32`: Recommended for CUDA GPUs with more than 8 GB of VRAM. It's a single precision type representing 32-bit floating point numbers, which is a standard for representing real numbers in computers. Its size is 32 bits (4 bytes). It can represent a wide range of real numbers with a reasonable level of precision. - +- `float32`: Recommended for CUDA GPUs with more than 8 GB of VRAM. It's a single precision type representing 32-bit floating point numbers, which is a standard for representing real numbers in computers. Its size is 32 bits (4 bytes). It can represent a wide range of real numbers with a reasonable level of precision. + #### Batch Size This option determines how many samples are processed together before the model parameters are updated. It doesn't affect the quality of the transcription, only the generation speed (the smaller, the slower). @@ -895,7 +900,7 @@ For simplicity, let's divide the possible batch size values into two groups: **WhisperX** will use the CPU for transcription if checked. Checked by default if there is no CUDA GPU. -As noted in the [Compute Type](#compute-type) section, the default compute type value for the CPU is `int8`, since many CPUs don't support efficient `float16` or `float32` computation, which would result in an error. Change it at your own risk. +As noted in the [Compute Type](#compute-type) section, the default compute type value for the CPU is `int8`, since many CPUs don't support efficient `float16` or `float32` computation, which would result in an error. Change it at your own risk. ## Troubleshooting @@ -926,7 +931,7 @@ You'll be prompted with an error like this: ``` RateLimitError("Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}") -``` +``` This is either because your account run out of credits or because you need to fund your account before you can use the API for the first time (even if you have free credits available). To fix this, you need to purchase credits for your account (starting at $5) with a credit or debit card by going to the [Billing](https://platform.openai.com/settings/organization/billing/overview) section of your OpenAI account settings. @@ -953,9 +958,10 @@ If you are using an API key that was created before you funded your account for - [x] Add support for `.json`, `.tsv` and `.aud` output file types when using WhisperX as transcription method. - [x] Add `appearance_mode` to `config.ini`. - [x] Add support for **Whisper's API** ([#42](https://github.com/HenestrosaDev/audiotext/discussions/42)). +- [x] Add pre-commit configuration for using `ruff` and `mypy`. +- [x] Set up a CI pipeline to apply the pre-commit hooks. - [ ] Change the `Generate transcription` button to `Cancel transcription` when a transcription is in progress. - [ ] Generate executables for macOS and Linux. -- [ ] Add pre-commit config for using `Black`, `isort`, and `mypy`. - [ ] Add tests. You can propose a new feature creating an [issue](https://github.com/HenestrosaDev/audiotext/issues/new/choose). @@ -970,7 +976,7 @@ See also the list of [contributors](https://github.com/HenestrosaDev/audiotext/c -## Contributing +## Contributing Contributions are what make the open source community such an amazing place to learn, inspire, and create. Any contributions you make are **greatly appreciated**. Please read the [CONTRIBUTING.md](https://github.com/HenestrosaDev/audiotext/blob/main/.github/CONTRIBUTING.md) file, where you can find more detailed information about how to contribute to the project. diff --git a/audiotext.spec b/audiotext.spec index e95c402..25e8705 100644 --- a/audiotext.spec +++ b/audiotext.spec @@ -14,7 +14,9 @@ datas = [ (r'venv/Lib/site-packages/pyannote', 'pyannote'), (r'venv/Lib/site-packages/asteroid_filterbanks', 'asteroid_filterbanks'), (r'venv/Lib/site-packages/whisperx', 'whisperx'), - ('res', 'res') + ('res', 'res'), + ('config.ini', '.'), + ('.env', '.'), ] datas += copy_metadata('torch') @@ -122,6 +124,3 @@ else: upx_exclude=[], name='audiotext', ) - -copyfile('config.ini', '{0}/audiotext/config.ini'.format(DISTPATH)) -copyfile('.env', '{0}/audiotext/.env'.format(DISTPATH)) diff --git a/config.ini b/config.ini index a7b67e4..40cdfbe 100644 --- a/config.ini +++ b/config.ini @@ -25,4 +25,3 @@ compute_type = float16 use_cpu = False can_use_gpu = False output_file_types = txt - diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..93a2b96 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,6 @@ +[tool.mypy] +disable_error_code = "import-untyped" + +[[tool.mypy.overrides]] +module = "*.ctk_scrollable_dropdown.*" +ignore_errors = true diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..f51f88c --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,2 @@ +mypy==1.11.0 +pre-commit==3.7.1 diff --git a/requirements.txt b/requirements.txt index 7a16eec..37fd2f2 100644 Binary files a/requirements.txt and b/requirements.txt differ diff --git a/res/locales/es/LC_MESSAGES/main_window.po b/res/locales/es/LC_MESSAGES/main_window.po index bb1ab00..de6e594 100644 Binary files a/res/locales/es/LC_MESSAGES/main_window.po and b/res/locales/es/LC_MESSAGES/main_window.po differ diff --git a/src/app.py b/src/app.py index 4492df6..468b3ed 100644 --- a/src/app.py +++ b/src/app.py @@ -10,8 +10,8 @@ from views.main_window import MainWindow -class App(ctk.CTk): - def __init__(self): +class App(ctk.CTk): # type: ignore[misc] + def __init__(self) -> None: super().__init__() # Get config_system to set the initial appearance mode diff --git a/src/controllers/main_controller.py b/src/controllers/main_controller.py index 2f65ba1..46d9232 100644 --- a/src/controllers/main_controller.py +++ b/src/controllers/main_controller.py @@ -18,7 +18,8 @@ class MainController: - def __init__(self, transcription: Transcription, view): + # Don't add type annotation to `view` to avoid circular imports + def __init__(self, transcription: Transcription, view): # type: ignore[no-untyped-def] self.view = view self.transcription = transcription self._is_mic_recording = False @@ -27,9 +28,11 @@ def __init__(self, transcription: Transcription, view): # PUBLIC METHODS - def select_file(self): + def select_file(self) -> None: """ Prompts a file explorer to determine the audio/video file path to transcribe. + + :return: None """ file_path = filedialog.askopenfilename( initialdir="/", @@ -44,16 +47,18 @@ def select_file(self): if file_path: self.view.on_select_path_success(file_path) - def select_directory(self): + def select_directory(self) -> None: """ Prompts a file explorer to determine the folder path to transcribe. + + :return: None """ dir_path = filedialog.askdirectory() if dir_path: self.view.on_select_path_success(dir_path) - def prepare_for_transcription(self, transcription: Transcription): + def prepare_for_transcription(self, transcription: Transcription) -> None: """ Prepares to transcribe based on the specified source type of the transcription object provided. It sets up the necessary configurations and starts the @@ -62,6 +67,7 @@ def prepare_for_transcription(self, transcription: Transcription): :param transcription: An instance of the Transcription class containing information about the audio to transcribe. :type transcription: Transcription + :return: None """ try: if not transcription.output_file_types: @@ -77,7 +83,10 @@ def prepare_for_transcription(self, transcription: Transcription): threading.Thread(target=self._start_recording_from_mic).start() return elif transcription.audio_source == AudioSource.YOUTUBE: - self._prepare_for_youtube_video_transcription() + if url := transcription.youtube_url: + self._prepare_for_youtube_video_transcription(url) + else: + raise ValueError("No YouTube video URL provided. Please enter one.") threading.Thread( target=lambda loop: loop.run_until_complete( @@ -89,13 +98,22 @@ def prepare_for_transcription(self, transcription: Transcription): except Exception as e: self._handle_exception(e) - def stop_recording_from_mic(self): + def stop_recording_from_mic(self) -> None: + """ + Stops recording audio from the microphone. + + This method sets the `_is_mic_recording` attribute to False and triggers the + `on_stop_recording_from_mic` method on the `view` attribute to indicate that + recording from the microphone has stopped. + + :return: None + """ self._is_mic_recording = False self.view.on_stop_recording_from_mic() def save_transcription( self, file_path: Path, should_autosave: bool, should_overwrite: bool - ): + ) -> None: """ Saves the transcription to a text file and optionally generate subtitles. @@ -107,6 +125,7 @@ def save_transcription( :param should_overwrite: Indicates whether existing files should be overwritten if they exist. :type should_overwrite: bool + :return: None """ save_file_path = self._get_save_path(file_path, should_autosave) @@ -114,18 +133,31 @@ def save_transcription( return if self.transcription.method == TranscriptionMethod.WHISPERX: - self._whisperx_handler.save_transcription( - file_path=Path(save_file_path), - output_file_types=self.transcription.output_file_types, - should_overwrite=should_overwrite, - ) + if self.transcription.output_file_types: + self._whisperx_handler.save_transcription( + file_path=Path(save_file_path), + output_file_types=self.transcription.output_file_types, + should_overwrite=should_overwrite, + ) + else: + exception = ValueError( + "There are no output file types selected. Please select at least " + "one." + ) + self._handle_exception(exception) elif self.transcription.method in [ TranscriptionMethod.GOOGLE_API, TranscriptionMethod.WHISPER_API, ]: - if should_overwrite or not os.path.exists(save_file_path): - with open(save_file_path, "w", encoding="utf-8") as file: - file.write(self.transcription.text) + if self.transcription.text: + if should_overwrite or not os.path.exists(save_file_path): + with open(save_file_path, "w", encoding="utf-8") as file: + file.write(self.transcription.text) + else: + exception = ValueError( + "There is no transcription available. Please generate it again." + ) + self._handle_exception(exception) else: exception = ValueError( "Incorrect transcription method. Please check the `config.ini` file." @@ -134,15 +166,17 @@ def save_transcription( # PRIVATE METHODS - def _prepare_for_file_transcription(self, file_path: Path): + def _prepare_for_file_transcription(self, file_path: Path) -> None: """ Prepares the system for transcription from a file by verifying if the file exists and is supported for transcription. If the file is valid, it updates the source path in the transcription object; otherwise, it raises a ValueError. :param file_path: The path to the file for transcription. + :type file_path: Path :raises ValueError: If the provided file path does not exist or is not supported for transcription. + :return: None """ is_file_supported = file_path.suffix in c.SUPPORTED_FILE_EXTENSIONS if file_path.is_file() and is_file_supported: @@ -150,28 +184,37 @@ def _prepare_for_file_transcription(self, file_path: Path): else: raise ValueError("Error: No valid file selected.") - def _prepare_for_youtube_video_transcription(self): + def _prepare_for_youtube_video_transcription(self, url: str) -> None: """ Prepares the system for transcription from a YouTube video by downloading the audio from the video using the YouTubeHandler. It updates the source path in the transcription object with the downloaded audio file path. If the source path is not obtained successfully, it raises a ValueError. - :raises ValueError: If the YouTube video URL is incorrect or the audio download fails. + :param url: URL of the YouTube video to transcribe. + :type url: str + :raises ValueError: If the YouTube video URL is incorrect or the audio download + fails. + :return: None """ - self.transcription.audio_source_path = YouTubeHandler.download_audio_from_video( - self.transcription.youtube_url - ) + audio_source_path = YouTubeHandler.download_audio_from_video(url) - if not self.transcription.audio_source_path: - raise ValueError("Please make sure the URL you entered is correct.") + if not audio_source_path: + raise ValueError( + "Something went wrong with the YouTube video audio download. Please " + "make sure the URL you entered is correct." + ) + + self.transcription.audio_source_path = audio_source_path - async def _handle_transcription_process(self): + async def _handle_transcription_process(self) -> None: """ Handles the transcription process based on the type of source specified in the transcription object. It asynchronously transcribes either a single file or multiple files in a directory. Upon completion or error, it notifies the view that the transcription process has been processed. + + :return: None """ try: if self.transcription.audio_source == AudioSource.DIRECTORY: @@ -183,15 +226,15 @@ async def _handle_transcription_process(self): finally: self.view.on_processed_transcription() - async def _transcribe_directory(self, dir_path: Path): + async def _transcribe_directory(self, dir_path: Path) -> None: """ Transcribes supported files from a directory. :param dir_path: The directory path selected by the user. :type dir_path: Path - :raises ValueError: If the directory path is invalid or doesn't contain valid file types to transcribe. + :return: None """ if files := self._get_files_to_transcribe_from_directory(): # Create a list of coroutines for each file transcription task @@ -207,7 +250,7 @@ async def _transcribe_directory(self, dir_path: Path): "file types to transcribe. Please choose another one." ) - async def _transcribe_file(self, file_path: Path): + async def _transcribe_file(self, file_path: Path) -> None: """ Transcribes audio from a file based on the specified transcription method. It updates the transcription object with the transcribed text. If the source @@ -216,6 +259,8 @@ async def _transcribe_file(self, file_path: Path): is enabled. :param file_path: The path of the audio file for transcription. + :type file_path: Path + :return: None """ transcription = self.transcription transcription.audio_source_path = file_path @@ -257,12 +302,18 @@ def _get_files_to_transcribe_from_directory(self) -> list[Path]: :return: A list of file paths to transcribe in the directory. :rtype: list[Path] """ + if not self.transcription.output_file_types: + raise ValueError( + "No output file types selected. Please select at least one." + ) + matching_files = [] for root, _, files in os.walk(self.transcription.audio_source_path): for file in files: if any(file.endswith(ext) for ext in c.SUPPORTED_FILE_EXTENSIONS): file_path = Path(root) / file + if not self.transcription.should_overwrite and any( (file_path.with_suffix(f".{ext}")).exists() for ext in self.transcription.output_file_types @@ -275,13 +326,15 @@ def _get_files_to_transcribe_from_directory(self) -> list[Path]: return matching_files - def _start_recording_from_mic(self): + def _start_recording_from_mic(self) -> None: """ Records the audio from the microphone and starts the transcription process when finished recording. This function continuously records audio from the microphone until stopped. The recorded audio is then saved to a WAV file and used for transcription. + + :return: None """ self._is_mic_recording = True audio_data = [] @@ -319,22 +372,24 @@ def _get_save_path(self, file_path: Path, should_autosave: bool) -> Path: :param file_path: The initial file path. :type file_path: Path - :param should_autosave: If True, saves the file automatically with a generated name. :type should_autosave: bool - :return: The path where the file should be saved. :rtype: Path """ + if self.transcription.output_file_types: + is_one_output_file_type = len(self.transcription.output_file_types) == 1 + else: + is_one_output_file_type = False + file_dir = file_path.parent file_type = "" initial_file_name = file_path.stem - is_one_output_file_type = len(self.transcription.output_file_types) == 1 if is_one_output_file_type: - file_type = c.FORMATS_TO_FILE_TYPES.get( - self.transcription.output_file_types[0] + file_type = c.FORMATS_TO_FILE_TYPES.get( # type: ignore[assignment] + self.transcription.output_file_types[0] # type: ignore[index] ) initial_file_name += f".{file_type}" @@ -360,13 +415,14 @@ def _get_save_path(self, file_path: Path, should_autosave: bool) -> Path: ) ) - def _handle_exception(self, e: Exception): + def _handle_exception(self, e: Exception) -> None: """ Prints the traceback of the exception, notifies the view that the transcription process has been processed, and displays a representation of the exception. :param e: The exception that occurred during the transcription process. :type e: Exception + :return: None """ print(traceback.format_exc()) self.view.on_processed_transcription() diff --git a/src/handlers/__init__.py b/src/handlers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/handlers/audio_handler.py b/src/handlers/audio_handler.py index 5146492..1cfc9b8 100644 --- a/src/handlers/audio_handler.py +++ b/src/handlers/audio_handler.py @@ -2,6 +2,8 @@ import shutil import traceback from io import BytesIO +from pathlib import Path +from typing import Callable, Optional import speech_recognition as sr from models.transcription import Transcription @@ -15,21 +17,27 @@ class AudioHandler: @staticmethod def get_transcription( - transcription: Transcription, should_split_on_silence: bool, transcription_func + transcription: Transcription, + should_split_on_silence: bool, + transcription_func: Callable[[sr.AudioData, Transcription], str], ) -> str: """ Transcribes audio from a file using the Google Speech-to-Text API. :param transcription: An instance of Transcription containing information about the audio file. + :type transcription: Transcription :param should_split_on_silence: A boolean flag indicating whether the audio should be split into chunks based on silence. If True, the audio will be split on silence and each chunk will be transcribed separately. If False, the entire audio will be transcribed as a single segment. + :type should_split_on_silence: bool :param transcription_func: The function to use for transcription. + :type transcription_func: Callable[[sr.AudioData, Transcription], str] :return: The transcribed text or an error message if transcription fails. + :rtype: str """ chunks_directory = ROOT_PATH / "audio-chunks" chunks_directory.mkdir(exist_ok=True) @@ -59,13 +67,18 @@ def get_transcription( return text @staticmethod - def load_audio_file(file_path, chunks_directory): + def load_audio_file( + file_path: Path, chunks_directory: Path + ) -> Optional[AudioSegment]: """ Load the audio from the file or extract it from the video. :param file_path: Path to the file to be loaded. + :type file_path: Path :param chunks_directory: Directory to store intermediate audio files. + :type chunks_directory: Path :return: Loaded AudioSegment object or None if unsupported file type. + :rtype: Optional[AudioSegment] """ content_type = file_path.suffix @@ -81,12 +94,14 @@ def load_audio_file(file_path, chunks_directory): return None @staticmethod - def split_audio_into_chunks(sound): + def split_audio_into_chunks(sound: AudioSegment) -> AudioSegment: """ Split the audio into chunks based on silence. :param sound: The AudioSegment object to be split. + :type sound: AudioSegment :return: List of audio chunks. + :rtype: AudioSegment """ return split_on_silence( sound, @@ -98,18 +113,25 @@ def split_audio_into_chunks(sound): @staticmethod def process_audio_chunks( - audio_chunks, transcription, transcription_func, chunks_directory - ): + audio_chunks: list[AudioSegment], + transcription: Transcription, + transcription_func: Callable[[sr.AudioData, Transcription], str], + chunks_directory: Path, + ) -> str: """ Process each audio chunk for transcription. :param audio_chunks: List of audio chunks. + :type audio_chunks: list[AudioSegment] :param transcription: Transcription object containing transcription details. + :type transcription: Transcription :param transcription_func: The function to use for transcription. + :type transcription_func: Callable[[sr.AudioData, Transcription], str] :param chunks_directory: Directory to store intermediate audio files. + :type chunks_directory: Path :return: The combined transcribed text. + :rtype: str """ - text = "" recognizer = sr.Recognizer() for idx, audio_chunk in enumerate(audio_chunks): @@ -122,23 +144,25 @@ def process_audio_chunks( try: chunk_text = transcription_func( - audio_data=audio_data, - transcription=transcription, + audio_data, + transcription, ) - text += chunk_text print(f"chunk text: {chunk_text}") + return chunk_text except Exception: return traceback.format_exc() - return text + return "" @staticmethod - def cleanup(chunks_directory): + def cleanup(chunks_directory: Path) -> None: """ Clean up the `chunks` directory. :param chunks_directory: Directory to be deleted. + :type chunks_directory: Path + :rtype: None """ shutil.rmtree(chunks_directory) diff --git a/src/handlers/google_api_handler.py b/src/handlers/google_api_handler.py index 1fb4866..0e2e489 100644 --- a/src/handlers/google_api_handler.py +++ b/src/handlers/google_api_handler.py @@ -9,10 +9,12 @@ class GoogleApiHandler(Transcribable): def transcribe(audio_data: sr.AudioData, transcription: Transcription) -> str: r = sr.Recognizer() - text = r.recognize_google( - audio_data, - language=transcription.language_code, - key=EnvKeys.GOOGLE_API_KEY.get_value() or None, + text = str( + r.recognize_google( + audio_data, + language=transcription.language_code, + key=EnvKeys.GOOGLE_API_KEY.get_value() or None, + ) ) text = f"{text}. " diff --git a/src/handlers/openai_api_handler.py b/src/handlers/openai_api_handler.py index 4b53c36..0111bfe 100644 --- a/src/handlers/openai_api_handler.py +++ b/src/handlers/openai_api_handler.py @@ -11,26 +11,41 @@ class OpenAiApiHandler(Transcribable): @staticmethod def transcribe(audio_data: sr.AudioData, transcription: Transcription) -> str: + if not transcription.language_code: + raise ValueError( + "The language provided is not correct. Please select one of the list." + ) + config = cm.ConfigManager.get_config_whisper_api() compressed_audio = AudioHandler.compress_audio(audio_data) timestamp_granularities = ( - config.timestamp_granularities.split(",") + config.timestamp_granularities if config.response_format == WhisperApiResponseFormats.VERBOSE_JSON.value else None ) client = OpenAI( - api_key=EnvKeys.OPENAI_API_KEY.get_value(), timeout=120.0 # 2 minutes + api_key=EnvKeys.OPENAI_API_KEY.get_value(), + timeout=120.0, # 2 minutes ) - whisper_api_transcription = client.audio.transcriptions.create( - model="whisper-1", - file=compressed_audio, - language=transcription.language_code, - response_format=config.response_format, - temperature=config.temperature, - timestamp_granularities=timestamp_granularities, - ) + if timestamp_granularities: + whisper_api_transcription = client.audio.transcriptions.create( + model="whisper-1", + file=compressed_audio, + language=transcription.language_code, + response_format=config.response_format, + temperature=config.temperature, + timestamp_granularities=timestamp_granularities, + ) + else: + whisper_api_transcription = client.audio.transcriptions.create( + model="whisper-1", + file=compressed_audio, + language=transcription.language_code, + response_format=config.response_format, + temperature=config.temperature, + ) if WhisperApiResponseFormats.JSON.value in config.response_format: return whisper_api_transcription.to_json() diff --git a/src/handlers/whisperx_handler.py b/src/handlers/whisperx_handler.py index fece35b..d608b59 100644 --- a/src/handlers/whisperx_handler.py +++ b/src/handlers/whisperx_handler.py @@ -1,15 +1,19 @@ import os import traceback from pathlib import Path +from typing import Optional, Union import utils.config_manager as cm import whisperx from models.transcription import Transcription +from whisperx.types import AlignedTranscriptionResult, TranscriptionResult class WhisperXHandler: - def __init__(self): - self._whisperx_result = None + def __init__(self) -> None: + self._whisperx_result: Optional[ + Union[TranscriptionResult, AlignedTranscriptionResult] + ] = None async def transcribe_file(self, transcription: Transcription) -> str: """ @@ -21,6 +25,12 @@ async def transcribe_file(self, transcription: Transcription) -> str: :return: The transcribed text or an error message if transcription fails. :rtype: str """ + if not transcription.output_file_types: + raise ValueError( + "No output file types specified. Please make sure to select at least " + "one." + ) + config_whisperx = cm.ConfigManager.get_config_whisperx() device = "cpu" if config_whisperx.use_cpu else "cuda" @@ -41,6 +51,9 @@ async def transcribe_file(self, transcription: Transcription) -> str: audio, batch_size=config_whisperx.batch_size ) + if self._whisperx_result is None: + raise ValueError("Something went wrong while transcribing.") + text_combined = " ".join( segment["text"].strip() for segment in self._whisperx_result["segments"] ) @@ -68,8 +81,11 @@ async def transcribe_file(self, transcription: Transcription) -> str: return traceback.format_exc() def save_transcription( - self, file_path: Path, output_file_types: list[str], should_overwrite: bool - ): + self, + file_path: Path, + output_file_types: list[str], + should_overwrite: bool, + ) -> None: """ Save the transcription as the specified file types. @@ -96,6 +112,6 @@ def save_transcription( writer = whisperx.transcribe.get_writer(output_type, str(output_dir)) # https://github.com/m-bain/whisperX/issues/455#issuecomment-1707547704 - self._whisperx_result["language"] = "en" + self._whisperx_result["language"] = "en" # type: ignore[index] writer(self._whisperx_result, file_path, vars(config_subtitles)) diff --git a/src/handlers/youtube_handler.py b/src/handlers/youtube_handler.py index 918107e..5f9688b 100644 --- a/src/handlers/youtube_handler.py +++ b/src/handlers/youtube_handler.py @@ -34,3 +34,4 @@ def download_audio_from_video( except Exception: print(traceback.format_exc()) + return None diff --git a/src/interfaces/__init__.py b/src/interfaces/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/models/config/config_subtitles.py b/src/models/config/config_subtitles.py index 88af1c9..9196773 100644 --- a/src/models/config/config_subtitles.py +++ b/src/models/config/config_subtitles.py @@ -1,6 +1,5 @@ from dataclasses import dataclass from enum import Enum -from typing import Optional @dataclass @@ -19,12 +18,17 @@ class Key(Enum): MAX_LINE_COUNT = "max_line_count" MAX_LINE_WIDTH = "max_line_width" - def value_type(self) -> Optional[str]: - """Get the value type associated with the ConfigKey.""" + def value_type(self) -> str: + """ + Get the value type associated with the ConfigKey. + + :return: The type of the value as a string, or None if the key is not found. + :rtype: str + """ type_mapping = { - self.HIGHLIGHT_WORDS: "bool", - self.MAX_LINE_COUNT: "int", - self.MAX_LINE_WIDTH: "int", + ConfigSubtitles.Key.HIGHLIGHT_WORDS: "bool", + ConfigSubtitles.Key.MAX_LINE_COUNT: "int", + ConfigSubtitles.Key.MAX_LINE_WIDTH: "int", } - return type_mapping.get(self, None) + return str(type_mapping.get(self)) diff --git a/src/models/config/config_system.py b/src/models/config/config_system.py index 35202c9..c18ea4b 100644 --- a/src/models/config/config_system.py +++ b/src/models/config/config_system.py @@ -1,6 +1,5 @@ from dataclasses import dataclass from enum import Enum -from typing import Optional @dataclass @@ -15,8 +14,13 @@ class Key(Enum): SECTION = "system" APPEARANCE_MODE = "appearance_mode" - def value_type(self) -> Optional[str]: - """Get the value type associated with the ConfigKey.""" - type_mapping = {self.APPEARANCE_MODE: "str"} + def value_type(self) -> str: + """ + Get the value type associated with the ConfigKey. - return type_mapping.get(self, None) + :return: The type of the value as a string, or None if the key is not found. + :rtype: str + """ + type_mapping = {ConfigSystem.Key.APPEARANCE_MODE: "str"} + + return str(type_mapping.get(self)) diff --git a/src/models/config/config_transcription.py b/src/models/config/config_transcription.py index 7239f44..9c6d82e 100644 --- a/src/models/config/config_transcription.py +++ b/src/models/config/config_transcription.py @@ -1,6 +1,5 @@ from dataclasses import dataclass from enum import Enum -from typing import Optional @dataclass @@ -23,14 +22,19 @@ class Key(Enum): AUTOSAVE = "autosave" OVERWRITE_FILES = "overwrite_files" - def value_type(self) -> Optional[str]: - """Get the value type associated with the ConfigKey.""" + def value_type(self) -> str: + """ + Get the value type associated with the ConfigKey. + + :return: The type of the value as a string, or None if the key is not found. + :rtype: str + """ type_mapping = { - self.LANGUAGE: "str", - self.AUDIO_SOURCE: "str", - self.METHOD: "str", - self.AUTOSAVE: "bool", - self.OVERWRITE_FILES: "bool", + ConfigTranscription.Key.LANGUAGE: "str", + ConfigTranscription.Key.AUDIO_SOURCE: "str", + ConfigTranscription.Key.METHOD: "str", + ConfigTranscription.Key.AUTOSAVE: "bool", + ConfigTranscription.Key.OVERWRITE_FILES: "bool", } - return type_mapping.get(self, None) + return str(type_mapping.get(self)) diff --git a/src/models/config/config_whisper_api.py b/src/models/config/config_whisper_api.py index e20032c..1432138 100644 --- a/src/models/config/config_whisper_api.py +++ b/src/models/config/config_whisper_api.py @@ -1,13 +1,15 @@ from dataclasses import dataclass from enum import Enum -from typing import Optional +from typing import Literal + +TimestampGranularitiesType = Literal["word", "segment"] @dataclass class ConfigWhisperApi: - response_format: str + response_format: Literal["json", "text", "srt", "verbose_json", "vtt"] temperature: float - timestamp_granularities: str + timestamp_granularities: list[TimestampGranularitiesType] class Key(Enum): """ @@ -19,17 +21,17 @@ class Key(Enum): TEMPERATURE = "temperature" TIMESTAMP_GRANULARITIES = "timestamp_granularities" - def value_type(self) -> Optional[str]: + def value_type(self) -> str: """ Get the value type associated with the ConfigKey. - :return + :return: The type of the value as a string, or None if the key is not found. :rtype: str """ type_mapping = { - self.RESPONSE_FORMAT: "str", - self.TEMPERATURE: "float", - self.TIMESTAMP_GRANULARITIES: "str", + ConfigWhisperApi.Key.RESPONSE_FORMAT: "str", + ConfigWhisperApi.Key.TEMPERATURE: "float", + ConfigWhisperApi.Key.TIMESTAMP_GRANULARITIES: "list", } - return type_mapping.get(self, None) + return str(type_mapping.get(self)) diff --git a/src/models/config/config_whisperx.py b/src/models/config/config_whisperx.py index eecbcfe..cca3377 100644 --- a/src/models/config/config_whisperx.py +++ b/src/models/config/config_whisperx.py @@ -1,6 +1,8 @@ from dataclasses import dataclass from enum import Enum -from typing import Optional +from typing import Literal + +OutputFileTypes = Literal["aud", "json", "srt", "tsv", "txt", "vtt"] @dataclass @@ -10,7 +12,7 @@ class ConfigWhisperX: compute_type: str use_cpu: bool can_use_gpu: bool - output_file_types: str + output_file_types: list[OutputFileTypes] class Key(Enum): """ @@ -25,20 +27,20 @@ class Key(Enum): CAN_USE_GPU = "can_use_gpu" OUTPUT_FILE_TYPES = "output_file_types" - def value_type(self) -> Optional[str]: + def value_type(self) -> str: """ Get the value type associated with the ConfigKey. - :return + :return: The type of the value as a string, or None if the key is not found. :rtype: str """ type_mapping = { - self.MODEL_SIZE: "str", - self.BATCH_SIZE: "int", - self.COMPUTE_TYPE: "str", - self.USE_CPU: "bool", - self.CAN_USE_GPU: "bool", - self.OUTPUT_FILE_TYPES: "str", + ConfigWhisperX.Key.MODEL_SIZE: "str", + ConfigWhisperX.Key.BATCH_SIZE: "int", + ConfigWhisperX.Key.COMPUTE_TYPE: "str", + ConfigWhisperX.Key.USE_CPU: "bool", + ConfigWhisperX.Key.CAN_USE_GPU: "bool", + ConfigWhisperX.Key.OUTPUT_FILE_TYPES: "list", } - return type_mapping.get(self, None) + return str(type_mapping.get(self)) diff --git a/src/models/transcription.py b/src/models/transcription.py index 8c11c49..e504653 100644 --- a/src/models/transcription.py +++ b/src/models/transcription.py @@ -1,4 +1,4 @@ -from dataclasses import dataclass, field +from dataclasses import dataclass from pathlib import Path from typing import Optional @@ -16,4 +16,4 @@ class Transcription: should_translate: bool = False should_autosave: bool = False should_overwrite: bool = False - youtube_url: str = None + youtube_url: Optional[str] = None diff --git a/src/utils/audio_utils.py b/src/utils/audio_utils.py index 8a4b4ce..e9dd4e9 100644 --- a/src/utils/audio_utils.py +++ b/src/utils/audio_utils.py @@ -2,7 +2,7 @@ from pydub import AudioSegment -def save_audio_data(audio_data: list[sr.AudioData], filename: str): +def save_audio_data(audio_data: list[sr.AudioData], filename: str) -> None: """ Save recorded audio data to a WAV file. @@ -10,6 +10,8 @@ def save_audio_data(audio_data: list[sr.AudioData], filename: str): :type audio_data: list[sr.AudioData] :param filename: The name of the file to save the audio data to. :type filename: str + :return: None + :rtype: None """ if audio_data: raw_audio_data = b"".join( diff --git a/src/utils/config_manager.py b/src/utils/config_manager.py index b7762e8..fef5494 100644 --- a/src/utils/config_manager.py +++ b/src/utils/config_manager.py @@ -1,6 +1,6 @@ from configparser import ConfigParser from pathlib import Path -from typing import Optional, Union +from typing import Any, Union from models.config.config_subtitles import ConfigSubtitles from models.config.config_system import ConfigSystem @@ -21,8 +21,14 @@ class ConfigManager: ] @staticmethod - def read_config(file_path: Path = _CONFIG_FILE_PATH) -> Optional[ConfigParser]: - config = ConfigParser() + def read_config(file_path: Path = _CONFIG_FILE_PATH) -> ConfigParser: + config = ConfigParser( + converters={ + "list": lambda x: [i.strip() for i in x.split(",")] + if len(x) > 0 + else [] + } + ) config.read(file_path) return config @@ -31,13 +37,13 @@ def get_config_subtitles() -> ConfigSubtitles: section = ConfigSubtitles.Key.SECTION return ConfigSubtitles( - highlight_words=ConfigManager.get_value( + highlight_words=ConfigManager.get_value( # type: ignore section, ConfigSubtitles.Key.HIGHLIGHT_WORDS ), - max_line_count=ConfigManager.get_value( + max_line_count=ConfigManager.get_value( # type: ignore section, ConfigSubtitles.Key.MAX_LINE_COUNT ), - max_line_width=ConfigManager.get_value( + max_line_width=ConfigManager.get_value( # type: ignore section, ConfigSubtitles.Key.MAX_LINE_WIDTH ), ) @@ -47,7 +53,7 @@ def get_config_system() -> ConfigSystem: section = ConfigSystem.Key.SECTION return ConfigSystem( - appearance_mode=ConfigManager.get_value( + appearance_mode=ConfigManager.get_value( # type: ignore section, ConfigSystem.Key.APPEARANCE_MODE ), ) @@ -57,13 +63,19 @@ def get_config_transcription() -> ConfigTranscription: section = ConfigTranscription.Key.SECTION return ConfigTranscription( - language=ConfigManager.get_value(section, ConfigTranscription.Key.LANGUAGE), - audio_source=ConfigManager.get_value( + language=ConfigManager.get_value( # type: ignore + section, ConfigTranscription.Key.LANGUAGE + ), + audio_source=ConfigManager.get_value( # type: ignore section, ConfigTranscription.Key.AUDIO_SOURCE ), - method=ConfigManager.get_value(section, ConfigTranscription.Key.METHOD), - autosave=ConfigManager.get_value(section, ConfigTranscription.Key.AUTOSAVE), - overwrite_files=ConfigManager.get_value( + method=ConfigManager.get_value( # type: ignore + section, ConfigTranscription.Key.METHOD + ), + autosave=ConfigManager.get_value( # type: ignore + section, ConfigTranscription.Key.AUTOSAVE + ), + overwrite_files=ConfigManager.get_value( # type: ignore section, ConfigTranscription.Key.OVERWRITE_FILES ), ) @@ -73,13 +85,13 @@ def get_config_whisper_api() -> ConfigWhisperApi: section = ConfigWhisperApi.Key.SECTION return ConfigWhisperApi( - response_format=ConfigManager.get_value( + response_format=ConfigManager.get_value( # type: ignore section, ConfigWhisperApi.Key.RESPONSE_FORMAT ), - temperature=ConfigManager.get_value( + temperature=ConfigManager.get_value( # type: ignore section, ConfigWhisperApi.Key.TEMPERATURE ), - timestamp_granularities=ConfigManager.get_value( + timestamp_granularities=ConfigManager.get_value( # type: ignore section, ConfigWhisperApi.Key.TIMESTAMP_GRANULARITIES ), ) @@ -89,16 +101,22 @@ def get_config_whisperx() -> ConfigWhisperX: section = ConfigWhisperX.Key.SECTION return ConfigWhisperX( - model_size=ConfigManager.get_value(section, ConfigWhisperX.Key.MODEL_SIZE), - batch_size=ConfigManager.get_value(section, ConfigWhisperX.Key.BATCH_SIZE), - compute_type=ConfigManager.get_value( + model_size=ConfigManager.get_value( # type: ignore + section, ConfigWhisperX.Key.MODEL_SIZE + ), + batch_size=ConfigManager.get_value( # type: ignore + section, ConfigWhisperX.Key.BATCH_SIZE + ), + compute_type=ConfigManager.get_value( # type: ignore section, ConfigWhisperX.Key.COMPUTE_TYPE ), - use_cpu=ConfigManager.get_value(section, ConfigWhisperX.Key.USE_CPU), - can_use_gpu=ConfigManager.get_value( + use_cpu=ConfigManager.get_value( # type: ignore + section, ConfigWhisperX.Key.USE_CPU + ), + can_use_gpu=ConfigManager.get_value( # type: ignore section, ConfigWhisperX.Key.CAN_USE_GPU ), - output_file_types=ConfigManager.get_value( + output_file_types=ConfigManager.get_value( # type: ignore section, ConfigWhisperX.Key.OUTPUT_FILE_TYPES ), ) @@ -108,11 +126,31 @@ def get_value( section: KeyType, key: KeyType, file_path: Path = _CONFIG_FILE_PATH, - ) -> Optional[Union[str, bool, int, float]]: + ) -> Union[str, bool, int, float, list[Any]]: + """ + Retrieve the value of a specified key within a section of a configuration file. + + This method reads a configuration file, checks if the given section and key exist, + and if they do, returns the value of the key in its appropriate type. If the + section or key does not exist, it raises a ValueError. + + :param section: The section in the configuration file where the key is located. + :type section: KeyType + :param key: The key within the section whose value is to be retrieved. + :type key: KeyType + :param file_path: The path to the configuration file. Defaults to + _CONFIG_FILE_PATH. + :type file_path: Path + :raises FileNotFoundError: If the specified configuration file does not exist. + :raises ValueError: If the section or key is not found in the config. file. + :return: The value of the specified key in its appropriate type (str, bool, int, + float, or list). + :rtype: Union[str, bool, int, float, list[Any]] + """ config = ConfigManager.read_config(file_path) - section_name = section.value - key_name = key.value + section_name = str(section.value) + key_name = str(key.value) key_value_type = key.value_type() # Check if the section and key exist before getting the value @@ -125,11 +163,12 @@ def get_value( return config.getint(section_name, key_name) elif key_value_type == "float": return config.getfloat(section_name, key_name) - else: - print( - f"Section [{section_name}] or Key [{key_name}] not found in the config" - ) - return None + elif key_value_type == "list": + return config.getlist(section_name, key_name) # type: ignore + + raise ValueError( + f"Section [{section}] or Key [{key_name}] not found in the config" + ) @staticmethod def modify_value( @@ -137,11 +176,31 @@ def modify_value( key: KeyType, new_value: str, file_path: Path = _CONFIG_FILE_PATH, - ): + ) -> None: + """ + Modify the value of a specified key within a section of a configuration file. + + This method reads a configuration file, checks if the given section and key + exist, and if they do, updates the value of the key to the new value provided. + If the section or key does not exist, it prints an error message. + + :param section: The section in the configuration file where the key is located. + :type section: KeyType + :param key: The key within the section whose value is to be modified. + :type key: KeyType + :param new_value: The new value to be set for the specified key. + :type new_value: str + :param file_path: The path to the configuration file. Defaults to + _CONFIG_FILE_PATH. + :type file_path: Path + :raises FileNotFoundError: If the specified configuration file does not exist. + :raises ValueError: If the section or key is not found in the config. file. + :return: None + """ config = ConfigManager.read_config(file_path) - section_name = section.value - key_name = key.value + section_name = str(section.value) + key_name = str(key.value) # Check if the section and option exist before modifying if section_name in config and key_name in config[section_name]: @@ -150,6 +209,8 @@ def modify_value( with open(file_path, "w") as config_file: config.write(config_file) - print(f"Value for [{section}][{key_name}] modified to {new_value}") + print(f"Value for [{section_name}][{key_name}] modified to {new_value}") else: - print(f"Section [{section}] or Key [{key_name}] not found in the config") + print( + f"Section [{section_name}] or Key [{key_name}] not found in the config" + ) diff --git a/src/utils/dict_utils.py b/src/utils/dict_utils.py index e975902..4fd3ffb 100644 --- a/src/utils/dict_utils.py +++ b/src/utils/dict_utils.py @@ -1,5 +1,19 @@ -def find_key_by_value(dictionary, target_value): +from typing import Any, Optional + + +def find_key_by_value(dictionary: dict[Any, Any], target_value: Any) -> Optional[Any]: + """ + Searches for the first key in the dictionary that has the specified target value. + + :param dictionary: The dictionary to search through. + :type dictionary: Dict[Any, Any] + :param target_value: The value to search for. + :type target_value: Any + :return: The key associated with the target value, or None if not found. + :rtype: Optional[Any] + """ for key, value in dictionary.items(): if value == target_value: return key + return None diff --git a/src/utils/enums.py b/src/utils/enums.py index 9fcf650..ce0b7da 100644 --- a/src/utils/enums.py +++ b/src/utils/enums.py @@ -59,6 +59,6 @@ class WhisperXFileTypes(Enum): AUD = "aud" JSON = "json" SRT = "srt" - TEXT = "txt" + TXT = "txt" TSV = "tsv" VTT = "vtt" diff --git a/src/views/custom_widgets/ctk_input_dialog.py b/src/views/custom_widgets/ctk_input_dialog.py index 132ed4e..b45020a 100644 --- a/src/views/custom_widgets/ctk_input_dialog.py +++ b/src/views/custom_widgets/ctk_input_dialog.py @@ -4,7 +4,7 @@ from utils.enums import Color -class CTkInputDialog(ctk.CTkToplevel): +class CTkInputDialog(ctk.CTkToplevel): # type: ignore """ Dialog with extra window, message, entry widget, cancel and ok button. For detailed information check out the documentation. @@ -21,7 +21,7 @@ def __init__( entry_border_color: Optional[Union[str, Tuple[str, str]]] = None, entry_text_color: Optional[Union[str, Tuple[str, str]]] = None, title: str = "CTkDialog", - font: Optional[Union[tuple, ctk.CTkFont]] = None, + font: Optional[Union[Tuple[int, str], ctk.CTkFont]] = None, label_text: str = "CTkDialog", entry_text: Optional[str] = None, ): @@ -85,7 +85,7 @@ def __init__( self.resizable(False, False) self.grab_set() # make other windows not clickable - def _create_widgets(self): + def _create_widgets(self) -> None: self.grid_columnconfigure((0, 1), weight=1) self.rowconfigure(0, weight=1) @@ -150,19 +150,19 @@ def _create_widgets(self): self.after(150, lambda: self._entry.focus()) self._entry.bind("", self._ok_event) - def _ok_event(self): + def _ok_event(self) -> None: self._user_input = self._entry.get() self.grab_release() self.destroy() - def _on_closing(self): + def _on_closing(self) -> None: self.grab_release() self.destroy() - def _cancel_event(self): + def _cancel_event(self) -> None: self.grab_release() self.destroy() - def get_input(self): + def get_input(self) -> Union[str, None]: self.master.wait_window(self) return self._user_input diff --git a/src/views/custom_widgets/ctk_scrollable_dropdown/__init__.py b/src/views/custom_widgets/ctk_scrollable_dropdown/__init__.py index eb90a08..4afed52 100644 --- a/src/views/custom_widgets/ctk_scrollable_dropdown/__init__.py +++ b/src/views/custom_widgets/ctk_scrollable_dropdown/__init__.py @@ -6,7 +6,9 @@ Homepage: https://github.com/Akascape/CTkScrollableDropdown """ -__version__ = "1.0" +__version__ = "1.2" from .ctk_scrollable_dropdown import CTkScrollableDropdown from .ctk_scrollable_dropdown_frame import CTkScrollableDropdownFrame + +__all__ = ["CTkScrollableDropdown", "CTkScrollableDropdownFrame"] diff --git a/src/views/custom_widgets/ctk_scrollable_dropdown/ctk_scrollable_dropdown.py b/src/views/custom_widgets/ctk_scrollable_dropdown/ctk_scrollable_dropdown.py index c5acf97..0756a5f 100644 --- a/src/views/custom_widgets/ctk_scrollable_dropdown/ctk_scrollable_dropdown.py +++ b/src/views/custom_widgets/ctk_scrollable_dropdown/ctk_scrollable_dropdown.py @@ -37,9 +37,9 @@ def __init__( text_color=None, autocomplete=False, hover_color=None, - **button_kwargs + **button_kwargs, ): - super().__init__(takefocus=1) + super().__init__(master=attach.winfo_toplevel(), takefocus=1) self.focus() self.lift() @@ -83,6 +83,11 @@ def __init__( lambda e: self._withdraw() if not self.disable else None, add="+", ) + self.bind( + "", + lambda e: self._withdraw() if not self.disable else None, + add="+", + ) self.attributes("-alpha", 0) self.disable = False @@ -173,14 +178,14 @@ def __init__( # Add binding for different ctk widgets if ( double_click - or self.attach.winfo_name().startswith("!ctkentry") - or self.attach.winfo_name().startswith("!ctkcombobox") + or type(self.attach) is customtkinter.CTkEntry + or type(self.attach) is customtkinter.CTkComboBox ): self.attach.bind("", lambda e: self._iconify(), add="+") else: self.attach.bind("", lambda e: self._iconify(), add="+") - if self.attach.winfo_name().startswith("!ctkcombobox"): + if type(self.attach) is customtkinter.CTkComboBox: self.attach._canvas.tag_bind( "right_parts", "", lambda e: self._iconify() ) @@ -190,7 +195,7 @@ def __init__( if self.command is None: self.command = self.attach.set - if self.attach.winfo_name().startswith("!ctkoptionmenu"): + if type(self.attach) is customtkinter.CTkOptionMenu: self.attach._canvas.bind("", lambda e: self._iconify()) self.attach._text_label.bind("", lambda e: self._iconify()) if self.command is None: @@ -205,7 +210,6 @@ def __init__( if self.autocomplete: self.bind_autocomplete() - self.deiconify() self.withdraw() self.attributes("-alpha", self.alpha) @@ -214,6 +218,8 @@ def _destroy(self): self.after(500, self.destroy_popup) def _withdraw(self): + if not self.winfo_exists(): + return if self.winfo_viewable() and self.hide: self.withdraw() @@ -229,13 +235,13 @@ def bind_autocomplete( def appear(x): self.appear = True - if self.attach.winfo_name().startswith("!ctkcombobox"): + if type(self.attach) is customtkinter.CTkComboBox: self.attach._entry.configure(textvariable=self.var_update) self.attach._entry.bind("", appear) self.attach.set(self.values[0]) self.var_update.trace_add("write", self._update) - if self.attach.winfo_name().startswith("!ctkentry"): + if type(self.attach) is customtkinter.CTkEntry: self.attach.configure(textvariable=self.var_update) self.attach.bind("", appear) self.var_update.trace_add("write", self._update) @@ -270,8 +276,9 @@ def _init_buttons(self, **button_kwargs): if self.image_values is not None else None, anchor=self.justify, + hover_color=self.hover_color, command=lambda k=row: self._attach_key_press(k), - **button_kwargs + **button_kwargs, ) self.widgets[self.i].pack(fill="x", pady=2, padx=(self.padding, 0)) self.i += 1 @@ -317,12 +324,14 @@ def _iconify(self): return if self.disable: return + if self.winfo_ismapped(): + self.hide = False if self.hide: self.event_generate("<>") - self._deiconify() self.focus() self.hide = False self.place_dropdown() + self._deiconify() if self.focus_something: self.dummy_entry.pack() self.dummy_entry.focus_set() @@ -389,9 +398,10 @@ def insert(self, value, **kwargs): height=self.button_height, fg_color=self.button_color, text_color=self.text_color, + hover_color=self.hover_color, anchor=self.justify, command=lambda k=value: self._attach_key_press(k), - **kwargs + **kwargs, ) self.widgets[self.i].pack(fill="x", pady=2, padx=(self.padding, 0)) self.i += 1 @@ -407,6 +417,9 @@ def popup(self, x=None, y=None): self.hide = True self._iconify() + def hide(self): + self._withdraw() + def configure(self, **kwargs): if "height" in kwargs: self.height = kwargs.pop("height") @@ -443,8 +456,14 @@ def configure(self, **kwargs): i += 1 if "button_color" in kwargs: + button_color = kwargs.pop("button_color") + for key in self.widgets.keys(): + self.widgets[key].configure(fg_color=button_color) + + if "font" in kwargs: + font = kwargs.pop("font") for key in self.widgets.keys(): - self.widgets[key].configure(fg_color=kwargs.pop("button_color")) + self.widgets[key].configure(font=font) if "hover_color" not in kwargs: kwargs["hover_color"] = self.hover_color diff --git a/src/views/custom_widgets/ctk_scrollable_dropdown/ctk_scrollable_dropdown_frame.py b/src/views/custom_widgets/ctk_scrollable_dropdown/ctk_scrollable_dropdown_frame.py index 0dcaff5..6e77cc4 100644 --- a/src/views/custom_widgets/ctk_scrollable_dropdown/ctk_scrollable_dropdown_frame.py +++ b/src/views/custom_widgets/ctk_scrollable_dropdown/ctk_scrollable_dropdown_frame.py @@ -34,7 +34,7 @@ def __init__( frame_border_color=None, text_color=None, autocomplete=False, - **button_kwargs + **button_kwargs, ): super().__init__( master=attach.winfo_toplevel(), bg_color=attach.cget("bg_color") @@ -56,6 +56,11 @@ def __init__( lambda e: self._withdraw() if not self.disable else None, add="+", ) + self.bind( + "", + lambda e: self._withdraw() if not self.disable else None, + add="+", + ) self.disable = False self.fg_color = ( @@ -192,7 +197,9 @@ def _withdraw(self): def _update(self, a, b, c): self.live_update(self.attach._entry.get()) - def bind_autocomplete(self): + def bind_autocomplete( + self, + ): def appear(x): self.appear = True @@ -222,7 +229,7 @@ def _init_buttons(self, **button_kwargs): else None, anchor=self.justify, command=lambda k=row: self._attach_key_press(k), - **button_kwargs + **button_kwargs, ) self.widgets[self.i].pack(fill="x", pady=2, padx=(self.padding, 0)) self.i += 1 @@ -259,6 +266,7 @@ def place_dropdown(self): self.height_new = self.height self.frame.configure(width=self.width_new, height=self.height_new) + self.frame._scrollbar.configure(height=self.height_new) self.place(x=self.x_pos, y=self.y_pos) if sys.platform.startswith("darwin"): @@ -340,7 +348,7 @@ def insert(self, value, **kwargs): text_color=self.text_color, anchor=self.justify, command=lambda k=value: self._attach_key_press(k), - **kwargs + **kwargs, ) self.widgets[self.i].pack(fill="x", pady=2, padx=(self.padding, 0)) self.i += 1 @@ -392,8 +400,14 @@ def configure(self, **kwargs): i += 1 if "button_color" in kwargs: + button_color = kwargs.pop("button_color") + for key in self.widgets.keys(): + self.widgets[key].configure(fg_color=button_color) + + if "font" in kwargs: + font = kwargs.pop("font") for key in self.widgets.keys(): - self.widgets[key].configure(fg_color=kwargs.pop("button_color")) + self.widgets[key].configure(font=font) for key in self.widgets.keys(): self.widgets[key].configure(**kwargs) diff --git a/src/views/main_window.py b/src/views/main_window.py index 8078284..7603e44 100644 --- a/src/views/main_window.py +++ b/src/views/main_window.py @@ -1,8 +1,7 @@ from pathlib import Path -from typing import Any +from typing import Any, Callable, Union import customtkinter as ctk -import utils.config_manager as cm import utils.constants as c import utils.dict_utils as du import utils.path_helper as ph @@ -10,10 +9,14 @@ from models.config.config_subtitles import ConfigSubtitles from models.config.config_system import ConfigSystem from models.config.config_transcription import ConfigTranscription -from models.config.config_whisper_api import ConfigWhisperApi -from models.config.config_whisperx import ConfigWhisperX +from models.config.config_whisper_api import ( + ConfigWhisperApi, + TimestampGranularitiesType, +) +from models.config.config_whisperx import ConfigWhisperX, OutputFileTypes from models.transcription import Transcription from PIL import Image +from utils.config_manager import ConfigManager from utils.enums import ( AudioSource, Color, @@ -22,6 +25,7 @@ TimestampGranularities, TranscriptionMethod, WhisperApiResponseFormats, + WhisperXFileTypes, ) from utils.env_keys import EnvKeys @@ -29,10 +33,10 @@ from .custom_widgets.ctk_scrollable_dropdown import CTkScrollableDropdown -class MainWindow(ctk.CTkFrame): +class MainWindow(ctk.CTkFrame): # type: ignore[misc] def __init__( self, - parent, + parent: Any, config_subtitles: ConfigSubtitles, config_system: ConfigSystem, config_transcription: ConfigTranscription, @@ -53,7 +57,7 @@ def __init__( self._config_whisperx = config_whisperx # Init the controller - self._controller = None + self._controller: Union[MainController, None] = None # Init the components of the window self._init_sidebar() @@ -71,55 +75,24 @@ def __init__( # GETTERS AND SETTERS - def set_controller(self, controller: MainController): + def set_controller(self, controller: MainController) -> None: """ Set the controller of the window. :param controller: View controller :type controller: MainController + :return: None """ self._controller = controller - def _get_transcription_properties(self) -> dict[str, Any]: - """ - Checks the current state of user interface elements to determine the - transcription properties. + # WIDGETS INITIALIZATION - :return: A dictionary containing the transcription properties. - :rtype: dict + def _init_sidebar(self) -> None: """ - language_code = du.find_key_by_value( - dictionary=c.AUDIO_LANGUAGES, - target_value=self.omn_transcription_language.get(), - ) - - properties = { - "audio_source": self._audio_source, - "language_code": language_code, - "method": TranscriptionMethod(self.omn_transcription_method.get()), - "should_autosave": self.chk_autosave.get() == 1, - "should_overwrite": self.chk_overwrite_files.get() == 1, - } - - if self.omn_transcription_method.get() == TranscriptionMethod.GOOGLE_API.value: - properties["should_translate"] = False - properties["output_file_types"] = ["txt"] - if self.omn_transcription_method.get() == TranscriptionMethod.WHISPER_API.value: - properties["should_translate"] = False - properties["output_file_types"] = [self.omn_response_format.get()] - if self.omn_transcription_method.get() == TranscriptionMethod.WHISPERX.value: - properties["should_translate"] = bool( - self.chk_whisper_options_translate.get() - ) - properties[ - "output_file_types" - ] = self._config_whisperx.output_file_types.split(",") - - return properties + Initializes the sidebar widgets. - # WIDGETS INITIALIZATION - - def _init_sidebar(self): + :return: None + """ # Sidebar frame self.frm_sidebar = ctk.CTkScrollableFrame( master=self, width=230, corner_radius=0 @@ -252,11 +225,7 @@ def _init_sidebar(self): self.chk_highlight_words = ctk.CTkCheckBox( master=self.frm_subtitle_options, text="Highlight words", - command=lambda: self._on_config_change( - section=ConfigSubtitles.Key.SECTION, - key=ConfigSubtitles.Key.HIGHLIGHT_WORDS, - new_value="True" if self.chk_highlight_words.get() else "False", - ), + command=self._on_highlight_words_change, ) self.chk_highlight_words.grid(row=1, column=0, padx=20, pady=10, sticky=ctk.W) @@ -622,7 +591,7 @@ def _init_sidebar(self): command=lambda *args: self._on_config_change( section=ConfigWhisperX.Key.SECTION, key=ConfigWhisperX.Key.MODEL_SIZE, - new_value=self.omn_model_size.get(), + new_value=args[0], ), ) self.omn_model_size.grid(row=2, column=0, padx=20, pady=(3, 10), sticky=ctk.EW) @@ -643,7 +612,7 @@ def _init_sidebar(self): command=lambda *args: self._on_config_change( section=ConfigWhisperX.Key.SECTION, key=ConfigWhisperX.Key.COMPUTE_TYPE, - new_value=self.omn_compute_type.get(), + new_value=args[0], ), ) self.omn_compute_type.grid( @@ -677,11 +646,7 @@ def _init_sidebar(self): self.chk_use_cpu = ctk.CTkCheckBox( master=self.frm_whisperx_advanced_options, text="Use CPU", - command=lambda: self._on_config_change( - section=ConfigWhisperX.Key.SECTION, - key=ConfigWhisperX.Key.USE_CPU, - new_value="True" if self.chk_use_cpu.get() else "False", - ), + command=self._on_use_cpu_change, ) self.chk_use_cpu.grid(row=6, column=0, padx=20, pady=(10, 16), sticky=ctk.W) @@ -719,7 +684,12 @@ def _init_sidebar(self): ) self.lbl_info.grid(row=8, column=0, padx=20, pady=(5, 10)) - def _init_main_content(self): + def _init_main_content(self) -> None: + """ + Initializes the widgets on the right side of the main window. + + :return: None + """ # Main entry frame self.frm_main_entry = ctk.CTkFrame(master=self, fg_color="transparent") self.frm_main_entry.grid(row=0, column=1, padx=20, pady=(20, 0), sticky=ctk.EW) @@ -793,11 +763,7 @@ def _init_main_content(self): self.chk_overwrite_files = ctk.CTkCheckBox( master=self.frm_save_options, text="Overwrite existing files", - command=lambda: self._on_config_change( - section=ConfigTranscription.Key.SECTION, - key=ConfigTranscription.Key.OVERWRITE_FILES, - new_value=str(bool(self.chk_overwrite_files.get())), - ), + command=self._on_overwrite_files_change, ) self.chk_overwrite_files.grid(row=0, column=2, padx=0, pady=0) @@ -809,19 +775,22 @@ def _init_main_content(self): # PUBLIC METHODS (called by the controller) - def on_select_path_success(self, filepath: str): + def on_select_path_success(self, path: str) -> None: """ Handles the successful selection of a file or directory path by updating the entry field with the selected file or directory path. - :param filepath: The selected file or directory path. - :type filepath: str + :param path: The selected file or directory path. + :type path: str + :return: None """ - self.ent_path.configure(textvariable=ctk.StringVar(self, filepath)) + self.ent_path.configure(textvariable=ctk.StringVar(self, path)) - def on_processed_transcription(self): + def on_processed_transcription(self) -> None: """ Re-enables disabled widgets after transcription processing is complete. + + :return: None """ self.ent_path.configure(state=ctk.NORMAL) self.omn_transcription_language.configure(state=ctk.NORMAL) @@ -831,13 +800,15 @@ def on_processed_transcription(self): self._toggle_progress_bar_visibility(should_show=False) - def on_stop_recording_from_mic(self): + def on_stop_recording_from_mic(self) -> None: """ Updates the state to indicate that recording from the microphone has stopped, notified by the controller. It also updates the button appearance to indicate that recording can be started again. Additionally, it delegates the task of stopping the recording to the controller. + + :return: None """ self._is_transcribing_from_mic = False @@ -848,27 +819,63 @@ def on_stop_recording_from_mic(self): state=ctk.DISABLED, ) - def display_text(self, text): + def display_text(self, text: str) -> None: """ Clears any existing text in the transcription text box to display the provided text. :param text: The text to be displayed in the transcription text box. :type text: str + :return: None """ self.tbx_transcription.delete("1.0", ctk.END) self.tbx_transcription.insert("0.0", text) # PRIVATE METHODS + def _get_transcription_properties(self) -> dict[str, Any]: + """ + Checks the current state of user interface elements to determine the + transcription properties. + + :return: A dictionary containing the transcription properties. + :rtype: dict[str, Any] + """ + language_code = du.find_key_by_value( + dictionary=c.AUDIO_LANGUAGES, + target_value=self.omn_transcription_language.get(), + ) + + properties = { + "audio_source": self._audio_source, + "language_code": language_code, + "method": TranscriptionMethod(self.omn_transcription_method.get()), + "should_autosave": self.chk_autosave.get() == 1, + "should_overwrite": self.chk_overwrite_files.get() == 1, + } + + if self.omn_transcription_method.get() == TranscriptionMethod.GOOGLE_API.value: + properties["should_translate"] = False + properties["output_file_types"] = ["txt"] + if self.omn_transcription_method.get() == TranscriptionMethod.WHISPER_API.value: + properties["should_translate"] = False + properties["output_file_types"] = [self.omn_response_format.get()] + if self.omn_transcription_method.get() == TranscriptionMethod.WHISPERX.value: + properties["should_translate"] = bool( + self.chk_whisper_options_translate.get() + ) + properties["output_file_types"] = self._config_whisperx.output_file_types + + return properties + def _setup_debounced_change( self, - section: str, - key: str, + section: ConfigManager.KeyType, + key: ConfigManager.KeyType, variable: ctk.Variable, - callback: callable, - *unused: tuple, - ): + callback: Callable[[ConfigManager.KeyType, ConfigManager.KeyType, str], None], + *unused: tuple, # type: ignore[type-arg] + ) -> None: """ Sets up a debounced callback for a variable change. @@ -883,10 +890,11 @@ def _setup_debounced_change( :param variable: The tkinter variable to watch for changes. :type variable: tkinter.Variable :param callback: The callback function to be executed after the debounce delay. - :type callback: function + :type callback: Callable[[cm.ConfigManager.KeyType, cm.ConfigManager.KeyType, str], None] :param unused: Additional unused arguments that must be kept to prevent exceptions. :type unused: tuple + :return: None """ variable.trace_add( mode="write", @@ -897,12 +905,12 @@ def _setup_debounced_change( def _on_change_debounced( self, - section: str, - key: str, + section: ConfigManager.KeyType, + key: ConfigManager.KeyType, variable: ctk.Variable, - callback: callable, + callback: Callable[[ConfigManager.KeyType, ConfigManager.KeyType, str], None], delay: int = 600, - ): + ) -> None: """ Handles debounced changes to a variable. @@ -918,9 +926,10 @@ def _on_change_debounced( :param variable: The tkinter variable being monitored for changes. :type variable: tkinter.Variable :param callback: The function to be executed after the debounce delay. - :type callback: callable - :param delay: The debounce delay in milliseconds before executing the callback. + :type callback: Callable[[cm.ConfigManager.KeyType, cm.ConfigManager.KeyType, str], None] + :param delay: Debounce delay in milliseconds before executing the callback. :type delay: int, optional + :return: None """ # Cancel the previously scheduled after call if self._after_id is not None: @@ -931,7 +940,17 @@ def _on_change_debounced( delay, lambda: callback(section, key, variable.get()) ) - def _on_transcription_language_change(self, option: str): + def _on_transcription_language_change(self, option: str) -> None: + """ + Handles changes to `omn_transcription_language`. + + Updates the configuration entry for the transcription language and sets the + value in the `omn_transcription_language`. + + :param option: The selected transcription language. + :type option: str + :return: None + """ self._on_config_change( section=ConfigTranscription.Key.SECTION, key=ConfigTranscription.Key.LANGUAGE, @@ -939,9 +958,9 @@ def _on_transcription_language_change(self, option: str): ) self.omn_transcription_language.set(option) - def _on_audio_source_change(self, option: str): + def _on_audio_source_change(self, option: str) -> None: """ - Handles changes to `omn_transcribe_from`. + Handles changes to `omn_audio_source`. Updates the transcription source based on the selected option. It also adjusts the GUI elements accordingly, such as configuring buttons, labels, and entry @@ -985,18 +1004,31 @@ def _on_audio_source_change(self, option: str): self.btn_file_explorer.grid_remove() self.frm_main_entry.grid() - def _on_select_path(self): + def _on_select_path(self) -> None: """ Triggers when `btn_file_explorer` is clicked to select the path of the file or directory to transcribe. + + :return: None """ + assert self._controller + if self._audio_source == AudioSource.FILE: self._controller.select_file() elif self._audio_source == AudioSource.DIRECTORY: self._controller.select_directory() @staticmethod - def _validate_temperature(temperature): + def _validate_temperature(temperature: str) -> bool: + """ + Validates the input value of temperature to ensure that it is within the correct + range (0.0 and 1.0). + + :param temperature: The input temperature to validate. + :type temperature: str + :return: True if the temperature is valid or False if it is not. + :rtype: bool + """ if temperature == "": return True @@ -1006,10 +1038,21 @@ def _validate_temperature(temperature): except ValueError: return False - def _on_start_recording_from_mic(self): + def _on_highlight_words_change(self) -> None: + new_value = "True" if self.chk_highlight_words.get() else "False" + + self._on_config_change( + section=ConfigSubtitles.Key.SECTION, + key=ConfigSubtitles.Key.HIGHLIGHT_WORDS, + new_value=new_value, + ) + + def _on_start_recording_from_mic(self) -> None: """ Updates the UI when the user has clicked the `btn_main_action` with the audio source set to Microphone. + + :return: None """ self._is_transcribing_from_mic = True @@ -1023,10 +1066,12 @@ def _on_start_recording_from_mic(self): state=ctk.NORMAL, ) - def _prepare_ui_for_transcription(self): + def _prepare_ui_for_transcription(self) -> None: """ Disables fields, shows the progress bar and removes the text of the previous transcription. + + :return: None """ self.ent_path.configure(state=ctk.DISABLED) self.omn_transcription_language.configure(state=ctk.DISABLED) @@ -1040,7 +1085,7 @@ def _prepare_ui_for_transcription(self): self.display_text("") - def _on_main_action(self): + def _on_main_action(self) -> None: """ Triggers when `btn_main_action` is clicked. @@ -1048,7 +1093,11 @@ def _on_main_action(self): selections in the user interface. It disables certain UI elements during the transcription process to prevent further user input until the transcription is complete. + + :return: None """ + assert self._controller + self._prepare_ui_for_transcription() transcription = Transcription(**self._get_transcription_properties()) @@ -1066,24 +1115,32 @@ def _on_main_action(self): self._controller.prepare_for_transcription(transcription) - def _on_save_transcription(self): + def _on_save_transcription(self) -> None: """ Triggers when `btn_save` is clicked. Prompts the user with the file explorer to select a directory and enter the name of the transcription file. + + :return: None """ + assert self._controller + self._controller.save_transcription( file_path=Path(self.ent_path.get()), should_autosave=False, should_overwrite=False, ) - def _on_transcription_method_change(self, option: str): + def _on_transcription_method_change(self, option: str) -> None: """ - Handles changes to the radio buttons of the "Transcribe using" option. + Handles changes to `omn_transcription_method`. Updates the user interface based on the chosen transcription method. It displays or hides specific options depending on whether WhisperX or Google API transcription method is selected. + + :param option: Selected transcription method. + :type option: str + :return: None """ self._on_config_change( section=ConfigTranscription.Key.SECTION, @@ -1118,13 +1175,16 @@ def _on_transcription_method_change(self, option: str): self.frm_whisper_api_options.grid() @staticmethod - def _on_set_api_key(env_key: EnvKeys, title: str): + def _on_set_api_key(env_key: EnvKeys, title: str) -> None: """ - Handles the setting of an API key depending on . - - Prompts the user to input a new Google API key through a dialog window. If a new - API key is provided, and it differs from the existing one, it updates the - configuration with the new API key. + Opens a dialog window to store the API key in the environment variables, if + applicable. + + :param env_key: Environment key to set the value. + :type env_key: EnvKeys + :param title: Title of the dialog window. + :type title: str + :return: None """ old_api_key = env_key.get_value() @@ -1139,7 +1199,7 @@ def _on_set_api_key(env_key: EnvKeys, title: str): if new_api_key and old_api_key != new_api_key: env_key.set_value(new_api_key.strip()) - def _on_show_advanced_options(self): + def _on_show_advanced_options(self) -> None: """ Handle clicks on `btn_whisperx_show_advanced_options`. @@ -1148,6 +1208,8 @@ def _on_show_advanced_options(self): the button text to "Show advanced options". If the advanced options frame is currently hidden, it displays the frame and updates the button text to "Hide advanced options". + + :return: None """ if self.frm_whisperx_advanced_options.winfo_ismapped(): self.frm_whisperx_advanced_options.grid_remove() @@ -1160,7 +1222,7 @@ def _on_show_advanced_options(self): text="Hide advanced options" ) - def _on_autosave_change(self): + def _on_autosave_change(self) -> None: """ Handles changes to `chk_autosave`. @@ -1168,6 +1230,8 @@ def _on_autosave_change(self): autosave option is selected or deselected. If `chk_autosave` is selected, it enables `chk_overwrite_files`. If `chk_autosave` is deselected, it deselects and disables `chk_overwrite_files`. + + :return: None """ self._on_config_change( section=ConfigTranscription.Key.SECTION, @@ -1188,47 +1252,61 @@ def _on_autosave_change(self): self.chk_overwrite_files.deselect() self.chk_overwrite_files.configure(state=ctk.DISABLED) - def _on_output_file_types_change(self): + def _on_overwrite_files_change(self) -> None: + new_value = "True" if self.chk_overwrite_files.get() else "False" + + self._on_config_change( + section=ConfigTranscription.Key.SECTION, + key=ConfigTranscription.Key.OVERWRITE_FILES, + new_value=new_value, + ) + + def _on_output_file_types_change(self) -> None: """ Handles changes to the output file types by updating the configuration and - displaying the appropriate subtitle options. + displaying the appropriate subtitle options if any of the selected output file + types is a subtitle file type. + + :return: None """ # Dictionary mapping checkboxes to their corresponding file types - checkbox_to_file_type = { - self.chk_output_file_vtt: "vtt", - self.chk_output_file_srt: "srt", - self.chk_output_file_txt: "txt", - self.chk_output_file_json: "json", - self.chk_output_file_tsv: "tsv", - self.chk_output_file_aud: "aud", + chk_to_output_file_type: dict[ctk.CTkCheckBox, OutputFileTypes] = { + self.chk_output_file_aud: WhisperXFileTypes.AUD.value, + self.chk_output_file_json: WhisperXFileTypes.JSON.value, + self.chk_output_file_srt: WhisperXFileTypes.SRT.value, + self.chk_output_file_tsv: WhisperXFileTypes.TSV.value, + self.chk_output_file_txt: WhisperXFileTypes.TXT.value, + self.chk_output_file_vtt: WhisperXFileTypes.VTT.value, } # List comprehension to gather selected file types - output_file_types = [ - file_type for chk, file_type in checkbox_to_file_type.items() if chk.get() + selected_output_file_types = [ + file_type for chk, file_type in chk_to_output_file_type.items() if chk.get() ] # Show or hide the subtitle options frame based on the selected subtitle file types - if any(file_type in output_file_types for file_type in {"vtt", "srt"}): + if any(file_type in selected_output_file_types for file_type in {"vtt", "srt"}): self.frm_subtitle_options.grid() else: self.frm_subtitle_options.grid_remove() # Convert the list to a comma-separated string and update the configuration - output_file_types_str = ",".join(output_file_types) - self._config_whisperx.output_file_types = output_file_types_str + selected_output_file_types_str = ",".join(selected_output_file_types) + self._config_whisperx.output_file_types = selected_output_file_types # Notify the config change self._on_config_change( section=ConfigWhisperX.Key.SECTION, key=ConfigWhisperX.Key.OUTPUT_FILE_TYPES, - new_value=output_file_types_str, + new_value=selected_output_file_types_str, ) - def _toggle_chk_timestamp_granularities(self): + def _toggle_chk_timestamp_granularities(self) -> None: """ Toggles timestamp granularities checkboxes visibility depending on the selected response format. + + :return: None """ if self.omn_response_format.get() != "verbose_json": self.chk_timestamp_granularities_segment.configure(state=ctk.DISABLED) @@ -1252,10 +1330,14 @@ def _toggle_chk_timestamp_granularities(self): ): self.chk_timestamp_granularities_word.select() - def _on_response_format_change(self, option: str): + def _on_response_format_change(self, option: str) -> None: """ Handles changes to the response format by updating the configuration and toggling the timestamp granularities checkboxes. + + :param option: Selected response format. + :type option: str + :return: None """ self._on_config_change( section=ConfigWhisperApi.Key.SECTION, @@ -1265,12 +1347,16 @@ def _on_response_format_change(self, option: str): self._toggle_chk_timestamp_granularities() - def _on_timestamp_granularities_change(self): + def _on_timestamp_granularities_change(self) -> None: """ Handles changes to the timestamp granularities by updating the configuration. + + :return: None """ # Dictionary mapping checkboxes to their corresponding file types - chk_to_timestamp_granularity = { + chk_to_timestamp_granularity: dict[ + ctk.CTkCheckBox, TimestampGranularitiesType + ] = { self.chk_timestamp_granularities_segment: TimestampGranularities.SEGMENT.value, self.chk_timestamp_granularities_word: TimestampGranularities.WORD.value, } @@ -1287,7 +1373,7 @@ def _on_timestamp_granularities_change(self): selected_timestamp_granularities ) self._config_whisper_api.timestamp_granularities = ( - selected_timestamp_granularities_str + selected_timestamp_granularities ) # Notify the config change @@ -1297,11 +1383,13 @@ def _on_timestamp_granularities_change(self): new_value=selected_timestamp_granularities_str, ) - def _toggle_progress_bar_visibility(self, should_show): + def _toggle_progress_bar_visibility(self, should_show: bool) -> None: """ Toggles the visibility of the progress bar based on the specified parameter. :param should_show: A boolean indicating whether to show or hide the bar. + :type should_show: bool + :return: None """ if should_show: self.progress_bar.grid(row=2, column=1, padx=40, pady=0, sticky=ctk.EW) @@ -1309,13 +1397,17 @@ def _toggle_progress_bar_visibility(self, should_show): else: self.progress_bar.grid_forget() - def _toggle_frm_subtitle_options_visibility(self): - if ( - self._config_transcription.method == TranscriptionMethod.WHISPERX.value - and ( - "srt" in self._config_whisperx.output_file_types - or "vtt" in self._config_whisperx.output_file_types - ) + def _toggle_frm_subtitle_options_visibility(self) -> None: + """ + Toggle the visibility of `frm_subtitle_options` depending on whether the + transcription method allows to configure subtitle generation and whether + any of the selected output file types is a subtitle file type. + + :return: None + """ + if self._config_transcription.method == TranscriptionMethod.WHISPERX.value and ( + "srt" in self._config_whisperx.output_file_types + or "vtt" in self._config_whisperx.output_file_types ): if "srt" in self._config_whisperx.output_file_types: self.chk_output_file_srt.select() @@ -1326,7 +1418,16 @@ def _toggle_frm_subtitle_options_visibility(self): else: self.frm_subtitle_options.grid_remove() - def _change_appearance_mode_event(self, new_appearance_mode: str): + def _on_use_cpu_change(self) -> None: + new_value = "True" if self.chk_use_cpu.get() else "False" + + self._on_config_change( + section=ConfigWhisperX.Key.SECTION, + key=ConfigWhisperX.Key.USE_CPU, + new_value=new_value, + ) + + def _change_appearance_mode_event(self, new_appearance_mode: str) -> None: """ Changes the appearance mode of the application and stores it in the configuration file. @@ -1334,6 +1435,7 @@ def _change_appearance_mode_event(self, new_appearance_mode: str): :param new_appearance_mode: The new appearance mode to set for the application. It can be "Light", "Dark" or "System". :type new_appearance_mode: str + :return: None """ ctk.set_appearance_mode(new_appearance_mode) @@ -1344,7 +1446,9 @@ def _change_appearance_mode_event(self, new_appearance_mode: str): ) @staticmethod - def _on_config_change(section: str, key: str, new_value: str): + def _on_config_change( + section: ConfigManager.KeyType, key: ConfigManager.KeyType, new_value: str + ) -> None: """ Updates a configuration value. It modifies the specified value in the configuration file using the `ConfigManager.modify_value` method. @@ -1355,5 +1459,6 @@ def _on_config_change(section: str, key: str, new_value: str): :type key: str :param new_value: The new value to replace the existing one. :type new_value: str + :return: None """ - cm.ConfigManager.modify_value(section, key, new_value) + ConfigManager.modify_value(section, key, new_value)