Initial Commit

This commit is contained in:
2025-11-20 15:49:45 +00:00
commit b6dd8b8fe2
1530 changed files with 602744 additions and 0 deletions

41
.gitignore vendored Normal file
View File

@@ -0,0 +1,41 @@
# Virtual environment
.venv/
env/
venv/
# Python bytecode and cache
__pycache__/
*.pyc
*.pyo
*.pyd
# Flask specific
instance/
.webassets-cache/
# Testing
.pytest_cache/
.coverage
htmlcov/
# Build artifacts
dist/
build/
*.egg-info/
# Editor specific files (optional, uncomment if applicable)
# .vscode/
# .idea/
# *.sublime-project
# *.sublime-workspace
# Database files (if using a local SQLite database)
*.db
*.sqlite
*.sqlite3
# Log files
*.log
# Sensitive files (e.g., environment variables)
.env

1214
app.py Normal file

File diff suppressed because it is too large Load Diff

247
buffteks/bin/Activate.ps1 Normal file
View File

@@ -0,0 +1,247 @@
<#
.Synopsis
Activate a Python virtual environment for the current PowerShell session.
.Description
Pushes the python executable for a virtual environment to the front of the
$Env:PATH environment variable and sets the prompt to signify that you are
in a Python virtual environment. Makes use of the command line switches as
well as the `pyvenv.cfg` file values present in the virtual environment.
.Parameter VenvDir
Path to the directory that contains the virtual environment to activate. The
default value for this is the parent of the directory that the Activate.ps1
script is located within.
.Parameter Prompt
The prompt prefix to display when this virtual environment is activated. By
default, this prompt is the name of the virtual environment folder (VenvDir)
surrounded by parentheses and followed by a single space (ie. '(.venv) ').
.Example
Activate.ps1
Activates the Python virtual environment that contains the Activate.ps1 script.
.Example
Activate.ps1 -Verbose
Activates the Python virtual environment that contains the Activate.ps1 script,
and shows extra information about the activation as it executes.
.Example
Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
Activates the Python virtual environment located in the specified location.
.Example
Activate.ps1 -Prompt "MyPython"
Activates the Python virtual environment that contains the Activate.ps1 script,
and prefixes the current prompt with the specified string (surrounded in
parentheses) while the virtual environment is active.
.Notes
On Windows, it may be required to enable this Activate.ps1 script by setting the
execution policy for the user. You can do this by issuing the following PowerShell
command:
PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
For more information on Execution Policies:
https://go.microsoft.com/fwlink/?LinkID=135170
#>
Param(
[Parameter(Mandatory = $false)]
[String]
$VenvDir,
[Parameter(Mandatory = $false)]
[String]
$Prompt
)
<# Function declarations --------------------------------------------------- #>
<#
.Synopsis
Remove all shell session elements added by the Activate script, including the
addition of the virtual environment's Python executable from the beginning of
the PATH variable.
.Parameter NonDestructive
If present, do not remove this function from the global namespace for the
session.
#>
function global:deactivate ([switch]$NonDestructive) {
# Revert to original values
# The prior prompt:
if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
}
# The prior PYTHONHOME:
if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
}
# The prior PATH:
if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
Remove-Item -Path Env:_OLD_VIRTUAL_PATH
}
# Just remove the VIRTUAL_ENV altogether:
if (Test-Path -Path Env:VIRTUAL_ENV) {
Remove-Item -Path env:VIRTUAL_ENV
}
# Just remove VIRTUAL_ENV_PROMPT altogether.
if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) {
Remove-Item -Path env:VIRTUAL_ENV_PROMPT
}
# Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
}
# Leave deactivate function in the global namespace if requested:
if (-not $NonDestructive) {
Remove-Item -Path function:deactivate
}
}
<#
.Description
Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
given folder, and returns them in a map.
For each line in the pyvenv.cfg file, if that line can be parsed into exactly
two strings separated by `=` (with any amount of whitespace surrounding the =)
then it is considered a `key = value` line. The left hand string is the key,
the right hand is the value.
If the value starts with a `'` or a `"` then the first and last character is
stripped from the value before being captured.
.Parameter ConfigDir
Path to the directory that contains the `pyvenv.cfg` file.
#>
function Get-PyVenvConfig(
[String]
$ConfigDir
) {
Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
# Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
$pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
# An empty map will be returned if no config file is found.
$pyvenvConfig = @{ }
if ($pyvenvConfigPath) {
Write-Verbose "File exists, parse `key = value` lines"
$pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
$pyvenvConfigContent | ForEach-Object {
$keyval = $PSItem -split "\s*=\s*", 2
if ($keyval[0] -and $keyval[1]) {
$val = $keyval[1]
# Remove extraneous quotations around a string value.
if ("'""".Contains($val.Substring(0, 1))) {
$val = $val.Substring(1, $val.Length - 2)
}
$pyvenvConfig[$keyval[0]] = $val
Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
}
}
}
return $pyvenvConfig
}
<# Begin Activate script --------------------------------------------------- #>
# Determine the containing directory of this script
$VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
$VenvExecDir = Get-Item -Path $VenvExecPath
Write-Verbose "Activation script is located in path: '$VenvExecPath'"
Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
# Set values required in priority: CmdLine, ConfigFile, Default
# First, get the location of the virtual environment, it might not be
# VenvExecDir if specified on the command line.
if ($VenvDir) {
Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
}
else {
Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
$VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
Write-Verbose "VenvDir=$VenvDir"
}
# Next, read the `pyvenv.cfg` file to determine any required value such
# as `prompt`.
$pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
# Next, set the prompt from the command line, or the config file, or
# just use the name of the virtual environment folder.
if ($Prompt) {
Write-Verbose "Prompt specified as argument, using '$Prompt'"
}
else {
Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
Write-Verbose " Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
$Prompt = $pyvenvCfg['prompt'];
}
else {
Write-Verbose " Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)"
Write-Verbose " Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
$Prompt = Split-Path -Path $venvDir -Leaf
}
}
Write-Verbose "Prompt = '$Prompt'"
Write-Verbose "VenvDir='$VenvDir'"
# Deactivate any currently active virtual environment, but leave the
# deactivate function in place.
deactivate -nondestructive
# Now set the environment variable VIRTUAL_ENV, used by many tools to determine
# that there is an activated venv.
$env:VIRTUAL_ENV = $VenvDir
if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
Write-Verbose "Setting prompt to '$Prompt'"
# Set the prompt to include the env name
# Make sure _OLD_VIRTUAL_PROMPT is global
function global:_OLD_VIRTUAL_PROMPT { "" }
Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
function global:prompt {
Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
_OLD_VIRTUAL_PROMPT
}
$env:VIRTUAL_ENV_PROMPT = $Prompt
}
# Clear PYTHONHOME
if (Test-Path -Path Env:PYTHONHOME) {
Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
Remove-Item -Path Env:PYTHONHOME
}
# Add the venv to the PATH
Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
$Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"

69
buffteks/bin/activate Normal file
View File

@@ -0,0 +1,69 @@
# This file must be used with "source bin/activate" *from bash*
# you cannot run it directly
deactivate () {
# reset old environment variables
if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
PATH="${_OLD_VIRTUAL_PATH:-}"
export PATH
unset _OLD_VIRTUAL_PATH
fi
if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
export PYTHONHOME
unset _OLD_VIRTUAL_PYTHONHOME
fi
# This should detect bash and zsh, which have a hash command that must
# be called to get it to forget past commands. Without forgetting
# past commands the $PATH changes we made may not be respected
if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
hash -r 2> /dev/null
fi
if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
PS1="${_OLD_VIRTUAL_PS1:-}"
export PS1
unset _OLD_VIRTUAL_PS1
fi
unset VIRTUAL_ENV
unset VIRTUAL_ENV_PROMPT
if [ ! "${1:-}" = "nondestructive" ] ; then
# Self destruct!
unset -f deactivate
fi
}
# unset irrelevant variables
deactivate nondestructive
VIRTUAL_ENV=/var/www/buffteks/buffteks
export VIRTUAL_ENV
_OLD_VIRTUAL_PATH="$PATH"
PATH="$VIRTUAL_ENV/"bin":$PATH"
export PATH
# unset PYTHONHOME if set
# this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
# could use `if (set -u; : $PYTHONHOME) ;` in bash
if [ -n "${PYTHONHOME:-}" ] ; then
_OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
unset PYTHONHOME
fi
if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
_OLD_VIRTUAL_PS1="${PS1:-}"
PS1='(buffteks) '"${PS1:-}"
export PS1
VIRTUAL_ENV_PROMPT='(buffteks) '
export VIRTUAL_ENV_PROMPT
fi
# This should detect bash and zsh, which have a hash command that must
# be called to get it to forget past commands. Without forgetting
# past commands the $PATH changes we made may not be respected
if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
hash -r 2> /dev/null
fi

26
buffteks/bin/activate.csh Normal file
View File

@@ -0,0 +1,26 @@
# This file must be used with "source bin/activate.csh" *from csh*.
# You cannot run it directly.
# Created by Davide Di Blasi <davidedb@gmail.com>.
# Ported to Python 3.3 venv by Andrew Svetlov <andrew.svetlov@gmail.com>
alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; unsetenv VIRTUAL_ENV_PROMPT; test "\!:*" != "nondestructive" && unalias deactivate'
# Unset irrelevant variables.
deactivate nondestructive
setenv VIRTUAL_ENV /var/www/buffteks/buffteks
set _OLD_VIRTUAL_PATH="$PATH"
setenv PATH "$VIRTUAL_ENV/"bin":$PATH"
set _OLD_VIRTUAL_PROMPT="$prompt"
if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then
set prompt = '(buffteks) '"$prompt"
setenv VIRTUAL_ENV_PROMPT '(buffteks) '
endif
alias pydoc python -m pydoc
rehash

View File

@@ -0,0 +1,69 @@
# This file must be used with "source <venv>/bin/activate.fish" *from fish*
# (https://fishshell.com/); you cannot run it directly.
function deactivate -d "Exit virtual environment and return to normal shell environment"
# reset old environment variables
if test -n "$_OLD_VIRTUAL_PATH"
set -gx PATH $_OLD_VIRTUAL_PATH
set -e _OLD_VIRTUAL_PATH
end
if test -n "$_OLD_VIRTUAL_PYTHONHOME"
set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME
set -e _OLD_VIRTUAL_PYTHONHOME
end
if test -n "$_OLD_FISH_PROMPT_OVERRIDE"
set -e _OLD_FISH_PROMPT_OVERRIDE
# prevents error when using nested fish instances (Issue #93858)
if functions -q _old_fish_prompt
functions -e fish_prompt
functions -c _old_fish_prompt fish_prompt
functions -e _old_fish_prompt
end
end
set -e VIRTUAL_ENV
set -e VIRTUAL_ENV_PROMPT
if test "$argv[1]" != "nondestructive"
# Self-destruct!
functions -e deactivate
end
end
# Unset irrelevant variables.
deactivate nondestructive
set -gx VIRTUAL_ENV /var/www/buffteks/buffteks
set -gx _OLD_VIRTUAL_PATH $PATH
set -gx PATH "$VIRTUAL_ENV/"bin $PATH
# Unset PYTHONHOME if set.
if set -q PYTHONHOME
set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME
set -e PYTHONHOME
end
if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
# fish uses a function instead of an env var to generate the prompt.
# Save the current fish_prompt function as the function _old_fish_prompt.
functions -c fish_prompt _old_fish_prompt
# With the original prompt function renamed, we can override with our own.
function fish_prompt
# Save the return status of the last command.
set -l old_status $status
# Output the venv prompt; color taken from the blue of the Python logo.
printf "%s%s%s" (set_color 4B8BBE) '(buffteks) ' (set_color normal)
# Restore the return status of the previous command.
echo "exit $old_status" | .
# Output the original/"old" prompt.
_old_fish_prompt
end
set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV"
set -gx VIRTUAL_ENV_PROMPT '(buffteks) '
end

8
buffteks/bin/dotenv Executable file
View File

@@ -0,0 +1,8 @@
#!/var/www/buffteks/buffteks/bin/python3
# -*- coding: utf-8 -*-
import re
import sys
from dotenv.__main__ import cli
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(cli())

8
buffteks/bin/flask Executable file
View File

@@ -0,0 +1,8 @@
#!/var/www/buffteks/buffteks/bin/python3
# -*- coding: utf-8 -*-
import re
import sys
from flask.cli import main
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(main())

8
buffteks/bin/gunicorn Executable file
View File

@@ -0,0 +1,8 @@
#!/var/www/buffteks/buffteks/bin/python3
# -*- coding: utf-8 -*-
import re
import sys
from gunicorn.app.wsgiapp import run
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(run())

8
buffteks/bin/normalizer Executable file
View File

@@ -0,0 +1,8 @@
#!/var/www/buffteks/buffteks/bin/python3
# -*- coding: utf-8 -*-
import re
import sys
from charset_normalizer.cli import cli_detect
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(cli_detect())

8
buffteks/bin/pip Executable file
View File

@@ -0,0 +1,8 @@
#!/var/www/buffteks/buffteks/bin/python3
# -*- coding: utf-8 -*-
import re
import sys
from pip._internal.cli.main import main
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(main())

8
buffteks/bin/pip3 Executable file
View File

@@ -0,0 +1,8 @@
#!/var/www/buffteks/buffteks/bin/python3
# -*- coding: utf-8 -*-
import re
import sys
from pip._internal.cli.main import main
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(main())

8
buffteks/bin/pip3.11 Executable file
View File

@@ -0,0 +1,8 @@
#!/var/www/buffteks/buffteks/bin/python3
# -*- coding: utf-8 -*-
import re
import sys
from pip._internal.cli.main import main
if __name__ == '__main__':
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
sys.exit(main())

1
buffteks/bin/python Symbolic link
View File

@@ -0,0 +1 @@
python3

1
buffteks/bin/python3 Symbolic link
View File

@@ -0,0 +1 @@
/usr/bin/python3

1
buffteks/bin/python3.11 Symbolic link
View File

@@ -0,0 +1 @@
python3

View File

@@ -0,0 +1,164 @@
/* -*- indent-tabs-mode: nil; tab-width: 4; -*- */
/* Greenlet object interface */
#ifndef Py_GREENLETOBJECT_H
#define Py_GREENLETOBJECT_H
#include <Python.h>
#ifdef __cplusplus
extern "C" {
#endif
/* This is deprecated and undocumented. It does not change. */
#define GREENLET_VERSION "1.0.0"
#ifndef GREENLET_MODULE
#define implementation_ptr_t void*
#endif
typedef struct _greenlet {
PyObject_HEAD
PyObject* weakreflist;
PyObject* dict;
implementation_ptr_t pimpl;
} PyGreenlet;
#define PyGreenlet_Check(op) (op && PyObject_TypeCheck(op, &PyGreenlet_Type))
/* C API functions */
/* Total number of symbols that are exported */
#define PyGreenlet_API_pointers 12
#define PyGreenlet_Type_NUM 0
#define PyExc_GreenletError_NUM 1
#define PyExc_GreenletExit_NUM 2
#define PyGreenlet_New_NUM 3
#define PyGreenlet_GetCurrent_NUM 4
#define PyGreenlet_Throw_NUM 5
#define PyGreenlet_Switch_NUM 6
#define PyGreenlet_SetParent_NUM 7
#define PyGreenlet_MAIN_NUM 8
#define PyGreenlet_STARTED_NUM 9
#define PyGreenlet_ACTIVE_NUM 10
#define PyGreenlet_GET_PARENT_NUM 11
#ifndef GREENLET_MODULE
/* This section is used by modules that uses the greenlet C API */
static void** _PyGreenlet_API = NULL;
# define PyGreenlet_Type \
(*(PyTypeObject*)_PyGreenlet_API[PyGreenlet_Type_NUM])
# define PyExc_GreenletError \
((PyObject*)_PyGreenlet_API[PyExc_GreenletError_NUM])
# define PyExc_GreenletExit \
((PyObject*)_PyGreenlet_API[PyExc_GreenletExit_NUM])
/*
* PyGreenlet_New(PyObject *args)
*
* greenlet.greenlet(run, parent=None)
*/
# define PyGreenlet_New \
(*(PyGreenlet * (*)(PyObject * run, PyGreenlet * parent)) \
_PyGreenlet_API[PyGreenlet_New_NUM])
/*
* PyGreenlet_GetCurrent(void)
*
* greenlet.getcurrent()
*/
# define PyGreenlet_GetCurrent \
(*(PyGreenlet * (*)(void)) _PyGreenlet_API[PyGreenlet_GetCurrent_NUM])
/*
* PyGreenlet_Throw(
* PyGreenlet *greenlet,
* PyObject *typ,
* PyObject *val,
* PyObject *tb)
*
* g.throw(...)
*/
# define PyGreenlet_Throw \
(*(PyObject * (*)(PyGreenlet * self, \
PyObject * typ, \
PyObject * val, \
PyObject * tb)) \
_PyGreenlet_API[PyGreenlet_Throw_NUM])
/*
* PyGreenlet_Switch(PyGreenlet *greenlet, PyObject *args)
*
* g.switch(*args, **kwargs)
*/
# define PyGreenlet_Switch \
(*(PyObject * \
(*)(PyGreenlet * greenlet, PyObject * args, PyObject * kwargs)) \
_PyGreenlet_API[PyGreenlet_Switch_NUM])
/*
* PyGreenlet_SetParent(PyObject *greenlet, PyObject *new_parent)
*
* g.parent = new_parent
*/
# define PyGreenlet_SetParent \
(*(int (*)(PyGreenlet * greenlet, PyGreenlet * nparent)) \
_PyGreenlet_API[PyGreenlet_SetParent_NUM])
/*
* PyGreenlet_GetParent(PyObject* greenlet)
*
* return greenlet.parent;
*
* This could return NULL even if there is no exception active.
* If it does not return NULL, you are responsible for decrementing the
* reference count.
*/
# define PyGreenlet_GetParent \
(*(PyGreenlet* (*)(PyGreenlet*)) \
_PyGreenlet_API[PyGreenlet_GET_PARENT_NUM])
/*
* deprecated, undocumented alias.
*/
# define PyGreenlet_GET_PARENT PyGreenlet_GetParent
# define PyGreenlet_MAIN \
(*(int (*)(PyGreenlet*)) \
_PyGreenlet_API[PyGreenlet_MAIN_NUM])
# define PyGreenlet_STARTED \
(*(int (*)(PyGreenlet*)) \
_PyGreenlet_API[PyGreenlet_STARTED_NUM])
# define PyGreenlet_ACTIVE \
(*(int (*)(PyGreenlet*)) \
_PyGreenlet_API[PyGreenlet_ACTIVE_NUM])
/* Macro that imports greenlet and initializes C API */
/* NOTE: This has actually moved to ``greenlet._greenlet._C_API``, but we
keep the older definition to be sure older code that might have a copy of
the header still works. */
# define PyGreenlet_Import() \
{ \
_PyGreenlet_API = (void**)PyCapsule_Import("greenlet._C_API", 0); \
}
#endif /* GREENLET_MODULE */
#ifdef __cplusplus
}
#endif
#endif /* !Py_GREENLETOBJECT_H */

View File

@@ -0,0 +1,222 @@
# don't import any costly modules
import sys
import os
is_pypy = '__pypy__' in sys.builtin_module_names
def warn_distutils_present():
if 'distutils' not in sys.modules:
return
if is_pypy and sys.version_info < (3, 7):
# PyPy for 3.6 unconditionally imports distutils, so bypass the warning
# https://foss.heptapod.net/pypy/pypy/-/blob/be829135bc0d758997b3566062999ee8b23872b4/lib-python/3/site.py#L250
return
import warnings
warnings.warn(
"Distutils was imported before Setuptools, but importing Setuptools "
"also replaces the `distutils` module in `sys.modules`. This may lead "
"to undesirable behaviors or errors. To avoid these issues, avoid "
"using distutils directly, ensure that setuptools is installed in the "
"traditional way (e.g. not an editable install), and/or make sure "
"that setuptools is always imported before distutils."
)
def clear_distutils():
if 'distutils' not in sys.modules:
return
import warnings
warnings.warn("Setuptools is replacing distutils.")
mods = [
name
for name in sys.modules
if name == "distutils" or name.startswith("distutils.")
]
for name in mods:
del sys.modules[name]
def enabled():
"""
Allow selection of distutils by environment variable.
"""
which = os.environ.get('SETUPTOOLS_USE_DISTUTILS', 'local')
return which == 'local'
def ensure_local_distutils():
import importlib
clear_distutils()
# With the DistutilsMetaFinder in place,
# perform an import to cause distutils to be
# loaded from setuptools._distutils. Ref #2906.
with shim():
importlib.import_module('distutils')
# check that submodules load as expected
core = importlib.import_module('distutils.core')
assert '_distutils' in core.__file__, core.__file__
assert 'setuptools._distutils.log' not in sys.modules
def do_override():
"""
Ensure that the local copy of distutils is preferred over stdlib.
See https://github.com/pypa/setuptools/issues/417#issuecomment-392298401
for more motivation.
"""
if enabled():
warn_distutils_present()
ensure_local_distutils()
class _TrivialRe:
def __init__(self, *patterns):
self._patterns = patterns
def match(self, string):
return all(pat in string for pat in self._patterns)
class DistutilsMetaFinder:
def find_spec(self, fullname, path, target=None):
# optimization: only consider top level modules and those
# found in the CPython test suite.
if path is not None and not fullname.startswith('test.'):
return
method_name = 'spec_for_{fullname}'.format(**locals())
method = getattr(self, method_name, lambda: None)
return method()
def spec_for_distutils(self):
if self.is_cpython():
return
import importlib
import importlib.abc
import importlib.util
try:
mod = importlib.import_module('setuptools._distutils')
except Exception:
# There are a couple of cases where setuptools._distutils
# may not be present:
# - An older Setuptools without a local distutils is
# taking precedence. Ref #2957.
# - Path manipulation during sitecustomize removes
# setuptools from the path but only after the hook
# has been loaded. Ref #2980.
# In either case, fall back to stdlib behavior.
return
class DistutilsLoader(importlib.abc.Loader):
def create_module(self, spec):
mod.__name__ = 'distutils'
return mod
def exec_module(self, module):
pass
return importlib.util.spec_from_loader(
'distutils', DistutilsLoader(), origin=mod.__file__
)
@staticmethod
def is_cpython():
"""
Suppress supplying distutils for CPython (build and tests).
Ref #2965 and #3007.
"""
return os.path.isfile('pybuilddir.txt')
def spec_for_pip(self):
"""
Ensure stdlib distutils when running under pip.
See pypa/pip#8761 for rationale.
"""
if self.pip_imported_during_build():
return
clear_distutils()
self.spec_for_distutils = lambda: None
@classmethod
def pip_imported_during_build(cls):
"""
Detect if pip is being imported in a build script. Ref #2355.
"""
import traceback
return any(
cls.frame_file_is_setup(frame) for frame, line in traceback.walk_stack(None)
)
@staticmethod
def frame_file_is_setup(frame):
"""
Return True if the indicated frame suggests a setup.py file.
"""
# some frames may not have __file__ (#2940)
return frame.f_globals.get('__file__', '').endswith('setup.py')
def spec_for_sensitive_tests(self):
"""
Ensure stdlib distutils when running select tests under CPython.
python/cpython#91169
"""
clear_distutils()
self.spec_for_distutils = lambda: None
sensitive_tests = (
[
'test.test_distutils',
'test.test_peg_generator',
'test.test_importlib',
]
if sys.version_info < (3, 10)
else [
'test.test_distutils',
]
)
for name in DistutilsMetaFinder.sensitive_tests:
setattr(
DistutilsMetaFinder,
f'spec_for_{name}',
DistutilsMetaFinder.spec_for_sensitive_tests,
)
DISTUTILS_FINDER = DistutilsMetaFinder()
def add_shim():
DISTUTILS_FINDER in sys.meta_path or insert_shim()
class shim:
def __enter__(self):
insert_shim()
def __exit__(self, exc, value, tb):
remove_shim()
def insert_shim():
sys.meta_path.insert(0, DISTUTILS_FINDER)
def remove_shim():
try:
sys.meta_path.remove(DISTUTILS_FINDER)
except ValueError:
pass

View File

@@ -0,0 +1 @@
__import__('_distutils_hack').do_override()

View File

@@ -0,0 +1,20 @@
Copyright 2010 Jason Kirtland
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@@ -0,0 +1,60 @@
Metadata-Version: 2.3
Name: blinker
Version: 1.9.0
Summary: Fast, simple object-to-object and broadcast signaling
Author: Jason Kirtland
Maintainer-email: Pallets Ecosystem <contact@palletsprojects.com>
Requires-Python: >=3.9
Description-Content-Type: text/markdown
Classifier: Development Status :: 5 - Production/Stable
Classifier: License :: OSI Approved :: MIT License
Classifier: Programming Language :: Python
Classifier: Typing :: Typed
Project-URL: Chat, https://discord.gg/pallets
Project-URL: Documentation, https://blinker.readthedocs.io
Project-URL: Source, https://github.com/pallets-eco/blinker/
# Blinker
Blinker provides a fast dispatching system that allows any number of
interested parties to subscribe to events, or "signals".
## Pallets Community Ecosystem
> [!IMPORTANT]\
> This project is part of the Pallets Community Ecosystem. Pallets is the open
> source organization that maintains Flask; Pallets-Eco enables community
> maintenance of related projects. If you are interested in helping maintain
> this project, please reach out on [the Pallets Discord server][discord].
>
> [discord]: https://discord.gg/pallets
## Example
Signal receivers can subscribe to specific senders or receive signals
sent by any sender.
```pycon
>>> from blinker import signal
>>> started = signal('round-started')
>>> def each(round):
... print(f"Round {round}")
...
>>> started.connect(each)
>>> def round_two(round):
... print("This is round two.")
...
>>> started.connect(round_two, sender=2)
>>> for round in range(1, 4):
... started.send(round)
...
Round 1!
Round 2!
This is round two.
Round 3!
```

View File

@@ -0,0 +1,12 @@
blinker-1.9.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
blinker-1.9.0.dist-info/LICENSE.txt,sha256=nrc6HzhZekqhcCXSrhvjg5Ykx5XphdTw6Xac4p-spGc,1054
blinker-1.9.0.dist-info/METADATA,sha256=uIRiM8wjjbHkCtbCyTvctU37IAZk0kEe5kxAld1dvzA,1633
blinker-1.9.0.dist-info/RECORD,,
blinker-1.9.0.dist-info/WHEEL,sha256=CpUCUxeHQbRN5UGRQHYRJorO5Af-Qy_fHMctcQ8DSGI,82
blinker/__init__.py,sha256=I2EdZqpy4LyjX17Hn1yzJGWCjeLaVaPzsMgHkLfj_cQ,317
blinker/__pycache__/__init__.cpython-311.pyc,,
blinker/__pycache__/_utilities.cpython-311.pyc,,
blinker/__pycache__/base.cpython-311.pyc,,
blinker/_utilities.py,sha256=0J7eeXXTUx0Ivf8asfpx0ycVkp0Eqfqnj117x2mYX9E,1675
blinker/base.py,sha256=QpDuvXXcwJF49lUBcH5BiST46Rz9wSG7VW_p7N_027M,19132
blinker/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0

View File

@@ -0,0 +1,4 @@
Wheel-Version: 1.0
Generator: flit 3.10.1
Root-Is-Purelib: true
Tag: py3-none-any

View File

@@ -0,0 +1,17 @@
from __future__ import annotations
from .base import ANY
from .base import default_namespace
from .base import NamedSignal
from .base import Namespace
from .base import Signal
from .base import signal
__all__ = [
"ANY",
"default_namespace",
"NamedSignal",
"Namespace",
"Signal",
"signal",
]

View File

@@ -0,0 +1,64 @@
from __future__ import annotations
import collections.abc as c
import inspect
import typing as t
from weakref import ref
from weakref import WeakMethod
T = t.TypeVar("T")
class Symbol:
"""A constant symbol, nicer than ``object()``. Repeated calls return the
same instance.
>>> Symbol('foo') is Symbol('foo')
True
>>> Symbol('foo')
foo
"""
symbols: t.ClassVar[dict[str, Symbol]] = {}
def __new__(cls, name: str) -> Symbol:
if name in cls.symbols:
return cls.symbols[name]
obj = super().__new__(cls)
cls.symbols[name] = obj
return obj
def __init__(self, name: str) -> None:
self.name = name
def __repr__(self) -> str:
return self.name
def __getnewargs__(self) -> tuple[t.Any, ...]:
return (self.name,)
def make_id(obj: object) -> c.Hashable:
"""Get a stable identifier for a receiver or sender, to be used as a dict
key or in a set.
"""
if inspect.ismethod(obj):
# The id of a bound method is not stable, but the id of the unbound
# function and instance are.
return id(obj.__func__), id(obj.__self__)
if isinstance(obj, (str, int)):
# Instances with the same value always compare equal and have the same
# hash, even if the id may change.
return obj
# Assume other types are not hashable but will always be the same instance.
return id(obj)
def make_ref(obj: T, callback: c.Callable[[ref[T]], None] | None = None) -> ref[T]:
if inspect.ismethod(obj):
return WeakMethod(obj, callback) # type: ignore[arg-type, return-value]
return ref(obj, callback)

View File

@@ -0,0 +1,512 @@
from __future__ import annotations
import collections.abc as c
import sys
import typing as t
import weakref
from collections import defaultdict
from contextlib import contextmanager
from functools import cached_property
from inspect import iscoroutinefunction
from ._utilities import make_id
from ._utilities import make_ref
from ._utilities import Symbol
F = t.TypeVar("F", bound=c.Callable[..., t.Any])
ANY = Symbol("ANY")
"""Symbol for "any sender"."""
ANY_ID = 0
class Signal:
"""A notification emitter.
:param doc: The docstring for the signal.
"""
ANY = ANY
"""An alias for the :data:`~blinker.ANY` sender symbol."""
set_class: type[set[t.Any]] = set
"""The set class to use for tracking connected receivers and senders.
Python's ``set`` is unordered. If receivers must be dispatched in the order
they were connected, an ordered set implementation can be used.
.. versionadded:: 1.7
"""
@cached_property
def receiver_connected(self) -> Signal:
"""Emitted at the end of each :meth:`connect` call.
The signal sender is the signal instance, and the :meth:`connect`
arguments are passed through: ``receiver``, ``sender``, and ``weak``.
.. versionadded:: 1.2
"""
return Signal(doc="Emitted after a receiver connects.")
@cached_property
def receiver_disconnected(self) -> Signal:
"""Emitted at the end of each :meth:`disconnect` call.
The sender is the signal instance, and the :meth:`disconnect` arguments
are passed through: ``receiver`` and ``sender``.
This signal is emitted **only** when :meth:`disconnect` is called
explicitly. This signal cannot be emitted by an automatic disconnect
when a weakly referenced receiver or sender goes out of scope, as the
instance is no longer be available to be used as the sender for this
signal.
An alternative approach is available by subscribing to
:attr:`receiver_connected` and setting up a custom weakref cleanup
callback on weak receivers and senders.
.. versionadded:: 1.2
"""
return Signal(doc="Emitted after a receiver disconnects.")
def __init__(self, doc: str | None = None) -> None:
if doc:
self.__doc__ = doc
self.receivers: dict[
t.Any, weakref.ref[c.Callable[..., t.Any]] | c.Callable[..., t.Any]
] = {}
"""The map of connected receivers. Useful to quickly check if any
receivers are connected to the signal: ``if s.receivers:``. The
structure and data is not part of the public API, but checking its
boolean value is.
"""
self.is_muted: bool = False
self._by_receiver: dict[t.Any, set[t.Any]] = defaultdict(self.set_class)
self._by_sender: dict[t.Any, set[t.Any]] = defaultdict(self.set_class)
self._weak_senders: dict[t.Any, weakref.ref[t.Any]] = {}
def connect(self, receiver: F, sender: t.Any = ANY, weak: bool = True) -> F:
"""Connect ``receiver`` to be called when the signal is sent by
``sender``.
:param receiver: The callable to call when :meth:`send` is called with
the given ``sender``, passing ``sender`` as a positional argument
along with any extra keyword arguments.
:param sender: Any object or :data:`ANY`. ``receiver`` will only be
called when :meth:`send` is called with this sender. If ``ANY``, the
receiver will be called for any sender. A receiver may be connected
to multiple senders by calling :meth:`connect` multiple times.
:param weak: Track the receiver with a :mod:`weakref`. The receiver will
be automatically disconnected when it is garbage collected. When
connecting a receiver defined within a function, set to ``False``,
otherwise it will be disconnected when the function scope ends.
"""
receiver_id = make_id(receiver)
sender_id = ANY_ID if sender is ANY else make_id(sender)
if weak:
self.receivers[receiver_id] = make_ref(
receiver, self._make_cleanup_receiver(receiver_id)
)
else:
self.receivers[receiver_id] = receiver
self._by_sender[sender_id].add(receiver_id)
self._by_receiver[receiver_id].add(sender_id)
if sender is not ANY and sender_id not in self._weak_senders:
# store a cleanup for weakref-able senders
try:
self._weak_senders[sender_id] = make_ref(
sender, self._make_cleanup_sender(sender_id)
)
except TypeError:
pass
if "receiver_connected" in self.__dict__ and self.receiver_connected.receivers:
try:
self.receiver_connected.send(
self, receiver=receiver, sender=sender, weak=weak
)
except TypeError:
# TODO no explanation or test for this
self.disconnect(receiver, sender)
raise
return receiver
def connect_via(self, sender: t.Any, weak: bool = False) -> c.Callable[[F], F]:
"""Connect the decorated function to be called when the signal is sent
by ``sender``.
The decorated function will be called when :meth:`send` is called with
the given ``sender``, passing ``sender`` as a positional argument along
with any extra keyword arguments.
:param sender: Any object or :data:`ANY`. ``receiver`` will only be
called when :meth:`send` is called with this sender. If ``ANY``, the
receiver will be called for any sender. A receiver may be connected
to multiple senders by calling :meth:`connect` multiple times.
:param weak: Track the receiver with a :mod:`weakref`. The receiver will
be automatically disconnected when it is garbage collected. When
connecting a receiver defined within a function, set to ``False``,
otherwise it will be disconnected when the function scope ends.=
.. versionadded:: 1.1
"""
def decorator(fn: F) -> F:
self.connect(fn, sender, weak)
return fn
return decorator
@contextmanager
def connected_to(
self, receiver: c.Callable[..., t.Any], sender: t.Any = ANY
) -> c.Generator[None, None, None]:
"""A context manager that temporarily connects ``receiver`` to the
signal while a ``with`` block executes. When the block exits, the
receiver is disconnected. Useful for tests.
:param receiver: The callable to call when :meth:`send` is called with
the given ``sender``, passing ``sender`` as a positional argument
along with any extra keyword arguments.
:param sender: Any object or :data:`ANY`. ``receiver`` will only be
called when :meth:`send` is called with this sender. If ``ANY``, the
receiver will be called for any sender.
.. versionadded:: 1.1
"""
self.connect(receiver, sender=sender, weak=False)
try:
yield None
finally:
self.disconnect(receiver)
@contextmanager
def muted(self) -> c.Generator[None, None, None]:
"""A context manager that temporarily disables the signal. No receivers
will be called if the signal is sent, until the ``with`` block exits.
Useful for tests.
"""
self.is_muted = True
try:
yield None
finally:
self.is_muted = False
def send(
self,
sender: t.Any | None = None,
/,
*,
_async_wrapper: c.Callable[
[c.Callable[..., c.Coroutine[t.Any, t.Any, t.Any]]], c.Callable[..., t.Any]
]
| None = None,
**kwargs: t.Any,
) -> list[tuple[c.Callable[..., t.Any], t.Any]]:
"""Call all receivers that are connected to the given ``sender``
or :data:`ANY`. Each receiver is called with ``sender`` as a positional
argument along with any extra keyword arguments. Return a list of
``(receiver, return value)`` tuples.
The order receivers are called is undefined, but can be influenced by
setting :attr:`set_class`.
If a receiver raises an exception, that exception will propagate up.
This makes debugging straightforward, with an assumption that correctly
implemented receivers will not raise.
:param sender: Call receivers connected to this sender, in addition to
those connected to :data:`ANY`.
:param _async_wrapper: Will be called on any receivers that are async
coroutines to turn them into sync callables. For example, could run
the receiver with an event loop.
:param kwargs: Extra keyword arguments to pass to each receiver.
.. versionchanged:: 1.7
Added the ``_async_wrapper`` argument.
"""
if self.is_muted:
return []
results = []
for receiver in self.receivers_for(sender):
if iscoroutinefunction(receiver):
if _async_wrapper is None:
raise RuntimeError("Cannot send to a coroutine function.")
result = _async_wrapper(receiver)(sender, **kwargs)
else:
result = receiver(sender, **kwargs)
results.append((receiver, result))
return results
async def send_async(
self,
sender: t.Any | None = None,
/,
*,
_sync_wrapper: c.Callable[
[c.Callable[..., t.Any]], c.Callable[..., c.Coroutine[t.Any, t.Any, t.Any]]
]
| None = None,
**kwargs: t.Any,
) -> list[tuple[c.Callable[..., t.Any], t.Any]]:
"""Await all receivers that are connected to the given ``sender``
or :data:`ANY`. Each receiver is called with ``sender`` as a positional
argument along with any extra keyword arguments. Return a list of
``(receiver, return value)`` tuples.
The order receivers are called is undefined, but can be influenced by
setting :attr:`set_class`.
If a receiver raises an exception, that exception will propagate up.
This makes debugging straightforward, with an assumption that correctly
implemented receivers will not raise.
:param sender: Call receivers connected to this sender, in addition to
those connected to :data:`ANY`.
:param _sync_wrapper: Will be called on any receivers that are sync
callables to turn them into async coroutines. For example,
could call the receiver in a thread.
:param kwargs: Extra keyword arguments to pass to each receiver.
.. versionadded:: 1.7
"""
if self.is_muted:
return []
results = []
for receiver in self.receivers_for(sender):
if not iscoroutinefunction(receiver):
if _sync_wrapper is None:
raise RuntimeError("Cannot send to a non-coroutine function.")
result = await _sync_wrapper(receiver)(sender, **kwargs)
else:
result = await receiver(sender, **kwargs)
results.append((receiver, result))
return results
def has_receivers_for(self, sender: t.Any) -> bool:
"""Check if there is at least one receiver that will be called with the
given ``sender``. A receiver connected to :data:`ANY` will always be
called, regardless of sender. Does not check if weakly referenced
receivers are still live. See :meth:`receivers_for` for a stronger
search.
:param sender: Check for receivers connected to this sender, in addition
to those connected to :data:`ANY`.
"""
if not self.receivers:
return False
if self._by_sender[ANY_ID]:
return True
if sender is ANY:
return False
return make_id(sender) in self._by_sender
def receivers_for(
self, sender: t.Any
) -> c.Generator[c.Callable[..., t.Any], None, None]:
"""Yield each receiver to be called for ``sender``, in addition to those
to be called for :data:`ANY`. Weakly referenced receivers that are not
live will be disconnected and skipped.
:param sender: Yield receivers connected to this sender, in addition
to those connected to :data:`ANY`.
"""
# TODO: test receivers_for(ANY)
if not self.receivers:
return
sender_id = make_id(sender)
if sender_id in self._by_sender:
ids = self._by_sender[ANY_ID] | self._by_sender[sender_id]
else:
ids = self._by_sender[ANY_ID].copy()
for receiver_id in ids:
receiver = self.receivers.get(receiver_id)
if receiver is None:
continue
if isinstance(receiver, weakref.ref):
strong = receiver()
if strong is None:
self._disconnect(receiver_id, ANY_ID)
continue
yield strong
else:
yield receiver
def disconnect(self, receiver: c.Callable[..., t.Any], sender: t.Any = ANY) -> None:
"""Disconnect ``receiver`` from being called when the signal is sent by
``sender``.
:param receiver: A connected receiver callable.
:param sender: Disconnect from only this sender. By default, disconnect
from all senders.
"""
sender_id: c.Hashable
if sender is ANY:
sender_id = ANY_ID
else:
sender_id = make_id(sender)
receiver_id = make_id(receiver)
self._disconnect(receiver_id, sender_id)
if (
"receiver_disconnected" in self.__dict__
and self.receiver_disconnected.receivers
):
self.receiver_disconnected.send(self, receiver=receiver, sender=sender)
def _disconnect(self, receiver_id: c.Hashable, sender_id: c.Hashable) -> None:
if sender_id == ANY_ID:
if self._by_receiver.pop(receiver_id, None) is not None:
for bucket in self._by_sender.values():
bucket.discard(receiver_id)
self.receivers.pop(receiver_id, None)
else:
self._by_sender[sender_id].discard(receiver_id)
self._by_receiver[receiver_id].discard(sender_id)
def _make_cleanup_receiver(
self, receiver_id: c.Hashable
) -> c.Callable[[weakref.ref[c.Callable[..., t.Any]]], None]:
"""Create a callback function to disconnect a weakly referenced
receiver when it is garbage collected.
"""
def cleanup(ref: weakref.ref[c.Callable[..., t.Any]]) -> None:
# If the interpreter is shutting down, disconnecting can result in a
# weird ignored exception. Don't call it in that case.
if not sys.is_finalizing():
self._disconnect(receiver_id, ANY_ID)
return cleanup
def _make_cleanup_sender(
self, sender_id: c.Hashable
) -> c.Callable[[weakref.ref[t.Any]], None]:
"""Create a callback function to disconnect all receivers for a weakly
referenced sender when it is garbage collected.
"""
assert sender_id != ANY_ID
def cleanup(ref: weakref.ref[t.Any]) -> None:
self._weak_senders.pop(sender_id, None)
for receiver_id in self._by_sender.pop(sender_id, ()):
self._by_receiver[receiver_id].discard(sender_id)
return cleanup
def _cleanup_bookkeeping(self) -> None:
"""Prune unused sender/receiver bookkeeping. Not threadsafe.
Connecting & disconnecting leaves behind a small amount of bookkeeping
data. Typical workloads using Blinker, for example in most web apps,
Flask, CLI scripts, etc., are not adversely affected by this
bookkeeping.
With a long-running process performing dynamic signal routing with high
volume, e.g. connecting to function closures, senders are all unique
object instances. Doing all of this over and over may cause memory usage
to grow due to extraneous bookkeeping. (An empty ``set`` for each stale
sender/receiver pair.)
This method will prune that bookkeeping away, with the caveat that such
pruning is not threadsafe. The risk is that cleanup of a fully
disconnected receiver/sender pair occurs while another thread is
connecting that same pair. If you are in the highly dynamic, unique
receiver/sender situation that has lead you to this method, that failure
mode is perhaps not a big deal for you.
"""
for mapping in (self._by_sender, self._by_receiver):
for ident, bucket in list(mapping.items()):
if not bucket:
mapping.pop(ident, None)
def _clear_state(self) -> None:
"""Disconnect all receivers and senders. Useful for tests."""
self._weak_senders.clear()
self.receivers.clear()
self._by_sender.clear()
self._by_receiver.clear()
class NamedSignal(Signal):
"""A named generic notification emitter. The name is not used by the signal
itself, but matches the key in the :class:`Namespace` that it belongs to.
:param name: The name of the signal within the namespace.
:param doc: The docstring for the signal.
"""
def __init__(self, name: str, doc: str | None = None) -> None:
super().__init__(doc)
#: The name of this signal.
self.name: str = name
def __repr__(self) -> str:
base = super().__repr__()
return f"{base[:-1]}; {self.name!r}>" # noqa: E702
class Namespace(dict[str, NamedSignal]):
"""A dict mapping names to signals."""
def signal(self, name: str, doc: str | None = None) -> NamedSignal:
"""Return the :class:`NamedSignal` for the given ``name``, creating it
if required. Repeated calls with the same name return the same signal.
:param name: The name of the signal.
:param doc: The docstring of the signal.
"""
if name not in self:
self[name] = NamedSignal(name, doc)
return self[name]
class _PNamespaceSignal(t.Protocol):
def __call__(self, name: str, doc: str | None = None) -> NamedSignal: ...
default_namespace: Namespace = Namespace()
"""A default :class:`Namespace` for creating named signals. :func:`signal`
creates a :class:`NamedSignal` in this namespace.
"""
signal: _PNamespaceSignal = default_namespace.signal
"""Return a :class:`NamedSignal` in :data:`default_namespace` with the given
``name``, creating it if required. Repeated calls with the same name return the
same signal.
"""

View File

@@ -0,0 +1,78 @@
Metadata-Version: 2.4
Name: certifi
Version: 2025.10.5
Summary: Python package for providing Mozilla's CA Bundle.
Home-page: https://github.com/certifi/python-certifi
Author: Kenneth Reitz
Author-email: me@kennethreitz.com
License: MPL-2.0
Project-URL: Source, https://github.com/certifi/python-certifi
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)
Classifier: Natural Language :: English
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
Classifier: Programming Language :: Python :: 3.14
Requires-Python: >=3.7
License-File: LICENSE
Dynamic: author
Dynamic: author-email
Dynamic: classifier
Dynamic: description
Dynamic: home-page
Dynamic: license
Dynamic: license-file
Dynamic: project-url
Dynamic: requires-python
Dynamic: summary
Certifi: Python SSL Certificates
================================
Certifi provides Mozilla's carefully curated collection of Root Certificates for
validating the trustworthiness of SSL certificates while verifying the identity
of TLS hosts. It has been extracted from the `Requests`_ project.
Installation
------------
``certifi`` is available on PyPI. Simply install it with ``pip``::
$ pip install certifi
Usage
-----
To reference the installed certificate authority (CA) bundle, you can use the
built-in function::
>>> import certifi
>>> certifi.where()
'/usr/local/lib/python3.7/site-packages/certifi/cacert.pem'
Or from the command line::
$ python -m certifi
/usr/local/lib/python3.7/site-packages/certifi/cacert.pem
Enjoy!
.. _`Requests`: https://requests.readthedocs.io/en/master/
Addition/Removal of Certificates
--------------------------------
Certifi does not support any addition/removal or other modification of the
CA trust store content. This project is intended to provide a reliable and
highly portable root of trust to python deployments. Look to upstream projects
for methods to use alternate trust.

View File

@@ -0,0 +1,14 @@
certifi-2025.10.5.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
certifi-2025.10.5.dist-info/METADATA,sha256=RzyR4sT6xRN1pNNy24IHVOlZuDJh1BNfaMa04zEadtk,2474
certifi-2025.10.5.dist-info/RECORD,,
certifi-2025.10.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
certifi-2025.10.5.dist-info/licenses/LICENSE,sha256=6TcW2mucDVpKHfYP5pWzcPBpVgPSH2-D8FPkLPwQyvc,989
certifi-2025.10.5.dist-info/top_level.txt,sha256=KMu4vUCfsjLrkPbSNdgdekS-pVJzBAJFO__nI8NF6-U,8
certifi/__init__.py,sha256=jWkaYHMk4oIPSSBEK5bLMbO_qrkyNm_cRFx-D16-3Ks,94
certifi/__main__.py,sha256=xBBoj905TUWBLRGANOcf7oi6e-3dMP4cEoG9OyMs11g,243
certifi/__pycache__/__init__.cpython-311.pyc,,
certifi/__pycache__/__main__.cpython-311.pyc,,
certifi/__pycache__/core.cpython-311.pyc,,
certifi/cacert.pem,sha256=IIn8WiWDZAH67pn3IkYLAbOTmZdGoPuBeUNmbW7MBFg,291366
certifi/core.py,sha256=XFXycndG5pf37ayeF8N32HUuDafsyhkVMbO4BAPWHa0,3394
certifi/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0

View File

@@ -0,0 +1,5 @@
Wheel-Version: 1.0
Generator: setuptools (80.9.0)
Root-Is-Purelib: true
Tag: py3-none-any

View File

@@ -0,0 +1,20 @@
This package contains a modified version of ca-bundle.crt:
ca-bundle.crt -- Bundle of CA Root Certificates
This is a bundle of X.509 certificates of public Certificate Authorities
(CA). These were automatically extracted from Mozilla's root certificates
file (certdata.txt). This file can be found in the mozilla source tree:
https://hg.mozilla.org/mozilla-central/file/tip/security/nss/lib/ckfw/builtins/certdata.txt
It contains the certificates in PEM format and therefore
can be directly used with curl / libcurl / php_curl, or with
an Apache+mod_ssl webserver for SSL client authentication.
Just configure this file as the SSLCACertificateFile.#
***** BEGIN LICENSE BLOCK *****
This Source Code Form is subject to the terms of the Mozilla Public License,
v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain
one at http://mozilla.org/MPL/2.0/.
***** END LICENSE BLOCK *****
@(#) $RCSfile: certdata.txt,v $ $Revision: 1.80 $ $Date: 2011/11/03 15:11:58 $

View File

@@ -0,0 +1,4 @@
from .core import contents, where
__all__ = ["contents", "where"]
__version__ = "2025.10.05"

View File

@@ -0,0 +1,12 @@
import argparse
from certifi import contents, where
parser = argparse.ArgumentParser()
parser.add_argument("-c", "--contents", action="store_true")
args = parser.parse_args()
if args.contents:
print(contents())
else:
print(where())

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,83 @@
"""
certifi.py
~~~~~~~~~~
This module returns the installation location of cacert.pem or its contents.
"""
import sys
import atexit
def exit_cacert_ctx() -> None:
_CACERT_CTX.__exit__(None, None, None) # type: ignore[union-attr]
if sys.version_info >= (3, 11):
from importlib.resources import as_file, files
_CACERT_CTX = None
_CACERT_PATH = None
def where() -> str:
# This is slightly terrible, but we want to delay extracting the file
# in cases where we're inside of a zipimport situation until someone
# actually calls where(), but we don't want to re-extract the file
# on every call of where(), so we'll do it once then store it in a
# global variable.
global _CACERT_CTX
global _CACERT_PATH
if _CACERT_PATH is None:
# This is slightly janky, the importlib.resources API wants you to
# manage the cleanup of this file, so it doesn't actually return a
# path, it returns a context manager that will give you the path
# when you enter it and will do any cleanup when you leave it. In
# the common case of not needing a temporary file, it will just
# return the file system location and the __exit__() is a no-op.
#
# We also have to hold onto the actual context manager, because
# it will do the cleanup whenever it gets garbage collected, so
# we will also store that at the global level as well.
_CACERT_CTX = as_file(files("certifi").joinpath("cacert.pem"))
_CACERT_PATH = str(_CACERT_CTX.__enter__())
atexit.register(exit_cacert_ctx)
return _CACERT_PATH
def contents() -> str:
return files("certifi").joinpath("cacert.pem").read_text(encoding="ascii")
else:
from importlib.resources import path as get_path, read_text
_CACERT_CTX = None
_CACERT_PATH = None
def where() -> str:
# This is slightly terrible, but we want to delay extracting the
# file in cases where we're inside of a zipimport situation until
# someone actually calls where(), but we don't want to re-extract
# the file on every call of where(), so we'll do it once then store
# it in a global variable.
global _CACERT_CTX
global _CACERT_PATH
if _CACERT_PATH is None:
# This is slightly janky, the importlib.resources API wants you
# to manage the cleanup of this file, so it doesn't actually
# return a path, it returns a context manager that will give
# you the path when you enter it and will do any cleanup when
# you leave it. In the common case of not needing a temporary
# file, it will just return the file system location and the
# __exit__() is a no-op.
#
# We also have to hold onto the actual context manager, because
# it will do the cleanup whenever it gets garbage collected, so
# we will also store that at the global level as well.
_CACERT_CTX = get_path("certifi", "cacert.pem")
_CACERT_PATH = str(_CACERT_CTX.__enter__())
atexit.register(exit_cacert_ctx)
return _CACERT_PATH
def contents() -> str:
return read_text("certifi", "cacert.pem", encoding="ascii")

View File

@@ -0,0 +1,764 @@
Metadata-Version: 2.4
Name: charset-normalizer
Version: 3.4.4
Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
Author-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
Maintainer-email: "Ahmed R. TAHRI" <tahri.ahmed@proton.me>
License: MIT
Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md
Project-URL: Documentation, https://charset-normalizer.readthedocs.io/
Project-URL: Code, https://github.com/jawah/charset_normalizer
Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues
Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Programming Language :: Python :: 3.13
Classifier: Programming Language :: Python :: 3.14
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Classifier: Topic :: Text Processing :: Linguistic
Classifier: Topic :: Utilities
Classifier: Typing :: Typed
Requires-Python: >=3.7
Description-Content-Type: text/markdown
License-File: LICENSE
Provides-Extra: unicode-backport
Dynamic: license-file
<h1 align="center">Charset Detection, for Everyone 👋</h1>
<p align="center">
<sup>The Real First Universal Charset Detector</sup><br>
<a href="https://pypi.org/project/charset-normalizer">
<img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
</a>
<a href="https://pepy.tech/project/charset-normalizer/">
<img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
</a>
<a href="https://bestpractices.coreinfrastructure.org/projects/7297">
<img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
</a>
</p>
<p align="center">
<sup><i>Featured Packages</i></sup><br>
<a href="https://github.com/jawah/niquests">
<img alt="Static Badge" src="https://img.shields.io/badge/Niquests-Most_Advanced_HTTP_Client-cyan">
</a>
<a href="https://github.com/jawah/wassima">
<img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Replacement-cyan">
</a>
</p>
<p align="center">
<sup><i>In other language (unofficial port - by the community)</i></sup><br>
<a href="https://github.com/nickspring/charset-normalizer-rs">
<img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
</a>
</p>
> A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
> I'm trying to resolve the issue by taking a new approach.
> All IANA character set names for which the Python core library provides codecs are supported.
<p align="center">
>>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
</p>
This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
| Feature | [Chardet](https://github.com/chardet/chardet) | Charset Normalizer | [cChardet](https://github.com/PyYoshi/cChardet) |
|--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
| `Fast` | ❌ | ✅ | ✅ |
| `Universal**` | ❌ | ✅ | ❌ |
| `Reliable` **without** distinguishable standards | ❌ | ✅ | ✅ |
| `Reliable` **with** distinguishable standards | ✅ | ✅ | ✅ |
| `License` | LGPL-2.1<br>_restrictive_ | MIT | MPL-1.1<br>_restrictive_ |
| `Native Python` | ✅ | ✅ | ❌ |
| `Detect spoken language` | ❌ | ✅ | N/A |
| `UnicodeDecodeError Safety` | ❌ | ✅ | ❌ |
| `Whl Size (min)` | 193.6 kB | 42 kB | ~200 kB |
| `Supported Encoding` | 33 | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) | 40 |
<p align="center">
<img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
</p>
*\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
## ⚡ Performance
This package offer better performance than its counterpart Chardet. Here are some numbers.
| Package | Accuracy | Mean per file (ms) | File per sec (est) |
|-----------------------------------------------|:--------:|:------------------:|:------------------:|
| [chardet](https://github.com/chardet/chardet) | 86 % | 63 ms | 16 file/sec |
| charset-normalizer | **98 %** | **10 ms** | 100 file/sec |
| Package | 99th percentile | 95th percentile | 50th percentile |
|-----------------------------------------------|:---------------:|:---------------:|:---------------:|
| [chardet](https://github.com/chardet/chardet) | 265 ms | 71 ms | 7 ms |
| charset-normalizer | 100 ms | 50 ms | 5 ms |
_updated as of december 2024 using CPython 3.12_
Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
> Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
> And yes, these results might change at any time. The dataset can be updated to include more files.
> The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
> Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
> (e.g. Supported Encoding) Challenge-them if you want.
## ✨ Installation
Using pip:
```sh
pip install charset-normalizer -U
```
## 🚀 Basic Usage
### CLI
This package comes with a CLI.
```
usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
file [file ...]
The Real First Universal Charset Detector. Discover originating encoding used
on text file. Normalize text to unicode.
positional arguments:
files File(s) to be analysed
optional arguments:
-h, --help show this help message and exit
-v, --verbose Display complementary information about file if any.
Stdout will contain logs about the detection process.
-a, --with-alternative
Output complementary possibilities if any. Top-level
JSON WILL be a list.
-n, --normalize Permit to normalize input file. If not set, program
does not write anything.
-m, --minimal Only output the charset detected to STDOUT. Disabling
JSON output.
-r, --replace Replace file when trying to normalize it instead of
creating a new one.
-f, --force Replace file without asking if you are sure, use this
flag with caution.
-t THRESHOLD, --threshold THRESHOLD
Define a custom maximum amount of chaos allowed in
decoded content. 0. <= chaos <= 1.
--version Show version information and exit.
```
```bash
normalizer ./data/sample.1.fr.srt
```
or
```bash
python -m charset_normalizer ./data/sample.1.fr.srt
```
🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
```json
{
"path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
"encoding": "cp1252",
"encoding_aliases": [
"1252",
"windows_1252"
],
"alternative_encodings": [
"cp1254",
"cp1256",
"cp1258",
"iso8859_14",
"iso8859_15",
"iso8859_16",
"iso8859_3",
"iso8859_9",
"latin_1",
"mbcs"
],
"language": "French",
"alphabets": [
"Basic Latin",
"Latin-1 Supplement"
],
"has_sig_or_bom": false,
"chaos": 0.149,
"coherence": 97.152,
"unicode_path": null,
"is_preferred": true
}
```
### Python
*Just print out normalized text*
```python
from charset_normalizer import from_path
results = from_path('./my_subtitle.srt')
print(str(results.best()))
```
*Upgrade your code without effort*
```python
from charset_normalizer import detect
```
The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
## 😇 Why
When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
reliable alternative using a completely different method. Also! I never back down on a good challenge!
I **don't care** about the **originating charset** encoding, because **two different tables** can
produce **two identical rendered string.**
What I want is to get readable text, the best I can.
In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
## 🍰 How
- Discard all charset encoding table that could not fit the binary content.
- Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
- Extract matches with the lowest mess detected.
- Additionally, we measure coherence / probe for a language.
**Wait a minute**, what is noise/mess and coherence according to **YOU ?**
*Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
**I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text).
I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
improve or rewrite it.
*Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
## ⚡ Known limitations
- Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
- Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
## ⚠️ About Python EOLs
**If you are running:**
- Python >=2.7,<3.5: Unsupported
- Python 3.5: charset-normalizer < 2.1
- Python 3.6: charset-normalizer < 3.1
- Python 3.7: charset-normalizer < 4.0
Upgrade your Python interpreter as soon as possible.
## 👤 Contributing
Contributions, issues and feature requests are very much welcome.<br />
Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
## 📝 License
Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
## 💼 For Enterprise
Professional support for charset-normalizer is available as part of the [Tidelift
Subscription][1]. Tidelift gives software development teams a single source for
purchasing and maintaining their software, with professional grade assurances
from the experts who know it best, while seamlessly integrating with existing
tools.
[1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/7297/badge)](https://www.bestpractices.dev/projects/7297)
# Changelog
All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
## [3.4.4](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.4) (2025-10-13)
### Changed
- Bound `setuptools` to a specific constraint `setuptools>=68,<=81`.
- Raised upper bound of mypyc for the optional pre-built extension to v1.18.2
### Removed
- `setuptools-scm` as a build dependency.
### Misc
- Enforced hashes in `dev-requirements.txt` and created `ci-requirements.txt` for security purposes.
- Additional pre-built wheels for riscv64, s390x, and armv7l architectures.
- Restore ` multiple.intoto.jsonl` in GitHub releases in addition to individual attestation file per wheel.
## [3.4.3](https://github.com/Ousret/charset_normalizer/compare/3.4.2...3.4.3) (2025-08-09)
### Changed
- mypy(c) is no longer a required dependency at build time if `CHARSET_NORMALIZER_USE_MYPYC` isn't set to `1`. (#595) (#583)
- automatically lower confidence on small bytes samples that are not Unicode in `detect` output legacy function. (#391)
### Added
- Custom build backend to overcome inability to mark mypy as an optional dependency in the build phase.
- Support for Python 3.14
### Fixed
- sdist archive contained useless directories.
- automatically fallback on valid UTF-16 or UTF-32 even if the md says it's noisy. (#633)
### Misc
- SBOM are automatically published to the relevant GitHub release to comply with regulatory changes.
Each published wheel comes with its SBOM. We choose CycloneDX as the format.
- Prebuilt optimized wheel are no longer distributed by default for CPython 3.7 due to a change in cibuildwheel.
## [3.4.2](https://github.com/Ousret/charset_normalizer/compare/3.4.1...3.4.2) (2025-05-02)
### Fixed
- Addressed the DeprecationWarning in our CLI regarding `argparse.FileType` by backporting the target class into the package. (#591)
- Improved the overall reliability of the detector with CJK Ideographs. (#605) (#587)
### Changed
- Optional mypyc compilation upgraded to version 1.15 for Python >= 3.8
## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24)
### Changed
- Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend.
- Enforce annotation delayed loading for a simpler and consistent types in the project.
- Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8
### Added
- pre-commit configuration.
- noxfile.
### Removed
- `build-requirements.txt` as per using `pyproject.toml` native build configuration.
- `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile).
- `setup.cfg` in favor of `pyproject.toml` metadata configuration.
- Unused `utils.range_scan` function.
### Fixed
- Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572)
- Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+
## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
### Added
- Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
- Support for Python 3.13 (#512)
### Fixed
- Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
- Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
- Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
### Fixed
- Unintentional memory usage regression when using large payload that match several encoding (#376)
- Regression on some detection case showcased in the documentation (#371)
### Added
- Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
### Changed
- Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
- Improved the general detection reliability based on reports from the community
## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
### Added
- Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
- Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
### Removed
- (internal) Redundant utils.is_ascii function and unused function is_private_use_only
- (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
### Changed
- (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
- Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
### Fixed
- Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
### Changed
- Typehint for function `from_path` no longer enforce `PathLike` as its first argument
- Minor improvement over the global detection reliability
### Added
- Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
- Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
- Explicit support for Python 3.12
### Fixed
- Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
### Added
- Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
### Removed
- Support for Python 3.6 (PR #260)
### Changed
- Optional speedup provided by mypy/c 1.0.1
## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
### Fixed
- Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
### Changed
- Speedup provided by mypy/c 0.990 on Python >= 3.7
## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
### Added
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
### Changed
- Build with static metadata using 'build' frontend
- Make the language detection stricter
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
### Fixed
- CLI with opt --normalize fail when using full path for files
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
- Sphinx warnings when generating the documentation
### Removed
- Coherence detector no longer return 'Simple English' instead return 'English'
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
- Breaking: Method `first()` and `best()` from CharsetMatch
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
- Breaking: Top-level function `normalize`
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
- Support for the backport `unicodedata2`
## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
### Added
- Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
- Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
- Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
### Changed
- Build with static metadata using 'build' frontend
- Make the language detection stricter
### Fixed
- CLI with opt --normalize fail when using full path for files
- TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
### Removed
- Coherence detector no longer return 'Simple English' instead return 'English'
- Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
### Added
- `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
### Removed
- Breaking: Method `first()` and `best()` from CharsetMatch
- UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
### Fixed
- Sphinx warnings when generating the documentation
## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
### Changed
- Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
### Removed
- Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
- Breaking: Top-level function `normalize`
- Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
- Support for the backport `unicodedata2`
## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
### Deprecated
- Function `normalize` scheduled for removal in 3.0
### Changed
- Removed useless call to decode in fn is_unprintable (#206)
### Fixed
- Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
### Added
- Output the Unicode table version when running the CLI with `--version` (PR #194)
### Changed
- Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
- Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
### Fixed
- Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
- CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
### Removed
- Support for Python 3.5 (PR #192)
### Deprecated
- Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
### Fixed
- ASCII miss-detection on rare cases (PR #170)
## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
### Added
- Explicit support for Python 3.11 (PR #164)
### Changed
- The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
### Fixed
- Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
### Changed
- Skipping the language-detection (CD) on ASCII (PR #155)
## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
### Changed
- Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
### Fixed
- Wrong logging level applied when setting kwarg `explain` to True (PR #146)
## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
### Changed
- Improvement over Vietnamese detection (PR #126)
- MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
- Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
- call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
- Code style as refactored by Sourcery-AI (PR #131)
- Minor adjustment on the MD around european words (PR #133)
- Remove and replace SRTs from assets / tests (PR #139)
- Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
- Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
### Fixed
- Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
- Avoid using too insignificant chunk (PR #137)
### Added
- Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
- Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
### Added
- Add support for Kazakh (Cyrillic) language detection (PR #109)
### Changed
- Further, improve inferring the language from a given single-byte code page (PR #112)
- Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
- Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
- Various detection improvement (MD+CD) (PR #117)
### Removed
- Remove redundant logging entry about detected language(s) (PR #115)
### Fixed
- Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
### Fixed
- Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
- Fix CLI crash when using --minimal output in certain cases (PR #103)
### Changed
- Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
### Changed
- The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
- The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
- The Unicode detection is slightly improved (PR #93)
- Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
### Removed
- The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
### Fixed
- In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
- Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
- The MANIFEST.in was not exhaustive (PR #78)
## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
### Fixed
- The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
- Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
- The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
- Submatch factoring could be wrong in rare edge cases (PR #72)
- Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
- Fix line endings from CRLF to LF for certain project files (PR #67)
### Changed
- Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
- Allow fallback on specified encoding if any (PR #71)
## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
### Changed
- Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
- According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
### Fixed
- Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
### Changed
- Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
### Fixed
- Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
- Using explain=False permanently disable the verbose output in the current runtime (PR #47)
- One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
- Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
### Changed
- Public function normalize default args values were not aligned with from_bytes (PR #53)
### Added
- You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
### Changed
- 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
- Accent has been made on UTF-8 detection, should perform rather instantaneous.
- The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
- The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
- The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
- utf_7 detection has been reinstated.
### Removed
- This package no longer require anything when used with Python 3.5 (Dropped cached_property)
- Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
- The exception hook on UnicodeDecodeError has been removed.
### Deprecated
- Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
### Fixed
- The CLI output used the relative path of the file(s). Should be absolute.
## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
### Fixed
- Logger configuration/usage no longer conflict with others (PR #44)
## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
### Removed
- Using standard logging instead of using the package loguru.
- Dropping nose test framework in favor of the maintained pytest.
- Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
- Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
- Stop support for UTF-7 that does not contain a SIG.
- Dropping PrettyTable, replaced with pure JSON output in CLI.
### Fixed
- BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
- Not searching properly for the BOM when trying utf32/16 parent codec.
### Changed
- Improving the package final size by compressing frequencies.json.
- Huge improvement over the larges payload.
### Added
- CLI now produces JSON consumable output.
- Return ASCII if given sequences fit. Given reasonable confidence.
## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
### Fixed
- In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
### Fixed
- Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
### Fixed
- The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
### Changed
- Amend the previous release to allow prettytable 2.0 (PR #35)
## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
### Fixed
- Fix error while using the package with a python pre-release interpreter (PR #33)
### Changed
- Dependencies refactoring, constraints revised.
### Added
- Add python 3.9 and 3.10 to the supported interpreters
MIT License
Copyright (c) 2025 TAHRI Ahmed R.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -0,0 +1,35 @@
../../../bin/normalizer,sha256=iciEOZNt7wnV8PJfe_-sqy9t7GZ5QJUhe_u65Zxlaes,255
charset_normalizer-3.4.4.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
charset_normalizer-3.4.4.dist-info/METADATA,sha256=jVuUFBti8dav19YLvWissTihVdF2ozUY4KKMw7jdkBQ,37303
charset_normalizer-3.4.4.dist-info/RECORD,,
charset_normalizer-3.4.4.dist-info/WHEEL,sha256=BvA_i88wcFUl5ehXLgmhwyDL4XPGrCKn6CTUA9axFDE,190
charset_normalizer-3.4.4.dist-info/entry_points.txt,sha256=ADSTKrkXZ3hhdOVFi6DcUEHQRS0xfxDIE_pEz4wLIXA,65
charset_normalizer-3.4.4.dist-info/licenses/LICENSE,sha256=bQ1Bv-FwrGx9wkjJpj4lTQ-0WmDVCoJX0K-SxuJJuIc,1071
charset_normalizer-3.4.4.dist-info/top_level.txt,sha256=7ASyzePr8_xuZWJsnqJjIBtyV8vhEo0wBCv1MPRRi3Q,19
charset_normalizer/__init__.py,sha256=OKRxRv2Zhnqk00tqkN0c1BtJjm165fWXLydE52IKuHc,1590
charset_normalizer/__main__.py,sha256=yzYxMR-IhKRHYwcSlavEv8oGdwxsR89mr2X09qXGdps,109
charset_normalizer/__pycache__/__init__.cpython-311.pyc,,
charset_normalizer/__pycache__/__main__.cpython-311.pyc,,
charset_normalizer/__pycache__/api.cpython-311.pyc,,
charset_normalizer/__pycache__/cd.cpython-311.pyc,,
charset_normalizer/__pycache__/constant.cpython-311.pyc,,
charset_normalizer/__pycache__/legacy.cpython-311.pyc,,
charset_normalizer/__pycache__/md.cpython-311.pyc,,
charset_normalizer/__pycache__/models.cpython-311.pyc,,
charset_normalizer/__pycache__/utils.cpython-311.pyc,,
charset_normalizer/__pycache__/version.cpython-311.pyc,,
charset_normalizer/api.py,sha256=V07i8aVeCD8T2fSia3C-fn0i9t8qQguEBhsqszg32Ns,22668
charset_normalizer/cd.py,sha256=WKTo1HDb-H9HfCDc3Bfwq5jzS25Ziy9SE2a74SgTq88,12522
charset_normalizer/cli/__init__.py,sha256=D8I86lFk2-py45JvqxniTirSj_sFyE6sjaY_0-G1shc,136
charset_normalizer/cli/__main__.py,sha256=dMaXG6IJXRvqq8z2tig7Qb83-BpWTln55ooiku5_uvg,12646
charset_normalizer/cli/__pycache__/__init__.cpython-311.pyc,,
charset_normalizer/cli/__pycache__/__main__.cpython-311.pyc,,
charset_normalizer/constant.py,sha256=7UVY4ldYhmQMHUdgQ_sgZmzcQ0xxYxpBunqSZ-XJZ8U,42713
charset_normalizer/legacy.py,sha256=sYBzSpzsRrg_wF4LP536pG64BItw7Tqtc3SMQAHvFLM,2731
charset_normalizer/md.cpython-311-x86_64-linux-gnu.so,sha256=vmQPHNPc6Z0rbXxiiMeoCQxfjjh5z8umtRgKpBckk7E,15912
charset_normalizer/md.py,sha256=-_oN3h3_X99nkFfqamD3yu45DC_wfk5odH0Tr_CQiXs,20145
charset_normalizer/md__mypyc.cpython-311-x86_64-linux-gnu.so,sha256=83X9F2ayDPRojhPPi7Dpm2LS0plQBG4B12R1GSri2lw,282232
charset_normalizer/models.py,sha256=lKXhOnIPtiakbK3i__J9wpOfzx3JDTKj7Dn3Rg0VaRI,12394
charset_normalizer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
charset_normalizer/utils.py,sha256=sTejPgrdlNsKNucZfJCxJ95lMTLA0ShHLLE3n5wpT9Q,12170
charset_normalizer/version.py,sha256=nKE4qBNk5WA4LIJ_yIH_aSDfvtsyizkWMg-PUG-UZVk,115

View File

@@ -0,0 +1,7 @@
Wheel-Version: 1.0
Generator: setuptools (80.9.0)
Root-Is-Purelib: false
Tag: cp311-cp311-manylinux_2_17_x86_64
Tag: cp311-cp311-manylinux2014_x86_64
Tag: cp311-cp311-manylinux_2_28_x86_64

View File

@@ -0,0 +1,2 @@
[console_scripts]
normalizer = charset_normalizer.cli:cli_detect

View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2025 TAHRI Ahmed R.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -0,0 +1 @@
charset_normalizer

View File

@@ -0,0 +1,48 @@
"""
Charset-Normalizer
~~~~~~~~~~~~~~
The Real First Universal Charset Detector.
A library that helps you read text from an unknown charset encoding.
Motivated by chardet, This package is trying to resolve the issue by taking a new approach.
All IANA character set names for which the Python core library provides codecs are supported.
Basic usage:
>>> from charset_normalizer import from_bytes
>>> results = from_bytes('Bсеки човек има право на образование. Oбразованието!'.encode('utf_8'))
>>> best_guess = results.best()
>>> str(best_guess)
'Bсеки човек има право на образование. Oбразованието!'
Others methods and usages are available - see the full documentation
at <https://github.com/Ousret/charset_normalizer>.
:copyright: (c) 2021 by Ahmed TAHRI
:license: MIT, see LICENSE for more details.
"""
from __future__ import annotations
import logging
from .api import from_bytes, from_fp, from_path, is_binary
from .legacy import detect
from .models import CharsetMatch, CharsetMatches
from .utils import set_logging_handler
from .version import VERSION, __version__
__all__ = (
"from_fp",
"from_path",
"from_bytes",
"is_binary",
"detect",
"CharsetMatch",
"CharsetMatches",
"__version__",
"VERSION",
"set_logging_handler",
)
# Attach a NullHandler to the top level logger by default
# https://docs.python.org/3.3/howto/logging.html#configuring-logging-for-a-library
logging.getLogger("charset_normalizer").addHandler(logging.NullHandler())

View File

@@ -0,0 +1,6 @@
from __future__ import annotations
from .cli import cli_detect
if __name__ == "__main__":
cli_detect()

View File

@@ -0,0 +1,669 @@
from __future__ import annotations
import logging
from os import PathLike
from typing import BinaryIO
from .cd import (
coherence_ratio,
encoding_languages,
mb_encoding_languages,
merge_coherence_ratios,
)
from .constant import IANA_SUPPORTED, TOO_BIG_SEQUENCE, TOO_SMALL_SEQUENCE, TRACE
from .md import mess_ratio
from .models import CharsetMatch, CharsetMatches
from .utils import (
any_specified_encoding,
cut_sequence_chunks,
iana_name,
identify_sig_or_bom,
is_cp_similar,
is_multi_byte_encoding,
should_strip_sig_or_bom,
)
logger = logging.getLogger("charset_normalizer")
explain_handler = logging.StreamHandler()
explain_handler.setFormatter(
logging.Formatter("%(asctime)s | %(levelname)s | %(message)s")
)
def from_bytes(
sequences: bytes | bytearray,
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.2,
cp_isolation: list[str] | None = None,
cp_exclusion: list[str] | None = None,
preemptive_behaviour: bool = True,
explain: bool = False,
language_threshold: float = 0.1,
enable_fallback: bool = True,
) -> CharsetMatches:
"""
Given a raw bytes sequence, return the best possibles charset usable to render str objects.
If there is no results, it is a strong indicator that the source is binary/not text.
By default, the process will extract 5 blocks of 512o each to assess the mess and coherence of a given sequence.
And will give up a particular code page after 20% of measured mess. Those criteria are customizable at will.
The preemptive behavior DOES NOT replace the traditional detection workflow, it prioritize a particular code page
but never take it for granted. Can improve the performance.
You may want to focus your attention to some code page or/and not others, use cp_isolation and cp_exclusion for that
purpose.
This function will strip the SIG in the payload/sequence every time except on UTF-16, UTF-32.
By default the library does not setup any handler other than the NullHandler, if you choose to set the 'explain'
toggle to True it will alter the logger configuration to add a StreamHandler that is suitable for debugging.
Custom logging format and handler can be set manually.
"""
if not isinstance(sequences, (bytearray, bytes)):
raise TypeError(
"Expected object of type bytes or bytearray, got: {}".format(
type(sequences)
)
)
if explain:
previous_logger_level: int = logger.level
logger.addHandler(explain_handler)
logger.setLevel(TRACE)
length: int = len(sequences)
if length == 0:
logger.debug("Encoding detection on empty bytes, assuming utf_8 intention.")
if explain: # Defensive: ensure exit path clean handler
logger.removeHandler(explain_handler)
logger.setLevel(previous_logger_level or logging.WARNING)
return CharsetMatches([CharsetMatch(sequences, "utf_8", 0.0, False, [], "")])
if cp_isolation is not None:
logger.log(
TRACE,
"cp_isolation is set. use this flag for debugging purpose. "
"limited list of encoding allowed : %s.",
", ".join(cp_isolation),
)
cp_isolation = [iana_name(cp, False) for cp in cp_isolation]
else:
cp_isolation = []
if cp_exclusion is not None:
logger.log(
TRACE,
"cp_exclusion is set. use this flag for debugging purpose. "
"limited list of encoding excluded : %s.",
", ".join(cp_exclusion),
)
cp_exclusion = [iana_name(cp, False) for cp in cp_exclusion]
else:
cp_exclusion = []
if length <= (chunk_size * steps):
logger.log(
TRACE,
"override steps (%i) and chunk_size (%i) as content does not fit (%i byte(s) given) parameters.",
steps,
chunk_size,
length,
)
steps = 1
chunk_size = length
if steps > 1 and length / steps < chunk_size:
chunk_size = int(length / steps)
is_too_small_sequence: bool = len(sequences) < TOO_SMALL_SEQUENCE
is_too_large_sequence: bool = len(sequences) >= TOO_BIG_SEQUENCE
if is_too_small_sequence:
logger.log(
TRACE,
"Trying to detect encoding from a tiny portion of ({}) byte(s).".format(
length
),
)
elif is_too_large_sequence:
logger.log(
TRACE,
"Using lazy str decoding because the payload is quite large, ({}) byte(s).".format(
length
),
)
prioritized_encodings: list[str] = []
specified_encoding: str | None = (
any_specified_encoding(sequences) if preemptive_behaviour else None
)
if specified_encoding is not None:
prioritized_encodings.append(specified_encoding)
logger.log(
TRACE,
"Detected declarative mark in sequence. Priority +1 given for %s.",
specified_encoding,
)
tested: set[str] = set()
tested_but_hard_failure: list[str] = []
tested_but_soft_failure: list[str] = []
fallback_ascii: CharsetMatch | None = None
fallback_u8: CharsetMatch | None = None
fallback_specified: CharsetMatch | None = None
results: CharsetMatches = CharsetMatches()
early_stop_results: CharsetMatches = CharsetMatches()
sig_encoding, sig_payload = identify_sig_or_bom(sequences)
if sig_encoding is not None:
prioritized_encodings.append(sig_encoding)
logger.log(
TRACE,
"Detected a SIG or BOM mark on first %i byte(s). Priority +1 given for %s.",
len(sig_payload),
sig_encoding,
)
prioritized_encodings.append("ascii")
if "utf_8" not in prioritized_encodings:
prioritized_encodings.append("utf_8")
for encoding_iana in prioritized_encodings + IANA_SUPPORTED:
if cp_isolation and encoding_iana not in cp_isolation:
continue
if cp_exclusion and encoding_iana in cp_exclusion:
continue
if encoding_iana in tested:
continue
tested.add(encoding_iana)
decoded_payload: str | None = None
bom_or_sig_available: bool = sig_encoding == encoding_iana
strip_sig_or_bom: bool = bom_or_sig_available and should_strip_sig_or_bom(
encoding_iana
)
if encoding_iana in {"utf_16", "utf_32"} and not bom_or_sig_available:
logger.log(
TRACE,
"Encoding %s won't be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.",
encoding_iana,
)
continue
if encoding_iana in {"utf_7"} and not bom_or_sig_available:
logger.log(
TRACE,
"Encoding %s won't be tested as-is because detection is unreliable without BOM/SIG.",
encoding_iana,
)
continue
try:
is_multi_byte_decoder: bool = is_multi_byte_encoding(encoding_iana)
except (ModuleNotFoundError, ImportError):
logger.log(
TRACE,
"Encoding %s does not provide an IncrementalDecoder",
encoding_iana,
)
continue
try:
if is_too_large_sequence and is_multi_byte_decoder is False:
str(
(
sequences[: int(50e4)]
if strip_sig_or_bom is False
else sequences[len(sig_payload) : int(50e4)]
),
encoding=encoding_iana,
)
else:
decoded_payload = str(
(
sequences
if strip_sig_or_bom is False
else sequences[len(sig_payload) :]
),
encoding=encoding_iana,
)
except (UnicodeDecodeError, LookupError) as e:
if not isinstance(e, LookupError):
logger.log(
TRACE,
"Code page %s does not fit given bytes sequence at ALL. %s",
encoding_iana,
str(e),
)
tested_but_hard_failure.append(encoding_iana)
continue
similar_soft_failure_test: bool = False
for encoding_soft_failed in tested_but_soft_failure:
if is_cp_similar(encoding_iana, encoding_soft_failed):
similar_soft_failure_test = True
break
if similar_soft_failure_test:
logger.log(
TRACE,
"%s is deemed too similar to code page %s and was consider unsuited already. Continuing!",
encoding_iana,
encoding_soft_failed,
)
continue
r_ = range(
0 if not bom_or_sig_available else len(sig_payload),
length,
int(length / steps),
)
multi_byte_bonus: bool = (
is_multi_byte_decoder
and decoded_payload is not None
and len(decoded_payload) < length
)
if multi_byte_bonus:
logger.log(
TRACE,
"Code page %s is a multi byte encoding table and it appear that at least one character "
"was encoded using n-bytes.",
encoding_iana,
)
max_chunk_gave_up: int = int(len(r_) / 4)
max_chunk_gave_up = max(max_chunk_gave_up, 2)
early_stop_count: int = 0
lazy_str_hard_failure = False
md_chunks: list[str] = []
md_ratios = []
try:
for chunk in cut_sequence_chunks(
sequences,
encoding_iana,
r_,
chunk_size,
bom_or_sig_available,
strip_sig_or_bom,
sig_payload,
is_multi_byte_decoder,
decoded_payload,
):
md_chunks.append(chunk)
md_ratios.append(
mess_ratio(
chunk,
threshold,
explain is True and 1 <= len(cp_isolation) <= 2,
)
)
if md_ratios[-1] >= threshold:
early_stop_count += 1
if (early_stop_count >= max_chunk_gave_up) or (
bom_or_sig_available and strip_sig_or_bom is False
):
break
except (
UnicodeDecodeError
) as e: # Lazy str loading may have missed something there
logger.log(
TRACE,
"LazyStr Loading: After MD chunk decode, code page %s does not fit given bytes sequence at ALL. %s",
encoding_iana,
str(e),
)
early_stop_count = max_chunk_gave_up
lazy_str_hard_failure = True
# We might want to check the sequence again with the whole content
# Only if initial MD tests passes
if (
not lazy_str_hard_failure
and is_too_large_sequence
and not is_multi_byte_decoder
):
try:
sequences[int(50e3) :].decode(encoding_iana, errors="strict")
except UnicodeDecodeError as e:
logger.log(
TRACE,
"LazyStr Loading: After final lookup, code page %s does not fit given bytes sequence at ALL. %s",
encoding_iana,
str(e),
)
tested_but_hard_failure.append(encoding_iana)
continue
mean_mess_ratio: float = sum(md_ratios) / len(md_ratios) if md_ratios else 0.0
if mean_mess_ratio >= threshold or early_stop_count >= max_chunk_gave_up:
tested_but_soft_failure.append(encoding_iana)
logger.log(
TRACE,
"%s was excluded because of initial chaos probing. Gave up %i time(s). "
"Computed mean chaos is %f %%.",
encoding_iana,
early_stop_count,
round(mean_mess_ratio * 100, ndigits=3),
)
# Preparing those fallbacks in case we got nothing.
if (
enable_fallback
and encoding_iana
in ["ascii", "utf_8", specified_encoding, "utf_16", "utf_32"]
and not lazy_str_hard_failure
):
fallback_entry = CharsetMatch(
sequences,
encoding_iana,
threshold,
bom_or_sig_available,
[],
decoded_payload,
preemptive_declaration=specified_encoding,
)
if encoding_iana == specified_encoding:
fallback_specified = fallback_entry
elif encoding_iana == "ascii":
fallback_ascii = fallback_entry
else:
fallback_u8 = fallback_entry
continue
logger.log(
TRACE,
"%s passed initial chaos probing. Mean measured chaos is %f %%",
encoding_iana,
round(mean_mess_ratio * 100, ndigits=3),
)
if not is_multi_byte_decoder:
target_languages: list[str] = encoding_languages(encoding_iana)
else:
target_languages = mb_encoding_languages(encoding_iana)
if target_languages:
logger.log(
TRACE,
"{} should target any language(s) of {}".format(
encoding_iana, str(target_languages)
),
)
cd_ratios = []
# We shall skip the CD when its about ASCII
# Most of the time its not relevant to run "language-detection" on it.
if encoding_iana != "ascii":
for chunk in md_chunks:
chunk_languages = coherence_ratio(
chunk,
language_threshold,
",".join(target_languages) if target_languages else None,
)
cd_ratios.append(chunk_languages)
cd_ratios_merged = merge_coherence_ratios(cd_ratios)
if cd_ratios_merged:
logger.log(
TRACE,
"We detected language {} using {}".format(
cd_ratios_merged, encoding_iana
),
)
current_match = CharsetMatch(
sequences,
encoding_iana,
mean_mess_ratio,
bom_or_sig_available,
cd_ratios_merged,
(
decoded_payload
if (
is_too_large_sequence is False
or encoding_iana in [specified_encoding, "ascii", "utf_8"]
)
else None
),
preemptive_declaration=specified_encoding,
)
results.append(current_match)
if (
encoding_iana in [specified_encoding, "ascii", "utf_8"]
and mean_mess_ratio < 0.1
):
# If md says nothing to worry about, then... stop immediately!
if mean_mess_ratio == 0.0:
logger.debug(
"Encoding detection: %s is most likely the one.",
current_match.encoding,
)
if explain: # Defensive: ensure exit path clean handler
logger.removeHandler(explain_handler)
logger.setLevel(previous_logger_level)
return CharsetMatches([current_match])
early_stop_results.append(current_match)
if (
len(early_stop_results)
and (specified_encoding is None or specified_encoding in tested)
and "ascii" in tested
and "utf_8" in tested
):
probable_result: CharsetMatch = early_stop_results.best() # type: ignore[assignment]
logger.debug(
"Encoding detection: %s is most likely the one.",
probable_result.encoding,
)
if explain: # Defensive: ensure exit path clean handler
logger.removeHandler(explain_handler)
logger.setLevel(previous_logger_level)
return CharsetMatches([probable_result])
if encoding_iana == sig_encoding:
logger.debug(
"Encoding detection: %s is most likely the one as we detected a BOM or SIG within "
"the beginning of the sequence.",
encoding_iana,
)
if explain: # Defensive: ensure exit path clean handler
logger.removeHandler(explain_handler)
logger.setLevel(previous_logger_level)
return CharsetMatches([results[encoding_iana]])
if len(results) == 0:
if fallback_u8 or fallback_ascii or fallback_specified:
logger.log(
TRACE,
"Nothing got out of the detection process. Using ASCII/UTF-8/Specified fallback.",
)
if fallback_specified:
logger.debug(
"Encoding detection: %s will be used as a fallback match",
fallback_specified.encoding,
)
results.append(fallback_specified)
elif (
(fallback_u8 and fallback_ascii is None)
or (
fallback_u8
and fallback_ascii
and fallback_u8.fingerprint != fallback_ascii.fingerprint
)
or (fallback_u8 is not None)
):
logger.debug("Encoding detection: utf_8 will be used as a fallback match")
results.append(fallback_u8)
elif fallback_ascii:
logger.debug("Encoding detection: ascii will be used as a fallback match")
results.append(fallback_ascii)
if results:
logger.debug(
"Encoding detection: Found %s as plausible (best-candidate) for content. With %i alternatives.",
results.best().encoding, # type: ignore
len(results) - 1,
)
else:
logger.debug("Encoding detection: Unable to determine any suitable charset.")
if explain:
logger.removeHandler(explain_handler)
logger.setLevel(previous_logger_level)
return results
def from_fp(
fp: BinaryIO,
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.20,
cp_isolation: list[str] | None = None,
cp_exclusion: list[str] | None = None,
preemptive_behaviour: bool = True,
explain: bool = False,
language_threshold: float = 0.1,
enable_fallback: bool = True,
) -> CharsetMatches:
"""
Same thing than the function from_bytes but using a file pointer that is already ready.
Will not close the file pointer.
"""
return from_bytes(
fp.read(),
steps,
chunk_size,
threshold,
cp_isolation,
cp_exclusion,
preemptive_behaviour,
explain,
language_threshold,
enable_fallback,
)
def from_path(
path: str | bytes | PathLike, # type: ignore[type-arg]
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.20,
cp_isolation: list[str] | None = None,
cp_exclusion: list[str] | None = None,
preemptive_behaviour: bool = True,
explain: bool = False,
language_threshold: float = 0.1,
enable_fallback: bool = True,
) -> CharsetMatches:
"""
Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode.
Can raise IOError.
"""
with open(path, "rb") as fp:
return from_fp(
fp,
steps,
chunk_size,
threshold,
cp_isolation,
cp_exclusion,
preemptive_behaviour,
explain,
language_threshold,
enable_fallback,
)
def is_binary(
fp_or_path_or_payload: PathLike | str | BinaryIO | bytes, # type: ignore[type-arg]
steps: int = 5,
chunk_size: int = 512,
threshold: float = 0.20,
cp_isolation: list[str] | None = None,
cp_exclusion: list[str] | None = None,
preemptive_behaviour: bool = True,
explain: bool = False,
language_threshold: float = 0.1,
enable_fallback: bool = False,
) -> bool:
"""
Detect if the given input (file, bytes, or path) points to a binary file. aka. not a string.
Based on the same main heuristic algorithms and default kwargs at the sole exception that fallbacks match
are disabled to be stricter around ASCII-compatible but unlikely to be a string.
"""
if isinstance(fp_or_path_or_payload, (str, PathLike)):
guesses = from_path(
fp_or_path_or_payload,
steps=steps,
chunk_size=chunk_size,
threshold=threshold,
cp_isolation=cp_isolation,
cp_exclusion=cp_exclusion,
preemptive_behaviour=preemptive_behaviour,
explain=explain,
language_threshold=language_threshold,
enable_fallback=enable_fallback,
)
elif isinstance(
fp_or_path_or_payload,
(
bytes,
bytearray,
),
):
guesses = from_bytes(
fp_or_path_or_payload,
steps=steps,
chunk_size=chunk_size,
threshold=threshold,
cp_isolation=cp_isolation,
cp_exclusion=cp_exclusion,
preemptive_behaviour=preemptive_behaviour,
explain=explain,
language_threshold=language_threshold,
enable_fallback=enable_fallback,
)
else:
guesses = from_fp(
fp_or_path_or_payload,
steps=steps,
chunk_size=chunk_size,
threshold=threshold,
cp_isolation=cp_isolation,
cp_exclusion=cp_exclusion,
preemptive_behaviour=preemptive_behaviour,
explain=explain,
language_threshold=language_threshold,
enable_fallback=enable_fallback,
)
return not guesses

View File

@@ -0,0 +1,395 @@
from __future__ import annotations
import importlib
from codecs import IncrementalDecoder
from collections import Counter
from functools import lru_cache
from typing import Counter as TypeCounter
from .constant import (
FREQUENCIES,
KO_NAMES,
LANGUAGE_SUPPORTED_COUNT,
TOO_SMALL_SEQUENCE,
ZH_NAMES,
)
from .md import is_suspiciously_successive_range
from .models import CoherenceMatches
from .utils import (
is_accentuated,
is_latin,
is_multi_byte_encoding,
is_unicode_range_secondary,
unicode_range,
)
def encoding_unicode_range(iana_name: str) -> list[str]:
"""
Return associated unicode ranges in a single byte code page.
"""
if is_multi_byte_encoding(iana_name):
raise OSError("Function not supported on multi-byte code page")
decoder = importlib.import_module(f"encodings.{iana_name}").IncrementalDecoder
p: IncrementalDecoder = decoder(errors="ignore")
seen_ranges: dict[str, int] = {}
character_count: int = 0
for i in range(0x40, 0xFF):
chunk: str = p.decode(bytes([i]))
if chunk:
character_range: str | None = unicode_range(chunk)
if character_range is None:
continue
if is_unicode_range_secondary(character_range) is False:
if character_range not in seen_ranges:
seen_ranges[character_range] = 0
seen_ranges[character_range] += 1
character_count += 1
return sorted(
[
character_range
for character_range in seen_ranges
if seen_ranges[character_range] / character_count >= 0.15
]
)
def unicode_range_languages(primary_range: str) -> list[str]:
"""
Return inferred languages used with a unicode range.
"""
languages: list[str] = []
for language, characters in FREQUENCIES.items():
for character in characters:
if unicode_range(character) == primary_range:
languages.append(language)
break
return languages
@lru_cache()
def encoding_languages(iana_name: str) -> list[str]:
"""
Single-byte encoding language association. Some code page are heavily linked to particular language(s).
This function does the correspondence.
"""
unicode_ranges: list[str] = encoding_unicode_range(iana_name)
primary_range: str | None = None
for specified_range in unicode_ranges:
if "Latin" not in specified_range:
primary_range = specified_range
break
if primary_range is None:
return ["Latin Based"]
return unicode_range_languages(primary_range)
@lru_cache()
def mb_encoding_languages(iana_name: str) -> list[str]:
"""
Multi-byte encoding language association. Some code page are heavily linked to particular language(s).
This function does the correspondence.
"""
if (
iana_name.startswith("shift_")
or iana_name.startswith("iso2022_jp")
or iana_name.startswith("euc_j")
or iana_name == "cp932"
):
return ["Japanese"]
if iana_name.startswith("gb") or iana_name in ZH_NAMES:
return ["Chinese"]
if iana_name.startswith("iso2022_kr") or iana_name in KO_NAMES:
return ["Korean"]
return []
@lru_cache(maxsize=LANGUAGE_SUPPORTED_COUNT)
def get_target_features(language: str) -> tuple[bool, bool]:
"""
Determine main aspects from a supported language if it contains accents and if is pure Latin.
"""
target_have_accents: bool = False
target_pure_latin: bool = True
for character in FREQUENCIES[language]:
if not target_have_accents and is_accentuated(character):
target_have_accents = True
if target_pure_latin and is_latin(character) is False:
target_pure_latin = False
return target_have_accents, target_pure_latin
def alphabet_languages(
characters: list[str], ignore_non_latin: bool = False
) -> list[str]:
"""
Return associated languages associated to given characters.
"""
languages: list[tuple[str, float]] = []
source_have_accents = any(is_accentuated(character) for character in characters)
for language, language_characters in FREQUENCIES.items():
target_have_accents, target_pure_latin = get_target_features(language)
if ignore_non_latin and target_pure_latin is False:
continue
if target_have_accents is False and source_have_accents:
continue
character_count: int = len(language_characters)
character_match_count: int = len(
[c for c in language_characters if c in characters]
)
ratio: float = character_match_count / character_count
if ratio >= 0.2:
languages.append((language, ratio))
languages = sorted(languages, key=lambda x: x[1], reverse=True)
return [compatible_language[0] for compatible_language in languages]
def characters_popularity_compare(
language: str, ordered_characters: list[str]
) -> float:
"""
Determine if a ordered characters list (by occurrence from most appearance to rarest) match a particular language.
The result is a ratio between 0. (absolutely no correspondence) and 1. (near perfect fit).
Beware that is function is not strict on the match in order to ease the detection. (Meaning close match is 1.)
"""
if language not in FREQUENCIES:
raise ValueError(f"{language} not available")
character_approved_count: int = 0
FREQUENCIES_language_set = set(FREQUENCIES[language])
ordered_characters_count: int = len(ordered_characters)
target_language_characters_count: int = len(FREQUENCIES[language])
large_alphabet: bool = target_language_characters_count > 26
for character, character_rank in zip(
ordered_characters, range(0, ordered_characters_count)
):
if character not in FREQUENCIES_language_set:
continue
character_rank_in_language: int = FREQUENCIES[language].index(character)
expected_projection_ratio: float = (
target_language_characters_count / ordered_characters_count
)
character_rank_projection: int = int(character_rank * expected_projection_ratio)
if (
large_alphabet is False
and abs(character_rank_projection - character_rank_in_language) > 4
):
continue
if (
large_alphabet is True
and abs(character_rank_projection - character_rank_in_language)
< target_language_characters_count / 3
):
character_approved_count += 1
continue
characters_before_source: list[str] = FREQUENCIES[language][
0:character_rank_in_language
]
characters_after_source: list[str] = FREQUENCIES[language][
character_rank_in_language:
]
characters_before: list[str] = ordered_characters[0:character_rank]
characters_after: list[str] = ordered_characters[character_rank:]
before_match_count: int = len(
set(characters_before) & set(characters_before_source)
)
after_match_count: int = len(
set(characters_after) & set(characters_after_source)
)
if len(characters_before_source) == 0 and before_match_count <= 4:
character_approved_count += 1
continue
if len(characters_after_source) == 0 and after_match_count <= 4:
character_approved_count += 1
continue
if (
before_match_count / len(characters_before_source) >= 0.4
or after_match_count / len(characters_after_source) >= 0.4
):
character_approved_count += 1
continue
return character_approved_count / len(ordered_characters)
def alpha_unicode_split(decoded_sequence: str) -> list[str]:
"""
Given a decoded text sequence, return a list of str. Unicode range / alphabet separation.
Ex. a text containing English/Latin with a bit a Hebrew will return two items in the resulting list;
One containing the latin letters and the other hebrew.
"""
layers: dict[str, str] = {}
for character in decoded_sequence:
if character.isalpha() is False:
continue
character_range: str | None = unicode_range(character)
if character_range is None:
continue
layer_target_range: str | None = None
for discovered_range in layers:
if (
is_suspiciously_successive_range(discovered_range, character_range)
is False
):
layer_target_range = discovered_range
break
if layer_target_range is None:
layer_target_range = character_range
if layer_target_range not in layers:
layers[layer_target_range] = character.lower()
continue
layers[layer_target_range] += character.lower()
return list(layers.values())
def merge_coherence_ratios(results: list[CoherenceMatches]) -> CoherenceMatches:
"""
This function merge results previously given by the function coherence_ratio.
The return type is the same as coherence_ratio.
"""
per_language_ratios: dict[str, list[float]] = {}
for result in results:
for sub_result in result:
language, ratio = sub_result
if language not in per_language_ratios:
per_language_ratios[language] = [ratio]
continue
per_language_ratios[language].append(ratio)
merge = [
(
language,
round(
sum(per_language_ratios[language]) / len(per_language_ratios[language]),
4,
),
)
for language in per_language_ratios
]
return sorted(merge, key=lambda x: x[1], reverse=True)
def filter_alt_coherence_matches(results: CoherenceMatches) -> CoherenceMatches:
"""
We shall NOT return "English—" in CoherenceMatches because it is an alternative
of "English". This function only keeps the best match and remove the em-dash in it.
"""
index_results: dict[str, list[float]] = dict()
for result in results:
language, ratio = result
no_em_name: str = language.replace("", "")
if no_em_name not in index_results:
index_results[no_em_name] = []
index_results[no_em_name].append(ratio)
if any(len(index_results[e]) > 1 for e in index_results):
filtered_results: CoherenceMatches = []
for language in index_results:
filtered_results.append((language, max(index_results[language])))
return filtered_results
return results
@lru_cache(maxsize=2048)
def coherence_ratio(
decoded_sequence: str, threshold: float = 0.1, lg_inclusion: str | None = None
) -> CoherenceMatches:
"""
Detect ANY language that can be identified in given sequence. The sequence will be analysed by layers.
A layer = Character extraction by alphabets/ranges.
"""
results: list[tuple[str, float]] = []
ignore_non_latin: bool = False
sufficient_match_count: int = 0
lg_inclusion_list = lg_inclusion.split(",") if lg_inclusion is not None else []
if "Latin Based" in lg_inclusion_list:
ignore_non_latin = True
lg_inclusion_list.remove("Latin Based")
for layer in alpha_unicode_split(decoded_sequence):
sequence_frequencies: TypeCounter[str] = Counter(layer)
most_common = sequence_frequencies.most_common()
character_count: int = sum(o for c, o in most_common)
if character_count <= TOO_SMALL_SEQUENCE:
continue
popular_character_ordered: list[str] = [c for c, o in most_common]
for language in lg_inclusion_list or alphabet_languages(
popular_character_ordered, ignore_non_latin
):
ratio: float = characters_popularity_compare(
language, popular_character_ordered
)
if ratio < threshold:
continue
elif ratio >= 0.8:
sufficient_match_count += 1
results.append((language, round(ratio, 4)))
if sufficient_match_count >= 3:
break
return sorted(
filter_alt_coherence_matches(results), key=lambda x: x[1], reverse=True
)

View File

@@ -0,0 +1,8 @@
from __future__ import annotations
from .__main__ import cli_detect, query_yes_no
__all__ = (
"cli_detect",
"query_yes_no",
)

View File

@@ -0,0 +1,381 @@
from __future__ import annotations
import argparse
import sys
import typing
from json import dumps
from os.path import abspath, basename, dirname, join, realpath
from platform import python_version
from unicodedata import unidata_version
import charset_normalizer.md as md_module
from charset_normalizer import from_fp
from charset_normalizer.models import CliDetectionResult
from charset_normalizer.version import __version__
def query_yes_no(question: str, default: str = "yes") -> bool:
"""Ask a yes/no question via input() and return their answer.
"question" is a string that is presented to the user.
"default" is the presumed answer if the user just hits <Enter>.
It must be "yes" (the default), "no" or None (meaning
an answer is required of the user).
The "answer" return value is True for "yes" or False for "no".
Credit goes to (c) https://stackoverflow.com/questions/3041986/apt-command-line-interface-like-yes-no-input
"""
valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False}
if default is None:
prompt = " [y/n] "
elif default == "yes":
prompt = " [Y/n] "
elif default == "no":
prompt = " [y/N] "
else:
raise ValueError("invalid default answer: '%s'" % default)
while True:
sys.stdout.write(question + prompt)
choice = input().lower()
if default is not None and choice == "":
return valid[default]
elif choice in valid:
return valid[choice]
else:
sys.stdout.write("Please respond with 'yes' or 'no' (or 'y' or 'n').\n")
class FileType:
"""Factory for creating file object types
Instances of FileType are typically passed as type= arguments to the
ArgumentParser add_argument() method.
Keyword Arguments:
- mode -- A string indicating how the file is to be opened. Accepts the
same values as the builtin open() function.
- bufsize -- The file's desired buffer size. Accepts the same values as
the builtin open() function.
- encoding -- The file's encoding. Accepts the same values as the
builtin open() function.
- errors -- A string indicating how encoding and decoding errors are to
be handled. Accepts the same value as the builtin open() function.
Backported from CPython 3.12
"""
def __init__(
self,
mode: str = "r",
bufsize: int = -1,
encoding: str | None = None,
errors: str | None = None,
):
self._mode = mode
self._bufsize = bufsize
self._encoding = encoding
self._errors = errors
def __call__(self, string: str) -> typing.IO: # type: ignore[type-arg]
# the special argument "-" means sys.std{in,out}
if string == "-":
if "r" in self._mode:
return sys.stdin.buffer if "b" in self._mode else sys.stdin
elif any(c in self._mode for c in "wax"):
return sys.stdout.buffer if "b" in self._mode else sys.stdout
else:
msg = f'argument "-" with mode {self._mode}'
raise ValueError(msg)
# all other arguments are used as file names
try:
return open(string, self._mode, self._bufsize, self._encoding, self._errors)
except OSError as e:
message = f"can't open '{string}': {e}"
raise argparse.ArgumentTypeError(message)
def __repr__(self) -> str:
args = self._mode, self._bufsize
kwargs = [("encoding", self._encoding), ("errors", self._errors)]
args_str = ", ".join(
[repr(arg) for arg in args if arg != -1]
+ [f"{kw}={arg!r}" for kw, arg in kwargs if arg is not None]
)
return f"{type(self).__name__}({args_str})"
def cli_detect(argv: list[str] | None = None) -> int:
"""
CLI assistant using ARGV and ArgumentParser
:param argv:
:return: 0 if everything is fine, anything else equal trouble
"""
parser = argparse.ArgumentParser(
description="The Real First Universal Charset Detector. "
"Discover originating encoding used on text file. "
"Normalize text to unicode."
)
parser.add_argument(
"files", type=FileType("rb"), nargs="+", help="File(s) to be analysed"
)
parser.add_argument(
"-v",
"--verbose",
action="store_true",
default=False,
dest="verbose",
help="Display complementary information about file if any. "
"Stdout will contain logs about the detection process.",
)
parser.add_argument(
"-a",
"--with-alternative",
action="store_true",
default=False,
dest="alternatives",
help="Output complementary possibilities if any. Top-level JSON WILL be a list.",
)
parser.add_argument(
"-n",
"--normalize",
action="store_true",
default=False,
dest="normalize",
help="Permit to normalize input file. If not set, program does not write anything.",
)
parser.add_argument(
"-m",
"--minimal",
action="store_true",
default=False,
dest="minimal",
help="Only output the charset detected to STDOUT. Disabling JSON output.",
)
parser.add_argument(
"-r",
"--replace",
action="store_true",
default=False,
dest="replace",
help="Replace file when trying to normalize it instead of creating a new one.",
)
parser.add_argument(
"-f",
"--force",
action="store_true",
default=False,
dest="force",
help="Replace file without asking if you are sure, use this flag with caution.",
)
parser.add_argument(
"-i",
"--no-preemptive",
action="store_true",
default=False,
dest="no_preemptive",
help="Disable looking at a charset declaration to hint the detector.",
)
parser.add_argument(
"-t",
"--threshold",
action="store",
default=0.2,
type=float,
dest="threshold",
help="Define a custom maximum amount of noise allowed in decoded content. 0. <= noise <= 1.",
)
parser.add_argument(
"--version",
action="version",
version="Charset-Normalizer {} - Python {} - Unicode {} - SpeedUp {}".format(
__version__,
python_version(),
unidata_version,
"OFF" if md_module.__file__.lower().endswith(".py") else "ON",
),
help="Show version information and exit.",
)
args = parser.parse_args(argv)
if args.replace is True and args.normalize is False:
if args.files:
for my_file in args.files:
my_file.close()
print("Use --replace in addition of --normalize only.", file=sys.stderr)
return 1
if args.force is True and args.replace is False:
if args.files:
for my_file in args.files:
my_file.close()
print("Use --force in addition of --replace only.", file=sys.stderr)
return 1
if args.threshold < 0.0 or args.threshold > 1.0:
if args.files:
for my_file in args.files:
my_file.close()
print("--threshold VALUE should be between 0. AND 1.", file=sys.stderr)
return 1
x_ = []
for my_file in args.files:
matches = from_fp(
my_file,
threshold=args.threshold,
explain=args.verbose,
preemptive_behaviour=args.no_preemptive is False,
)
best_guess = matches.best()
if best_guess is None:
print(
'Unable to identify originating encoding for "{}". {}'.format(
my_file.name,
(
"Maybe try increasing maximum amount of chaos."
if args.threshold < 1.0
else ""
),
),
file=sys.stderr,
)
x_.append(
CliDetectionResult(
abspath(my_file.name),
None,
[],
[],
"Unknown",
[],
False,
1.0,
0.0,
None,
True,
)
)
else:
x_.append(
CliDetectionResult(
abspath(my_file.name),
best_guess.encoding,
best_guess.encoding_aliases,
[
cp
for cp in best_guess.could_be_from_charset
if cp != best_guess.encoding
],
best_guess.language,
best_guess.alphabets,
best_guess.bom,
best_guess.percent_chaos,
best_guess.percent_coherence,
None,
True,
)
)
if len(matches) > 1 and args.alternatives:
for el in matches:
if el != best_guess:
x_.append(
CliDetectionResult(
abspath(my_file.name),
el.encoding,
el.encoding_aliases,
[
cp
for cp in el.could_be_from_charset
if cp != el.encoding
],
el.language,
el.alphabets,
el.bom,
el.percent_chaos,
el.percent_coherence,
None,
False,
)
)
if args.normalize is True:
if best_guess.encoding.startswith("utf") is True:
print(
'"{}" file does not need to be normalized, as it already came from unicode.'.format(
my_file.name
),
file=sys.stderr,
)
if my_file.closed is False:
my_file.close()
continue
dir_path = dirname(realpath(my_file.name))
file_name = basename(realpath(my_file.name))
o_: list[str] = file_name.split(".")
if args.replace is False:
o_.insert(-1, best_guess.encoding)
if my_file.closed is False:
my_file.close()
elif (
args.force is False
and query_yes_no(
'Are you sure to normalize "{}" by replacing it ?'.format(
my_file.name
),
"no",
)
is False
):
if my_file.closed is False:
my_file.close()
continue
try:
x_[0].unicode_path = join(dir_path, ".".join(o_))
with open(x_[0].unicode_path, "wb") as fp:
fp.write(best_guess.output())
except OSError as e:
print(str(e), file=sys.stderr)
if my_file.closed is False:
my_file.close()
return 2
if my_file.closed is False:
my_file.close()
if args.minimal is False:
print(
dumps(
[el.__dict__ for el in x_] if len(x_) > 1 else x_[0].__dict__,
ensure_ascii=True,
indent=4,
)
)
else:
for my_file in args.files:
print(
", ".join(
[
el.encoding or "undefined"
for el in x_
if el.path == abspath(my_file.name)
]
)
)
return 0
if __name__ == "__main__":
cli_detect()

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,80 @@
from __future__ import annotations
from typing import TYPE_CHECKING, Any
from warnings import warn
from .api import from_bytes
from .constant import CHARDET_CORRESPONDENCE, TOO_SMALL_SEQUENCE
# TODO: remove this check when dropping Python 3.7 support
if TYPE_CHECKING:
from typing_extensions import TypedDict
class ResultDict(TypedDict):
encoding: str | None
language: str
confidence: float | None
def detect(
byte_str: bytes, should_rename_legacy: bool = False, **kwargs: Any
) -> ResultDict:
"""
chardet legacy method
Detect the encoding of the given byte string. It should be mostly backward-compatible.
Encoding name will match Chardet own writing whenever possible. (Not on encoding name unsupported by it)
This function is deprecated and should be used to migrate your project easily, consult the documentation for
further information. Not planned for removal.
:param byte_str: The byte sequence to examine.
:param should_rename_legacy: Should we rename legacy encodings
to their more modern equivalents?
"""
if len(kwargs):
warn(
f"charset-normalizer disregard arguments '{','.join(list(kwargs.keys()))}' in legacy function detect()"
)
if not isinstance(byte_str, (bytearray, bytes)):
raise TypeError( # pragma: nocover
f"Expected object of type bytes or bytearray, got: {type(byte_str)}"
)
if isinstance(byte_str, bytearray):
byte_str = bytes(byte_str)
r = from_bytes(byte_str).best()
encoding = r.encoding if r is not None else None
language = r.language if r is not None and r.language != "Unknown" else ""
confidence = 1.0 - r.chaos if r is not None else None
# automatically lower confidence
# on small bytes samples.
# https://github.com/jawah/charset_normalizer/issues/391
if (
confidence is not None
and confidence >= 0.9
and encoding
not in {
"utf_8",
"ascii",
}
and r.bom is False # type: ignore[union-attr]
and len(byte_str) < TOO_SMALL_SEQUENCE
):
confidence -= 0.2
# Note: CharsetNormalizer does not return 'UTF-8-SIG' as the sig get stripped in the detection/normalization process
# but chardet does return 'utf-8-sig' and it is a valid codec name.
if r is not None and encoding == "utf_8" and r.bom:
encoding += "_sig"
if should_rename_legacy is False and encoding in CHARDET_CORRESPONDENCE:
encoding = CHARDET_CORRESPONDENCE[encoding]
return {
"encoding": encoding,
"language": language,
"confidence": confidence,
}

View File

@@ -0,0 +1,635 @@
from __future__ import annotations
from functools import lru_cache
from logging import getLogger
from .constant import (
COMMON_SAFE_ASCII_CHARACTERS,
TRACE,
UNICODE_SECONDARY_RANGE_KEYWORD,
)
from .utils import (
is_accentuated,
is_arabic,
is_arabic_isolated_form,
is_case_variable,
is_cjk,
is_emoticon,
is_hangul,
is_hiragana,
is_katakana,
is_latin,
is_punctuation,
is_separator,
is_symbol,
is_thai,
is_unprintable,
remove_accent,
unicode_range,
is_cjk_uncommon,
)
class MessDetectorPlugin:
"""
Base abstract class used for mess detection plugins.
All detectors MUST extend and implement given methods.
"""
def eligible(self, character: str) -> bool:
"""
Determine if given character should be fed in.
"""
raise NotImplementedError # pragma: nocover
def feed(self, character: str) -> None:
"""
The main routine to be executed upon character.
Insert the logic in witch the text would be considered chaotic.
"""
raise NotImplementedError # pragma: nocover
def reset(self) -> None: # pragma: no cover
"""
Permit to reset the plugin to the initial state.
"""
raise NotImplementedError
@property
def ratio(self) -> float:
"""
Compute the chaos ratio based on what your feed() has seen.
Must NOT be lower than 0.; No restriction gt 0.
"""
raise NotImplementedError # pragma: nocover
class TooManySymbolOrPunctuationPlugin(MessDetectorPlugin):
def __init__(self) -> None:
self._punctuation_count: int = 0
self._symbol_count: int = 0
self._character_count: int = 0
self._last_printable_char: str | None = None
self._frenzy_symbol_in_word: bool = False
def eligible(self, character: str) -> bool:
return character.isprintable()
def feed(self, character: str) -> None:
self._character_count += 1
if (
character != self._last_printable_char
and character not in COMMON_SAFE_ASCII_CHARACTERS
):
if is_punctuation(character):
self._punctuation_count += 1
elif (
character.isdigit() is False
and is_symbol(character)
and is_emoticon(character) is False
):
self._symbol_count += 2
self._last_printable_char = character
def reset(self) -> None: # Abstract
self._punctuation_count = 0
self._character_count = 0
self._symbol_count = 0
@property
def ratio(self) -> float:
if self._character_count == 0:
return 0.0
ratio_of_punctuation: float = (
self._punctuation_count + self._symbol_count
) / self._character_count
return ratio_of_punctuation if ratio_of_punctuation >= 0.3 else 0.0
class TooManyAccentuatedPlugin(MessDetectorPlugin):
def __init__(self) -> None:
self._character_count: int = 0
self._accentuated_count: int = 0
def eligible(self, character: str) -> bool:
return character.isalpha()
def feed(self, character: str) -> None:
self._character_count += 1
if is_accentuated(character):
self._accentuated_count += 1
def reset(self) -> None: # Abstract
self._character_count = 0
self._accentuated_count = 0
@property
def ratio(self) -> float:
if self._character_count < 8:
return 0.0
ratio_of_accentuation: float = self._accentuated_count / self._character_count
return ratio_of_accentuation if ratio_of_accentuation >= 0.35 else 0.0
class UnprintablePlugin(MessDetectorPlugin):
def __init__(self) -> None:
self._unprintable_count: int = 0
self._character_count: int = 0
def eligible(self, character: str) -> bool:
return True
def feed(self, character: str) -> None:
if is_unprintable(character):
self._unprintable_count += 1
self._character_count += 1
def reset(self) -> None: # Abstract
self._unprintable_count = 0
@property
def ratio(self) -> float:
if self._character_count == 0:
return 0.0
return (self._unprintable_count * 8) / self._character_count
class SuspiciousDuplicateAccentPlugin(MessDetectorPlugin):
def __init__(self) -> None:
self._successive_count: int = 0
self._character_count: int = 0
self._last_latin_character: str | None = None
def eligible(self, character: str) -> bool:
return character.isalpha() and is_latin(character)
def feed(self, character: str) -> None:
self._character_count += 1
if (
self._last_latin_character is not None
and is_accentuated(character)
and is_accentuated(self._last_latin_character)
):
if character.isupper() and self._last_latin_character.isupper():
self._successive_count += 1
# Worse if its the same char duplicated with different accent.
if remove_accent(character) == remove_accent(self._last_latin_character):
self._successive_count += 1
self._last_latin_character = character
def reset(self) -> None: # Abstract
self._successive_count = 0
self._character_count = 0
self._last_latin_character = None
@property
def ratio(self) -> float:
if self._character_count == 0:
return 0.0
return (self._successive_count * 2) / self._character_count
class SuspiciousRange(MessDetectorPlugin):
def __init__(self) -> None:
self._suspicious_successive_range_count: int = 0
self._character_count: int = 0
self._last_printable_seen: str | None = None
def eligible(self, character: str) -> bool:
return character.isprintable()
def feed(self, character: str) -> None:
self._character_count += 1
if (
character.isspace()
or is_punctuation(character)
or character in COMMON_SAFE_ASCII_CHARACTERS
):
self._last_printable_seen = None
return
if self._last_printable_seen is None:
self._last_printable_seen = character
return
unicode_range_a: str | None = unicode_range(self._last_printable_seen)
unicode_range_b: str | None = unicode_range(character)
if is_suspiciously_successive_range(unicode_range_a, unicode_range_b):
self._suspicious_successive_range_count += 1
self._last_printable_seen = character
def reset(self) -> None: # Abstract
self._character_count = 0
self._suspicious_successive_range_count = 0
self._last_printable_seen = None
@property
def ratio(self) -> float:
if self._character_count <= 13:
return 0.0
ratio_of_suspicious_range_usage: float = (
self._suspicious_successive_range_count * 2
) / self._character_count
return ratio_of_suspicious_range_usage
class SuperWeirdWordPlugin(MessDetectorPlugin):
def __init__(self) -> None:
self._word_count: int = 0
self._bad_word_count: int = 0
self._foreign_long_count: int = 0
self._is_current_word_bad: bool = False
self._foreign_long_watch: bool = False
self._character_count: int = 0
self._bad_character_count: int = 0
self._buffer: str = ""
self._buffer_accent_count: int = 0
self._buffer_glyph_count: int = 0
def eligible(self, character: str) -> bool:
return True
def feed(self, character: str) -> None:
if character.isalpha():
self._buffer += character
if is_accentuated(character):
self._buffer_accent_count += 1
if (
self._foreign_long_watch is False
and (is_latin(character) is False or is_accentuated(character))
and is_cjk(character) is False
and is_hangul(character) is False
and is_katakana(character) is False
and is_hiragana(character) is False
and is_thai(character) is False
):
self._foreign_long_watch = True
if (
is_cjk(character)
or is_hangul(character)
or is_katakana(character)
or is_hiragana(character)
or is_thai(character)
):
self._buffer_glyph_count += 1
return
if not self._buffer:
return
if (
character.isspace() or is_punctuation(character) or is_separator(character)
) and self._buffer:
self._word_count += 1
buffer_length: int = len(self._buffer)
self._character_count += buffer_length
if buffer_length >= 4:
if self._buffer_accent_count / buffer_length >= 0.5:
self._is_current_word_bad = True
# Word/Buffer ending with an upper case accentuated letter are so rare,
# that we will consider them all as suspicious. Same weight as foreign_long suspicious.
elif (
is_accentuated(self._buffer[-1])
and self._buffer[-1].isupper()
and all(_.isupper() for _ in self._buffer) is False
):
self._foreign_long_count += 1
self._is_current_word_bad = True
elif self._buffer_glyph_count == 1:
self._is_current_word_bad = True
self._foreign_long_count += 1
if buffer_length >= 24 and self._foreign_long_watch:
camel_case_dst = [
i
for c, i in zip(self._buffer, range(0, buffer_length))
if c.isupper()
]
probable_camel_cased: bool = False
if camel_case_dst and (len(camel_case_dst) / buffer_length <= 0.3):
probable_camel_cased = True
if not probable_camel_cased:
self._foreign_long_count += 1
self._is_current_word_bad = True
if self._is_current_word_bad:
self._bad_word_count += 1
self._bad_character_count += len(self._buffer)
self._is_current_word_bad = False
self._foreign_long_watch = False
self._buffer = ""
self._buffer_accent_count = 0
self._buffer_glyph_count = 0
elif (
character not in {"<", ">", "-", "=", "~", "|", "_"}
and character.isdigit() is False
and is_symbol(character)
):
self._is_current_word_bad = True
self._buffer += character
def reset(self) -> None: # Abstract
self._buffer = ""
self._is_current_word_bad = False
self._foreign_long_watch = False
self._bad_word_count = 0
self._word_count = 0
self._character_count = 0
self._bad_character_count = 0
self._foreign_long_count = 0
@property
def ratio(self) -> float:
if self._word_count <= 10 and self._foreign_long_count == 0:
return 0.0
return self._bad_character_count / self._character_count
class CjkUncommonPlugin(MessDetectorPlugin):
"""
Detect messy CJK text that probably means nothing.
"""
def __init__(self) -> None:
self._character_count: int = 0
self._uncommon_count: int = 0
def eligible(self, character: str) -> bool:
return is_cjk(character)
def feed(self, character: str) -> None:
self._character_count += 1
if is_cjk_uncommon(character):
self._uncommon_count += 1
return
def reset(self) -> None: # Abstract
self._character_count = 0
self._uncommon_count = 0
@property
def ratio(self) -> float:
if self._character_count < 8:
return 0.0
uncommon_form_usage: float = self._uncommon_count / self._character_count
# we can be pretty sure it's garbage when uncommon characters are widely
# used. otherwise it could just be traditional chinese for example.
return uncommon_form_usage / 10 if uncommon_form_usage > 0.5 else 0.0
class ArchaicUpperLowerPlugin(MessDetectorPlugin):
def __init__(self) -> None:
self._buf: bool = False
self._character_count_since_last_sep: int = 0
self._successive_upper_lower_count: int = 0
self._successive_upper_lower_count_final: int = 0
self._character_count: int = 0
self._last_alpha_seen: str | None = None
self._current_ascii_only: bool = True
def eligible(self, character: str) -> bool:
return True
def feed(self, character: str) -> None:
is_concerned = character.isalpha() and is_case_variable(character)
chunk_sep = is_concerned is False
if chunk_sep and self._character_count_since_last_sep > 0:
if (
self._character_count_since_last_sep <= 64
and character.isdigit() is False
and self._current_ascii_only is False
):
self._successive_upper_lower_count_final += (
self._successive_upper_lower_count
)
self._successive_upper_lower_count = 0
self._character_count_since_last_sep = 0
self._last_alpha_seen = None
self._buf = False
self._character_count += 1
self._current_ascii_only = True
return
if self._current_ascii_only is True and character.isascii() is False:
self._current_ascii_only = False
if self._last_alpha_seen is not None:
if (character.isupper() and self._last_alpha_seen.islower()) or (
character.islower() and self._last_alpha_seen.isupper()
):
if self._buf is True:
self._successive_upper_lower_count += 2
self._buf = False
else:
self._buf = True
else:
self._buf = False
self._character_count += 1
self._character_count_since_last_sep += 1
self._last_alpha_seen = character
def reset(self) -> None: # Abstract
self._character_count = 0
self._character_count_since_last_sep = 0
self._successive_upper_lower_count = 0
self._successive_upper_lower_count_final = 0
self._last_alpha_seen = None
self._buf = False
self._current_ascii_only = True
@property
def ratio(self) -> float:
if self._character_count == 0:
return 0.0
return self._successive_upper_lower_count_final / self._character_count
class ArabicIsolatedFormPlugin(MessDetectorPlugin):
def __init__(self) -> None:
self._character_count: int = 0
self._isolated_form_count: int = 0
def reset(self) -> None: # Abstract
self._character_count = 0
self._isolated_form_count = 0
def eligible(self, character: str) -> bool:
return is_arabic(character)
def feed(self, character: str) -> None:
self._character_count += 1
if is_arabic_isolated_form(character):
self._isolated_form_count += 1
@property
def ratio(self) -> float:
if self._character_count < 8:
return 0.0
isolated_form_usage: float = self._isolated_form_count / self._character_count
return isolated_form_usage
@lru_cache(maxsize=1024)
def is_suspiciously_successive_range(
unicode_range_a: str | None, unicode_range_b: str | None
) -> bool:
"""
Determine if two Unicode range seen next to each other can be considered as suspicious.
"""
if unicode_range_a is None or unicode_range_b is None:
return True
if unicode_range_a == unicode_range_b:
return False
if "Latin" in unicode_range_a and "Latin" in unicode_range_b:
return False
if "Emoticons" in unicode_range_a or "Emoticons" in unicode_range_b:
return False
# Latin characters can be accompanied with a combining diacritical mark
# eg. Vietnamese.
if ("Latin" in unicode_range_a or "Latin" in unicode_range_b) and (
"Combining" in unicode_range_a or "Combining" in unicode_range_b
):
return False
keywords_range_a, keywords_range_b = (
unicode_range_a.split(" "),
unicode_range_b.split(" "),
)
for el in keywords_range_a:
if el in UNICODE_SECONDARY_RANGE_KEYWORD:
continue
if el in keywords_range_b:
return False
# Japanese Exception
range_a_jp_chars, range_b_jp_chars = (
unicode_range_a
in (
"Hiragana",
"Katakana",
),
unicode_range_b in ("Hiragana", "Katakana"),
)
if (range_a_jp_chars or range_b_jp_chars) and (
"CJK" in unicode_range_a or "CJK" in unicode_range_b
):
return False
if range_a_jp_chars and range_b_jp_chars:
return False
if "Hangul" in unicode_range_a or "Hangul" in unicode_range_b:
if "CJK" in unicode_range_a or "CJK" in unicode_range_b:
return False
if unicode_range_a == "Basic Latin" or unicode_range_b == "Basic Latin":
return False
# Chinese/Japanese use dedicated range for punctuation and/or separators.
if ("CJK" in unicode_range_a or "CJK" in unicode_range_b) or (
unicode_range_a in ["Katakana", "Hiragana"]
and unicode_range_b in ["Katakana", "Hiragana"]
):
if "Punctuation" in unicode_range_a or "Punctuation" in unicode_range_b:
return False
if "Forms" in unicode_range_a or "Forms" in unicode_range_b:
return False
if unicode_range_a == "Basic Latin" or unicode_range_b == "Basic Latin":
return False
return True
@lru_cache(maxsize=2048)
def mess_ratio(
decoded_sequence: str, maximum_threshold: float = 0.2, debug: bool = False
) -> float:
"""
Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier.
"""
detectors: list[MessDetectorPlugin] = [
md_class() for md_class in MessDetectorPlugin.__subclasses__()
]
length: int = len(decoded_sequence) + 1
mean_mess_ratio: float = 0.0
if length < 512:
intermediary_mean_mess_ratio_calc: int = 32
elif length <= 1024:
intermediary_mean_mess_ratio_calc = 64
else:
intermediary_mean_mess_ratio_calc = 128
for character, index in zip(decoded_sequence + "\n", range(length)):
for detector in detectors:
if detector.eligible(character):
detector.feed(character)
if (
index > 0 and index % intermediary_mean_mess_ratio_calc == 0
) or index == length - 1:
mean_mess_ratio = sum(dt.ratio for dt in detectors)
if mean_mess_ratio >= maximum_threshold:
break
if debug:
logger = getLogger("charset_normalizer")
logger.log(
TRACE,
"Mess-detector extended-analysis start. "
f"intermediary_mean_mess_ratio_calc={intermediary_mean_mess_ratio_calc} mean_mess_ratio={mean_mess_ratio} "
f"maximum_threshold={maximum_threshold}",
)
if len(decoded_sequence) > 16:
logger.log(TRACE, f"Starting with: {decoded_sequence[:16]}")
logger.log(TRACE, f"Ending with: {decoded_sequence[-16::]}")
for dt in detectors:
logger.log(TRACE, f"{dt.__class__}: {dt.ratio}")
return round(mean_mess_ratio, 3)

View File

@@ -0,0 +1,360 @@
from __future__ import annotations
from encodings.aliases import aliases
from hashlib import sha256
from json import dumps
from re import sub
from typing import Any, Iterator, List, Tuple
from .constant import RE_POSSIBLE_ENCODING_INDICATION, TOO_BIG_SEQUENCE
from .utils import iana_name, is_multi_byte_encoding, unicode_range
class CharsetMatch:
def __init__(
self,
payload: bytes,
guessed_encoding: str,
mean_mess_ratio: float,
has_sig_or_bom: bool,
languages: CoherenceMatches,
decoded_payload: str | None = None,
preemptive_declaration: str | None = None,
):
self._payload: bytes = payload
self._encoding: str = guessed_encoding
self._mean_mess_ratio: float = mean_mess_ratio
self._languages: CoherenceMatches = languages
self._has_sig_or_bom: bool = has_sig_or_bom
self._unicode_ranges: list[str] | None = None
self._leaves: list[CharsetMatch] = []
self._mean_coherence_ratio: float = 0.0
self._output_payload: bytes | None = None
self._output_encoding: str | None = None
self._string: str | None = decoded_payload
self._preemptive_declaration: str | None = preemptive_declaration
def __eq__(self, other: object) -> bool:
if not isinstance(other, CharsetMatch):
if isinstance(other, str):
return iana_name(other) == self.encoding
return False
return self.encoding == other.encoding and self.fingerprint == other.fingerprint
def __lt__(self, other: object) -> bool:
"""
Implemented to make sorted available upon CharsetMatches items.
"""
if not isinstance(other, CharsetMatch):
raise ValueError
chaos_difference: float = abs(self.chaos - other.chaos)
coherence_difference: float = abs(self.coherence - other.coherence)
# Below 1% difference --> Use Coherence
if chaos_difference < 0.01 and coherence_difference > 0.02:
return self.coherence > other.coherence
elif chaos_difference < 0.01 and coherence_difference <= 0.02:
# When having a difficult decision, use the result that decoded as many multi-byte as possible.
# preserve RAM usage!
if len(self._payload) >= TOO_BIG_SEQUENCE:
return self.chaos < other.chaos
return self.multi_byte_usage > other.multi_byte_usage
return self.chaos < other.chaos
@property
def multi_byte_usage(self) -> float:
return 1.0 - (len(str(self)) / len(self.raw))
def __str__(self) -> str:
# Lazy Str Loading
if self._string is None:
self._string = str(self._payload, self._encoding, "strict")
return self._string
def __repr__(self) -> str:
return f"<CharsetMatch '{self.encoding}' bytes({self.fingerprint})>"
def add_submatch(self, other: CharsetMatch) -> None:
if not isinstance(other, CharsetMatch) or other == self:
raise ValueError(
"Unable to add instance <{}> as a submatch of a CharsetMatch".format(
other.__class__
)
)
other._string = None # Unload RAM usage; dirty trick.
self._leaves.append(other)
@property
def encoding(self) -> str:
return self._encoding
@property
def encoding_aliases(self) -> list[str]:
"""
Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855.
"""
also_known_as: list[str] = []
for u, p in aliases.items():
if self.encoding == u:
also_known_as.append(p)
elif self.encoding == p:
also_known_as.append(u)
return also_known_as
@property
def bom(self) -> bool:
return self._has_sig_or_bom
@property
def byte_order_mark(self) -> bool:
return self._has_sig_or_bom
@property
def languages(self) -> list[str]:
"""
Return the complete list of possible languages found in decoded sequence.
Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'.
"""
return [e[0] for e in self._languages]
@property
def language(self) -> str:
"""
Most probable language found in decoded sequence. If none were detected or inferred, the property will return
"Unknown".
"""
if not self._languages:
# Trying to infer the language based on the given encoding
# Its either English or we should not pronounce ourselves in certain cases.
if "ascii" in self.could_be_from_charset:
return "English"
# doing it there to avoid circular import
from charset_normalizer.cd import encoding_languages, mb_encoding_languages
languages = (
mb_encoding_languages(self.encoding)
if is_multi_byte_encoding(self.encoding)
else encoding_languages(self.encoding)
)
if len(languages) == 0 or "Latin Based" in languages:
return "Unknown"
return languages[0]
return self._languages[0][0]
@property
def chaos(self) -> float:
return self._mean_mess_ratio
@property
def coherence(self) -> float:
if not self._languages:
return 0.0
return self._languages[0][1]
@property
def percent_chaos(self) -> float:
return round(self.chaos * 100, ndigits=3)
@property
def percent_coherence(self) -> float:
return round(self.coherence * 100, ndigits=3)
@property
def raw(self) -> bytes:
"""
Original untouched bytes.
"""
return self._payload
@property
def submatch(self) -> list[CharsetMatch]:
return self._leaves
@property
def has_submatch(self) -> bool:
return len(self._leaves) > 0
@property
def alphabets(self) -> list[str]:
if self._unicode_ranges is not None:
return self._unicode_ranges
# list detected ranges
detected_ranges: list[str | None] = [unicode_range(char) for char in str(self)]
# filter and sort
self._unicode_ranges = sorted(list({r for r in detected_ranges if r}))
return self._unicode_ranges
@property
def could_be_from_charset(self) -> list[str]:
"""
The complete list of encoding that output the exact SAME str result and therefore could be the originating
encoding.
This list does include the encoding available in property 'encoding'.
"""
return [self._encoding] + [m.encoding for m in self._leaves]
def output(self, encoding: str = "utf_8") -> bytes:
"""
Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
Any errors will be simply ignored by the encoder NOT replaced.
"""
if self._output_encoding is None or self._output_encoding != encoding:
self._output_encoding = encoding
decoded_string = str(self)
if (
self._preemptive_declaration is not None
and self._preemptive_declaration.lower()
not in ["utf-8", "utf8", "utf_8"]
):
patched_header = sub(
RE_POSSIBLE_ENCODING_INDICATION,
lambda m: m.string[m.span()[0] : m.span()[1]].replace(
m.groups()[0],
iana_name(self._output_encoding).replace("_", "-"), # type: ignore[arg-type]
),
decoded_string[:8192],
count=1,
)
decoded_string = patched_header + decoded_string[8192:]
self._output_payload = decoded_string.encode(encoding, "replace")
return self._output_payload # type: ignore
@property
def fingerprint(self) -> str:
"""
Retrieve the unique SHA256 computed using the transformed (re-encoded) payload. Not the original one.
"""
return sha256(self.output()).hexdigest()
class CharsetMatches:
"""
Container with every CharsetMatch items ordered by default from most probable to the less one.
Act like a list(iterable) but does not implements all related methods.
"""
def __init__(self, results: list[CharsetMatch] | None = None):
self._results: list[CharsetMatch] = sorted(results) if results else []
def __iter__(self) -> Iterator[CharsetMatch]:
yield from self._results
def __getitem__(self, item: int | str) -> CharsetMatch:
"""
Retrieve a single item either by its position or encoding name (alias may be used here).
Raise KeyError upon invalid index or encoding not present in results.
"""
if isinstance(item, int):
return self._results[item]
if isinstance(item, str):
item = iana_name(item, False)
for result in self._results:
if item in result.could_be_from_charset:
return result
raise KeyError
def __len__(self) -> int:
return len(self._results)
def __bool__(self) -> bool:
return len(self._results) > 0
def append(self, item: CharsetMatch) -> None:
"""
Insert a single match. Will be inserted accordingly to preserve sort.
Can be inserted as a submatch.
"""
if not isinstance(item, CharsetMatch):
raise ValueError(
"Cannot append instance '{}' to CharsetMatches".format(
str(item.__class__)
)
)
# We should disable the submatch factoring when the input file is too heavy (conserve RAM usage)
if len(item.raw) < TOO_BIG_SEQUENCE:
for match in self._results:
if match.fingerprint == item.fingerprint and match.chaos == item.chaos:
match.add_submatch(item)
return
self._results.append(item)
self._results = sorted(self._results)
def best(self) -> CharsetMatch | None:
"""
Simply return the first match. Strict equivalent to matches[0].
"""
if not self._results:
return None
return self._results[0]
def first(self) -> CharsetMatch | None:
"""
Redundant method, call the method best(). Kept for BC reasons.
"""
return self.best()
CoherenceMatch = Tuple[str, float]
CoherenceMatches = List[CoherenceMatch]
class CliDetectionResult:
def __init__(
self,
path: str,
encoding: str | None,
encoding_aliases: list[str],
alternative_encodings: list[str],
language: str,
alphabets: list[str],
has_sig_or_bom: bool,
chaos: float,
coherence: float,
unicode_path: str | None,
is_preferred: bool,
):
self.path: str = path
self.unicode_path: str | None = unicode_path
self.encoding: str | None = encoding
self.encoding_aliases: list[str] = encoding_aliases
self.alternative_encodings: list[str] = alternative_encodings
self.language: str = language
self.alphabets: list[str] = alphabets
self.has_sig_or_bom: bool = has_sig_or_bom
self.chaos: float = chaos
self.coherence: float = coherence
self.is_preferred: bool = is_preferred
@property
def __dict__(self) -> dict[str, Any]: # type: ignore
return {
"path": self.path,
"encoding": self.encoding,
"encoding_aliases": self.encoding_aliases,
"alternative_encodings": self.alternative_encodings,
"language": self.language,
"alphabets": self.alphabets,
"has_sig_or_bom": self.has_sig_or_bom,
"chaos": self.chaos,
"coherence": self.coherence,
"unicode_path": self.unicode_path,
"is_preferred": self.is_preferred,
}
def to_json(self) -> str:
return dumps(self.__dict__, ensure_ascii=True, indent=4)

View File

@@ -0,0 +1,414 @@
from __future__ import annotations
import importlib
import logging
import unicodedata
from codecs import IncrementalDecoder
from encodings.aliases import aliases
from functools import lru_cache
from re import findall
from typing import Generator
from _multibytecodec import ( # type: ignore[import-not-found,import]
MultibyteIncrementalDecoder,
)
from .constant import (
ENCODING_MARKS,
IANA_SUPPORTED_SIMILAR,
RE_POSSIBLE_ENCODING_INDICATION,
UNICODE_RANGES_COMBINED,
UNICODE_SECONDARY_RANGE_KEYWORD,
UTF8_MAXIMAL_ALLOCATION,
COMMON_CJK_CHARACTERS,
)
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_accentuated(character: str) -> bool:
try:
description: str = unicodedata.name(character)
except ValueError: # Defensive: unicode database outdated?
return False
return (
"WITH GRAVE" in description
or "WITH ACUTE" in description
or "WITH CEDILLA" in description
or "WITH DIAERESIS" in description
or "WITH CIRCUMFLEX" in description
or "WITH TILDE" in description
or "WITH MACRON" in description
or "WITH RING ABOVE" in description
)
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def remove_accent(character: str) -> str:
decomposed: str = unicodedata.decomposition(character)
if not decomposed:
return character
codes: list[str] = decomposed.split(" ")
return chr(int(codes[0], 16))
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def unicode_range(character: str) -> str | None:
"""
Retrieve the Unicode range official name from a single character.
"""
character_ord: int = ord(character)
for range_name, ord_range in UNICODE_RANGES_COMBINED.items():
if character_ord in ord_range:
return range_name
return None
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_latin(character: str) -> bool:
try:
description: str = unicodedata.name(character)
except ValueError: # Defensive: unicode database outdated?
return False
return "LATIN" in description
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_punctuation(character: str) -> bool:
character_category: str = unicodedata.category(character)
if "P" in character_category:
return True
character_range: str | None = unicode_range(character)
if character_range is None:
return False
return "Punctuation" in character_range
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_symbol(character: str) -> bool:
character_category: str = unicodedata.category(character)
if "S" in character_category or "N" in character_category:
return True
character_range: str | None = unicode_range(character)
if character_range is None:
return False
return "Forms" in character_range and character_category != "Lo"
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_emoticon(character: str) -> bool:
character_range: str | None = unicode_range(character)
if character_range is None:
return False
return "Emoticons" in character_range or "Pictographs" in character_range
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_separator(character: str) -> bool:
if character.isspace() or character in {"", "+", "<", ">"}:
return True
character_category: str = unicodedata.category(character)
return "Z" in character_category or character_category in {"Po", "Pd", "Pc"}
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_case_variable(character: str) -> bool:
return character.islower() != character.isupper()
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_cjk(character: str) -> bool:
try:
character_name = unicodedata.name(character)
except ValueError: # Defensive: unicode database outdated?
return False
return "CJK" in character_name
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_hiragana(character: str) -> bool:
try:
character_name = unicodedata.name(character)
except ValueError: # Defensive: unicode database outdated?
return False
return "HIRAGANA" in character_name
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_katakana(character: str) -> bool:
try:
character_name = unicodedata.name(character)
except ValueError: # Defensive: unicode database outdated?
return False
return "KATAKANA" in character_name
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_hangul(character: str) -> bool:
try:
character_name = unicodedata.name(character)
except ValueError: # Defensive: unicode database outdated?
return False
return "HANGUL" in character_name
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_thai(character: str) -> bool:
try:
character_name = unicodedata.name(character)
except ValueError: # Defensive: unicode database outdated?
return False
return "THAI" in character_name
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_arabic(character: str) -> bool:
try:
character_name = unicodedata.name(character)
except ValueError: # Defensive: unicode database outdated?
return False
return "ARABIC" in character_name
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_arabic_isolated_form(character: str) -> bool:
try:
character_name = unicodedata.name(character)
except ValueError: # Defensive: unicode database outdated?
return False
return "ARABIC" in character_name and "ISOLATED FORM" in character_name
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_cjk_uncommon(character: str) -> bool:
return character not in COMMON_CJK_CHARACTERS
@lru_cache(maxsize=len(UNICODE_RANGES_COMBINED))
def is_unicode_range_secondary(range_name: str) -> bool:
return any(keyword in range_name for keyword in UNICODE_SECONDARY_RANGE_KEYWORD)
@lru_cache(maxsize=UTF8_MAXIMAL_ALLOCATION)
def is_unprintable(character: str) -> bool:
return (
character.isspace() is False # includes \n \t \r \v
and character.isprintable() is False
and character != "\x1a" # Why? Its the ASCII substitute character.
and character != "\ufeff" # bug discovered in Python,
# Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space.
)
def any_specified_encoding(sequence: bytes, search_zone: int = 8192) -> str | None:
"""
Extract using ASCII-only decoder any specified encoding in the first n-bytes.
"""
if not isinstance(sequence, bytes):
raise TypeError
seq_len: int = len(sequence)
results: list[str] = findall(
RE_POSSIBLE_ENCODING_INDICATION,
sequence[: min(seq_len, search_zone)].decode("ascii", errors="ignore"),
)
if len(results) == 0:
return None
for specified_encoding in results:
specified_encoding = specified_encoding.lower().replace("-", "_")
encoding_alias: str
encoding_iana: str
for encoding_alias, encoding_iana in aliases.items():
if encoding_alias == specified_encoding:
return encoding_iana
if encoding_iana == specified_encoding:
return encoding_iana
return None
@lru_cache(maxsize=128)
def is_multi_byte_encoding(name: str) -> bool:
"""
Verify is a specific encoding is a multi byte one based on it IANA name
"""
return name in {
"utf_8",
"utf_8_sig",
"utf_16",
"utf_16_be",
"utf_16_le",
"utf_32",
"utf_32_le",
"utf_32_be",
"utf_7",
} or issubclass(
importlib.import_module(f"encodings.{name}").IncrementalDecoder,
MultibyteIncrementalDecoder,
)
def identify_sig_or_bom(sequence: bytes) -> tuple[str | None, bytes]:
"""
Identify and extract SIG/BOM in given sequence.
"""
for iana_encoding in ENCODING_MARKS:
marks: bytes | list[bytes] = ENCODING_MARKS[iana_encoding]
if isinstance(marks, bytes):
marks = [marks]
for mark in marks:
if sequence.startswith(mark):
return iana_encoding, mark
return None, b""
def should_strip_sig_or_bom(iana_encoding: str) -> bool:
return iana_encoding not in {"utf_16", "utf_32"}
def iana_name(cp_name: str, strict: bool = True) -> str:
"""Returns the Python normalized encoding name (Not the IANA official name)."""
cp_name = cp_name.lower().replace("-", "_")
encoding_alias: str
encoding_iana: str
for encoding_alias, encoding_iana in aliases.items():
if cp_name in [encoding_alias, encoding_iana]:
return encoding_iana
if strict:
raise ValueError(f"Unable to retrieve IANA for '{cp_name}'")
return cp_name
def cp_similarity(iana_name_a: str, iana_name_b: str) -> float:
if is_multi_byte_encoding(iana_name_a) or is_multi_byte_encoding(iana_name_b):
return 0.0
decoder_a = importlib.import_module(f"encodings.{iana_name_a}").IncrementalDecoder
decoder_b = importlib.import_module(f"encodings.{iana_name_b}").IncrementalDecoder
id_a: IncrementalDecoder = decoder_a(errors="ignore")
id_b: IncrementalDecoder = decoder_b(errors="ignore")
character_match_count: int = 0
for i in range(255):
to_be_decoded: bytes = bytes([i])
if id_a.decode(to_be_decoded) == id_b.decode(to_be_decoded):
character_match_count += 1
return character_match_count / 254
def is_cp_similar(iana_name_a: str, iana_name_b: str) -> bool:
"""
Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using
the function cp_similarity.
"""
return (
iana_name_a in IANA_SUPPORTED_SIMILAR
and iana_name_b in IANA_SUPPORTED_SIMILAR[iana_name_a]
)
def set_logging_handler(
name: str = "charset_normalizer",
level: int = logging.INFO,
format_string: str = "%(asctime)s | %(levelname)s | %(message)s",
) -> None:
logger = logging.getLogger(name)
logger.setLevel(level)
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter(format_string))
logger.addHandler(handler)
def cut_sequence_chunks(
sequences: bytes,
encoding_iana: str,
offsets: range,
chunk_size: int,
bom_or_sig_available: bool,
strip_sig_or_bom: bool,
sig_payload: bytes,
is_multi_byte_decoder: bool,
decoded_payload: str | None = None,
) -> Generator[str, None, None]:
if decoded_payload and is_multi_byte_decoder is False:
for i in offsets:
chunk = decoded_payload[i : i + chunk_size]
if not chunk:
break
yield chunk
else:
for i in offsets:
chunk_end = i + chunk_size
if chunk_end > len(sequences) + 8:
continue
cut_sequence = sequences[i : i + chunk_size]
if bom_or_sig_available and strip_sig_or_bom is False:
cut_sequence = sig_payload + cut_sequence
chunk = cut_sequence.decode(
encoding_iana,
errors="ignore" if is_multi_byte_decoder else "strict",
)
# multi-byte bad cutting detector and adjustment
# not the cleanest way to perform that fix but clever enough for now.
if is_multi_byte_decoder and i > 0:
chunk_partial_size_chk: int = min(chunk_size, 16)
if (
decoded_payload
and chunk[:chunk_partial_size_chk] not in decoded_payload
):
for j in range(i, i - 4, -1):
cut_sequence = sequences[j:chunk_end]
if bom_or_sig_available and strip_sig_or_bom is False:
cut_sequence = sig_payload + cut_sequence
chunk = cut_sequence.decode(encoding_iana, errors="ignore")
if chunk[:chunk_partial_size_chk] in decoded_payload:
break
yield chunk

View File

@@ -0,0 +1,8 @@
"""
Expose version
"""
from __future__ import annotations
__version__ = "3.4.4"
VERSION = __version__.split(".")

View File

@@ -0,0 +1,84 @@
Metadata-Version: 2.4
Name: click
Version: 8.3.0
Summary: Composable command line interface toolkit
Maintainer-email: Pallets <contact@palletsprojects.com>
Requires-Python: >=3.10
Description-Content-Type: text/markdown
License-Expression: BSD-3-Clause
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Typing :: Typed
License-File: LICENSE.txt
Requires-Dist: colorama; platform_system == 'Windows'
Project-URL: Changes, https://click.palletsprojects.com/page/changes/
Project-URL: Chat, https://discord.gg/pallets
Project-URL: Documentation, https://click.palletsprojects.com/
Project-URL: Donate, https://palletsprojects.com/donate
Project-URL: Source, https://github.com/pallets/click/
<div align="center"><img src="https://raw.githubusercontent.com/pallets/click/refs/heads/stable/docs/_static/click-name.svg" alt="" height="150"></div>
# Click
Click is a Python package for creating beautiful command line interfaces
in a composable way with as little code as necessary. It's the "Command
Line Interface Creation Kit". It's highly configurable but comes with
sensible defaults out of the box.
It aims to make the process of writing command line tools quick and fun
while also preventing any frustration caused by the inability to
implement an intended CLI API.
Click in three points:
- Arbitrary nesting of commands
- Automatic help page generation
- Supports lazy loading of subcommands at runtime
## A Simple Example
```python
import click
@click.command()
@click.option("--count", default=1, help="Number of greetings.")
@click.option("--name", prompt="Your name", help="The person to greet.")
def hello(count, name):
"""Simple program that greets NAME for a total of COUNT times."""
for _ in range(count):
click.echo(f"Hello, {name}!")
if __name__ == '__main__':
hello()
```
```
$ python hello.py --count=3
Your name: Click
Hello, Click!
Hello, Click!
Hello, Click!
```
## Donate
The Pallets organization develops and supports Click and other popular
packages. In order to grow the community of contributors and users, and
allow the maintainers to devote more time to the projects, [please
donate today][].
[please donate today]: https://palletsprojects.com/donate
## Contributing
See our [detailed contributing documentation][contrib] for many ways to
contribute, including reporting issues, requesting features, asking or answering
questions, and making PRs.
[contrib]: https://palletsprojects.com/contributing/

View File

@@ -0,0 +1,40 @@
click-8.3.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
click-8.3.0.dist-info/METADATA,sha256=P6vpEHZ_MLBt4SO2eB-QaadcOdiznkzaZtJImRo7_V4,2621
click-8.3.0.dist-info/RECORD,,
click-8.3.0.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
click-8.3.0.dist-info/licenses/LICENSE.txt,sha256=morRBqOU6FO_4h9C9OctWSgZoigF2ZG18ydQKSkrZY0,1475
click/__init__.py,sha256=6YyS1aeyknZ0LYweWozNZy0A9nZ_11wmYIhv3cbQrYo,4473
click/__pycache__/__init__.cpython-311.pyc,,
click/__pycache__/_compat.cpython-311.pyc,,
click/__pycache__/_termui_impl.cpython-311.pyc,,
click/__pycache__/_textwrap.cpython-311.pyc,,
click/__pycache__/_utils.cpython-311.pyc,,
click/__pycache__/_winconsole.cpython-311.pyc,,
click/__pycache__/core.cpython-311.pyc,,
click/__pycache__/decorators.cpython-311.pyc,,
click/__pycache__/exceptions.cpython-311.pyc,,
click/__pycache__/formatting.cpython-311.pyc,,
click/__pycache__/globals.cpython-311.pyc,,
click/__pycache__/parser.cpython-311.pyc,,
click/__pycache__/shell_completion.cpython-311.pyc,,
click/__pycache__/termui.cpython-311.pyc,,
click/__pycache__/testing.cpython-311.pyc,,
click/__pycache__/types.cpython-311.pyc,,
click/__pycache__/utils.cpython-311.pyc,,
click/_compat.py,sha256=v3xBZkFbvA1BXPRkFfBJc6-pIwPI7345m-kQEnpVAs4,18693
click/_termui_impl.py,sha256=ktpAHyJtNkhyR-x64CQFD6xJQI11fTA3qg2AV3iCToU,26799
click/_textwrap.py,sha256=BOae0RQ6vg3FkNgSJyOoGzG1meGMxJ_ukWVZKx_v-0o,1400
click/_utils.py,sha256=kZwtTf5gMuCilJJceS2iTCvRvCY-0aN5rJq8gKw7p8g,943
click/_winconsole.py,sha256=_vxUuUaxwBhoR0vUWCNuHY8VUefiMdCIyU2SXPqoF-A,8465
click/core.py,sha256=1A5T8UoAXklIGPTJ83_DJbVi35ehtJS2FTkP_wQ7es0,128855
click/decorators.py,sha256=5P7abhJtAQYp_KHgjUvhMv464ERwOzrv2enNknlwHyQ,18461
click/exceptions.py,sha256=8utf8w6V5hJXMnO_ic1FNrtbwuEn1NUu1aDwV8UqnG4,9954
click/formatting.py,sha256=RVfwwr0rwWNpgGr8NaHodPzkIr7_tUyVh_nDdanLMNc,9730
click/globals.py,sha256=gM-Nh6A4M0HB_SgkaF5M4ncGGMDHc_flHXu9_oh4GEU,1923
click/parser.py,sha256=Q31pH0FlQZEq-UXE_ABRzlygEfvxPTuZbWNh4xfXmzw,19010
click/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
click/shell_completion.py,sha256=Cc4GQUFuWpfQBa9sF5qXeeYI7n3tI_1k6ZdSn4BZbT0,20994
click/termui.py,sha256=vAYrKC2a7f_NfEIhAThEVYfa__ib5XQbTSCGtJlABRA,30847
click/testing.py,sha256=EERbzcl1br0mW0qBS9EqkknfNfXB9WQEW0ELIpkvuSs,19102
click/types.py,sha256=ek54BNSFwPKsqtfT7jsqcc4WHui8AIFVMKM4oVZIXhc,39927
click/utils.py,sha256=gCUoewdAhA-QLBUUHxrLh4uj6m7T1WjZZMNPvR0I7YA,20257

View File

@@ -0,0 +1,4 @@
Wheel-Version: 1.0
Generator: flit 3.12.0
Root-Is-Purelib: true
Tag: py3-none-any

View File

@@ -0,0 +1,28 @@
Copyright 2014 Pallets
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -0,0 +1,123 @@
"""
Click is a simple Python module inspired by the stdlib optparse to make
writing command line scripts fun. Unlike other modules, it's based
around a simple API that does not come with too much magic and is
composable.
"""
from __future__ import annotations
from .core import Argument as Argument
from .core import Command as Command
from .core import CommandCollection as CommandCollection
from .core import Context as Context
from .core import Group as Group
from .core import Option as Option
from .core import Parameter as Parameter
from .decorators import argument as argument
from .decorators import command as command
from .decorators import confirmation_option as confirmation_option
from .decorators import group as group
from .decorators import help_option as help_option
from .decorators import make_pass_decorator as make_pass_decorator
from .decorators import option as option
from .decorators import pass_context as pass_context
from .decorators import pass_obj as pass_obj
from .decorators import password_option as password_option
from .decorators import version_option as version_option
from .exceptions import Abort as Abort
from .exceptions import BadArgumentUsage as BadArgumentUsage
from .exceptions import BadOptionUsage as BadOptionUsage
from .exceptions import BadParameter as BadParameter
from .exceptions import ClickException as ClickException
from .exceptions import FileError as FileError
from .exceptions import MissingParameter as MissingParameter
from .exceptions import NoSuchOption as NoSuchOption
from .exceptions import UsageError as UsageError
from .formatting import HelpFormatter as HelpFormatter
from .formatting import wrap_text as wrap_text
from .globals import get_current_context as get_current_context
from .termui import clear as clear
from .termui import confirm as confirm
from .termui import echo_via_pager as echo_via_pager
from .termui import edit as edit
from .termui import getchar as getchar
from .termui import launch as launch
from .termui import pause as pause
from .termui import progressbar as progressbar
from .termui import prompt as prompt
from .termui import secho as secho
from .termui import style as style
from .termui import unstyle as unstyle
from .types import BOOL as BOOL
from .types import Choice as Choice
from .types import DateTime as DateTime
from .types import File as File
from .types import FLOAT as FLOAT
from .types import FloatRange as FloatRange
from .types import INT as INT
from .types import IntRange as IntRange
from .types import ParamType as ParamType
from .types import Path as Path
from .types import STRING as STRING
from .types import Tuple as Tuple
from .types import UNPROCESSED as UNPROCESSED
from .types import UUID as UUID
from .utils import echo as echo
from .utils import format_filename as format_filename
from .utils import get_app_dir as get_app_dir
from .utils import get_binary_stream as get_binary_stream
from .utils import get_text_stream as get_text_stream
from .utils import open_file as open_file
def __getattr__(name: str) -> object:
import warnings
if name == "BaseCommand":
from .core import _BaseCommand
warnings.warn(
"'BaseCommand' is deprecated and will be removed in Click 9.0. Use"
" 'Command' instead.",
DeprecationWarning,
stacklevel=2,
)
return _BaseCommand
if name == "MultiCommand":
from .core import _MultiCommand
warnings.warn(
"'MultiCommand' is deprecated and will be removed in Click 9.0. Use"
" 'Group' instead.",
DeprecationWarning,
stacklevel=2,
)
return _MultiCommand
if name == "OptionParser":
from .parser import _OptionParser
warnings.warn(
"'OptionParser' is deprecated and will be removed in Click 9.0. The"
" old parser is available in 'optparse'.",
DeprecationWarning,
stacklevel=2,
)
return _OptionParser
if name == "__version__":
import importlib.metadata
import warnings
warnings.warn(
"The '__version__' attribute is deprecated and will be removed in"
" Click 9.1. Use feature detection or"
" 'importlib.metadata.version(\"click\")' instead.",
DeprecationWarning,
stacklevel=2,
)
return importlib.metadata.version("click")
raise AttributeError(name)

View File

@@ -0,0 +1,622 @@
from __future__ import annotations
import codecs
import collections.abc as cabc
import io
import os
import re
import sys
import typing as t
from types import TracebackType
from weakref import WeakKeyDictionary
CYGWIN = sys.platform.startswith("cygwin")
WIN = sys.platform.startswith("win")
auto_wrap_for_ansi: t.Callable[[t.TextIO], t.TextIO] | None = None
_ansi_re = re.compile(r"\033\[[;?0-9]*[a-zA-Z]")
def _make_text_stream(
stream: t.BinaryIO,
encoding: str | None,
errors: str | None,
force_readable: bool = False,
force_writable: bool = False,
) -> t.TextIO:
if encoding is None:
encoding = get_best_encoding(stream)
if errors is None:
errors = "replace"
return _NonClosingTextIOWrapper(
stream,
encoding,
errors,
line_buffering=True,
force_readable=force_readable,
force_writable=force_writable,
)
def is_ascii_encoding(encoding: str) -> bool:
"""Checks if a given encoding is ascii."""
try:
return codecs.lookup(encoding).name == "ascii"
except LookupError:
return False
def get_best_encoding(stream: t.IO[t.Any]) -> str:
"""Returns the default stream encoding if not found."""
rv = getattr(stream, "encoding", None) or sys.getdefaultencoding()
if is_ascii_encoding(rv):
return "utf-8"
return rv
class _NonClosingTextIOWrapper(io.TextIOWrapper):
def __init__(
self,
stream: t.BinaryIO,
encoding: str | None,
errors: str | None,
force_readable: bool = False,
force_writable: bool = False,
**extra: t.Any,
) -> None:
self._stream = stream = t.cast(
t.BinaryIO, _FixupStream(stream, force_readable, force_writable)
)
super().__init__(stream, encoding, errors, **extra)
def __del__(self) -> None:
try:
self.detach()
except Exception:
pass
def isatty(self) -> bool:
# https://bitbucket.org/pypy/pypy/issue/1803
return self._stream.isatty()
class _FixupStream:
"""The new io interface needs more from streams than streams
traditionally implement. As such, this fix-up code is necessary in
some circumstances.
The forcing of readable and writable flags are there because some tools
put badly patched objects on sys (one such offender are certain version
of jupyter notebook).
"""
def __init__(
self,
stream: t.BinaryIO,
force_readable: bool = False,
force_writable: bool = False,
):
self._stream = stream
self._force_readable = force_readable
self._force_writable = force_writable
def __getattr__(self, name: str) -> t.Any:
return getattr(self._stream, name)
def read1(self, size: int) -> bytes:
f = getattr(self._stream, "read1", None)
if f is not None:
return t.cast(bytes, f(size))
return self._stream.read(size)
def readable(self) -> bool:
if self._force_readable:
return True
x = getattr(self._stream, "readable", None)
if x is not None:
return t.cast(bool, x())
try:
self._stream.read(0)
except Exception:
return False
return True
def writable(self) -> bool:
if self._force_writable:
return True
x = getattr(self._stream, "writable", None)
if x is not None:
return t.cast(bool, x())
try:
self._stream.write(b"")
except Exception:
try:
self._stream.write(b"")
except Exception:
return False
return True
def seekable(self) -> bool:
x = getattr(self._stream, "seekable", None)
if x is not None:
return t.cast(bool, x())
try:
self._stream.seek(self._stream.tell())
except Exception:
return False
return True
def _is_binary_reader(stream: t.IO[t.Any], default: bool = False) -> bool:
try:
return isinstance(stream.read(0), bytes)
except Exception:
return default
# This happens in some cases where the stream was already
# closed. In this case, we assume the default.
def _is_binary_writer(stream: t.IO[t.Any], default: bool = False) -> bool:
try:
stream.write(b"")
except Exception:
try:
stream.write("")
return False
except Exception:
pass
return default
return True
def _find_binary_reader(stream: t.IO[t.Any]) -> t.BinaryIO | None:
# We need to figure out if the given stream is already binary.
# This can happen because the official docs recommend detaching
# the streams to get binary streams. Some code might do this, so
# we need to deal with this case explicitly.
if _is_binary_reader(stream, False):
return t.cast(t.BinaryIO, stream)
buf = getattr(stream, "buffer", None)
# Same situation here; this time we assume that the buffer is
# actually binary in case it's closed.
if buf is not None and _is_binary_reader(buf, True):
return t.cast(t.BinaryIO, buf)
return None
def _find_binary_writer(stream: t.IO[t.Any]) -> t.BinaryIO | None:
# We need to figure out if the given stream is already binary.
# This can happen because the official docs recommend detaching
# the streams to get binary streams. Some code might do this, so
# we need to deal with this case explicitly.
if _is_binary_writer(stream, False):
return t.cast(t.BinaryIO, stream)
buf = getattr(stream, "buffer", None)
# Same situation here; this time we assume that the buffer is
# actually binary in case it's closed.
if buf is not None and _is_binary_writer(buf, True):
return t.cast(t.BinaryIO, buf)
return None
def _stream_is_misconfigured(stream: t.TextIO) -> bool:
"""A stream is misconfigured if its encoding is ASCII."""
# If the stream does not have an encoding set, we assume it's set
# to ASCII. This appears to happen in certain unittest
# environments. It's not quite clear what the correct behavior is
# but this at least will force Click to recover somehow.
return is_ascii_encoding(getattr(stream, "encoding", None) or "ascii")
def _is_compat_stream_attr(stream: t.TextIO, attr: str, value: str | None) -> bool:
"""A stream attribute is compatible if it is equal to the
desired value or the desired value is unset and the attribute
has a value.
"""
stream_value = getattr(stream, attr, None)
return stream_value == value or (value is None and stream_value is not None)
def _is_compatible_text_stream(
stream: t.TextIO, encoding: str | None, errors: str | None
) -> bool:
"""Check if a stream's encoding and errors attributes are
compatible with the desired values.
"""
return _is_compat_stream_attr(
stream, "encoding", encoding
) and _is_compat_stream_attr(stream, "errors", errors)
def _force_correct_text_stream(
text_stream: t.IO[t.Any],
encoding: str | None,
errors: str | None,
is_binary: t.Callable[[t.IO[t.Any], bool], bool],
find_binary: t.Callable[[t.IO[t.Any]], t.BinaryIO | None],
force_readable: bool = False,
force_writable: bool = False,
) -> t.TextIO:
if is_binary(text_stream, False):
binary_reader = t.cast(t.BinaryIO, text_stream)
else:
text_stream = t.cast(t.TextIO, text_stream)
# If the stream looks compatible, and won't default to a
# misconfigured ascii encoding, return it as-is.
if _is_compatible_text_stream(text_stream, encoding, errors) and not (
encoding is None and _stream_is_misconfigured(text_stream)
):
return text_stream
# Otherwise, get the underlying binary reader.
possible_binary_reader = find_binary(text_stream)
# If that's not possible, silently use the original reader
# and get mojibake instead of exceptions.
if possible_binary_reader is None:
return text_stream
binary_reader = possible_binary_reader
# Default errors to replace instead of strict in order to get
# something that works.
if errors is None:
errors = "replace"
# Wrap the binary stream in a text stream with the correct
# encoding parameters.
return _make_text_stream(
binary_reader,
encoding,
errors,
force_readable=force_readable,
force_writable=force_writable,
)
def _force_correct_text_reader(
text_reader: t.IO[t.Any],
encoding: str | None,
errors: str | None,
force_readable: bool = False,
) -> t.TextIO:
return _force_correct_text_stream(
text_reader,
encoding,
errors,
_is_binary_reader,
_find_binary_reader,
force_readable=force_readable,
)
def _force_correct_text_writer(
text_writer: t.IO[t.Any],
encoding: str | None,
errors: str | None,
force_writable: bool = False,
) -> t.TextIO:
return _force_correct_text_stream(
text_writer,
encoding,
errors,
_is_binary_writer,
_find_binary_writer,
force_writable=force_writable,
)
def get_binary_stdin() -> t.BinaryIO:
reader = _find_binary_reader(sys.stdin)
if reader is None:
raise RuntimeError("Was not able to determine binary stream for sys.stdin.")
return reader
def get_binary_stdout() -> t.BinaryIO:
writer = _find_binary_writer(sys.stdout)
if writer is None:
raise RuntimeError("Was not able to determine binary stream for sys.stdout.")
return writer
def get_binary_stderr() -> t.BinaryIO:
writer = _find_binary_writer(sys.stderr)
if writer is None:
raise RuntimeError("Was not able to determine binary stream for sys.stderr.")
return writer
def get_text_stdin(encoding: str | None = None, errors: str | None = None) -> t.TextIO:
rv = _get_windows_console_stream(sys.stdin, encoding, errors)
if rv is not None:
return rv
return _force_correct_text_reader(sys.stdin, encoding, errors, force_readable=True)
def get_text_stdout(encoding: str | None = None, errors: str | None = None) -> t.TextIO:
rv = _get_windows_console_stream(sys.stdout, encoding, errors)
if rv is not None:
return rv
return _force_correct_text_writer(sys.stdout, encoding, errors, force_writable=True)
def get_text_stderr(encoding: str | None = None, errors: str | None = None) -> t.TextIO:
rv = _get_windows_console_stream(sys.stderr, encoding, errors)
if rv is not None:
return rv
return _force_correct_text_writer(sys.stderr, encoding, errors, force_writable=True)
def _wrap_io_open(
file: str | os.PathLike[str] | int,
mode: str,
encoding: str | None,
errors: str | None,
) -> t.IO[t.Any]:
"""Handles not passing ``encoding`` and ``errors`` in binary mode."""
if "b" in mode:
return open(file, mode)
return open(file, mode, encoding=encoding, errors=errors)
def open_stream(
filename: str | os.PathLike[str],
mode: str = "r",
encoding: str | None = None,
errors: str | None = "strict",
atomic: bool = False,
) -> tuple[t.IO[t.Any], bool]:
binary = "b" in mode
filename = os.fspath(filename)
# Standard streams first. These are simple because they ignore the
# atomic flag. Use fsdecode to handle Path("-").
if os.fsdecode(filename) == "-":
if any(m in mode for m in ["w", "a", "x"]):
if binary:
return get_binary_stdout(), False
return get_text_stdout(encoding=encoding, errors=errors), False
if binary:
return get_binary_stdin(), False
return get_text_stdin(encoding=encoding, errors=errors), False
# Non-atomic writes directly go out through the regular open functions.
if not atomic:
return _wrap_io_open(filename, mode, encoding, errors), True
# Some usability stuff for atomic writes
if "a" in mode:
raise ValueError(
"Appending to an existing file is not supported, because that"
" would involve an expensive `copy`-operation to a temporary"
" file. Open the file in normal `w`-mode and copy explicitly"
" if that's what you're after."
)
if "x" in mode:
raise ValueError("Use the `overwrite`-parameter instead.")
if "w" not in mode:
raise ValueError("Atomic writes only make sense with `w`-mode.")
# Atomic writes are more complicated. They work by opening a file
# as a proxy in the same folder and then using the fdopen
# functionality to wrap it in a Python file. Then we wrap it in an
# atomic file that moves the file over on close.
import errno
import random
try:
perm: int | None = os.stat(filename).st_mode
except OSError:
perm = None
flags = os.O_RDWR | os.O_CREAT | os.O_EXCL
if binary:
flags |= getattr(os, "O_BINARY", 0)
while True:
tmp_filename = os.path.join(
os.path.dirname(filename),
f".__atomic-write{random.randrange(1 << 32):08x}",
)
try:
fd = os.open(tmp_filename, flags, 0o666 if perm is None else perm)
break
except OSError as e:
if e.errno == errno.EEXIST or (
os.name == "nt"
and e.errno == errno.EACCES
and os.path.isdir(e.filename)
and os.access(e.filename, os.W_OK)
):
continue
raise
if perm is not None:
os.chmod(tmp_filename, perm) # in case perm includes bits in umask
f = _wrap_io_open(fd, mode, encoding, errors)
af = _AtomicFile(f, tmp_filename, os.path.realpath(filename))
return t.cast(t.IO[t.Any], af), True
class _AtomicFile:
def __init__(self, f: t.IO[t.Any], tmp_filename: str, real_filename: str) -> None:
self._f = f
self._tmp_filename = tmp_filename
self._real_filename = real_filename
self.closed = False
@property
def name(self) -> str:
return self._real_filename
def close(self, delete: bool = False) -> None:
if self.closed:
return
self._f.close()
os.replace(self._tmp_filename, self._real_filename)
self.closed = True
def __getattr__(self, name: str) -> t.Any:
return getattr(self._f, name)
def __enter__(self) -> _AtomicFile:
return self
def __exit__(
self,
exc_type: type[BaseException] | None,
exc_value: BaseException | None,
tb: TracebackType | None,
) -> None:
self.close(delete=exc_type is not None)
def __repr__(self) -> str:
return repr(self._f)
def strip_ansi(value: str) -> str:
return _ansi_re.sub("", value)
def _is_jupyter_kernel_output(stream: t.IO[t.Any]) -> bool:
while isinstance(stream, (_FixupStream, _NonClosingTextIOWrapper)):
stream = stream._stream
return stream.__class__.__module__.startswith("ipykernel.")
def should_strip_ansi(
stream: t.IO[t.Any] | None = None, color: bool | None = None
) -> bool:
if color is None:
if stream is None:
stream = sys.stdin
return not isatty(stream) and not _is_jupyter_kernel_output(stream)
return not color
# On Windows, wrap the output streams with colorama to support ANSI
# color codes.
# NOTE: double check is needed so mypy does not analyze this on Linux
if sys.platform.startswith("win") and WIN:
from ._winconsole import _get_windows_console_stream
def _get_argv_encoding() -> str:
import locale
return locale.getpreferredencoding()
_ansi_stream_wrappers: cabc.MutableMapping[t.TextIO, t.TextIO] = WeakKeyDictionary()
def auto_wrap_for_ansi(stream: t.TextIO, color: bool | None = None) -> t.TextIO:
"""Support ANSI color and style codes on Windows by wrapping a
stream with colorama.
"""
try:
cached = _ansi_stream_wrappers.get(stream)
except Exception:
cached = None
if cached is not None:
return cached
import colorama
strip = should_strip_ansi(stream, color)
ansi_wrapper = colorama.AnsiToWin32(stream, strip=strip)
rv = t.cast(t.TextIO, ansi_wrapper.stream)
_write = rv.write
def _safe_write(s: str) -> int:
try:
return _write(s)
except BaseException:
ansi_wrapper.reset_all()
raise
rv.write = _safe_write # type: ignore[method-assign]
try:
_ansi_stream_wrappers[stream] = rv
except Exception:
pass
return rv
else:
def _get_argv_encoding() -> str:
return getattr(sys.stdin, "encoding", None) or sys.getfilesystemencoding()
def _get_windows_console_stream(
f: t.TextIO, encoding: str | None, errors: str | None
) -> t.TextIO | None:
return None
def term_len(x: str) -> int:
return len(strip_ansi(x))
def isatty(stream: t.IO[t.Any]) -> bool:
try:
return stream.isatty()
except Exception:
return False
def _make_cached_stream_func(
src_func: t.Callable[[], t.TextIO | None],
wrapper_func: t.Callable[[], t.TextIO],
) -> t.Callable[[], t.TextIO | None]:
cache: cabc.MutableMapping[t.TextIO, t.TextIO] = WeakKeyDictionary()
def func() -> t.TextIO | None:
stream = src_func()
if stream is None:
return None
try:
rv = cache.get(stream)
except Exception:
rv = None
if rv is not None:
return rv
rv = wrapper_func()
try:
cache[stream] = rv
except Exception:
pass
return rv
return func
_default_text_stdin = _make_cached_stream_func(lambda: sys.stdin, get_text_stdin)
_default_text_stdout = _make_cached_stream_func(lambda: sys.stdout, get_text_stdout)
_default_text_stderr = _make_cached_stream_func(lambda: sys.stderr, get_text_stderr)
binary_streams: cabc.Mapping[str, t.Callable[[], t.BinaryIO]] = {
"stdin": get_binary_stdin,
"stdout": get_binary_stdout,
"stderr": get_binary_stderr,
}
text_streams: cabc.Mapping[str, t.Callable[[str | None, str | None], t.TextIO]] = {
"stdin": get_text_stdin,
"stdout": get_text_stdout,
"stderr": get_text_stderr,
}

View File

@@ -0,0 +1,847 @@
"""
This module contains implementations for the termui module. To keep the
import time of Click down, some infrequently used functionality is
placed in this module and only imported as needed.
"""
from __future__ import annotations
import collections.abc as cabc
import contextlib
import math
import os
import shlex
import sys
import time
import typing as t
from gettext import gettext as _
from io import StringIO
from pathlib import Path
from types import TracebackType
from ._compat import _default_text_stdout
from ._compat import CYGWIN
from ._compat import get_best_encoding
from ._compat import isatty
from ._compat import open_stream
from ._compat import strip_ansi
from ._compat import term_len
from ._compat import WIN
from .exceptions import ClickException
from .utils import echo
V = t.TypeVar("V")
if os.name == "nt":
BEFORE_BAR = "\r"
AFTER_BAR = "\n"
else:
BEFORE_BAR = "\r\033[?25l"
AFTER_BAR = "\033[?25h\n"
class ProgressBar(t.Generic[V]):
def __init__(
self,
iterable: cabc.Iterable[V] | None,
length: int | None = None,
fill_char: str = "#",
empty_char: str = " ",
bar_template: str = "%(bar)s",
info_sep: str = " ",
hidden: bool = False,
show_eta: bool = True,
show_percent: bool | None = None,
show_pos: bool = False,
item_show_func: t.Callable[[V | None], str | None] | None = None,
label: str | None = None,
file: t.TextIO | None = None,
color: bool | None = None,
update_min_steps: int = 1,
width: int = 30,
) -> None:
self.fill_char = fill_char
self.empty_char = empty_char
self.bar_template = bar_template
self.info_sep = info_sep
self.hidden = hidden
self.show_eta = show_eta
self.show_percent = show_percent
self.show_pos = show_pos
self.item_show_func = item_show_func
self.label: str = label or ""
if file is None:
file = _default_text_stdout()
# There are no standard streams attached to write to. For example,
# pythonw on Windows.
if file is None:
file = StringIO()
self.file = file
self.color = color
self.update_min_steps = update_min_steps
self._completed_intervals = 0
self.width: int = width
self.autowidth: bool = width == 0
if length is None:
from operator import length_hint
length = length_hint(iterable, -1)
if length == -1:
length = None
if iterable is None:
if length is None:
raise TypeError("iterable or length is required")
iterable = t.cast("cabc.Iterable[V]", range(length))
self.iter: cabc.Iterable[V] = iter(iterable)
self.length = length
self.pos: int = 0
self.avg: list[float] = []
self.last_eta: float
self.start: float
self.start = self.last_eta = time.time()
self.eta_known: bool = False
self.finished: bool = False
self.max_width: int | None = None
self.entered: bool = False
self.current_item: V | None = None
self._is_atty = isatty(self.file)
self._last_line: str | None = None
def __enter__(self) -> ProgressBar[V]:
self.entered = True
self.render_progress()
return self
def __exit__(
self,
exc_type: type[BaseException] | None,
exc_value: BaseException | None,
tb: TracebackType | None,
) -> None:
self.render_finish()
def __iter__(self) -> cabc.Iterator[V]:
if not self.entered:
raise RuntimeError("You need to use progress bars in a with block.")
self.render_progress()
return self.generator()
def __next__(self) -> V:
# Iteration is defined in terms of a generator function,
# returned by iter(self); use that to define next(). This works
# because `self.iter` is an iterable consumed by that generator,
# so it is re-entry safe. Calling `next(self.generator())`
# twice works and does "what you want".
return next(iter(self))
def render_finish(self) -> None:
if self.hidden or not self._is_atty:
return
self.file.write(AFTER_BAR)
self.file.flush()
@property
def pct(self) -> float:
if self.finished:
return 1.0
return min(self.pos / (float(self.length or 1) or 1), 1.0)
@property
def time_per_iteration(self) -> float:
if not self.avg:
return 0.0
return sum(self.avg) / float(len(self.avg))
@property
def eta(self) -> float:
if self.length is not None and not self.finished:
return self.time_per_iteration * (self.length - self.pos)
return 0.0
def format_eta(self) -> str:
if self.eta_known:
t = int(self.eta)
seconds = t % 60
t //= 60
minutes = t % 60
t //= 60
hours = t % 24
t //= 24
if t > 0:
return f"{t}d {hours:02}:{minutes:02}:{seconds:02}"
else:
return f"{hours:02}:{minutes:02}:{seconds:02}"
return ""
def format_pos(self) -> str:
pos = str(self.pos)
if self.length is not None:
pos += f"/{self.length}"
return pos
def format_pct(self) -> str:
return f"{int(self.pct * 100): 4}%"[1:]
def format_bar(self) -> str:
if self.length is not None:
bar_length = int(self.pct * self.width)
bar = self.fill_char * bar_length
bar += self.empty_char * (self.width - bar_length)
elif self.finished:
bar = self.fill_char * self.width
else:
chars = list(self.empty_char * (self.width or 1))
if self.time_per_iteration != 0:
chars[
int(
(math.cos(self.pos * self.time_per_iteration) / 2.0 + 0.5)
* self.width
)
] = self.fill_char
bar = "".join(chars)
return bar
def format_progress_line(self) -> str:
show_percent = self.show_percent
info_bits = []
if self.length is not None and show_percent is None:
show_percent = not self.show_pos
if self.show_pos:
info_bits.append(self.format_pos())
if show_percent:
info_bits.append(self.format_pct())
if self.show_eta and self.eta_known and not self.finished:
info_bits.append(self.format_eta())
if self.item_show_func is not None:
item_info = self.item_show_func(self.current_item)
if item_info is not None:
info_bits.append(item_info)
return (
self.bar_template
% {
"label": self.label,
"bar": self.format_bar(),
"info": self.info_sep.join(info_bits),
}
).rstrip()
def render_progress(self) -> None:
if self.hidden:
return
if not self._is_atty:
# Only output the label once if the output is not a TTY.
if self._last_line != self.label:
self._last_line = self.label
echo(self.label, file=self.file, color=self.color)
return
buf = []
# Update width in case the terminal has been resized
if self.autowidth:
import shutil
old_width = self.width
self.width = 0
clutter_length = term_len(self.format_progress_line())
new_width = max(0, shutil.get_terminal_size().columns - clutter_length)
if new_width < old_width and self.max_width is not None:
buf.append(BEFORE_BAR)
buf.append(" " * self.max_width)
self.max_width = new_width
self.width = new_width
clear_width = self.width
if self.max_width is not None:
clear_width = self.max_width
buf.append(BEFORE_BAR)
line = self.format_progress_line()
line_len = term_len(line)
if self.max_width is None or self.max_width < line_len:
self.max_width = line_len
buf.append(line)
buf.append(" " * (clear_width - line_len))
line = "".join(buf)
# Render the line only if it changed.
if line != self._last_line:
self._last_line = line
echo(line, file=self.file, color=self.color, nl=False)
self.file.flush()
def make_step(self, n_steps: int) -> None:
self.pos += n_steps
if self.length is not None and self.pos >= self.length:
self.finished = True
if (time.time() - self.last_eta) < 1.0:
return
self.last_eta = time.time()
# self.avg is a rolling list of length <= 7 of steps where steps are
# defined as time elapsed divided by the total progress through
# self.length.
if self.pos:
step = (time.time() - self.start) / self.pos
else:
step = time.time() - self.start
self.avg = self.avg[-6:] + [step]
self.eta_known = self.length is not None
def update(self, n_steps: int, current_item: V | None = None) -> None:
"""Update the progress bar by advancing a specified number of
steps, and optionally set the ``current_item`` for this new
position.
:param n_steps: Number of steps to advance.
:param current_item: Optional item to set as ``current_item``
for the updated position.
.. versionchanged:: 8.0
Added the ``current_item`` optional parameter.
.. versionchanged:: 8.0
Only render when the number of steps meets the
``update_min_steps`` threshold.
"""
if current_item is not None:
self.current_item = current_item
self._completed_intervals += n_steps
if self._completed_intervals >= self.update_min_steps:
self.make_step(self._completed_intervals)
self.render_progress()
self._completed_intervals = 0
def finish(self) -> None:
self.eta_known = False
self.current_item = None
self.finished = True
def generator(self) -> cabc.Iterator[V]:
"""Return a generator which yields the items added to the bar
during construction, and updates the progress bar *after* the
yielded block returns.
"""
# WARNING: the iterator interface for `ProgressBar` relies on
# this and only works because this is a simple generator which
# doesn't create or manage additional state. If this function
# changes, the impact should be evaluated both against
# `iter(bar)` and `next(bar)`. `next()` in particular may call
# `self.generator()` repeatedly, and this must remain safe in
# order for that interface to work.
if not self.entered:
raise RuntimeError("You need to use progress bars in a with block.")
if not self._is_atty:
yield from self.iter
else:
for rv in self.iter:
self.current_item = rv
# This allows show_item_func to be updated before the
# item is processed. Only trigger at the beginning of
# the update interval.
if self._completed_intervals == 0:
self.render_progress()
yield rv
self.update(1)
self.finish()
self.render_progress()
def pager(generator: cabc.Iterable[str], color: bool | None = None) -> None:
"""Decide what method to use for paging through text."""
stdout = _default_text_stdout()
# There are no standard streams attached to write to. For example,
# pythonw on Windows.
if stdout is None:
stdout = StringIO()
if not isatty(sys.stdin) or not isatty(stdout):
return _nullpager(stdout, generator, color)
# Split and normalize the pager command into parts.
pager_cmd_parts = shlex.split(os.environ.get("PAGER", ""), posix=False)
if pager_cmd_parts:
if WIN:
if _tempfilepager(generator, pager_cmd_parts, color):
return
elif _pipepager(generator, pager_cmd_parts, color):
return
if os.environ.get("TERM") in ("dumb", "emacs"):
return _nullpager(stdout, generator, color)
if (WIN or sys.platform.startswith("os2")) and _tempfilepager(
generator, ["more"], color
):
return
if _pipepager(generator, ["less"], color):
return
import tempfile
fd, filename = tempfile.mkstemp()
os.close(fd)
try:
if _pipepager(generator, ["more"], color):
return
return _nullpager(stdout, generator, color)
finally:
os.unlink(filename)
def _pipepager(
generator: cabc.Iterable[str], cmd_parts: list[str], color: bool | None
) -> bool:
"""Page through text by feeding it to another program. Invoking a
pager through this might support colors.
Returns `True` if the command was found, `False` otherwise and thus another
pager should be attempted.
"""
# Split the command into the invoked CLI and its parameters.
if not cmd_parts:
return False
import shutil
cmd = cmd_parts[0]
cmd_params = cmd_parts[1:]
cmd_filepath = shutil.which(cmd)
if not cmd_filepath:
return False
# Resolves symlinks and produces a normalized absolute path string.
cmd_path = Path(cmd_filepath).resolve()
cmd_name = cmd_path.name
import subprocess
# Make a local copy of the environment to not affect the global one.
env = dict(os.environ)
# If we're piping to less and the user hasn't decided on colors, we enable
# them by default we find the -R flag in the command line arguments.
if color is None and cmd_name == "less":
less_flags = f"{os.environ.get('LESS', '')}{' '.join(cmd_params)}"
if not less_flags:
env["LESS"] = "-R"
color = True
elif "r" in less_flags or "R" in less_flags:
color = True
c = subprocess.Popen(
[str(cmd_path)] + cmd_params,
shell=True,
stdin=subprocess.PIPE,
env=env,
errors="replace",
text=True,
)
assert c.stdin is not None
try:
for text in generator:
if not color:
text = strip_ansi(text)
c.stdin.write(text)
except BrokenPipeError:
# In case the pager exited unexpectedly, ignore the broken pipe error.
pass
except Exception as e:
# In case there is an exception we want to close the pager immediately
# and let the caller handle it.
# Otherwise the pager will keep running, and the user may not notice
# the error message, or worse yet it may leave the terminal in a broken state.
c.terminate()
raise e
finally:
# We must close stdin and wait for the pager to exit before we continue
try:
c.stdin.close()
# Close implies flush, so it might throw a BrokenPipeError if the pager
# process exited already.
except BrokenPipeError:
pass
# Less doesn't respect ^C, but catches it for its own UI purposes (aborting
# search or other commands inside less).
#
# That means when the user hits ^C, the parent process (click) terminates,
# but less is still alive, paging the output and messing up the terminal.
#
# If the user wants to make the pager exit on ^C, they should set
# `LESS='-K'`. It's not our decision to make.
while True:
try:
c.wait()
except KeyboardInterrupt:
pass
else:
break
return True
def _tempfilepager(
generator: cabc.Iterable[str], cmd_parts: list[str], color: bool | None
) -> bool:
"""Page through text by invoking a program on a temporary file.
Returns `True` if the command was found, `False` otherwise and thus another
pager should be attempted.
"""
# Split the command into the invoked CLI and its parameters.
if not cmd_parts:
return False
import shutil
cmd = cmd_parts[0]
cmd_filepath = shutil.which(cmd)
if not cmd_filepath:
return False
# Resolves symlinks and produces a normalized absolute path string.
cmd_path = Path(cmd_filepath).resolve()
import subprocess
import tempfile
fd, filename = tempfile.mkstemp()
# TODO: This never terminates if the passed generator never terminates.
text = "".join(generator)
if not color:
text = strip_ansi(text)
encoding = get_best_encoding(sys.stdout)
with open_stream(filename, "wb")[0] as f:
f.write(text.encode(encoding))
try:
subprocess.call([str(cmd_path), filename])
except OSError:
# Command not found
pass
finally:
os.close(fd)
os.unlink(filename)
return True
def _nullpager(
stream: t.TextIO, generator: cabc.Iterable[str], color: bool | None
) -> None:
"""Simply print unformatted text. This is the ultimate fallback."""
for text in generator:
if not color:
text = strip_ansi(text)
stream.write(text)
class Editor:
def __init__(
self,
editor: str | None = None,
env: cabc.Mapping[str, str] | None = None,
require_save: bool = True,
extension: str = ".txt",
) -> None:
self.editor = editor
self.env = env
self.require_save = require_save
self.extension = extension
def get_editor(self) -> str:
if self.editor is not None:
return self.editor
for key in "VISUAL", "EDITOR":
rv = os.environ.get(key)
if rv:
return rv
if WIN:
return "notepad"
from shutil import which
for editor in "sensible-editor", "vim", "nano":
if which(editor) is not None:
return editor
return "vi"
def edit_files(self, filenames: cabc.Iterable[str]) -> None:
import subprocess
editor = self.get_editor()
environ: dict[str, str] | None = None
if self.env:
environ = os.environ.copy()
environ.update(self.env)
exc_filename = " ".join(f'"{filename}"' for filename in filenames)
try:
c = subprocess.Popen(
args=f"{editor} {exc_filename}", env=environ, shell=True
)
exit_code = c.wait()
if exit_code != 0:
raise ClickException(
_("{editor}: Editing failed").format(editor=editor)
)
except OSError as e:
raise ClickException(
_("{editor}: Editing failed: {e}").format(editor=editor, e=e)
) from e
@t.overload
def edit(self, text: bytes | bytearray) -> bytes | None: ...
# We cannot know whether or not the type expected is str or bytes when None
# is passed, so str is returned as that was what was done before.
@t.overload
def edit(self, text: str | None) -> str | None: ...
def edit(self, text: str | bytes | bytearray | None) -> str | bytes | None:
import tempfile
if text is None:
data: bytes | bytearray = b""
elif isinstance(text, (bytes, bytearray)):
data = text
else:
if text and not text.endswith("\n"):
text += "\n"
if WIN:
data = text.replace("\n", "\r\n").encode("utf-8-sig")
else:
data = text.encode("utf-8")
fd, name = tempfile.mkstemp(prefix="editor-", suffix=self.extension)
f: t.BinaryIO
try:
with os.fdopen(fd, "wb") as f:
f.write(data)
# If the filesystem resolution is 1 second, like Mac OS
# 10.12 Extended, or 2 seconds, like FAT32, and the editor
# closes very fast, require_save can fail. Set the modified
# time to be 2 seconds in the past to work around this.
os.utime(name, (os.path.getatime(name), os.path.getmtime(name) - 2))
# Depending on the resolution, the exact value might not be
# recorded, so get the new recorded value.
timestamp = os.path.getmtime(name)
self.edit_files((name,))
if self.require_save and os.path.getmtime(name) == timestamp:
return None
with open(name, "rb") as f:
rv = f.read()
if isinstance(text, (bytes, bytearray)):
return rv
return rv.decode("utf-8-sig").replace("\r\n", "\n")
finally:
os.unlink(name)
def open_url(url: str, wait: bool = False, locate: bool = False) -> int:
import subprocess
def _unquote_file(url: str) -> str:
from urllib.parse import unquote
if url.startswith("file://"):
url = unquote(url[7:])
return url
if sys.platform == "darwin":
args = ["open"]
if wait:
args.append("-W")
if locate:
args.append("-R")
args.append(_unquote_file(url))
null = open("/dev/null", "w")
try:
return subprocess.Popen(args, stderr=null).wait()
finally:
null.close()
elif WIN:
if locate:
url = _unquote_file(url)
args = ["explorer", f"/select,{url}"]
else:
args = ["start"]
if wait:
args.append("/WAIT")
args.append("")
args.append(url)
try:
return subprocess.call(args)
except OSError:
# Command not found
return 127
elif CYGWIN:
if locate:
url = _unquote_file(url)
args = ["cygstart", os.path.dirname(url)]
else:
args = ["cygstart"]
if wait:
args.append("-w")
args.append(url)
try:
return subprocess.call(args)
except OSError:
# Command not found
return 127
try:
if locate:
url = os.path.dirname(_unquote_file(url)) or "."
else:
url = _unquote_file(url)
c = subprocess.Popen(["xdg-open", url])
if wait:
return c.wait()
return 0
except OSError:
if url.startswith(("http://", "https://")) and not locate and not wait:
import webbrowser
webbrowser.open(url)
return 0
return 1
def _translate_ch_to_exc(ch: str) -> None:
if ch == "\x03":
raise KeyboardInterrupt()
if ch == "\x04" and not WIN: # Unix-like, Ctrl+D
raise EOFError()
if ch == "\x1a" and WIN: # Windows, Ctrl+Z
raise EOFError()
return None
if sys.platform == "win32":
import msvcrt
@contextlib.contextmanager
def raw_terminal() -> cabc.Iterator[int]:
yield -1
def getchar(echo: bool) -> str:
# The function `getch` will return a bytes object corresponding to
# the pressed character. Since Windows 10 build 1803, it will also
# return \x00 when called a second time after pressing a regular key.
#
# `getwch` does not share this probably-bugged behavior. Moreover, it
# returns a Unicode object by default, which is what we want.
#
# Either of these functions will return \x00 or \xe0 to indicate
# a special key, and you need to call the same function again to get
# the "rest" of the code. The fun part is that \u00e0 is
# "latin small letter a with grave", so if you type that on a French
# keyboard, you _also_ get a \xe0.
# E.g., consider the Up arrow. This returns \xe0 and then \x48. The
# resulting Unicode string reads as "a with grave" + "capital H".
# This is indistinguishable from when the user actually types
# "a with grave" and then "capital H".
#
# When \xe0 is returned, we assume it's part of a special-key sequence
# and call `getwch` again, but that means that when the user types
# the \u00e0 character, `getchar` doesn't return until a second
# character is typed.
# The alternative is returning immediately, but that would mess up
# cross-platform handling of arrow keys and others that start with
# \xe0. Another option is using `getch`, but then we can't reliably
# read non-ASCII characters, because return values of `getch` are
# limited to the current 8-bit codepage.
#
# Anyway, Click doesn't claim to do this Right(tm), and using `getwch`
# is doing the right thing in more situations than with `getch`.
if echo:
func = t.cast(t.Callable[[], str], msvcrt.getwche)
else:
func = t.cast(t.Callable[[], str], msvcrt.getwch)
rv = func()
if rv in ("\x00", "\xe0"):
# \x00 and \xe0 are control characters that indicate special key,
# see above.
rv += func()
_translate_ch_to_exc(rv)
return rv
else:
import termios
import tty
@contextlib.contextmanager
def raw_terminal() -> cabc.Iterator[int]:
f: t.TextIO | None
fd: int
if not isatty(sys.stdin):
f = open("/dev/tty")
fd = f.fileno()
else:
fd = sys.stdin.fileno()
f = None
try:
old_settings = termios.tcgetattr(fd)
try:
tty.setraw(fd)
yield fd
finally:
termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
sys.stdout.flush()
if f is not None:
f.close()
except termios.error:
pass
def getchar(echo: bool) -> str:
with raw_terminal() as fd:
ch = os.read(fd, 32).decode(get_best_encoding(sys.stdin), "replace")
if echo and isatty(sys.stdout):
sys.stdout.write(ch)
_translate_ch_to_exc(ch)
return ch

View File

@@ -0,0 +1,51 @@
from __future__ import annotations
import collections.abc as cabc
import textwrap
from contextlib import contextmanager
class TextWrapper(textwrap.TextWrapper):
def _handle_long_word(
self,
reversed_chunks: list[str],
cur_line: list[str],
cur_len: int,
width: int,
) -> None:
space_left = max(width - cur_len, 1)
if self.break_long_words:
last = reversed_chunks[-1]
cut = last[:space_left]
res = last[space_left:]
cur_line.append(cut)
reversed_chunks[-1] = res
elif not cur_line:
cur_line.append(reversed_chunks.pop())
@contextmanager
def extra_indent(self, indent: str) -> cabc.Iterator[None]:
old_initial_indent = self.initial_indent
old_subsequent_indent = self.subsequent_indent
self.initial_indent += indent
self.subsequent_indent += indent
try:
yield
finally:
self.initial_indent = old_initial_indent
self.subsequent_indent = old_subsequent_indent
def indent_only(self, text: str) -> str:
rv = []
for idx, line in enumerate(text.splitlines()):
indent = self.initial_indent
if idx > 0:
indent = self.subsequent_indent
rv.append(f"{indent}{line}")
return "\n".join(rv)

View File

@@ -0,0 +1,36 @@
from __future__ import annotations
import enum
import typing as t
class Sentinel(enum.Enum):
"""Enum used to define sentinel values.
.. seealso::
`PEP 661 - Sentinel Values <https://peps.python.org/pep-0661/>`_.
"""
UNSET = object()
FLAG_NEEDS_VALUE = object()
def __repr__(self) -> str:
return f"{self.__class__.__name__}.{self.name}"
UNSET = Sentinel.UNSET
"""Sentinel used to indicate that a value is not set."""
FLAG_NEEDS_VALUE = Sentinel.FLAG_NEEDS_VALUE
"""Sentinel used to indicate an option was passed as a flag without a
value but is not a flag option.
``Option.consume_value`` uses this to prompt or use the ``flag_value``.
"""
T_UNSET = t.Literal[UNSET] # type: ignore[valid-type]
"""Type hint for the :data:`UNSET` sentinel value."""
T_FLAG_NEEDS_VALUE = t.Literal[FLAG_NEEDS_VALUE] # type: ignore[valid-type]
"""Type hint for the :data:`FLAG_NEEDS_VALUE` sentinel value."""

View File

@@ -0,0 +1,296 @@
# This module is based on the excellent work by Adam Bartoš who
# provided a lot of what went into the implementation here in
# the discussion to issue1602 in the Python bug tracker.
#
# There are some general differences in regards to how this works
# compared to the original patches as we do not need to patch
# the entire interpreter but just work in our little world of
# echo and prompt.
from __future__ import annotations
import collections.abc as cabc
import io
import sys
import time
import typing as t
from ctypes import Array
from ctypes import byref
from ctypes import c_char
from ctypes import c_char_p
from ctypes import c_int
from ctypes import c_ssize_t
from ctypes import c_ulong
from ctypes import c_void_p
from ctypes import POINTER
from ctypes import py_object
from ctypes import Structure
from ctypes.wintypes import DWORD
from ctypes.wintypes import HANDLE
from ctypes.wintypes import LPCWSTR
from ctypes.wintypes import LPWSTR
from ._compat import _NonClosingTextIOWrapper
assert sys.platform == "win32"
import msvcrt # noqa: E402
from ctypes import windll # noqa: E402
from ctypes import WINFUNCTYPE # noqa: E402
c_ssize_p = POINTER(c_ssize_t)
kernel32 = windll.kernel32
GetStdHandle = kernel32.GetStdHandle
ReadConsoleW = kernel32.ReadConsoleW
WriteConsoleW = kernel32.WriteConsoleW
GetConsoleMode = kernel32.GetConsoleMode
GetLastError = kernel32.GetLastError
GetCommandLineW = WINFUNCTYPE(LPWSTR)(("GetCommandLineW", windll.kernel32))
CommandLineToArgvW = WINFUNCTYPE(POINTER(LPWSTR), LPCWSTR, POINTER(c_int))(
("CommandLineToArgvW", windll.shell32)
)
LocalFree = WINFUNCTYPE(c_void_p, c_void_p)(("LocalFree", windll.kernel32))
STDIN_HANDLE = GetStdHandle(-10)
STDOUT_HANDLE = GetStdHandle(-11)
STDERR_HANDLE = GetStdHandle(-12)
PyBUF_SIMPLE = 0
PyBUF_WRITABLE = 1
ERROR_SUCCESS = 0
ERROR_NOT_ENOUGH_MEMORY = 8
ERROR_OPERATION_ABORTED = 995
STDIN_FILENO = 0
STDOUT_FILENO = 1
STDERR_FILENO = 2
EOF = b"\x1a"
MAX_BYTES_WRITTEN = 32767
if t.TYPE_CHECKING:
try:
# Using `typing_extensions.Buffer` instead of `collections.abc`
# on Windows for some reason does not have `Sized` implemented.
from collections.abc import Buffer # type: ignore
except ImportError:
from typing_extensions import Buffer
try:
from ctypes import pythonapi
except ImportError:
# On PyPy we cannot get buffers so our ability to operate here is
# severely limited.
get_buffer = None
else:
class Py_buffer(Structure):
_fields_ = [ # noqa: RUF012
("buf", c_void_p),
("obj", py_object),
("len", c_ssize_t),
("itemsize", c_ssize_t),
("readonly", c_int),
("ndim", c_int),
("format", c_char_p),
("shape", c_ssize_p),
("strides", c_ssize_p),
("suboffsets", c_ssize_p),
("internal", c_void_p),
]
PyObject_GetBuffer = pythonapi.PyObject_GetBuffer
PyBuffer_Release = pythonapi.PyBuffer_Release
def get_buffer(obj: Buffer, writable: bool = False) -> Array[c_char]:
buf = Py_buffer()
flags: int = PyBUF_WRITABLE if writable else PyBUF_SIMPLE
PyObject_GetBuffer(py_object(obj), byref(buf), flags)
try:
buffer_type = c_char * buf.len
out: Array[c_char] = buffer_type.from_address(buf.buf)
return out
finally:
PyBuffer_Release(byref(buf))
class _WindowsConsoleRawIOBase(io.RawIOBase):
def __init__(self, handle: int | None) -> None:
self.handle = handle
def isatty(self) -> t.Literal[True]:
super().isatty()
return True
class _WindowsConsoleReader(_WindowsConsoleRawIOBase):
def readable(self) -> t.Literal[True]:
return True
def readinto(self, b: Buffer) -> int:
bytes_to_be_read = len(b)
if not bytes_to_be_read:
return 0
elif bytes_to_be_read % 2:
raise ValueError(
"cannot read odd number of bytes from UTF-16-LE encoded console"
)
buffer = get_buffer(b, writable=True)
code_units_to_be_read = bytes_to_be_read // 2
code_units_read = c_ulong()
rv = ReadConsoleW(
HANDLE(self.handle),
buffer,
code_units_to_be_read,
byref(code_units_read),
None,
)
if GetLastError() == ERROR_OPERATION_ABORTED:
# wait for KeyboardInterrupt
time.sleep(0.1)
if not rv:
raise OSError(f"Windows error: {GetLastError()}")
if buffer[0] == EOF:
return 0
return 2 * code_units_read.value
class _WindowsConsoleWriter(_WindowsConsoleRawIOBase):
def writable(self) -> t.Literal[True]:
return True
@staticmethod
def _get_error_message(errno: int) -> str:
if errno == ERROR_SUCCESS:
return "ERROR_SUCCESS"
elif errno == ERROR_NOT_ENOUGH_MEMORY:
return "ERROR_NOT_ENOUGH_MEMORY"
return f"Windows error {errno}"
def write(self, b: Buffer) -> int:
bytes_to_be_written = len(b)
buf = get_buffer(b)
code_units_to_be_written = min(bytes_to_be_written, MAX_BYTES_WRITTEN) // 2
code_units_written = c_ulong()
WriteConsoleW(
HANDLE(self.handle),
buf,
code_units_to_be_written,
byref(code_units_written),
None,
)
bytes_written = 2 * code_units_written.value
if bytes_written == 0 and bytes_to_be_written > 0:
raise OSError(self._get_error_message(GetLastError()))
return bytes_written
class ConsoleStream:
def __init__(self, text_stream: t.TextIO, byte_stream: t.BinaryIO) -> None:
self._text_stream = text_stream
self.buffer = byte_stream
@property
def name(self) -> str:
return self.buffer.name
def write(self, x: t.AnyStr) -> int:
if isinstance(x, str):
return self._text_stream.write(x)
try:
self.flush()
except Exception:
pass
return self.buffer.write(x)
def writelines(self, lines: cabc.Iterable[t.AnyStr]) -> None:
for line in lines:
self.write(line)
def __getattr__(self, name: str) -> t.Any:
return getattr(self._text_stream, name)
def isatty(self) -> bool:
return self.buffer.isatty()
def __repr__(self) -> str:
return f"<ConsoleStream name={self.name!r} encoding={self.encoding!r}>"
def _get_text_stdin(buffer_stream: t.BinaryIO) -> t.TextIO:
text_stream = _NonClosingTextIOWrapper(
io.BufferedReader(_WindowsConsoleReader(STDIN_HANDLE)),
"utf-16-le",
"strict",
line_buffering=True,
)
return t.cast(t.TextIO, ConsoleStream(text_stream, buffer_stream))
def _get_text_stdout(buffer_stream: t.BinaryIO) -> t.TextIO:
text_stream = _NonClosingTextIOWrapper(
io.BufferedWriter(_WindowsConsoleWriter(STDOUT_HANDLE)),
"utf-16-le",
"strict",
line_buffering=True,
)
return t.cast(t.TextIO, ConsoleStream(text_stream, buffer_stream))
def _get_text_stderr(buffer_stream: t.BinaryIO) -> t.TextIO:
text_stream = _NonClosingTextIOWrapper(
io.BufferedWriter(_WindowsConsoleWriter(STDERR_HANDLE)),
"utf-16-le",
"strict",
line_buffering=True,
)
return t.cast(t.TextIO, ConsoleStream(text_stream, buffer_stream))
_stream_factories: cabc.Mapping[int, t.Callable[[t.BinaryIO], t.TextIO]] = {
0: _get_text_stdin,
1: _get_text_stdout,
2: _get_text_stderr,
}
def _is_console(f: t.TextIO) -> bool:
if not hasattr(f, "fileno"):
return False
try:
fileno = f.fileno()
except (OSError, io.UnsupportedOperation):
return False
handle = msvcrt.get_osfhandle(fileno)
return bool(GetConsoleMode(handle, byref(DWORD())))
def _get_windows_console_stream(
f: t.TextIO, encoding: str | None, errors: str | None
) -> t.TextIO | None:
if (
get_buffer is None
or encoding not in {"utf-16-le", None}
or errors not in {"strict", None}
or not _is_console(f)
):
return None
func = _stream_factories.get(f.fileno())
if func is None:
return None
b = getattr(f, "buffer", None)
if b is None:
return None
return func(b)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,551 @@
from __future__ import annotations
import inspect
import typing as t
from functools import update_wrapper
from gettext import gettext as _
from .core import Argument
from .core import Command
from .core import Context
from .core import Group
from .core import Option
from .core import Parameter
from .globals import get_current_context
from .utils import echo
if t.TYPE_CHECKING:
import typing_extensions as te
P = te.ParamSpec("P")
R = t.TypeVar("R")
T = t.TypeVar("T")
_AnyCallable = t.Callable[..., t.Any]
FC = t.TypeVar("FC", bound="_AnyCallable | Command")
def pass_context(f: t.Callable[te.Concatenate[Context, P], R]) -> t.Callable[P, R]:
"""Marks a callback as wanting to receive the current context
object as first argument.
"""
def new_func(*args: P.args, **kwargs: P.kwargs) -> R:
return f(get_current_context(), *args, **kwargs)
return update_wrapper(new_func, f)
def pass_obj(f: t.Callable[te.Concatenate[T, P], R]) -> t.Callable[P, R]:
"""Similar to :func:`pass_context`, but only pass the object on the
context onwards (:attr:`Context.obj`). This is useful if that object
represents the state of a nested system.
"""
def new_func(*args: P.args, **kwargs: P.kwargs) -> R:
return f(get_current_context().obj, *args, **kwargs)
return update_wrapper(new_func, f)
def make_pass_decorator(
object_type: type[T], ensure: bool = False
) -> t.Callable[[t.Callable[te.Concatenate[T, P], R]], t.Callable[P, R]]:
"""Given an object type this creates a decorator that will work
similar to :func:`pass_obj` but instead of passing the object of the
current context, it will find the innermost context of type
:func:`object_type`.
This generates a decorator that works roughly like this::
from functools import update_wrapper
def decorator(f):
@pass_context
def new_func(ctx, *args, **kwargs):
obj = ctx.find_object(object_type)
return ctx.invoke(f, obj, *args, **kwargs)
return update_wrapper(new_func, f)
return decorator
:param object_type: the type of the object to pass.
:param ensure: if set to `True`, a new object will be created and
remembered on the context if it's not there yet.
"""
def decorator(f: t.Callable[te.Concatenate[T, P], R]) -> t.Callable[P, R]:
def new_func(*args: P.args, **kwargs: P.kwargs) -> R:
ctx = get_current_context()
obj: T | None
if ensure:
obj = ctx.ensure_object(object_type)
else:
obj = ctx.find_object(object_type)
if obj is None:
raise RuntimeError(
"Managed to invoke callback without a context"
f" object of type {object_type.__name__!r}"
" existing."
)
return ctx.invoke(f, obj, *args, **kwargs)
return update_wrapper(new_func, f)
return decorator
def pass_meta_key(
key: str, *, doc_description: str | None = None
) -> t.Callable[[t.Callable[te.Concatenate[T, P], R]], t.Callable[P, R]]:
"""Create a decorator that passes a key from
:attr:`click.Context.meta` as the first argument to the decorated
function.
:param key: Key in ``Context.meta`` to pass.
:param doc_description: Description of the object being passed,
inserted into the decorator's docstring. Defaults to "the 'key'
key from Context.meta".
.. versionadded:: 8.0
"""
def decorator(f: t.Callable[te.Concatenate[T, P], R]) -> t.Callable[P, R]:
def new_func(*args: P.args, **kwargs: P.kwargs) -> R:
ctx = get_current_context()
obj = ctx.meta[key]
return ctx.invoke(f, obj, *args, **kwargs)
return update_wrapper(new_func, f)
if doc_description is None:
doc_description = f"the {key!r} key from :attr:`click.Context.meta`"
decorator.__doc__ = (
f"Decorator that passes {doc_description} as the first argument"
" to the decorated function."
)
return decorator
CmdType = t.TypeVar("CmdType", bound=Command)
# variant: no call, directly as decorator for a function.
@t.overload
def command(name: _AnyCallable) -> Command: ...
# variant: with positional name and with positional or keyword cls argument:
# @command(namearg, CommandCls, ...) or @command(namearg, cls=CommandCls, ...)
@t.overload
def command(
name: str | None,
cls: type[CmdType],
**attrs: t.Any,
) -> t.Callable[[_AnyCallable], CmdType]: ...
# variant: name omitted, cls _must_ be a keyword argument, @command(cls=CommandCls, ...)
@t.overload
def command(
name: None = None,
*,
cls: type[CmdType],
**attrs: t.Any,
) -> t.Callable[[_AnyCallable], CmdType]: ...
# variant: with optional string name, no cls argument provided.
@t.overload
def command(
name: str | None = ..., cls: None = None, **attrs: t.Any
) -> t.Callable[[_AnyCallable], Command]: ...
def command(
name: str | _AnyCallable | None = None,
cls: type[CmdType] | None = None,
**attrs: t.Any,
) -> Command | t.Callable[[_AnyCallable], Command | CmdType]:
r"""Creates a new :class:`Command` and uses the decorated function as
callback. This will also automatically attach all decorated
:func:`option`\s and :func:`argument`\s as parameters to the command.
The name of the command defaults to the name of the function, converted to
lowercase, with underscores ``_`` replaced by dashes ``-``, and the suffixes
``_command``, ``_cmd``, ``_group``, and ``_grp`` are removed. For example,
``init_data_command`` becomes ``init-data``.
All keyword arguments are forwarded to the underlying command class.
For the ``params`` argument, any decorated params are appended to
the end of the list.
Once decorated the function turns into a :class:`Command` instance
that can be invoked as a command line utility or be attached to a
command :class:`Group`.
:param name: The name of the command. Defaults to modifying the function's
name as described above.
:param cls: The command class to create. Defaults to :class:`Command`.
.. versionchanged:: 8.2
The suffixes ``_command``, ``_cmd``, ``_group``, and ``_grp`` are
removed when generating the name.
.. versionchanged:: 8.1
This decorator can be applied without parentheses.
.. versionchanged:: 8.1
The ``params`` argument can be used. Decorated params are
appended to the end of the list.
"""
func: t.Callable[[_AnyCallable], t.Any] | None = None
if callable(name):
func = name
name = None
assert cls is None, "Use 'command(cls=cls)(callable)' to specify a class."
assert not attrs, "Use 'command(**kwargs)(callable)' to provide arguments."
if cls is None:
cls = t.cast("type[CmdType]", Command)
def decorator(f: _AnyCallable) -> CmdType:
if isinstance(f, Command):
raise TypeError("Attempted to convert a callback into a command twice.")
attr_params = attrs.pop("params", None)
params = attr_params if attr_params is not None else []
try:
decorator_params = f.__click_params__ # type: ignore
except AttributeError:
pass
else:
del f.__click_params__ # type: ignore
params.extend(reversed(decorator_params))
if attrs.get("help") is None:
attrs["help"] = f.__doc__
if t.TYPE_CHECKING:
assert cls is not None
assert not callable(name)
if name is not None:
cmd_name = name
else:
cmd_name = f.__name__.lower().replace("_", "-")
cmd_left, sep, suffix = cmd_name.rpartition("-")
if sep and suffix in {"command", "cmd", "group", "grp"}:
cmd_name = cmd_left
cmd = cls(name=cmd_name, callback=f, params=params, **attrs)
cmd.__doc__ = f.__doc__
return cmd
if func is not None:
return decorator(func)
return decorator
GrpType = t.TypeVar("GrpType", bound=Group)
# variant: no call, directly as decorator for a function.
@t.overload
def group(name: _AnyCallable) -> Group: ...
# variant: with positional name and with positional or keyword cls argument:
# @group(namearg, GroupCls, ...) or @group(namearg, cls=GroupCls, ...)
@t.overload
def group(
name: str | None,
cls: type[GrpType],
**attrs: t.Any,
) -> t.Callable[[_AnyCallable], GrpType]: ...
# variant: name omitted, cls _must_ be a keyword argument, @group(cmd=GroupCls, ...)
@t.overload
def group(
name: None = None,
*,
cls: type[GrpType],
**attrs: t.Any,
) -> t.Callable[[_AnyCallable], GrpType]: ...
# variant: with optional string name, no cls argument provided.
@t.overload
def group(
name: str | None = ..., cls: None = None, **attrs: t.Any
) -> t.Callable[[_AnyCallable], Group]: ...
def group(
name: str | _AnyCallable | None = None,
cls: type[GrpType] | None = None,
**attrs: t.Any,
) -> Group | t.Callable[[_AnyCallable], Group | GrpType]:
"""Creates a new :class:`Group` with a function as callback. This
works otherwise the same as :func:`command` just that the `cls`
parameter is set to :class:`Group`.
.. versionchanged:: 8.1
This decorator can be applied without parentheses.
"""
if cls is None:
cls = t.cast("type[GrpType]", Group)
if callable(name):
return command(cls=cls, **attrs)(name)
return command(name, cls, **attrs)
def _param_memo(f: t.Callable[..., t.Any], param: Parameter) -> None:
if isinstance(f, Command):
f.params.append(param)
else:
if not hasattr(f, "__click_params__"):
f.__click_params__ = [] # type: ignore
f.__click_params__.append(param) # type: ignore
def argument(
*param_decls: str, cls: type[Argument] | None = None, **attrs: t.Any
) -> t.Callable[[FC], FC]:
"""Attaches an argument to the command. All positional arguments are
passed as parameter declarations to :class:`Argument`; all keyword
arguments are forwarded unchanged (except ``cls``).
This is equivalent to creating an :class:`Argument` instance manually
and attaching it to the :attr:`Command.params` list.
For the default argument class, refer to :class:`Argument` and
:class:`Parameter` for descriptions of parameters.
:param cls: the argument class to instantiate. This defaults to
:class:`Argument`.
:param param_decls: Passed as positional arguments to the constructor of
``cls``.
:param attrs: Passed as keyword arguments to the constructor of ``cls``.
"""
if cls is None:
cls = Argument
def decorator(f: FC) -> FC:
_param_memo(f, cls(param_decls, **attrs))
return f
return decorator
def option(
*param_decls: str, cls: type[Option] | None = None, **attrs: t.Any
) -> t.Callable[[FC], FC]:
"""Attaches an option to the command. All positional arguments are
passed as parameter declarations to :class:`Option`; all keyword
arguments are forwarded unchanged (except ``cls``).
This is equivalent to creating an :class:`Option` instance manually
and attaching it to the :attr:`Command.params` list.
For the default option class, refer to :class:`Option` and
:class:`Parameter` for descriptions of parameters.
:param cls: the option class to instantiate. This defaults to
:class:`Option`.
:param param_decls: Passed as positional arguments to the constructor of
``cls``.
:param attrs: Passed as keyword arguments to the constructor of ``cls``.
"""
if cls is None:
cls = Option
def decorator(f: FC) -> FC:
_param_memo(f, cls(param_decls, **attrs))
return f
return decorator
def confirmation_option(*param_decls: str, **kwargs: t.Any) -> t.Callable[[FC], FC]:
"""Add a ``--yes`` option which shows a prompt before continuing if
not passed. If the prompt is declined, the program will exit.
:param param_decls: One or more option names. Defaults to the single
value ``"--yes"``.
:param kwargs: Extra arguments are passed to :func:`option`.
"""
def callback(ctx: Context, param: Parameter, value: bool) -> None:
if not value:
ctx.abort()
if not param_decls:
param_decls = ("--yes",)
kwargs.setdefault("is_flag", True)
kwargs.setdefault("callback", callback)
kwargs.setdefault("expose_value", False)
kwargs.setdefault("prompt", "Do you want to continue?")
kwargs.setdefault("help", "Confirm the action without prompting.")
return option(*param_decls, **kwargs)
def password_option(*param_decls: str, **kwargs: t.Any) -> t.Callable[[FC], FC]:
"""Add a ``--password`` option which prompts for a password, hiding
input and asking to enter the value again for confirmation.
:param param_decls: One or more option names. Defaults to the single
value ``"--password"``.
:param kwargs: Extra arguments are passed to :func:`option`.
"""
if not param_decls:
param_decls = ("--password",)
kwargs.setdefault("prompt", True)
kwargs.setdefault("confirmation_prompt", True)
kwargs.setdefault("hide_input", True)
return option(*param_decls, **kwargs)
def version_option(
version: str | None = None,
*param_decls: str,
package_name: str | None = None,
prog_name: str | None = None,
message: str | None = None,
**kwargs: t.Any,
) -> t.Callable[[FC], FC]:
"""Add a ``--version`` option which immediately prints the version
number and exits the program.
If ``version`` is not provided, Click will try to detect it using
:func:`importlib.metadata.version` to get the version for the
``package_name``.
If ``package_name`` is not provided, Click will try to detect it by
inspecting the stack frames. This will be used to detect the
version, so it must match the name of the installed package.
:param version: The version number to show. If not provided, Click
will try to detect it.
:param param_decls: One or more option names. Defaults to the single
value ``"--version"``.
:param package_name: The package name to detect the version from. If
not provided, Click will try to detect it.
:param prog_name: The name of the CLI to show in the message. If not
provided, it will be detected from the command.
:param message: The message to show. The values ``%(prog)s``,
``%(package)s``, and ``%(version)s`` are available. Defaults to
``"%(prog)s, version %(version)s"``.
:param kwargs: Extra arguments are passed to :func:`option`.
:raise RuntimeError: ``version`` could not be detected.
.. versionchanged:: 8.0
Add the ``package_name`` parameter, and the ``%(package)s``
value for messages.
.. versionchanged:: 8.0
Use :mod:`importlib.metadata` instead of ``pkg_resources``. The
version is detected based on the package name, not the entry
point name. The Python package name must match the installed
package name, or be passed with ``package_name=``.
"""
if message is None:
message = _("%(prog)s, version %(version)s")
if version is None and package_name is None:
frame = inspect.currentframe()
f_back = frame.f_back if frame is not None else None
f_globals = f_back.f_globals if f_back is not None else None
# break reference cycle
# https://docs.python.org/3/library/inspect.html#the-interpreter-stack
del frame
if f_globals is not None:
package_name = f_globals.get("__name__")
if package_name == "__main__":
package_name = f_globals.get("__package__")
if package_name:
package_name = package_name.partition(".")[0]
def callback(ctx: Context, param: Parameter, value: bool) -> None:
if not value or ctx.resilient_parsing:
return
nonlocal prog_name
nonlocal version
if prog_name is None:
prog_name = ctx.find_root().info_name
if version is None and package_name is not None:
import importlib.metadata
try:
version = importlib.metadata.version(package_name)
except importlib.metadata.PackageNotFoundError:
raise RuntimeError(
f"{package_name!r} is not installed. Try passing"
" 'package_name' instead."
) from None
if version is None:
raise RuntimeError(
f"Could not determine the version for {package_name!r} automatically."
)
echo(
message % {"prog": prog_name, "package": package_name, "version": version},
color=ctx.color,
)
ctx.exit()
if not param_decls:
param_decls = ("--version",)
kwargs.setdefault("is_flag", True)
kwargs.setdefault("expose_value", False)
kwargs.setdefault("is_eager", True)
kwargs.setdefault("help", _("Show the version and exit."))
kwargs["callback"] = callback
return option(*param_decls, **kwargs)
def help_option(*param_decls: str, **kwargs: t.Any) -> t.Callable[[FC], FC]:
"""Pre-configured ``--help`` option which immediately prints the help page
and exits the program.
:param param_decls: One or more option names. Defaults to the single
value ``"--help"``.
:param kwargs: Extra arguments are passed to :func:`option`.
"""
def show_help(ctx: Context, param: Parameter, value: bool) -> None:
"""Callback that print the help page on ``<stdout>`` and exits."""
if value and not ctx.resilient_parsing:
echo(ctx.get_help(), color=ctx.color)
ctx.exit()
if not param_decls:
param_decls = ("--help",)
kwargs.setdefault("is_flag", True)
kwargs.setdefault("expose_value", False)
kwargs.setdefault("is_eager", True)
kwargs.setdefault("help", _("Show this message and exit."))
kwargs.setdefault("callback", show_help)
return option(*param_decls, **kwargs)

View File

@@ -0,0 +1,308 @@
from __future__ import annotations
import collections.abc as cabc
import typing as t
from gettext import gettext as _
from gettext import ngettext
from ._compat import get_text_stderr
from .globals import resolve_color_default
from .utils import echo
from .utils import format_filename
if t.TYPE_CHECKING:
from .core import Command
from .core import Context
from .core import Parameter
def _join_param_hints(param_hint: cabc.Sequence[str] | str | None) -> str | None:
if param_hint is not None and not isinstance(param_hint, str):
return " / ".join(repr(x) for x in param_hint)
return param_hint
class ClickException(Exception):
"""An exception that Click can handle and show to the user."""
#: The exit code for this exception.
exit_code = 1
def __init__(self, message: str) -> None:
super().__init__(message)
# The context will be removed by the time we print the message, so cache
# the color settings here to be used later on (in `show`)
self.show_color: bool | None = resolve_color_default()
self.message = message
def format_message(self) -> str:
return self.message
def __str__(self) -> str:
return self.message
def show(self, file: t.IO[t.Any] | None = None) -> None:
if file is None:
file = get_text_stderr()
echo(
_("Error: {message}").format(message=self.format_message()),
file=file,
color=self.show_color,
)
class UsageError(ClickException):
"""An internal exception that signals a usage error. This typically
aborts any further handling.
:param message: the error message to display.
:param ctx: optionally the context that caused this error. Click will
fill in the context automatically in some situations.
"""
exit_code = 2
def __init__(self, message: str, ctx: Context | None = None) -> None:
super().__init__(message)
self.ctx = ctx
self.cmd: Command | None = self.ctx.command if self.ctx else None
def show(self, file: t.IO[t.Any] | None = None) -> None:
if file is None:
file = get_text_stderr()
color = None
hint = ""
if (
self.ctx is not None
and self.ctx.command.get_help_option(self.ctx) is not None
):
hint = _("Try '{command} {option}' for help.").format(
command=self.ctx.command_path, option=self.ctx.help_option_names[0]
)
hint = f"{hint}\n"
if self.ctx is not None:
color = self.ctx.color
echo(f"{self.ctx.get_usage()}\n{hint}", file=file, color=color)
echo(
_("Error: {message}").format(message=self.format_message()),
file=file,
color=color,
)
class BadParameter(UsageError):
"""An exception that formats out a standardized error message for a
bad parameter. This is useful when thrown from a callback or type as
Click will attach contextual information to it (for instance, which
parameter it is).
.. versionadded:: 2.0
:param param: the parameter object that caused this error. This can
be left out, and Click will attach this info itself
if possible.
:param param_hint: a string that shows up as parameter name. This
can be used as alternative to `param` in cases
where custom validation should happen. If it is
a string it's used as such, if it's a list then
each item is quoted and separated.
"""
def __init__(
self,
message: str,
ctx: Context | None = None,
param: Parameter | None = None,
param_hint: cabc.Sequence[str] | str | None = None,
) -> None:
super().__init__(message, ctx)
self.param = param
self.param_hint = param_hint
def format_message(self) -> str:
if self.param_hint is not None:
param_hint = self.param_hint
elif self.param is not None:
param_hint = self.param.get_error_hint(self.ctx) # type: ignore
else:
return _("Invalid value: {message}").format(message=self.message)
return _("Invalid value for {param_hint}: {message}").format(
param_hint=_join_param_hints(param_hint), message=self.message
)
class MissingParameter(BadParameter):
"""Raised if click required an option or argument but it was not
provided when invoking the script.
.. versionadded:: 4.0
:param param_type: a string that indicates the type of the parameter.
The default is to inherit the parameter type from
the given `param`. Valid values are ``'parameter'``,
``'option'`` or ``'argument'``.
"""
def __init__(
self,
message: str | None = None,
ctx: Context | None = None,
param: Parameter | None = None,
param_hint: cabc.Sequence[str] | str | None = None,
param_type: str | None = None,
) -> None:
super().__init__(message or "", ctx, param, param_hint)
self.param_type = param_type
def format_message(self) -> str:
if self.param_hint is not None:
param_hint: cabc.Sequence[str] | str | None = self.param_hint
elif self.param is not None:
param_hint = self.param.get_error_hint(self.ctx) # type: ignore
else:
param_hint = None
param_hint = _join_param_hints(param_hint)
param_hint = f" {param_hint}" if param_hint else ""
param_type = self.param_type
if param_type is None and self.param is not None:
param_type = self.param.param_type_name
msg = self.message
if self.param is not None:
msg_extra = self.param.type.get_missing_message(
param=self.param, ctx=self.ctx
)
if msg_extra:
if msg:
msg += f". {msg_extra}"
else:
msg = msg_extra
msg = f" {msg}" if msg else ""
# Translate param_type for known types.
if param_type == "argument":
missing = _("Missing argument")
elif param_type == "option":
missing = _("Missing option")
elif param_type == "parameter":
missing = _("Missing parameter")
else:
missing = _("Missing {param_type}").format(param_type=param_type)
return f"{missing}{param_hint}.{msg}"
def __str__(self) -> str:
if not self.message:
param_name = self.param.name if self.param else None
return _("Missing parameter: {param_name}").format(param_name=param_name)
else:
return self.message
class NoSuchOption(UsageError):
"""Raised if click attempted to handle an option that does not
exist.
.. versionadded:: 4.0
"""
def __init__(
self,
option_name: str,
message: str | None = None,
possibilities: cabc.Sequence[str] | None = None,
ctx: Context | None = None,
) -> None:
if message is None:
message = _("No such option: {name}").format(name=option_name)
super().__init__(message, ctx)
self.option_name = option_name
self.possibilities = possibilities
def format_message(self) -> str:
if not self.possibilities:
return self.message
possibility_str = ", ".join(sorted(self.possibilities))
suggest = ngettext(
"Did you mean {possibility}?",
"(Possible options: {possibilities})",
len(self.possibilities),
).format(possibility=possibility_str, possibilities=possibility_str)
return f"{self.message} {suggest}"
class BadOptionUsage(UsageError):
"""Raised if an option is generally supplied but the use of the option
was incorrect. This is for instance raised if the number of arguments
for an option is not correct.
.. versionadded:: 4.0
:param option_name: the name of the option being used incorrectly.
"""
def __init__(
self, option_name: str, message: str, ctx: Context | None = None
) -> None:
super().__init__(message, ctx)
self.option_name = option_name
class BadArgumentUsage(UsageError):
"""Raised if an argument is generally supplied but the use of the argument
was incorrect. This is for instance raised if the number of values
for an argument is not correct.
.. versionadded:: 6.0
"""
class NoArgsIsHelpError(UsageError):
def __init__(self, ctx: Context) -> None:
self.ctx: Context
super().__init__(ctx.get_help(), ctx=ctx)
def show(self, file: t.IO[t.Any] | None = None) -> None:
echo(self.format_message(), file=file, err=True, color=self.ctx.color)
class FileError(ClickException):
"""Raised if a file cannot be opened."""
def __init__(self, filename: str, hint: str | None = None) -> None:
if hint is None:
hint = _("unknown error")
super().__init__(hint)
self.ui_filename: str = format_filename(filename)
self.filename = filename
def format_message(self) -> str:
return _("Could not open file {filename!r}: {message}").format(
filename=self.ui_filename, message=self.message
)
class Abort(RuntimeError):
"""An internal signalling exception that signals Click to abort."""
class Exit(RuntimeError):
"""An exception that indicates that the application should exit with some
status code.
:param code: the status code to exit with.
"""
__slots__ = ("exit_code",)
def __init__(self, code: int = 0) -> None:
self.exit_code: int = code

View File

@@ -0,0 +1,301 @@
from __future__ import annotations
import collections.abc as cabc
from contextlib import contextmanager
from gettext import gettext as _
from ._compat import term_len
from .parser import _split_opt
# Can force a width. This is used by the test system
FORCED_WIDTH: int | None = None
def measure_table(rows: cabc.Iterable[tuple[str, str]]) -> tuple[int, ...]:
widths: dict[int, int] = {}
for row in rows:
for idx, col in enumerate(row):
widths[idx] = max(widths.get(idx, 0), term_len(col))
return tuple(y for x, y in sorted(widths.items()))
def iter_rows(
rows: cabc.Iterable[tuple[str, str]], col_count: int
) -> cabc.Iterator[tuple[str, ...]]:
for row in rows:
yield row + ("",) * (col_count - len(row))
def wrap_text(
text: str,
width: int = 78,
initial_indent: str = "",
subsequent_indent: str = "",
preserve_paragraphs: bool = False,
) -> str:
"""A helper function that intelligently wraps text. By default, it
assumes that it operates on a single paragraph of text but if the
`preserve_paragraphs` parameter is provided it will intelligently
handle paragraphs (defined by two empty lines).
If paragraphs are handled, a paragraph can be prefixed with an empty
line containing the ``\\b`` character (``\\x08``) to indicate that
no rewrapping should happen in that block.
:param text: the text that should be rewrapped.
:param width: the maximum width for the text.
:param initial_indent: the initial indent that should be placed on the
first line as a string.
:param subsequent_indent: the indent string that should be placed on
each consecutive line.
:param preserve_paragraphs: if this flag is set then the wrapping will
intelligently handle paragraphs.
"""
from ._textwrap import TextWrapper
text = text.expandtabs()
wrapper = TextWrapper(
width,
initial_indent=initial_indent,
subsequent_indent=subsequent_indent,
replace_whitespace=False,
)
if not preserve_paragraphs:
return wrapper.fill(text)
p: list[tuple[int, bool, str]] = []
buf: list[str] = []
indent = None
def _flush_par() -> None:
if not buf:
return
if buf[0].strip() == "\b":
p.append((indent or 0, True, "\n".join(buf[1:])))
else:
p.append((indent or 0, False, " ".join(buf)))
del buf[:]
for line in text.splitlines():
if not line:
_flush_par()
indent = None
else:
if indent is None:
orig_len = term_len(line)
line = line.lstrip()
indent = orig_len - term_len(line)
buf.append(line)
_flush_par()
rv = []
for indent, raw, text in p:
with wrapper.extra_indent(" " * indent):
if raw:
rv.append(wrapper.indent_only(text))
else:
rv.append(wrapper.fill(text))
return "\n\n".join(rv)
class HelpFormatter:
"""This class helps with formatting text-based help pages. It's
usually just needed for very special internal cases, but it's also
exposed so that developers can write their own fancy outputs.
At present, it always writes into memory.
:param indent_increment: the additional increment for each level.
:param width: the width for the text. This defaults to the terminal
width clamped to a maximum of 78.
"""
def __init__(
self,
indent_increment: int = 2,
width: int | None = None,
max_width: int | None = None,
) -> None:
self.indent_increment = indent_increment
if max_width is None:
max_width = 80
if width is None:
import shutil
width = FORCED_WIDTH
if width is None:
width = max(min(shutil.get_terminal_size().columns, max_width) - 2, 50)
self.width = width
self.current_indent: int = 0
self.buffer: list[str] = []
def write(self, string: str) -> None:
"""Writes a unicode string into the internal buffer."""
self.buffer.append(string)
def indent(self) -> None:
"""Increases the indentation."""
self.current_indent += self.indent_increment
def dedent(self) -> None:
"""Decreases the indentation."""
self.current_indent -= self.indent_increment
def write_usage(self, prog: str, args: str = "", prefix: str | None = None) -> None:
"""Writes a usage line into the buffer.
:param prog: the program name.
:param args: whitespace separated list of arguments.
:param prefix: The prefix for the first line. Defaults to
``"Usage: "``.
"""
if prefix is None:
prefix = f"{_('Usage:')} "
usage_prefix = f"{prefix:>{self.current_indent}}{prog} "
text_width = self.width - self.current_indent
if text_width >= (term_len(usage_prefix) + 20):
# The arguments will fit to the right of the prefix.
indent = " " * term_len(usage_prefix)
self.write(
wrap_text(
args,
text_width,
initial_indent=usage_prefix,
subsequent_indent=indent,
)
)
else:
# The prefix is too long, put the arguments on the next line.
self.write(usage_prefix)
self.write("\n")
indent = " " * (max(self.current_indent, term_len(prefix)) + 4)
self.write(
wrap_text(
args, text_width, initial_indent=indent, subsequent_indent=indent
)
)
self.write("\n")
def write_heading(self, heading: str) -> None:
"""Writes a heading into the buffer."""
self.write(f"{'':>{self.current_indent}}{heading}:\n")
def write_paragraph(self) -> None:
"""Writes a paragraph into the buffer."""
if self.buffer:
self.write("\n")
def write_text(self, text: str) -> None:
"""Writes re-indented text into the buffer. This rewraps and
preserves paragraphs.
"""
indent = " " * self.current_indent
self.write(
wrap_text(
text,
self.width,
initial_indent=indent,
subsequent_indent=indent,
preserve_paragraphs=True,
)
)
self.write("\n")
def write_dl(
self,
rows: cabc.Sequence[tuple[str, str]],
col_max: int = 30,
col_spacing: int = 2,
) -> None:
"""Writes a definition list into the buffer. This is how options
and commands are usually formatted.
:param rows: a list of two item tuples for the terms and values.
:param col_max: the maximum width of the first column.
:param col_spacing: the number of spaces between the first and
second column.
"""
rows = list(rows)
widths = measure_table(rows)
if len(widths) != 2:
raise TypeError("Expected two columns for definition list")
first_col = min(widths[0], col_max) + col_spacing
for first, second in iter_rows(rows, len(widths)):
self.write(f"{'':>{self.current_indent}}{first}")
if not second:
self.write("\n")
continue
if term_len(first) <= first_col - col_spacing:
self.write(" " * (first_col - term_len(first)))
else:
self.write("\n")
self.write(" " * (first_col + self.current_indent))
text_width = max(self.width - first_col - 2, 10)
wrapped_text = wrap_text(second, text_width, preserve_paragraphs=True)
lines = wrapped_text.splitlines()
if lines:
self.write(f"{lines[0]}\n")
for line in lines[1:]:
self.write(f"{'':>{first_col + self.current_indent}}{line}\n")
else:
self.write("\n")
@contextmanager
def section(self, name: str) -> cabc.Iterator[None]:
"""Helpful context manager that writes a paragraph, a heading,
and the indents.
:param name: the section name that is written as heading.
"""
self.write_paragraph()
self.write_heading(name)
self.indent()
try:
yield
finally:
self.dedent()
@contextmanager
def indentation(self) -> cabc.Iterator[None]:
"""A context manager that increases the indentation."""
self.indent()
try:
yield
finally:
self.dedent()
def getvalue(self) -> str:
"""Returns the buffer contents."""
return "".join(self.buffer)
def join_options(options: cabc.Sequence[str]) -> tuple[str, bool]:
"""Given a list of option strings this joins them in the most appropriate
way and returns them in the form ``(formatted_string,
any_prefix_is_slash)`` where the second item in the tuple is a flag that
indicates if any of the option prefixes was a slash.
"""
rv = []
any_prefix_is_slash = False
for opt in options:
prefix = _split_opt(opt)[0]
if prefix == "/":
any_prefix_is_slash = True
rv.append((len(prefix), opt))
rv.sort(key=lambda x: x[0])
return ", ".join(x[1] for x in rv), any_prefix_is_slash

View File

@@ -0,0 +1,67 @@
from __future__ import annotations
import typing as t
from threading import local
if t.TYPE_CHECKING:
from .core import Context
_local = local()
@t.overload
def get_current_context(silent: t.Literal[False] = False) -> Context: ...
@t.overload
def get_current_context(silent: bool = ...) -> Context | None: ...
def get_current_context(silent: bool = False) -> Context | None:
"""Returns the current click context. This can be used as a way to
access the current context object from anywhere. This is a more implicit
alternative to the :func:`pass_context` decorator. This function is
primarily useful for helpers such as :func:`echo` which might be
interested in changing its behavior based on the current context.
To push the current context, :meth:`Context.scope` can be used.
.. versionadded:: 5.0
:param silent: if set to `True` the return value is `None` if no context
is available. The default behavior is to raise a
:exc:`RuntimeError`.
"""
try:
return t.cast("Context", _local.stack[-1])
except (AttributeError, IndexError) as e:
if not silent:
raise RuntimeError("There is no active click context.") from e
return None
def push_context(ctx: Context) -> None:
"""Pushes a new context to the current stack."""
_local.__dict__.setdefault("stack", []).append(ctx)
def pop_context() -> None:
"""Removes the top level from the stack."""
_local.stack.pop()
def resolve_color_default(color: bool | None = None) -> bool | None:
"""Internal helper to get the default value of the color flag. If a
value is passed it's returned unchanged, otherwise it's looked up from
the current context.
"""
if color is not None:
return color
ctx = get_current_context(silent=True)
if ctx is not None:
return ctx.color
return None

View File

@@ -0,0 +1,532 @@
"""
This module started out as largely a copy paste from the stdlib's
optparse module with the features removed that we do not need from
optparse because we implement them in Click on a higher level (for
instance type handling, help formatting and a lot more).
The plan is to remove more and more from here over time.
The reason this is a different module and not optparse from the stdlib
is that there are differences in 2.x and 3.x about the error messages
generated and optparse in the stdlib uses gettext for no good reason
and might cause us issues.
Click uses parts of optparse written by Gregory P. Ward and maintained
by the Python Software Foundation. This is limited to code in parser.py.
Copyright 2001-2006 Gregory P. Ward. All rights reserved.
Copyright 2002-2006 Python Software Foundation. All rights reserved.
"""
# This code uses parts of optparse written by Gregory P. Ward and
# maintained by the Python Software Foundation.
# Copyright 2001-2006 Gregory P. Ward
# Copyright 2002-2006 Python Software Foundation
from __future__ import annotations
import collections.abc as cabc
import typing as t
from collections import deque
from gettext import gettext as _
from gettext import ngettext
from ._utils import FLAG_NEEDS_VALUE
from ._utils import UNSET
from .exceptions import BadArgumentUsage
from .exceptions import BadOptionUsage
from .exceptions import NoSuchOption
from .exceptions import UsageError
if t.TYPE_CHECKING:
from ._utils import T_FLAG_NEEDS_VALUE
from ._utils import T_UNSET
from .core import Argument as CoreArgument
from .core import Context
from .core import Option as CoreOption
from .core import Parameter as CoreParameter
V = t.TypeVar("V")
def _unpack_args(
args: cabc.Sequence[str], nargs_spec: cabc.Sequence[int]
) -> tuple[cabc.Sequence[str | cabc.Sequence[str | None] | None], list[str]]:
"""Given an iterable of arguments and an iterable of nargs specifications,
it returns a tuple with all the unpacked arguments at the first index
and all remaining arguments as the second.
The nargs specification is the number of arguments that should be consumed
or `-1` to indicate that this position should eat up all the remainders.
Missing items are filled with ``UNSET``.
"""
args = deque(args)
nargs_spec = deque(nargs_spec)
rv: list[str | tuple[str | T_UNSET, ...] | T_UNSET] = []
spos: int | None = None
def _fetch(c: deque[V]) -> V | T_UNSET:
try:
if spos is None:
return c.popleft()
else:
return c.pop()
except IndexError:
return UNSET
while nargs_spec:
nargs = _fetch(nargs_spec)
if nargs is None:
continue
if nargs == 1:
rv.append(_fetch(args)) # type: ignore[arg-type]
elif nargs > 1:
x = [_fetch(args) for _ in range(nargs)]
# If we're reversed, we're pulling in the arguments in reverse,
# so we need to turn them around.
if spos is not None:
x.reverse()
rv.append(tuple(x))
elif nargs < 0:
if spos is not None:
raise TypeError("Cannot have two nargs < 0")
spos = len(rv)
rv.append(UNSET)
# spos is the position of the wildcard (star). If it's not `None`,
# we fill it with the remainder.
if spos is not None:
rv[spos] = tuple(args)
args = []
rv[spos + 1 :] = reversed(rv[spos + 1 :])
return tuple(rv), list(args)
def _split_opt(opt: str) -> tuple[str, str]:
first = opt[:1]
if first.isalnum():
return "", opt
if opt[1:2] == first:
return opt[:2], opt[2:]
return first, opt[1:]
def _normalize_opt(opt: str, ctx: Context | None) -> str:
if ctx is None or ctx.token_normalize_func is None:
return opt
prefix, opt = _split_opt(opt)
return f"{prefix}{ctx.token_normalize_func(opt)}"
class _Option:
def __init__(
self,
obj: CoreOption,
opts: cabc.Sequence[str],
dest: str | None,
action: str | None = None,
nargs: int = 1,
const: t.Any | None = None,
):
self._short_opts = []
self._long_opts = []
self.prefixes: set[str] = set()
for opt in opts:
prefix, value = _split_opt(opt)
if not prefix:
raise ValueError(f"Invalid start character for option ({opt})")
self.prefixes.add(prefix[0])
if len(prefix) == 1 and len(value) == 1:
self._short_opts.append(opt)
else:
self._long_opts.append(opt)
self.prefixes.add(prefix)
if action is None:
action = "store"
self.dest = dest
self.action = action
self.nargs = nargs
self.const = const
self.obj = obj
@property
def takes_value(self) -> bool:
return self.action in ("store", "append")
def process(self, value: t.Any, state: _ParsingState) -> None:
if self.action == "store":
state.opts[self.dest] = value # type: ignore
elif self.action == "store_const":
state.opts[self.dest] = self.const # type: ignore
elif self.action == "append":
state.opts.setdefault(self.dest, []).append(value) # type: ignore
elif self.action == "append_const":
state.opts.setdefault(self.dest, []).append(self.const) # type: ignore
elif self.action == "count":
state.opts[self.dest] = state.opts.get(self.dest, 0) + 1 # type: ignore
else:
raise ValueError(f"unknown action '{self.action}'")
state.order.append(self.obj)
class _Argument:
def __init__(self, obj: CoreArgument, dest: str | None, nargs: int = 1):
self.dest = dest
self.nargs = nargs
self.obj = obj
def process(
self,
value: str | cabc.Sequence[str | None] | None | T_UNSET,
state: _ParsingState,
) -> None:
if self.nargs > 1:
assert isinstance(value, cabc.Sequence)
holes = sum(1 for x in value if x is UNSET)
if holes == len(value):
value = UNSET
elif holes != 0:
raise BadArgumentUsage(
_("Argument {name!r} takes {nargs} values.").format(
name=self.dest, nargs=self.nargs
)
)
# We failed to collect any argument value so we consider the argument as unset.
if value == ():
value = UNSET
state.opts[self.dest] = value # type: ignore
state.order.append(self.obj)
class _ParsingState:
def __init__(self, rargs: list[str]) -> None:
self.opts: dict[str, t.Any] = {}
self.largs: list[str] = []
self.rargs = rargs
self.order: list[CoreParameter] = []
class _OptionParser:
"""The option parser is an internal class that is ultimately used to
parse options and arguments. It's modelled after optparse and brings
a similar but vastly simplified API. It should generally not be used
directly as the high level Click classes wrap it for you.
It's not nearly as extensible as optparse or argparse as it does not
implement features that are implemented on a higher level (such as
types or defaults).
:param ctx: optionally the :class:`~click.Context` where this parser
should go with.
.. deprecated:: 8.2
Will be removed in Click 9.0.
"""
def __init__(self, ctx: Context | None = None) -> None:
#: The :class:`~click.Context` for this parser. This might be
#: `None` for some advanced use cases.
self.ctx = ctx
#: This controls how the parser deals with interspersed arguments.
#: If this is set to `False`, the parser will stop on the first
#: non-option. Click uses this to implement nested subcommands
#: safely.
self.allow_interspersed_args: bool = True
#: This tells the parser how to deal with unknown options. By
#: default it will error out (which is sensible), but there is a
#: second mode where it will ignore it and continue processing
#: after shifting all the unknown options into the resulting args.
self.ignore_unknown_options: bool = False
if ctx is not None:
self.allow_interspersed_args = ctx.allow_interspersed_args
self.ignore_unknown_options = ctx.ignore_unknown_options
self._short_opt: dict[str, _Option] = {}
self._long_opt: dict[str, _Option] = {}
self._opt_prefixes = {"-", "--"}
self._args: list[_Argument] = []
def add_option(
self,
obj: CoreOption,
opts: cabc.Sequence[str],
dest: str | None,
action: str | None = None,
nargs: int = 1,
const: t.Any | None = None,
) -> None:
"""Adds a new option named `dest` to the parser. The destination
is not inferred (unlike with optparse) and needs to be explicitly
provided. Action can be any of ``store``, ``store_const``,
``append``, ``append_const`` or ``count``.
The `obj` can be used to identify the option in the order list
that is returned from the parser.
"""
opts = [_normalize_opt(opt, self.ctx) for opt in opts]
option = _Option(obj, opts, dest, action=action, nargs=nargs, const=const)
self._opt_prefixes.update(option.prefixes)
for opt in option._short_opts:
self._short_opt[opt] = option
for opt in option._long_opts:
self._long_opt[opt] = option
def add_argument(self, obj: CoreArgument, dest: str | None, nargs: int = 1) -> None:
"""Adds a positional argument named `dest` to the parser.
The `obj` can be used to identify the option in the order list
that is returned from the parser.
"""
self._args.append(_Argument(obj, dest=dest, nargs=nargs))
def parse_args(
self, args: list[str]
) -> tuple[dict[str, t.Any], list[str], list[CoreParameter]]:
"""Parses positional arguments and returns ``(values, args, order)``
for the parsed options and arguments as well as the leftover
arguments if there are any. The order is a list of objects as they
appear on the command line. If arguments appear multiple times they
will be memorized multiple times as well.
"""
state = _ParsingState(args)
try:
self._process_args_for_options(state)
self._process_args_for_args(state)
except UsageError:
if self.ctx is None or not self.ctx.resilient_parsing:
raise
return state.opts, state.largs, state.order
def _process_args_for_args(self, state: _ParsingState) -> None:
pargs, args = _unpack_args(
state.largs + state.rargs, [x.nargs for x in self._args]
)
for idx, arg in enumerate(self._args):
arg.process(pargs[idx], state)
state.largs = args
state.rargs = []
def _process_args_for_options(self, state: _ParsingState) -> None:
while state.rargs:
arg = state.rargs.pop(0)
arglen = len(arg)
# Double dashes always handled explicitly regardless of what
# prefixes are valid.
if arg == "--":
return
elif arg[:1] in self._opt_prefixes and arglen > 1:
self._process_opts(arg, state)
elif self.allow_interspersed_args:
state.largs.append(arg)
else:
state.rargs.insert(0, arg)
return
# Say this is the original argument list:
# [arg0, arg1, ..., arg(i-1), arg(i), arg(i+1), ..., arg(N-1)]
# ^
# (we are about to process arg(i)).
#
# Then rargs is [arg(i), ..., arg(N-1)] and largs is a *subset* of
# [arg0, ..., arg(i-1)] (any options and their arguments will have
# been removed from largs).
#
# The while loop will usually consume 1 or more arguments per pass.
# If it consumes 1 (eg. arg is an option that takes no arguments),
# then after _process_arg() is done the situation is:
#
# largs = subset of [arg0, ..., arg(i)]
# rargs = [arg(i+1), ..., arg(N-1)]
#
# If allow_interspersed_args is false, largs will always be
# *empty* -- still a subset of [arg0, ..., arg(i-1)], but
# not a very interesting subset!
def _match_long_opt(
self, opt: str, explicit_value: str | None, state: _ParsingState
) -> None:
if opt not in self._long_opt:
from difflib import get_close_matches
possibilities = get_close_matches(opt, self._long_opt)
raise NoSuchOption(opt, possibilities=possibilities, ctx=self.ctx)
option = self._long_opt[opt]
if option.takes_value:
# At this point it's safe to modify rargs by injecting the
# explicit value, because no exception is raised in this
# branch. This means that the inserted value will be fully
# consumed.
if explicit_value is not None:
state.rargs.insert(0, explicit_value)
value = self._get_value_from_state(opt, option, state)
elif explicit_value is not None:
raise BadOptionUsage(
opt, _("Option {name!r} does not take a value.").format(name=opt)
)
else:
value = UNSET
option.process(value, state)
def _match_short_opt(self, arg: str, state: _ParsingState) -> None:
stop = False
i = 1
prefix = arg[0]
unknown_options = []
for ch in arg[1:]:
opt = _normalize_opt(f"{prefix}{ch}", self.ctx)
option = self._short_opt.get(opt)
i += 1
if not option:
if self.ignore_unknown_options:
unknown_options.append(ch)
continue
raise NoSuchOption(opt, ctx=self.ctx)
if option.takes_value:
# Any characters left in arg? Pretend they're the
# next arg, and stop consuming characters of arg.
if i < len(arg):
state.rargs.insert(0, arg[i:])
stop = True
value = self._get_value_from_state(opt, option, state)
else:
value = UNSET
option.process(value, state)
if stop:
break
# If we got any unknown options we recombine the string of the
# remaining options and re-attach the prefix, then report that
# to the state as new larg. This way there is basic combinatorics
# that can be achieved while still ignoring unknown arguments.
if self.ignore_unknown_options and unknown_options:
state.largs.append(f"{prefix}{''.join(unknown_options)}")
def _get_value_from_state(
self, option_name: str, option: _Option, state: _ParsingState
) -> str | cabc.Sequence[str] | T_FLAG_NEEDS_VALUE:
nargs = option.nargs
value: str | cabc.Sequence[str] | T_FLAG_NEEDS_VALUE
if len(state.rargs) < nargs:
if option.obj._flag_needs_value:
# Option allows omitting the value.
value = FLAG_NEEDS_VALUE
else:
raise BadOptionUsage(
option_name,
ngettext(
"Option {name!r} requires an argument.",
"Option {name!r} requires {nargs} arguments.",
nargs,
).format(name=option_name, nargs=nargs),
)
elif nargs == 1:
next_rarg = state.rargs[0]
if (
option.obj._flag_needs_value
and isinstance(next_rarg, str)
and next_rarg[:1] in self._opt_prefixes
and len(next_rarg) > 1
):
# The next arg looks like the start of an option, don't
# use it as the value if omitting the value is allowed.
value = FLAG_NEEDS_VALUE
else:
value = state.rargs.pop(0)
else:
value = tuple(state.rargs[:nargs])
del state.rargs[:nargs]
return value
def _process_opts(self, arg: str, state: _ParsingState) -> None:
explicit_value = None
# Long option handling happens in two parts. The first part is
# supporting explicitly attached values. In any case, we will try
# to long match the option first.
if "=" in arg:
long_opt, explicit_value = arg.split("=", 1)
else:
long_opt = arg
norm_long_opt = _normalize_opt(long_opt, self.ctx)
# At this point we will match the (assumed) long option through
# the long option matching code. Note that this allows options
# like "-foo" to be matched as long options.
try:
self._match_long_opt(norm_long_opt, explicit_value, state)
except NoSuchOption:
# At this point the long option matching failed, and we need
# to try with short options. However there is a special rule
# which says, that if we have a two character options prefix
# (applies to "--foo" for instance), we do not dispatch to the
# short option code and will instead raise the no option
# error.
if arg[:2] not in self._opt_prefixes:
self._match_short_opt(arg, state)
return
if not self.ignore_unknown_options:
raise
state.largs.append(arg)
def __getattr__(name: str) -> object:
import warnings
if name in {
"OptionParser",
"Argument",
"Option",
"split_opt",
"normalize_opt",
"ParsingState",
}:
warnings.warn(
f"'parser.{name}' is deprecated and will be removed in Click 9.0."
" The old parser is available in 'optparse'.",
DeprecationWarning,
stacklevel=2,
)
return globals()[f"_{name}"]
if name == "split_arg_string":
from .shell_completion import split_arg_string
warnings.warn(
"Importing 'parser.split_arg_string' is deprecated, it will only be"
" available in 'shell_completion' in Click 9.0.",
DeprecationWarning,
stacklevel=2,
)
return split_arg_string
raise AttributeError(name)

View File

@@ -0,0 +1,667 @@
from __future__ import annotations
import collections.abc as cabc
import os
import re
import typing as t
from gettext import gettext as _
from .core import Argument
from .core import Command
from .core import Context
from .core import Group
from .core import Option
from .core import Parameter
from .core import ParameterSource
from .utils import echo
def shell_complete(
cli: Command,
ctx_args: cabc.MutableMapping[str, t.Any],
prog_name: str,
complete_var: str,
instruction: str,
) -> int:
"""Perform shell completion for the given CLI program.
:param cli: Command being called.
:param ctx_args: Extra arguments to pass to
``cli.make_context``.
:param prog_name: Name of the executable in the shell.
:param complete_var: Name of the environment variable that holds
the completion instruction.
:param instruction: Value of ``complete_var`` with the completion
instruction and shell, in the form ``instruction_shell``.
:return: Status code to exit with.
"""
shell, _, instruction = instruction.partition("_")
comp_cls = get_completion_class(shell)
if comp_cls is None:
return 1
comp = comp_cls(cli, ctx_args, prog_name, complete_var)
if instruction == "source":
echo(comp.source())
return 0
if instruction == "complete":
echo(comp.complete())
return 0
return 1
class CompletionItem:
"""Represents a completion value and metadata about the value. The
default metadata is ``type`` to indicate special shell handling,
and ``help`` if a shell supports showing a help string next to the
value.
Arbitrary parameters can be passed when creating the object, and
accessed using ``item.attr``. If an attribute wasn't passed,
accessing it returns ``None``.
:param value: The completion suggestion.
:param type: Tells the shell script to provide special completion
support for the type. Click uses ``"dir"`` and ``"file"``.
:param help: String shown next to the value if supported.
:param kwargs: Arbitrary metadata. The built-in implementations
don't use this, but custom type completions paired with custom
shell support could use it.
"""
__slots__ = ("value", "type", "help", "_info")
def __init__(
self,
value: t.Any,
type: str = "plain",
help: str | None = None,
**kwargs: t.Any,
) -> None:
self.value: t.Any = value
self.type: str = type
self.help: str | None = help
self._info = kwargs
def __getattr__(self, name: str) -> t.Any:
return self._info.get(name)
# Only Bash >= 4.4 has the nosort option.
_SOURCE_BASH = """\
%(complete_func)s() {
local IFS=$'\\n'
local response
response=$(env COMP_WORDS="${COMP_WORDS[*]}" COMP_CWORD=$COMP_CWORD \
%(complete_var)s=bash_complete $1)
for completion in $response; do
IFS=',' read type value <<< "$completion"
if [[ $type == 'dir' ]]; then
COMPREPLY=()
compopt -o dirnames
elif [[ $type == 'file' ]]; then
COMPREPLY=()
compopt -o default
elif [[ $type == 'plain' ]]; then
COMPREPLY+=($value)
fi
done
return 0
}
%(complete_func)s_setup() {
complete -o nosort -F %(complete_func)s %(prog_name)s
}
%(complete_func)s_setup;
"""
# See ZshComplete.format_completion below, and issue #2703, before
# changing this script.
#
# (TL;DR: _describe is picky about the format, but this Zsh script snippet
# is already widely deployed. So freeze this script, and use clever-ish
# handling of colons in ZshComplet.format_completion.)
_SOURCE_ZSH = """\
#compdef %(prog_name)s
%(complete_func)s() {
local -a completions
local -a completions_with_descriptions
local -a response
(( ! $+commands[%(prog_name)s] )) && return 1
response=("${(@f)$(env COMP_WORDS="${words[*]}" COMP_CWORD=$((CURRENT-1)) \
%(complete_var)s=zsh_complete %(prog_name)s)}")
for type key descr in ${response}; do
if [[ "$type" == "plain" ]]; then
if [[ "$descr" == "_" ]]; then
completions+=("$key")
else
completions_with_descriptions+=("$key":"$descr")
fi
elif [[ "$type" == "dir" ]]; then
_path_files -/
elif [[ "$type" == "file" ]]; then
_path_files -f
fi
done
if [ -n "$completions_with_descriptions" ]; then
_describe -V unsorted completions_with_descriptions -U
fi
if [ -n "$completions" ]; then
compadd -U -V unsorted -a completions
fi
}
if [[ $zsh_eval_context[-1] == loadautofunc ]]; then
# autoload from fpath, call function directly
%(complete_func)s "$@"
else
# eval/source/. command, register function for later
compdef %(complete_func)s %(prog_name)s
fi
"""
_SOURCE_FISH = """\
function %(complete_func)s;
set -l response (env %(complete_var)s=fish_complete COMP_WORDS=(commandline -cp) \
COMP_CWORD=(commandline -t) %(prog_name)s);
for completion in $response;
set -l metadata (string split "," $completion);
if test $metadata[1] = "dir";
__fish_complete_directories $metadata[2];
else if test $metadata[1] = "file";
__fish_complete_path $metadata[2];
else if test $metadata[1] = "plain";
echo $metadata[2];
end;
end;
end;
complete --no-files --command %(prog_name)s --arguments \
"(%(complete_func)s)";
"""
class ShellComplete:
"""Base class for providing shell completion support. A subclass for
a given shell will override attributes and methods to implement the
completion instructions (``source`` and ``complete``).
:param cli: Command being called.
:param prog_name: Name of the executable in the shell.
:param complete_var: Name of the environment variable that holds
the completion instruction.
.. versionadded:: 8.0
"""
name: t.ClassVar[str]
"""Name to register the shell as with :func:`add_completion_class`.
This is used in completion instructions (``{name}_source`` and
``{name}_complete``).
"""
source_template: t.ClassVar[str]
"""Completion script template formatted by :meth:`source`. This must
be provided by subclasses.
"""
def __init__(
self,
cli: Command,
ctx_args: cabc.MutableMapping[str, t.Any],
prog_name: str,
complete_var: str,
) -> None:
self.cli = cli
self.ctx_args = ctx_args
self.prog_name = prog_name
self.complete_var = complete_var
@property
def func_name(self) -> str:
"""The name of the shell function defined by the completion
script.
"""
safe_name = re.sub(r"\W*", "", self.prog_name.replace("-", "_"), flags=re.ASCII)
return f"_{safe_name}_completion"
def source_vars(self) -> dict[str, t.Any]:
"""Vars for formatting :attr:`source_template`.
By default this provides ``complete_func``, ``complete_var``,
and ``prog_name``.
"""
return {
"complete_func": self.func_name,
"complete_var": self.complete_var,
"prog_name": self.prog_name,
}
def source(self) -> str:
"""Produce the shell script that defines the completion
function. By default this ``%``-style formats
:attr:`source_template` with the dict returned by
:meth:`source_vars`.
"""
return self.source_template % self.source_vars()
def get_completion_args(self) -> tuple[list[str], str]:
"""Use the env vars defined by the shell script to return a
tuple of ``args, incomplete``. This must be implemented by
subclasses.
"""
raise NotImplementedError
def get_completions(self, args: list[str], incomplete: str) -> list[CompletionItem]:
"""Determine the context and last complete command or parameter
from the complete args. Call that object's ``shell_complete``
method to get the completions for the incomplete value.
:param args: List of complete args before the incomplete value.
:param incomplete: Value being completed. May be empty.
"""
ctx = _resolve_context(self.cli, self.ctx_args, self.prog_name, args)
obj, incomplete = _resolve_incomplete(ctx, args, incomplete)
return obj.shell_complete(ctx, incomplete)
def format_completion(self, item: CompletionItem) -> str:
"""Format a completion item into the form recognized by the
shell script. This must be implemented by subclasses.
:param item: Completion item to format.
"""
raise NotImplementedError
def complete(self) -> str:
"""Produce the completion data to send back to the shell.
By default this calls :meth:`get_completion_args`, gets the
completions, then calls :meth:`format_completion` for each
completion.
"""
args, incomplete = self.get_completion_args()
completions = self.get_completions(args, incomplete)
out = [self.format_completion(item) for item in completions]
return "\n".join(out)
class BashComplete(ShellComplete):
"""Shell completion for Bash."""
name = "bash"
source_template = _SOURCE_BASH
@staticmethod
def _check_version() -> None:
import shutil
import subprocess
bash_exe = shutil.which("bash")
if bash_exe is None:
match = None
else:
output = subprocess.run(
[bash_exe, "--norc", "-c", 'echo "${BASH_VERSION}"'],
stdout=subprocess.PIPE,
)
match = re.search(r"^(\d+)\.(\d+)\.\d+", output.stdout.decode())
if match is not None:
major, minor = match.groups()
if major < "4" or major == "4" and minor < "4":
echo(
_(
"Shell completion is not supported for Bash"
" versions older than 4.4."
),
err=True,
)
else:
echo(
_("Couldn't detect Bash version, shell completion is not supported."),
err=True,
)
def source(self) -> str:
self._check_version()
return super().source()
def get_completion_args(self) -> tuple[list[str], str]:
cwords = split_arg_string(os.environ["COMP_WORDS"])
cword = int(os.environ["COMP_CWORD"])
args = cwords[1:cword]
try:
incomplete = cwords[cword]
except IndexError:
incomplete = ""
return args, incomplete
def format_completion(self, item: CompletionItem) -> str:
return f"{item.type},{item.value}"
class ZshComplete(ShellComplete):
"""Shell completion for Zsh."""
name = "zsh"
source_template = _SOURCE_ZSH
def get_completion_args(self) -> tuple[list[str], str]:
cwords = split_arg_string(os.environ["COMP_WORDS"])
cword = int(os.environ["COMP_CWORD"])
args = cwords[1:cword]
try:
incomplete = cwords[cword]
except IndexError:
incomplete = ""
return args, incomplete
def format_completion(self, item: CompletionItem) -> str:
help_ = item.help or "_"
# The zsh completion script uses `_describe` on items with help
# texts (which splits the item help from the item value at the
# first unescaped colon) and `compadd` on items without help
# text (which uses the item value as-is and does not support
# colon escaping). So escape colons in the item value if and
# only if the item help is not the sentinel "_" value, as used
# by the completion script.
#
# (The zsh completion script is potentially widely deployed, and
# thus harder to fix than this method.)
#
# See issue #1812 and issue #2703 for further context.
value = item.value.replace(":", r"\:") if help_ != "_" else item.value
return f"{item.type}\n{value}\n{help_}"
class FishComplete(ShellComplete):
"""Shell completion for Fish."""
name = "fish"
source_template = _SOURCE_FISH
def get_completion_args(self) -> tuple[list[str], str]:
cwords = split_arg_string(os.environ["COMP_WORDS"])
incomplete = os.environ["COMP_CWORD"]
if incomplete:
incomplete = split_arg_string(incomplete)[0]
args = cwords[1:]
# Fish stores the partial word in both COMP_WORDS and
# COMP_CWORD, remove it from complete args.
if incomplete and args and args[-1] == incomplete:
args.pop()
return args, incomplete
def format_completion(self, item: CompletionItem) -> str:
if item.help:
return f"{item.type},{item.value}\t{item.help}"
return f"{item.type},{item.value}"
ShellCompleteType = t.TypeVar("ShellCompleteType", bound="type[ShellComplete]")
_available_shells: dict[str, type[ShellComplete]] = {
"bash": BashComplete,
"fish": FishComplete,
"zsh": ZshComplete,
}
def add_completion_class(
cls: ShellCompleteType, name: str | None = None
) -> ShellCompleteType:
"""Register a :class:`ShellComplete` subclass under the given name.
The name will be provided by the completion instruction environment
variable during completion.
:param cls: The completion class that will handle completion for the
shell.
:param name: Name to register the class under. Defaults to the
class's ``name`` attribute.
"""
if name is None:
name = cls.name
_available_shells[name] = cls
return cls
def get_completion_class(shell: str) -> type[ShellComplete] | None:
"""Look up a registered :class:`ShellComplete` subclass by the name
provided by the completion instruction environment variable. If the
name isn't registered, returns ``None``.
:param shell: Name the class is registered under.
"""
return _available_shells.get(shell)
def split_arg_string(string: str) -> list[str]:
"""Split an argument string as with :func:`shlex.split`, but don't
fail if the string is incomplete. Ignores a missing closing quote or
incomplete escape sequence and uses the partial token as-is.
.. code-block:: python
split_arg_string("example 'my file")
["example", "my file"]
split_arg_string("example my\\")
["example", "my"]
:param string: String to split.
.. versionchanged:: 8.2
Moved to ``shell_completion`` from ``parser``.
"""
import shlex
lex = shlex.shlex(string, posix=True)
lex.whitespace_split = True
lex.commenters = ""
out = []
try:
for token in lex:
out.append(token)
except ValueError:
# Raised when end-of-string is reached in an invalid state. Use
# the partial token as-is. The quote or escape character is in
# lex.state, not lex.token.
out.append(lex.token)
return out
def _is_incomplete_argument(ctx: Context, param: Parameter) -> bool:
"""Determine if the given parameter is an argument that can still
accept values.
:param ctx: Invocation context for the command represented by the
parsed complete args.
:param param: Argument object being checked.
"""
if not isinstance(param, Argument):
return False
assert param.name is not None
# Will be None if expose_value is False.
value = ctx.params.get(param.name)
return (
param.nargs == -1
or ctx.get_parameter_source(param.name) is not ParameterSource.COMMANDLINE
or (
param.nargs > 1
and isinstance(value, (tuple, list))
and len(value) < param.nargs
)
)
def _start_of_option(ctx: Context, value: str) -> bool:
"""Check if the value looks like the start of an option."""
if not value:
return False
c = value[0]
return c in ctx._opt_prefixes
def _is_incomplete_option(ctx: Context, args: list[str], param: Parameter) -> bool:
"""Determine if the given parameter is an option that needs a value.
:param args: List of complete args before the incomplete value.
:param param: Option object being checked.
"""
if not isinstance(param, Option):
return False
if param.is_flag or param.count:
return False
last_option = None
for index, arg in enumerate(reversed(args)):
if index + 1 > param.nargs:
break
if _start_of_option(ctx, arg):
last_option = arg
break
return last_option is not None and last_option in param.opts
def _resolve_context(
cli: Command,
ctx_args: cabc.MutableMapping[str, t.Any],
prog_name: str,
args: list[str],
) -> Context:
"""Produce the context hierarchy starting with the command and
traversing the complete arguments. This only follows the commands,
it doesn't trigger input prompts or callbacks.
:param cli: Command being called.
:param prog_name: Name of the executable in the shell.
:param args: List of complete args before the incomplete value.
"""
ctx_args["resilient_parsing"] = True
with cli.make_context(prog_name, args.copy(), **ctx_args) as ctx:
args = ctx._protected_args + ctx.args
while args:
command = ctx.command
if isinstance(command, Group):
if not command.chain:
name, cmd, args = command.resolve_command(ctx, args)
if cmd is None:
return ctx
with cmd.make_context(
name, args, parent=ctx, resilient_parsing=True
) as sub_ctx:
ctx = sub_ctx
args = ctx._protected_args + ctx.args
else:
sub_ctx = ctx
while args:
name, cmd, args = command.resolve_command(ctx, args)
if cmd is None:
return ctx
with cmd.make_context(
name,
args,
parent=ctx,
allow_extra_args=True,
allow_interspersed_args=False,
resilient_parsing=True,
) as sub_sub_ctx:
sub_ctx = sub_sub_ctx
args = sub_ctx.args
ctx = sub_ctx
args = [*sub_ctx._protected_args, *sub_ctx.args]
else:
break
return ctx
def _resolve_incomplete(
ctx: Context, args: list[str], incomplete: str
) -> tuple[Command | Parameter, str]:
"""Find the Click object that will handle the completion of the
incomplete value. Return the object and the incomplete value.
:param ctx: Invocation context for the command represented by
the parsed complete args.
:param args: List of complete args before the incomplete value.
:param incomplete: Value being completed. May be empty.
"""
# Different shells treat an "=" between a long option name and
# value differently. Might keep the value joined, return the "="
# as a separate item, or return the split name and value. Always
# split and discard the "=" to make completion easier.
if incomplete == "=":
incomplete = ""
elif "=" in incomplete and _start_of_option(ctx, incomplete):
name, _, incomplete = incomplete.partition("=")
args.append(name)
# The "--" marker tells Click to stop treating values as options
# even if they start with the option character. If it hasn't been
# given and the incomplete arg looks like an option, the current
# command will provide option name completions.
if "--" not in args and _start_of_option(ctx, incomplete):
return ctx.command, incomplete
params = ctx.command.get_params(ctx)
# If the last complete arg is an option name with an incomplete
# value, the option will provide value completions.
for param in params:
if _is_incomplete_option(ctx, args, param):
return param, incomplete
# It's not an option name or value. The first argument without a
# parsed value will provide value completions.
for param in params:
if _is_incomplete_argument(ctx, param):
return param, incomplete
# There were no unparsed arguments, the command may be a group that
# will provide command name completions.
return ctx.command, incomplete

View File

@@ -0,0 +1,877 @@
from __future__ import annotations
import collections.abc as cabc
import inspect
import io
import itertools
import sys
import typing as t
from contextlib import AbstractContextManager
from gettext import gettext as _
from ._compat import isatty
from ._compat import strip_ansi
from .exceptions import Abort
from .exceptions import UsageError
from .globals import resolve_color_default
from .types import Choice
from .types import convert_type
from .types import ParamType
from .utils import echo
from .utils import LazyFile
if t.TYPE_CHECKING:
from ._termui_impl import ProgressBar
V = t.TypeVar("V")
# The prompt functions to use. The doc tools currently override these
# functions to customize how they work.
visible_prompt_func: t.Callable[[str], str] = input
_ansi_colors = {
"black": 30,
"red": 31,
"green": 32,
"yellow": 33,
"blue": 34,
"magenta": 35,
"cyan": 36,
"white": 37,
"reset": 39,
"bright_black": 90,
"bright_red": 91,
"bright_green": 92,
"bright_yellow": 93,
"bright_blue": 94,
"bright_magenta": 95,
"bright_cyan": 96,
"bright_white": 97,
}
_ansi_reset_all = "\033[0m"
def hidden_prompt_func(prompt: str) -> str:
import getpass
return getpass.getpass(prompt)
def _build_prompt(
text: str,
suffix: str,
show_default: bool = False,
default: t.Any | None = None,
show_choices: bool = True,
type: ParamType | None = None,
) -> str:
prompt = text
if type is not None and show_choices and isinstance(type, Choice):
prompt += f" ({', '.join(map(str, type.choices))})"
if default is not None and show_default:
prompt = f"{prompt} [{_format_default(default)}]"
return f"{prompt}{suffix}"
def _format_default(default: t.Any) -> t.Any:
if isinstance(default, (io.IOBase, LazyFile)) and hasattr(default, "name"):
return default.name
return default
def prompt(
text: str,
default: t.Any | None = None,
hide_input: bool = False,
confirmation_prompt: bool | str = False,
type: ParamType | t.Any | None = None,
value_proc: t.Callable[[str], t.Any] | None = None,
prompt_suffix: str = ": ",
show_default: bool = True,
err: bool = False,
show_choices: bool = True,
) -> t.Any:
"""Prompts a user for input. This is a convenience function that can
be used to prompt a user for input later.
If the user aborts the input by sending an interrupt signal, this
function will catch it and raise a :exc:`Abort` exception.
:param text: the text to show for the prompt.
:param default: the default value to use if no input happens. If this
is not given it will prompt until it's aborted.
:param hide_input: if this is set to true then the input value will
be hidden.
:param confirmation_prompt: Prompt a second time to confirm the
value. Can be set to a string instead of ``True`` to customize
the message.
:param type: the type to use to check the value against.
:param value_proc: if this parameter is provided it's a function that
is invoked instead of the type conversion to
convert a value.
:param prompt_suffix: a suffix that should be added to the prompt.
:param show_default: shows or hides the default value in the prompt.
:param err: if set to true the file defaults to ``stderr`` instead of
``stdout``, the same as with echo.
:param show_choices: Show or hide choices if the passed type is a Choice.
For example if type is a Choice of either day or week,
show_choices is true and text is "Group by" then the
prompt will be "Group by (day, week): ".
.. versionadded:: 8.0
``confirmation_prompt`` can be a custom string.
.. versionadded:: 7.0
Added the ``show_choices`` parameter.
.. versionadded:: 6.0
Added unicode support for cmd.exe on Windows.
.. versionadded:: 4.0
Added the `err` parameter.
"""
def prompt_func(text: str) -> str:
f = hidden_prompt_func if hide_input else visible_prompt_func
try:
# Write the prompt separately so that we get nice
# coloring through colorama on Windows
echo(text.rstrip(" "), nl=False, err=err)
# Echo a space to stdout to work around an issue where
# readline causes backspace to clear the whole line.
return f(" ")
except (KeyboardInterrupt, EOFError):
# getpass doesn't print a newline if the user aborts input with ^C.
# Allegedly this behavior is inherited from getpass(3).
# A doc bug has been filed at https://bugs.python.org/issue24711
if hide_input:
echo(None, err=err)
raise Abort() from None
if value_proc is None:
value_proc = convert_type(type, default)
prompt = _build_prompt(
text, prompt_suffix, show_default, default, show_choices, type
)
if confirmation_prompt:
if confirmation_prompt is True:
confirmation_prompt = _("Repeat for confirmation")
confirmation_prompt = _build_prompt(confirmation_prompt, prompt_suffix)
while True:
while True:
value = prompt_func(prompt)
if value:
break
elif default is not None:
value = default
break
try:
result = value_proc(value)
except UsageError as e:
if hide_input:
echo(_("Error: The value you entered was invalid."), err=err)
else:
echo(_("Error: {e.message}").format(e=e), err=err)
continue
if not confirmation_prompt:
return result
while True:
value2 = prompt_func(confirmation_prompt)
is_empty = not value and not value2
if value2 or is_empty:
break
if value == value2:
return result
echo(_("Error: The two entered values do not match."), err=err)
def confirm(
text: str,
default: bool | None = False,
abort: bool = False,
prompt_suffix: str = ": ",
show_default: bool = True,
err: bool = False,
) -> bool:
"""Prompts for confirmation (yes/no question).
If the user aborts the input by sending a interrupt signal this
function will catch it and raise a :exc:`Abort` exception.
:param text: the question to ask.
:param default: The default value to use when no input is given. If
``None``, repeat until input is given.
:param abort: if this is set to `True` a negative answer aborts the
exception by raising :exc:`Abort`.
:param prompt_suffix: a suffix that should be added to the prompt.
:param show_default: shows or hides the default value in the prompt.
:param err: if set to true the file defaults to ``stderr`` instead of
``stdout``, the same as with echo.
.. versionchanged:: 8.0
Repeat until input is given if ``default`` is ``None``.
.. versionadded:: 4.0
Added the ``err`` parameter.
"""
prompt = _build_prompt(
text,
prompt_suffix,
show_default,
"y/n" if default is None else ("Y/n" if default else "y/N"),
)
while True:
try:
# Write the prompt separately so that we get nice
# coloring through colorama on Windows
echo(prompt.rstrip(" "), nl=False, err=err)
# Echo a space to stdout to work around an issue where
# readline causes backspace to clear the whole line.
value = visible_prompt_func(" ").lower().strip()
except (KeyboardInterrupt, EOFError):
raise Abort() from None
if value in ("y", "yes"):
rv = True
elif value in ("n", "no"):
rv = False
elif default is not None and value == "":
rv = default
else:
echo(_("Error: invalid input"), err=err)
continue
break
if abort and not rv:
raise Abort()
return rv
def echo_via_pager(
text_or_generator: cabc.Iterable[str] | t.Callable[[], cabc.Iterable[str]] | str,
color: bool | None = None,
) -> None:
"""This function takes a text and shows it via an environment specific
pager on stdout.
.. versionchanged:: 3.0
Added the `color` flag.
:param text_or_generator: the text to page, or alternatively, a
generator emitting the text to page.
:param color: controls if the pager supports ANSI colors or not. The
default is autodetection.
"""
color = resolve_color_default(color)
if inspect.isgeneratorfunction(text_or_generator):
i = t.cast("t.Callable[[], cabc.Iterable[str]]", text_or_generator)()
elif isinstance(text_or_generator, str):
i = [text_or_generator]
else:
i = iter(t.cast("cabc.Iterable[str]", text_or_generator))
# convert every element of i to a text type if necessary
text_generator = (el if isinstance(el, str) else str(el) for el in i)
from ._termui_impl import pager
return pager(itertools.chain(text_generator, "\n"), color)
@t.overload
def progressbar(
*,
length: int,
label: str | None = None,
hidden: bool = False,
show_eta: bool = True,
show_percent: bool | None = None,
show_pos: bool = False,
fill_char: str = "#",
empty_char: str = "-",
bar_template: str = "%(label)s [%(bar)s] %(info)s",
info_sep: str = " ",
width: int = 36,
file: t.TextIO | None = None,
color: bool | None = None,
update_min_steps: int = 1,
) -> ProgressBar[int]: ...
@t.overload
def progressbar(
iterable: cabc.Iterable[V] | None = None,
length: int | None = None,
label: str | None = None,
hidden: bool = False,
show_eta: bool = True,
show_percent: bool | None = None,
show_pos: bool = False,
item_show_func: t.Callable[[V | None], str | None] | None = None,
fill_char: str = "#",
empty_char: str = "-",
bar_template: str = "%(label)s [%(bar)s] %(info)s",
info_sep: str = " ",
width: int = 36,
file: t.TextIO | None = None,
color: bool | None = None,
update_min_steps: int = 1,
) -> ProgressBar[V]: ...
def progressbar(
iterable: cabc.Iterable[V] | None = None,
length: int | None = None,
label: str | None = None,
hidden: bool = False,
show_eta: bool = True,
show_percent: bool | None = None,
show_pos: bool = False,
item_show_func: t.Callable[[V | None], str | None] | None = None,
fill_char: str = "#",
empty_char: str = "-",
bar_template: str = "%(label)s [%(bar)s] %(info)s",
info_sep: str = " ",
width: int = 36,
file: t.TextIO | None = None,
color: bool | None = None,
update_min_steps: int = 1,
) -> ProgressBar[V]:
"""This function creates an iterable context manager that can be used
to iterate over something while showing a progress bar. It will
either iterate over the `iterable` or `length` items (that are counted
up). While iteration happens, this function will print a rendered
progress bar to the given `file` (defaults to stdout) and will attempt
to calculate remaining time and more. By default, this progress bar
will not be rendered if the file is not a terminal.
The context manager creates the progress bar. When the context
manager is entered the progress bar is already created. With every
iteration over the progress bar, the iterable passed to the bar is
advanced and the bar is updated. When the context manager exits,
a newline is printed and the progress bar is finalized on screen.
Note: The progress bar is currently designed for use cases where the
total progress can be expected to take at least several seconds.
Because of this, the ProgressBar class object won't display
progress that is considered too fast, and progress where the time
between steps is less than a second.
No printing must happen or the progress bar will be unintentionally
destroyed.
Example usage::
with progressbar(items) as bar:
for item in bar:
do_something_with(item)
Alternatively, if no iterable is specified, one can manually update the
progress bar through the `update()` method instead of directly
iterating over the progress bar. The update method accepts the number
of steps to increment the bar with::
with progressbar(length=chunks.total_bytes) as bar:
for chunk in chunks:
process_chunk(chunk)
bar.update(chunks.bytes)
The ``update()`` method also takes an optional value specifying the
``current_item`` at the new position. This is useful when used
together with ``item_show_func`` to customize the output for each
manual step::
with click.progressbar(
length=total_size,
label='Unzipping archive',
item_show_func=lambda a: a.filename
) as bar:
for archive in zip_file:
archive.extract()
bar.update(archive.size, archive)
:param iterable: an iterable to iterate over. If not provided the length
is required.
:param length: the number of items to iterate over. By default the
progressbar will attempt to ask the iterator about its
length, which might or might not work. If an iterable is
also provided this parameter can be used to override the
length. If an iterable is not provided the progress bar
will iterate over a range of that length.
:param label: the label to show next to the progress bar.
:param hidden: hide the progressbar. Defaults to ``False``. When no tty is
detected, it will only print the progressbar label. Setting this to
``False`` also disables that.
:param show_eta: enables or disables the estimated time display. This is
automatically disabled if the length cannot be
determined.
:param show_percent: enables or disables the percentage display. The
default is `True` if the iterable has a length or
`False` if not.
:param show_pos: enables or disables the absolute position display. The
default is `False`.
:param item_show_func: A function called with the current item which
can return a string to show next to the progress bar. If the
function returns ``None`` nothing is shown. The current item can
be ``None``, such as when entering and exiting the bar.
:param fill_char: the character to use to show the filled part of the
progress bar.
:param empty_char: the character to use to show the non-filled part of
the progress bar.
:param bar_template: the format string to use as template for the bar.
The parameters in it are ``label`` for the label,
``bar`` for the progress bar and ``info`` for the
info section.
:param info_sep: the separator between multiple info items (eta etc.)
:param width: the width of the progress bar in characters, 0 means full
terminal width
:param file: The file to write to. If this is not a terminal then
only the label is printed.
:param color: controls if the terminal supports ANSI colors or not. The
default is autodetection. This is only needed if ANSI
codes are included anywhere in the progress bar output
which is not the case by default.
:param update_min_steps: Render only when this many updates have
completed. This allows tuning for very fast iterators.
.. versionadded:: 8.2
The ``hidden`` argument.
.. versionchanged:: 8.0
Output is shown even if execution time is less than 0.5 seconds.
.. versionchanged:: 8.0
``item_show_func`` shows the current item, not the previous one.
.. versionchanged:: 8.0
Labels are echoed if the output is not a TTY. Reverts a change
in 7.0 that removed all output.
.. versionadded:: 8.0
The ``update_min_steps`` parameter.
.. versionadded:: 4.0
The ``color`` parameter and ``update`` method.
.. versionadded:: 2.0
"""
from ._termui_impl import ProgressBar
color = resolve_color_default(color)
return ProgressBar(
iterable=iterable,
length=length,
hidden=hidden,
show_eta=show_eta,
show_percent=show_percent,
show_pos=show_pos,
item_show_func=item_show_func,
fill_char=fill_char,
empty_char=empty_char,
bar_template=bar_template,
info_sep=info_sep,
file=file,
label=label,
width=width,
color=color,
update_min_steps=update_min_steps,
)
def clear() -> None:
"""Clears the terminal screen. This will have the effect of clearing
the whole visible space of the terminal and moving the cursor to the
top left. This does not do anything if not connected to a terminal.
.. versionadded:: 2.0
"""
if not isatty(sys.stdout):
return
# ANSI escape \033[2J clears the screen, \033[1;1H moves the cursor
echo("\033[2J\033[1;1H", nl=False)
def _interpret_color(color: int | tuple[int, int, int] | str, offset: int = 0) -> str:
if isinstance(color, int):
return f"{38 + offset};5;{color:d}"
if isinstance(color, (tuple, list)):
r, g, b = color
return f"{38 + offset};2;{r:d};{g:d};{b:d}"
return str(_ansi_colors[color] + offset)
def style(
text: t.Any,
fg: int | tuple[int, int, int] | str | None = None,
bg: int | tuple[int, int, int] | str | None = None,
bold: bool | None = None,
dim: bool | None = None,
underline: bool | None = None,
overline: bool | None = None,
italic: bool | None = None,
blink: bool | None = None,
reverse: bool | None = None,
strikethrough: bool | None = None,
reset: bool = True,
) -> str:
"""Styles a text with ANSI styles and returns the new string. By
default the styling is self contained which means that at the end
of the string a reset code is issued. This can be prevented by
passing ``reset=False``.
Examples::
click.echo(click.style('Hello World!', fg='green'))
click.echo(click.style('ATTENTION!', blink=True))
click.echo(click.style('Some things', reverse=True, fg='cyan'))
click.echo(click.style('More colors', fg=(255, 12, 128), bg=117))
Supported color names:
* ``black`` (might be a gray)
* ``red``
* ``green``
* ``yellow`` (might be an orange)
* ``blue``
* ``magenta``
* ``cyan``
* ``white`` (might be light gray)
* ``bright_black``
* ``bright_red``
* ``bright_green``
* ``bright_yellow``
* ``bright_blue``
* ``bright_magenta``
* ``bright_cyan``
* ``bright_white``
* ``reset`` (reset the color code only)
If the terminal supports it, color may also be specified as:
- An integer in the interval [0, 255]. The terminal must support
8-bit/256-color mode.
- An RGB tuple of three integers in [0, 255]. The terminal must
support 24-bit/true-color mode.
See https://en.wikipedia.org/wiki/ANSI_color and
https://gist.github.com/XVilka/8346728 for more information.
:param text: the string to style with ansi codes.
:param fg: if provided this will become the foreground color.
:param bg: if provided this will become the background color.
:param bold: if provided this will enable or disable bold mode.
:param dim: if provided this will enable or disable dim mode. This is
badly supported.
:param underline: if provided this will enable or disable underline.
:param overline: if provided this will enable or disable overline.
:param italic: if provided this will enable or disable italic.
:param blink: if provided this will enable or disable blinking.
:param reverse: if provided this will enable or disable inverse
rendering (foreground becomes background and the
other way round).
:param strikethrough: if provided this will enable or disable
striking through text.
:param reset: by default a reset-all code is added at the end of the
string which means that styles do not carry over. This
can be disabled to compose styles.
.. versionchanged:: 8.0
A non-string ``message`` is converted to a string.
.. versionchanged:: 8.0
Added support for 256 and RGB color codes.
.. versionchanged:: 8.0
Added the ``strikethrough``, ``italic``, and ``overline``
parameters.
.. versionchanged:: 7.0
Added support for bright colors.
.. versionadded:: 2.0
"""
if not isinstance(text, str):
text = str(text)
bits = []
if fg:
try:
bits.append(f"\033[{_interpret_color(fg)}m")
except KeyError:
raise TypeError(f"Unknown color {fg!r}") from None
if bg:
try:
bits.append(f"\033[{_interpret_color(bg, 10)}m")
except KeyError:
raise TypeError(f"Unknown color {bg!r}") from None
if bold is not None:
bits.append(f"\033[{1 if bold else 22}m")
if dim is not None:
bits.append(f"\033[{2 if dim else 22}m")
if underline is not None:
bits.append(f"\033[{4 if underline else 24}m")
if overline is not None:
bits.append(f"\033[{53 if overline else 55}m")
if italic is not None:
bits.append(f"\033[{3 if italic else 23}m")
if blink is not None:
bits.append(f"\033[{5 if blink else 25}m")
if reverse is not None:
bits.append(f"\033[{7 if reverse else 27}m")
if strikethrough is not None:
bits.append(f"\033[{9 if strikethrough else 29}m")
bits.append(text)
if reset:
bits.append(_ansi_reset_all)
return "".join(bits)
def unstyle(text: str) -> str:
"""Removes ANSI styling information from a string. Usually it's not
necessary to use this function as Click's echo function will
automatically remove styling if necessary.
.. versionadded:: 2.0
:param text: the text to remove style information from.
"""
return strip_ansi(text)
def secho(
message: t.Any | None = None,
file: t.IO[t.AnyStr] | None = None,
nl: bool = True,
err: bool = False,
color: bool | None = None,
**styles: t.Any,
) -> None:
"""This function combines :func:`echo` and :func:`style` into one
call. As such the following two calls are the same::
click.secho('Hello World!', fg='green')
click.echo(click.style('Hello World!', fg='green'))
All keyword arguments are forwarded to the underlying functions
depending on which one they go with.
Non-string types will be converted to :class:`str`. However,
:class:`bytes` are passed directly to :meth:`echo` without applying
style. If you want to style bytes that represent text, call
:meth:`bytes.decode` first.
.. versionchanged:: 8.0
A non-string ``message`` is converted to a string. Bytes are
passed through without style applied.
.. versionadded:: 2.0
"""
if message is not None and not isinstance(message, (bytes, bytearray)):
message = style(message, **styles)
return echo(message, file=file, nl=nl, err=err, color=color)
@t.overload
def edit(
text: bytes | bytearray,
editor: str | None = None,
env: cabc.Mapping[str, str] | None = None,
require_save: bool = False,
extension: str = ".txt",
) -> bytes | None: ...
@t.overload
def edit(
text: str,
editor: str | None = None,
env: cabc.Mapping[str, str] | None = None,
require_save: bool = True,
extension: str = ".txt",
) -> str | None: ...
@t.overload
def edit(
text: None = None,
editor: str | None = None,
env: cabc.Mapping[str, str] | None = None,
require_save: bool = True,
extension: str = ".txt",
filename: str | cabc.Iterable[str] | None = None,
) -> None: ...
def edit(
text: str | bytes | bytearray | None = None,
editor: str | None = None,
env: cabc.Mapping[str, str] | None = None,
require_save: bool = True,
extension: str = ".txt",
filename: str | cabc.Iterable[str] | None = None,
) -> str | bytes | bytearray | None:
r"""Edits the given text in the defined editor. If an editor is given
(should be the full path to the executable but the regular operating
system search path is used for finding the executable) it overrides
the detected editor. Optionally, some environment variables can be
used. If the editor is closed without changes, `None` is returned. In
case a file is edited directly the return value is always `None` and
`require_save` and `extension` are ignored.
If the editor cannot be opened a :exc:`UsageError` is raised.
Note for Windows: to simplify cross-platform usage, the newlines are
automatically converted from POSIX to Windows and vice versa. As such,
the message here will have ``\n`` as newline markers.
:param text: the text to edit.
:param editor: optionally the editor to use. Defaults to automatic
detection.
:param env: environment variables to forward to the editor.
:param require_save: if this is true, then not saving in the editor
will make the return value become `None`.
:param extension: the extension to tell the editor about. This defaults
to `.txt` but changing this might change syntax
highlighting.
:param filename: if provided it will edit this file instead of the
provided text contents. It will not use a temporary
file as an indirection in that case. If the editor supports
editing multiple files at once, a sequence of files may be
passed as well. Invoke `click.file` once per file instead
if multiple files cannot be managed at once or editing the
files serially is desired.
.. versionchanged:: 8.2.0
``filename`` now accepts any ``Iterable[str]`` in addition to a ``str``
if the ``editor`` supports editing multiple files at once.
"""
from ._termui_impl import Editor
ed = Editor(editor=editor, env=env, require_save=require_save, extension=extension)
if filename is None:
return ed.edit(text)
if isinstance(filename, str):
filename = (filename,)
ed.edit_files(filenames=filename)
return None
def launch(url: str, wait: bool = False, locate: bool = False) -> int:
"""This function launches the given URL (or filename) in the default
viewer application for this file type. If this is an executable, it
might launch the executable in a new session. The return value is
the exit code of the launched application. Usually, ``0`` indicates
success.
Examples::
click.launch('https://click.palletsprojects.com/')
click.launch('/my/downloaded/file', locate=True)
.. versionadded:: 2.0
:param url: URL or filename of the thing to launch.
:param wait: Wait for the program to exit before returning. This
only works if the launched program blocks. In particular,
``xdg-open`` on Linux does not block.
:param locate: if this is set to `True` then instead of launching the
application associated with the URL it will attempt to
launch a file manager with the file located. This
might have weird effects if the URL does not point to
the filesystem.
"""
from ._termui_impl import open_url
return open_url(url, wait=wait, locate=locate)
# If this is provided, getchar() calls into this instead. This is used
# for unittesting purposes.
_getchar: t.Callable[[bool], str] | None = None
def getchar(echo: bool = False) -> str:
"""Fetches a single character from the terminal and returns it. This
will always return a unicode character and under certain rare
circumstances this might return more than one character. The
situations which more than one character is returned is when for
whatever reason multiple characters end up in the terminal buffer or
standard input was not actually a terminal.
Note that this will always read from the terminal, even if something
is piped into the standard input.
Note for Windows: in rare cases when typing non-ASCII characters, this
function might wait for a second character and then return both at once.
This is because certain Unicode characters look like special-key markers.
.. versionadded:: 2.0
:param echo: if set to `True`, the character read will also show up on
the terminal. The default is to not show it.
"""
global _getchar
if _getchar is None:
from ._termui_impl import getchar as f
_getchar = f
return _getchar(echo)
def raw_terminal() -> AbstractContextManager[int]:
from ._termui_impl import raw_terminal as f
return f()
def pause(info: str | None = None, err: bool = False) -> None:
"""This command stops execution and waits for the user to press any
key to continue. This is similar to the Windows batch "pause"
command. If the program is not run through a terminal, this command
will instead do nothing.
.. versionadded:: 2.0
.. versionadded:: 4.0
Added the `err` parameter.
:param info: The message to print before pausing. Defaults to
``"Press any key to continue..."``.
:param err: if set to message goes to ``stderr`` instead of
``stdout``, the same as with echo.
"""
if not isatty(sys.stdin) or not isatty(sys.stdout):
return
if info is None:
info = _("Press any key to continue...")
try:
if info:
echo(info, nl=False, err=err)
try:
getchar()
except (KeyboardInterrupt, EOFError):
pass
finally:
if info:
echo(err=err)

View File

@@ -0,0 +1,577 @@
from __future__ import annotations
import collections.abc as cabc
import contextlib
import io
import os
import shlex
import sys
import tempfile
import typing as t
from types import TracebackType
from . import _compat
from . import formatting
from . import termui
from . import utils
from ._compat import _find_binary_reader
if t.TYPE_CHECKING:
from _typeshed import ReadableBuffer
from .core import Command
class EchoingStdin:
def __init__(self, input: t.BinaryIO, output: t.BinaryIO) -> None:
self._input = input
self._output = output
self._paused = False
def __getattr__(self, x: str) -> t.Any:
return getattr(self._input, x)
def _echo(self, rv: bytes) -> bytes:
if not self._paused:
self._output.write(rv)
return rv
def read(self, n: int = -1) -> bytes:
return self._echo(self._input.read(n))
def read1(self, n: int = -1) -> bytes:
return self._echo(self._input.read1(n)) # type: ignore
def readline(self, n: int = -1) -> bytes:
return self._echo(self._input.readline(n))
def readlines(self) -> list[bytes]:
return [self._echo(x) for x in self._input.readlines()]
def __iter__(self) -> cabc.Iterator[bytes]:
return iter(self._echo(x) for x in self._input)
def __repr__(self) -> str:
return repr(self._input)
@contextlib.contextmanager
def _pause_echo(stream: EchoingStdin | None) -> cabc.Iterator[None]:
if stream is None:
yield
else:
stream._paused = True
yield
stream._paused = False
class BytesIOCopy(io.BytesIO):
"""Patch ``io.BytesIO`` to let the written stream be copied to another.
.. versionadded:: 8.2
"""
def __init__(self, copy_to: io.BytesIO) -> None:
super().__init__()
self.copy_to = copy_to
def flush(self) -> None:
super().flush()
self.copy_to.flush()
def write(self, b: ReadableBuffer) -> int:
self.copy_to.write(b)
return super().write(b)
class StreamMixer:
"""Mixes `<stdout>` and `<stderr>` streams.
The result is available in the ``output`` attribute.
.. versionadded:: 8.2
"""
def __init__(self) -> None:
self.output: io.BytesIO = io.BytesIO()
self.stdout: io.BytesIO = BytesIOCopy(copy_to=self.output)
self.stderr: io.BytesIO = BytesIOCopy(copy_to=self.output)
def __del__(self) -> None:
"""
Guarantee that embedded file-like objects are closed in a
predictable order, protecting against races between
self.output being closed and other streams being flushed on close
.. versionadded:: 8.2.2
"""
self.stderr.close()
self.stdout.close()
self.output.close()
class _NamedTextIOWrapper(io.TextIOWrapper):
def __init__(
self, buffer: t.BinaryIO, name: str, mode: str, **kwargs: t.Any
) -> None:
super().__init__(buffer, **kwargs)
self._name = name
self._mode = mode
@property
def name(self) -> str:
return self._name
@property
def mode(self) -> str:
return self._mode
def make_input_stream(
input: str | bytes | t.IO[t.Any] | None, charset: str
) -> t.BinaryIO:
# Is already an input stream.
if hasattr(input, "read"):
rv = _find_binary_reader(t.cast("t.IO[t.Any]", input))
if rv is not None:
return rv
raise TypeError("Could not find binary reader for input stream.")
if input is None:
input = b""
elif isinstance(input, str):
input = input.encode(charset)
return io.BytesIO(input)
class Result:
"""Holds the captured result of an invoked CLI script.
:param runner: The runner that created the result
:param stdout_bytes: The standard output as bytes.
:param stderr_bytes: The standard error as bytes.
:param output_bytes: A mix of ``stdout_bytes`` and ``stderr_bytes``, as the
user would see it in its terminal.
:param return_value: The value returned from the invoked command.
:param exit_code: The exit code as integer.
:param exception: The exception that happened if one did.
:param exc_info: Exception information (exception type, exception instance,
traceback type).
.. versionchanged:: 8.2
``stderr_bytes`` no longer optional, ``output_bytes`` introduced and
``mix_stderr`` has been removed.
.. versionadded:: 8.0
Added ``return_value``.
"""
def __init__(
self,
runner: CliRunner,
stdout_bytes: bytes,
stderr_bytes: bytes,
output_bytes: bytes,
return_value: t.Any,
exit_code: int,
exception: BaseException | None,
exc_info: tuple[type[BaseException], BaseException, TracebackType]
| None = None,
):
self.runner = runner
self.stdout_bytes = stdout_bytes
self.stderr_bytes = stderr_bytes
self.output_bytes = output_bytes
self.return_value = return_value
self.exit_code = exit_code
self.exception = exception
self.exc_info = exc_info
@property
def output(self) -> str:
"""The terminal output as unicode string, as the user would see it.
.. versionchanged:: 8.2
No longer a proxy for ``self.stdout``. Now has its own independent stream
that is mixing `<stdout>` and `<stderr>`, in the order they were written.
"""
return self.output_bytes.decode(self.runner.charset, "replace").replace(
"\r\n", "\n"
)
@property
def stdout(self) -> str:
"""The standard output as unicode string."""
return self.stdout_bytes.decode(self.runner.charset, "replace").replace(
"\r\n", "\n"
)
@property
def stderr(self) -> str:
"""The standard error as unicode string.
.. versionchanged:: 8.2
No longer raise an exception, always returns the `<stderr>` string.
"""
return self.stderr_bytes.decode(self.runner.charset, "replace").replace(
"\r\n", "\n"
)
def __repr__(self) -> str:
exc_str = repr(self.exception) if self.exception else "okay"
return f"<{type(self).__name__} {exc_str}>"
class CliRunner:
"""The CLI runner provides functionality to invoke a Click command line
script for unittesting purposes in a isolated environment. This only
works in single-threaded systems without any concurrency as it changes the
global interpreter state.
:param charset: the character set for the input and output data.
:param env: a dictionary with environment variables for overriding.
:param echo_stdin: if this is set to `True`, then reading from `<stdin>` writes
to `<stdout>`. This is useful for showing examples in
some circumstances. Note that regular prompts
will automatically echo the input.
:param catch_exceptions: Whether to catch any exceptions other than
``SystemExit`` when running :meth:`~CliRunner.invoke`.
.. versionchanged:: 8.2
Added the ``catch_exceptions`` parameter.
.. versionchanged:: 8.2
``mix_stderr`` parameter has been removed.
"""
def __init__(
self,
charset: str = "utf-8",
env: cabc.Mapping[str, str | None] | None = None,
echo_stdin: bool = False,
catch_exceptions: bool = True,
) -> None:
self.charset = charset
self.env: cabc.Mapping[str, str | None] = env or {}
self.echo_stdin = echo_stdin
self.catch_exceptions = catch_exceptions
def get_default_prog_name(self, cli: Command) -> str:
"""Given a command object it will return the default program name
for it. The default is the `name` attribute or ``"root"`` if not
set.
"""
return cli.name or "root"
def make_env(
self, overrides: cabc.Mapping[str, str | None] | None = None
) -> cabc.Mapping[str, str | None]:
"""Returns the environment overrides for invoking a script."""
rv = dict(self.env)
if overrides:
rv.update(overrides)
return rv
@contextlib.contextmanager
def isolation(
self,
input: str | bytes | t.IO[t.Any] | None = None,
env: cabc.Mapping[str, str | None] | None = None,
color: bool = False,
) -> cabc.Iterator[tuple[io.BytesIO, io.BytesIO, io.BytesIO]]:
"""A context manager that sets up the isolation for invoking of a
command line tool. This sets up `<stdin>` with the given input data
and `os.environ` with the overrides from the given dictionary.
This also rebinds some internals in Click to be mocked (like the
prompt functionality).
This is automatically done in the :meth:`invoke` method.
:param input: the input stream to put into `sys.stdin`.
:param env: the environment overrides as dictionary.
:param color: whether the output should contain color codes. The
application can still override this explicitly.
.. versionadded:: 8.2
An additional output stream is returned, which is a mix of
`<stdout>` and `<stderr>` streams.
.. versionchanged:: 8.2
Always returns the `<stderr>` stream.
.. versionchanged:: 8.0
`<stderr>` is opened with ``errors="backslashreplace"``
instead of the default ``"strict"``.
.. versionchanged:: 4.0
Added the ``color`` parameter.
"""
bytes_input = make_input_stream(input, self.charset)
echo_input = None
old_stdin = sys.stdin
old_stdout = sys.stdout
old_stderr = sys.stderr
old_forced_width = formatting.FORCED_WIDTH
formatting.FORCED_WIDTH = 80
env = self.make_env(env)
stream_mixer = StreamMixer()
if self.echo_stdin:
bytes_input = echo_input = t.cast(
t.BinaryIO, EchoingStdin(bytes_input, stream_mixer.stdout)
)
sys.stdin = text_input = _NamedTextIOWrapper(
bytes_input, encoding=self.charset, name="<stdin>", mode="r"
)
if self.echo_stdin:
# Force unbuffered reads, otherwise TextIOWrapper reads a
# large chunk which is echoed early.
text_input._CHUNK_SIZE = 1 # type: ignore
sys.stdout = _NamedTextIOWrapper(
stream_mixer.stdout, encoding=self.charset, name="<stdout>", mode="w"
)
sys.stderr = _NamedTextIOWrapper(
stream_mixer.stderr,
encoding=self.charset,
name="<stderr>",
mode="w",
errors="backslashreplace",
)
@_pause_echo(echo_input) # type: ignore
def visible_input(prompt: str | None = None) -> str:
sys.stdout.write(prompt or "")
try:
val = next(text_input).rstrip("\r\n")
except StopIteration as e:
raise EOFError() from e
sys.stdout.write(f"{val}\n")
sys.stdout.flush()
return val
@_pause_echo(echo_input) # type: ignore
def hidden_input(prompt: str | None = None) -> str:
sys.stdout.write(f"{prompt or ''}\n")
sys.stdout.flush()
try:
return next(text_input).rstrip("\r\n")
except StopIteration as e:
raise EOFError() from e
@_pause_echo(echo_input) # type: ignore
def _getchar(echo: bool) -> str:
char = sys.stdin.read(1)
if echo:
sys.stdout.write(char)
sys.stdout.flush()
return char
default_color = color
def should_strip_ansi(
stream: t.IO[t.Any] | None = None, color: bool | None = None
) -> bool:
if color is None:
return not default_color
return not color
old_visible_prompt_func = termui.visible_prompt_func
old_hidden_prompt_func = termui.hidden_prompt_func
old__getchar_func = termui._getchar
old_should_strip_ansi = utils.should_strip_ansi # type: ignore
old__compat_should_strip_ansi = _compat.should_strip_ansi
termui.visible_prompt_func = visible_input
termui.hidden_prompt_func = hidden_input
termui._getchar = _getchar
utils.should_strip_ansi = should_strip_ansi # type: ignore
_compat.should_strip_ansi = should_strip_ansi
old_env = {}
try:
for key, value in env.items():
old_env[key] = os.environ.get(key)
if value is None:
try:
del os.environ[key]
except Exception:
pass
else:
os.environ[key] = value
yield (stream_mixer.stdout, stream_mixer.stderr, stream_mixer.output)
finally:
for key, value in old_env.items():
if value is None:
try:
del os.environ[key]
except Exception:
pass
else:
os.environ[key] = value
sys.stdout = old_stdout
sys.stderr = old_stderr
sys.stdin = old_stdin
termui.visible_prompt_func = old_visible_prompt_func
termui.hidden_prompt_func = old_hidden_prompt_func
termui._getchar = old__getchar_func
utils.should_strip_ansi = old_should_strip_ansi # type: ignore
_compat.should_strip_ansi = old__compat_should_strip_ansi
formatting.FORCED_WIDTH = old_forced_width
def invoke(
self,
cli: Command,
args: str | cabc.Sequence[str] | None = None,
input: str | bytes | t.IO[t.Any] | None = None,
env: cabc.Mapping[str, str | None] | None = None,
catch_exceptions: bool | None = None,
color: bool = False,
**extra: t.Any,
) -> Result:
"""Invokes a command in an isolated environment. The arguments are
forwarded directly to the command line script, the `extra` keyword
arguments are passed to the :meth:`~clickpkg.Command.main` function of
the command.
This returns a :class:`Result` object.
:param cli: the command to invoke
:param args: the arguments to invoke. It may be given as an iterable
or a string. When given as string it will be interpreted
as a Unix shell command. More details at
:func:`shlex.split`.
:param input: the input data for `sys.stdin`.
:param env: the environment overrides.
:param catch_exceptions: Whether to catch any other exceptions than
``SystemExit``. If :data:`None`, the value
from :class:`CliRunner` is used.
:param extra: the keyword arguments to pass to :meth:`main`.
:param color: whether the output should contain color codes. The
application can still override this explicitly.
.. versionadded:: 8.2
The result object has the ``output_bytes`` attribute with
the mix of ``stdout_bytes`` and ``stderr_bytes``, as the user would
see it in its terminal.
.. versionchanged:: 8.2
The result object always returns the ``stderr_bytes`` stream.
.. versionchanged:: 8.0
The result object has the ``return_value`` attribute with
the value returned from the invoked command.
.. versionchanged:: 4.0
Added the ``color`` parameter.
.. versionchanged:: 3.0
Added the ``catch_exceptions`` parameter.
.. versionchanged:: 3.0
The result object has the ``exc_info`` attribute with the
traceback if available.
"""
exc_info = None
if catch_exceptions is None:
catch_exceptions = self.catch_exceptions
with self.isolation(input=input, env=env, color=color) as outstreams:
return_value = None
exception: BaseException | None = None
exit_code = 0
if isinstance(args, str):
args = shlex.split(args)
try:
prog_name = extra.pop("prog_name")
except KeyError:
prog_name = self.get_default_prog_name(cli)
try:
return_value = cli.main(args=args or (), prog_name=prog_name, **extra)
except SystemExit as e:
exc_info = sys.exc_info()
e_code = t.cast("int | t.Any | None", e.code)
if e_code is None:
e_code = 0
if e_code != 0:
exception = e
if not isinstance(e_code, int):
sys.stdout.write(str(e_code))
sys.stdout.write("\n")
e_code = 1
exit_code = e_code
except Exception as e:
if not catch_exceptions:
raise
exception = e
exit_code = 1
exc_info = sys.exc_info()
finally:
sys.stdout.flush()
sys.stderr.flush()
stdout = outstreams[0].getvalue()
stderr = outstreams[1].getvalue()
output = outstreams[2].getvalue()
return Result(
runner=self,
stdout_bytes=stdout,
stderr_bytes=stderr,
output_bytes=output,
return_value=return_value,
exit_code=exit_code,
exception=exception,
exc_info=exc_info, # type: ignore
)
@contextlib.contextmanager
def isolated_filesystem(
self, temp_dir: str | os.PathLike[str] | None = None
) -> cabc.Iterator[str]:
"""A context manager that creates a temporary directory and
changes the current working directory to it. This isolates tests
that affect the contents of the CWD to prevent them from
interfering with each other.
:param temp_dir: Create the temporary directory under this
directory. If given, the created directory is not removed
when exiting.
.. versionchanged:: 8.0
Added the ``temp_dir`` parameter.
"""
cwd = os.getcwd()
dt = tempfile.mkdtemp(dir=temp_dir)
os.chdir(dt)
try:
yield dt
finally:
os.chdir(cwd)
if temp_dir is None:
import shutil
try:
shutil.rmtree(dt)
except OSError:
pass

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,627 @@
from __future__ import annotations
import collections.abc as cabc
import os
import re
import sys
import typing as t
from functools import update_wrapper
from types import ModuleType
from types import TracebackType
from ._compat import _default_text_stderr
from ._compat import _default_text_stdout
from ._compat import _find_binary_writer
from ._compat import auto_wrap_for_ansi
from ._compat import binary_streams
from ._compat import open_stream
from ._compat import should_strip_ansi
from ._compat import strip_ansi
from ._compat import text_streams
from ._compat import WIN
from .globals import resolve_color_default
if t.TYPE_CHECKING:
import typing_extensions as te
P = te.ParamSpec("P")
R = t.TypeVar("R")
def _posixify(name: str) -> str:
return "-".join(name.split()).lower()
def safecall(func: t.Callable[P, R]) -> t.Callable[P, R | None]:
"""Wraps a function so that it swallows exceptions."""
def wrapper(*args: P.args, **kwargs: P.kwargs) -> R | None:
try:
return func(*args, **kwargs)
except Exception:
pass
return None
return update_wrapper(wrapper, func)
def make_str(value: t.Any) -> str:
"""Converts a value into a valid string."""
if isinstance(value, bytes):
try:
return value.decode(sys.getfilesystemencoding())
except UnicodeError:
return value.decode("utf-8", "replace")
return str(value)
def make_default_short_help(help: str, max_length: int = 45) -> str:
"""Returns a condensed version of help string."""
# Consider only the first paragraph.
paragraph_end = help.find("\n\n")
if paragraph_end != -1:
help = help[:paragraph_end]
# Collapse newlines, tabs, and spaces.
words = help.split()
if not words:
return ""
# The first paragraph started with a "no rewrap" marker, ignore it.
if words[0] == "\b":
words = words[1:]
total_length = 0
last_index = len(words) - 1
for i, word in enumerate(words):
total_length += len(word) + (i > 0)
if total_length > max_length: # too long, truncate
break
if word[-1] == ".": # sentence end, truncate without "..."
return " ".join(words[: i + 1])
if total_length == max_length and i != last_index:
break # not at sentence end, truncate with "..."
else:
return " ".join(words) # no truncation needed
# Account for the length of the suffix.
total_length += len("...")
# remove words until the length is short enough
while i > 0:
total_length -= len(words[i]) + (i > 0)
if total_length <= max_length:
break
i -= 1
return " ".join(words[:i]) + "..."
class LazyFile:
"""A lazy file works like a regular file but it does not fully open
the file but it does perform some basic checks early to see if the
filename parameter does make sense. This is useful for safely opening
files for writing.
"""
def __init__(
self,
filename: str | os.PathLike[str],
mode: str = "r",
encoding: str | None = None,
errors: str | None = "strict",
atomic: bool = False,
):
self.name: str = os.fspath(filename)
self.mode = mode
self.encoding = encoding
self.errors = errors
self.atomic = atomic
self._f: t.IO[t.Any] | None
self.should_close: bool
if self.name == "-":
self._f, self.should_close = open_stream(filename, mode, encoding, errors)
else:
if "r" in mode:
# Open and close the file in case we're opening it for
# reading so that we can catch at least some errors in
# some cases early.
open(filename, mode).close()
self._f = None
self.should_close = True
def __getattr__(self, name: str) -> t.Any:
return getattr(self.open(), name)
def __repr__(self) -> str:
if self._f is not None:
return repr(self._f)
return f"<unopened file '{format_filename(self.name)}' {self.mode}>"
def open(self) -> t.IO[t.Any]:
"""Opens the file if it's not yet open. This call might fail with
a :exc:`FileError`. Not handling this error will produce an error
that Click shows.
"""
if self._f is not None:
return self._f
try:
rv, self.should_close = open_stream(
self.name, self.mode, self.encoding, self.errors, atomic=self.atomic
)
except OSError as e:
from .exceptions import FileError
raise FileError(self.name, hint=e.strerror) from e
self._f = rv
return rv
def close(self) -> None:
"""Closes the underlying file, no matter what."""
if self._f is not None:
self._f.close()
def close_intelligently(self) -> None:
"""This function only closes the file if it was opened by the lazy
file wrapper. For instance this will never close stdin.
"""
if self.should_close:
self.close()
def __enter__(self) -> LazyFile:
return self
def __exit__(
self,
exc_type: type[BaseException] | None,
exc_value: BaseException | None,
tb: TracebackType | None,
) -> None:
self.close_intelligently()
def __iter__(self) -> cabc.Iterator[t.AnyStr]:
self.open()
return iter(self._f) # type: ignore
class KeepOpenFile:
def __init__(self, file: t.IO[t.Any]) -> None:
self._file: t.IO[t.Any] = file
def __getattr__(self, name: str) -> t.Any:
return getattr(self._file, name)
def __enter__(self) -> KeepOpenFile:
return self
def __exit__(
self,
exc_type: type[BaseException] | None,
exc_value: BaseException | None,
tb: TracebackType | None,
) -> None:
pass
def __repr__(self) -> str:
return repr(self._file)
def __iter__(self) -> cabc.Iterator[t.AnyStr]:
return iter(self._file)
def echo(
message: t.Any | None = None,
file: t.IO[t.Any] | None = None,
nl: bool = True,
err: bool = False,
color: bool | None = None,
) -> None:
"""Print a message and newline to stdout or a file. This should be
used instead of :func:`print` because it provides better support
for different data, files, and environments.
Compared to :func:`print`, this does the following:
- Ensures that the output encoding is not misconfigured on Linux.
- Supports Unicode in the Windows console.
- Supports writing to binary outputs, and supports writing bytes
to text outputs.
- Supports colors and styles on Windows.
- Removes ANSI color and style codes if the output does not look
like an interactive terminal.
- Always flushes the output.
:param message: The string or bytes to output. Other objects are
converted to strings.
:param file: The file to write to. Defaults to ``stdout``.
:param err: Write to ``stderr`` instead of ``stdout``.
:param nl: Print a newline after the message. Enabled by default.
:param color: Force showing or hiding colors and other styles. By
default Click will remove color if the output does not look like
an interactive terminal.
.. versionchanged:: 6.0
Support Unicode output on the Windows console. Click does not
modify ``sys.stdout``, so ``sys.stdout.write()`` and ``print()``
will still not support Unicode.
.. versionchanged:: 4.0
Added the ``color`` parameter.
.. versionadded:: 3.0
Added the ``err`` parameter.
.. versionchanged:: 2.0
Support colors on Windows if colorama is installed.
"""
if file is None:
if err:
file = _default_text_stderr()
else:
file = _default_text_stdout()
# There are no standard streams attached to write to. For example,
# pythonw on Windows.
if file is None:
return
# Convert non bytes/text into the native string type.
if message is not None and not isinstance(message, (str, bytes, bytearray)):
out: str | bytes | bytearray | None = str(message)
else:
out = message
if nl:
out = out or ""
if isinstance(out, str):
out += "\n"
else:
out += b"\n"
if not out:
file.flush()
return
# If there is a message and the value looks like bytes, we manually
# need to find the binary stream and write the message in there.
# This is done separately so that most stream types will work as you
# would expect. Eg: you can write to StringIO for other cases.
if isinstance(out, (bytes, bytearray)):
binary_file = _find_binary_writer(file)
if binary_file is not None:
file.flush()
binary_file.write(out)
binary_file.flush()
return
# ANSI style code support. For no message or bytes, nothing happens.
# When outputting to a file instead of a terminal, strip codes.
else:
color = resolve_color_default(color)
if should_strip_ansi(file, color):
out = strip_ansi(out)
elif WIN:
if auto_wrap_for_ansi is not None:
file = auto_wrap_for_ansi(file, color) # type: ignore
elif not color:
out = strip_ansi(out)
file.write(out) # type: ignore
file.flush()
def get_binary_stream(name: t.Literal["stdin", "stdout", "stderr"]) -> t.BinaryIO:
"""Returns a system stream for byte processing.
:param name: the name of the stream to open. Valid names are ``'stdin'``,
``'stdout'`` and ``'stderr'``
"""
opener = binary_streams.get(name)
if opener is None:
raise TypeError(f"Unknown standard stream '{name}'")
return opener()
def get_text_stream(
name: t.Literal["stdin", "stdout", "stderr"],
encoding: str | None = None,
errors: str | None = "strict",
) -> t.TextIO:
"""Returns a system stream for text processing. This usually returns
a wrapped stream around a binary stream returned from
:func:`get_binary_stream` but it also can take shortcuts for already
correctly configured streams.
:param name: the name of the stream to open. Valid names are ``'stdin'``,
``'stdout'`` and ``'stderr'``
:param encoding: overrides the detected default encoding.
:param errors: overrides the default error mode.
"""
opener = text_streams.get(name)
if opener is None:
raise TypeError(f"Unknown standard stream '{name}'")
return opener(encoding, errors)
def open_file(
filename: str | os.PathLike[str],
mode: str = "r",
encoding: str | None = None,
errors: str | None = "strict",
lazy: bool = False,
atomic: bool = False,
) -> t.IO[t.Any]:
"""Open a file, with extra behavior to handle ``'-'`` to indicate
a standard stream, lazy open on write, and atomic write. Similar to
the behavior of the :class:`~click.File` param type.
If ``'-'`` is given to open ``stdout`` or ``stdin``, the stream is
wrapped so that using it in a context manager will not close it.
This makes it possible to use the function without accidentally
closing a standard stream:
.. code-block:: python
with open_file(filename) as f:
...
:param filename: The name or Path of the file to open, or ``'-'`` for
``stdin``/``stdout``.
:param mode: The mode in which to open the file.
:param encoding: The encoding to decode or encode a file opened in
text mode.
:param errors: The error handling mode.
:param lazy: Wait to open the file until it is accessed. For read
mode, the file is temporarily opened to raise access errors
early, then closed until it is read again.
:param atomic: Write to a temporary file and replace the given file
on close.
.. versionadded:: 3.0
"""
if lazy:
return t.cast(
"t.IO[t.Any]", LazyFile(filename, mode, encoding, errors, atomic=atomic)
)
f, should_close = open_stream(filename, mode, encoding, errors, atomic=atomic)
if not should_close:
f = t.cast("t.IO[t.Any]", KeepOpenFile(f))
return f
def format_filename(
filename: str | bytes | os.PathLike[str] | os.PathLike[bytes],
shorten: bool = False,
) -> str:
"""Format a filename as a string for display. Ensures the filename can be
displayed by replacing any invalid bytes or surrogate escapes in the name
with the replacement character ``<60>``.
Invalid bytes or surrogate escapes will raise an error when written to a
stream with ``errors="strict"``. This will typically happen with ``stdout``
when the locale is something like ``en_GB.UTF-8``.
Many scenarios *are* safe to write surrogates though, due to PEP 538 and
PEP 540, including:
- Writing to ``stderr``, which uses ``errors="backslashreplace"``.
- The system has ``LANG=C.UTF-8``, ``C``, or ``POSIX``. Python opens
stdout and stderr with ``errors="surrogateescape"``.
- None of ``LANG/LC_*`` are set. Python assumes ``LANG=C.UTF-8``.
- Python is started in UTF-8 mode with ``PYTHONUTF8=1`` or ``-X utf8``.
Python opens stdout and stderr with ``errors="surrogateescape"``.
:param filename: formats a filename for UI display. This will also convert
the filename into unicode without failing.
:param shorten: this optionally shortens the filename to strip of the
path that leads up to it.
"""
if shorten:
filename = os.path.basename(filename)
else:
filename = os.fspath(filename)
if isinstance(filename, bytes):
filename = filename.decode(sys.getfilesystemencoding(), "replace")
else:
filename = filename.encode("utf-8", "surrogateescape").decode(
"utf-8", "replace"
)
return filename
def get_app_dir(app_name: str, roaming: bool = True, force_posix: bool = False) -> str:
r"""Returns the config folder for the application. The default behavior
is to return whatever is most appropriate for the operating system.
To give you an idea, for an app called ``"Foo Bar"``, something like
the following folders could be returned:
Mac OS X:
``~/Library/Application Support/Foo Bar``
Mac OS X (POSIX):
``~/.foo-bar``
Unix:
``~/.config/foo-bar``
Unix (POSIX):
``~/.foo-bar``
Windows (roaming):
``C:\Users\<user>\AppData\Roaming\Foo Bar``
Windows (not roaming):
``C:\Users\<user>\AppData\Local\Foo Bar``
.. versionadded:: 2.0
:param app_name: the application name. This should be properly capitalized
and can contain whitespace.
:param roaming: controls if the folder should be roaming or not on Windows.
Has no effect otherwise.
:param force_posix: if this is set to `True` then on any POSIX system the
folder will be stored in the home folder with a leading
dot instead of the XDG config home or darwin's
application support folder.
"""
if WIN:
key = "APPDATA" if roaming else "LOCALAPPDATA"
folder = os.environ.get(key)
if folder is None:
folder = os.path.expanduser("~")
return os.path.join(folder, app_name)
if force_posix:
return os.path.join(os.path.expanduser(f"~/.{_posixify(app_name)}"))
if sys.platform == "darwin":
return os.path.join(
os.path.expanduser("~/Library/Application Support"), app_name
)
return os.path.join(
os.environ.get("XDG_CONFIG_HOME", os.path.expanduser("~/.config")),
_posixify(app_name),
)
class PacifyFlushWrapper:
"""This wrapper is used to catch and suppress BrokenPipeErrors resulting
from ``.flush()`` being called on broken pipe during the shutdown/final-GC
of the Python interpreter. Notably ``.flush()`` is always called on
``sys.stdout`` and ``sys.stderr``. So as to have minimal impact on any
other cleanup code, and the case where the underlying file is not a broken
pipe, all calls and attributes are proxied.
"""
def __init__(self, wrapped: t.IO[t.Any]) -> None:
self.wrapped = wrapped
def flush(self) -> None:
try:
self.wrapped.flush()
except OSError as e:
import errno
if e.errno != errno.EPIPE:
raise
def __getattr__(self, attr: str) -> t.Any:
return getattr(self.wrapped, attr)
def _detect_program_name(
path: str | None = None, _main: ModuleType | None = None
) -> str:
"""Determine the command used to run the program, for use in help
text. If a file or entry point was executed, the file name is
returned. If ``python -m`` was used to execute a module or package,
``python -m name`` is returned.
This doesn't try to be too precise, the goal is to give a concise
name for help text. Files are only shown as their name without the
path. ``python`` is only shown for modules, and the full path to
``sys.executable`` is not shown.
:param path: The Python file being executed. Python puts this in
``sys.argv[0]``, which is used by default.
:param _main: The ``__main__`` module. This should only be passed
during internal testing.
.. versionadded:: 8.0
Based on command args detection in the Werkzeug reloader.
:meta private:
"""
if _main is None:
_main = sys.modules["__main__"]
if not path:
path = sys.argv[0]
# The value of __package__ indicates how Python was called. It may
# not exist if a setuptools script is installed as an egg. It may be
# set incorrectly for entry points created with pip on Windows.
# It is set to "" inside a Shiv or PEX zipapp.
if getattr(_main, "__package__", None) in {None, ""} or (
os.name == "nt"
and _main.__package__ == ""
and not os.path.exists(path)
and os.path.exists(f"{path}.exe")
):
# Executed a file, like "python app.py".
return os.path.basename(path)
# Executed a module, like "python -m example".
# Rewritten by Python from "-m script" to "/path/to/script.py".
# Need to look at main module to determine how it was executed.
py_module = t.cast(str, _main.__package__)
name = os.path.splitext(os.path.basename(path))[0]
# A submodule like "example.cli".
if name != "__main__":
py_module = f"{py_module}.{name}"
return f"python -m {py_module.lstrip('.')}"
def _expand_args(
args: cabc.Iterable[str],
*,
user: bool = True,
env: bool = True,
glob_recursive: bool = True,
) -> list[str]:
"""Simulate Unix shell expansion with Python functions.
See :func:`glob.glob`, :func:`os.path.expanduser`, and
:func:`os.path.expandvars`.
This is intended for use on Windows, where the shell does not do any
expansion. It may not exactly match what a Unix shell would do.
:param args: List of command line arguments to expand.
:param user: Expand user home directory.
:param env: Expand environment variables.
:param glob_recursive: ``**`` matches directories recursively.
.. versionchanged:: 8.1
Invalid glob patterns are treated as empty expansions rather
than raising an error.
.. versionadded:: 8.0
:meta private:
"""
from glob import glob
out = []
for arg in args:
if user:
arg = os.path.expanduser(arg)
if env:
arg = os.path.expandvars(arg)
try:
matches = glob(arg, recursive=glob_recursive)
except re.error:
matches = []
if not matches:
out.append(arg)
else:
out.extend(matches)
return out

View File

@@ -0,0 +1 @@
import os; var = 'SETUPTOOLS_USE_DISTUTILS'; enabled = os.environ.get(var, 'local') == 'local'; enabled and __import__('_distutils_hack').add_shim();

View File

@@ -0,0 +1,49 @@
from typing import Any, Optional
from .main import (dotenv_values, find_dotenv, get_key, load_dotenv, set_key,
unset_key)
def load_ipython_extension(ipython: Any) -> None:
from .ipython import load_ipython_extension
load_ipython_extension(ipython)
def get_cli_string(
path: Optional[str] = None,
action: Optional[str] = None,
key: Optional[str] = None,
value: Optional[str] = None,
quote: Optional[str] = None,
):
"""Returns a string suitable for running as a shell script.
Useful for converting a arguments passed to a fabric task
to be passed to a `local` or `run` command.
"""
command = ['dotenv']
if quote:
command.append(f'-q {quote}')
if path:
command.append(f'-f {path}')
if action:
command.append(action)
if key:
command.append(key)
if value:
if ' ' in value:
command.append(f'"{value}"')
else:
command.append(value)
return ' '.join(command).strip()
__all__ = ['get_cli_string',
'load_dotenv',
'dotenv_values',
'get_key',
'set_key',
'unset_key',
'find_dotenv',
'load_ipython_extension']

View File

@@ -0,0 +1,6 @@
"""Entry point for cli, enables execution with `python -m dotenv`"""
from .cli import cli
if __name__ == "__main__":
cli()

View File

@@ -0,0 +1,205 @@
import json
import os
import shlex
import sys
from contextlib import contextmanager
from typing import Any, Dict, IO, Iterator, List, Optional
if sys.platform == 'win32':
from subprocess import Popen
try:
import click
except ImportError:
sys.stderr.write('It seems python-dotenv is not installed with cli option. \n'
'Run pip install "python-dotenv[cli]" to fix this.')
sys.exit(1)
from .main import dotenv_values, set_key, unset_key
from .version import __version__
def enumerate_env() -> Optional[str]:
"""
Return a path for the ${pwd}/.env file.
If pwd does not exist, return None.
"""
try:
cwd = os.getcwd()
except FileNotFoundError:
return None
path = os.path.join(cwd, '.env')
return path
@click.group()
@click.option('-f', '--file', default=enumerate_env(),
type=click.Path(file_okay=True),
help="Location of the .env file, defaults to .env file in current working directory.")
@click.option('-q', '--quote', default='always',
type=click.Choice(['always', 'never', 'auto']),
help="Whether to quote or not the variable values. Default mode is always. This does not affect parsing.")
@click.option('-e', '--export', default=False,
type=click.BOOL,
help="Whether to write the dot file as an executable bash script.")
@click.version_option(version=__version__)
@click.pass_context
def cli(ctx: click.Context, file: Any, quote: Any, export: Any) -> None:
"""This script is used to set, get or unset values from a .env file."""
ctx.obj = {'QUOTE': quote, 'EXPORT': export, 'FILE': file}
@contextmanager
def stream_file(path: os.PathLike) -> Iterator[IO[str]]:
"""
Open a file and yield the corresponding (decoded) stream.
Exits with error code 2 if the file cannot be opened.
"""
try:
with open(path) as stream:
yield stream
except OSError as exc:
print(f"Error opening env file: {exc}", file=sys.stderr)
exit(2)
@cli.command()
@click.pass_context
@click.option('--format', default='simple',
type=click.Choice(['simple', 'json', 'shell', 'export']),
help="The format in which to display the list. Default format is simple, "
"which displays name=value without quotes.")
def list(ctx: click.Context, format: bool) -> None:
"""Display all the stored key/value."""
file = ctx.obj['FILE']
with stream_file(file) as stream:
values = dotenv_values(stream=stream)
if format == 'json':
click.echo(json.dumps(values, indent=2, sort_keys=True))
else:
prefix = 'export ' if format == 'export' else ''
for k in sorted(values):
v = values[k]
if v is not None:
if format in ('export', 'shell'):
v = shlex.quote(v)
click.echo(f'{prefix}{k}={v}')
@cli.command()
@click.pass_context
@click.argument('key', required=True)
@click.argument('value', required=True)
def set(ctx: click.Context, key: Any, value: Any) -> None:
"""Store the given key/value."""
file = ctx.obj['FILE']
quote = ctx.obj['QUOTE']
export = ctx.obj['EXPORT']
success, key, value = set_key(file, key, value, quote, export)
if success:
click.echo(f'{key}={value}')
else:
exit(1)
@cli.command()
@click.pass_context
@click.argument('key', required=True)
def get(ctx: click.Context, key: Any) -> None:
"""Retrieve the value for the given key."""
file = ctx.obj['FILE']
with stream_file(file) as stream:
values = dotenv_values(stream=stream)
stored_value = values.get(key)
if stored_value:
click.echo(stored_value)
else:
exit(1)
@cli.command()
@click.pass_context
@click.argument('key', required=True)
def unset(ctx: click.Context, key: Any) -> None:
"""Removes the given key."""
file = ctx.obj['FILE']
quote = ctx.obj['QUOTE']
success, key = unset_key(file, key, quote)
if success:
click.echo(f"Successfully removed {key}")
else:
exit(1)
@cli.command(context_settings={'ignore_unknown_options': True})
@click.pass_context
@click.option(
"--override/--no-override",
default=True,
help="Override variables from the environment file with those from the .env file.",
)
@click.argument('commandline', nargs=-1, type=click.UNPROCESSED)
def run(ctx: click.Context, override: bool, commandline: List[str]) -> None:
"""Run command with environment variables present."""
file = ctx.obj['FILE']
if not os.path.isfile(file):
raise click.BadParameter(
f'Invalid value for \'-f\' "{file}" does not exist.',
ctx=ctx
)
dotenv_as_dict = {
k: v
for (k, v) in dotenv_values(file).items()
if v is not None and (override or k not in os.environ)
}
if not commandline:
click.echo('No command given.')
exit(1)
run_command(commandline, dotenv_as_dict)
def run_command(command: List[str], env: Dict[str, str]) -> None:
"""Replace the current process with the specified command.
Replaces the current process with the specified command and the variables from `env`
added in the current environment variables.
Parameters
----------
command: List[str]
The command and it's parameters
env: Dict
The additional environment variables
Returns
-------
None
This function does not return any value. It replaces the current process with the new one.
"""
# copy the current environment variables and add the vales from
# `env`
cmd_env = os.environ.copy()
cmd_env.update(env)
if sys.platform == 'win32':
# execvpe on Windows returns control immediately
# rather than once the command has finished.
p = Popen(command,
universal_newlines=True,
bufsize=0,
shell=False,
env=cmd_env)
_, _ = p.communicate()
exit(p.returncode)
else:
os.execvpe(command[0], args=command, env=cmd_env)

View File

@@ -0,0 +1,39 @@
from IPython.core.magic import Magics, line_magic, magics_class # type: ignore
from IPython.core.magic_arguments import (argument, magic_arguments, # type: ignore
parse_argstring) # type: ignore
from .main import find_dotenv, load_dotenv
@magics_class
class IPythonDotEnv(Magics):
@magic_arguments()
@argument(
'-o', '--override', action='store_true',
help="Indicate to override existing variables"
)
@argument(
'-v', '--verbose', action='store_true',
help="Indicate function calls to be verbose"
)
@argument('dotenv_path', nargs='?', type=str, default='.env',
help='Search in increasingly higher folders for the `dotenv_path`')
@line_magic
def dotenv(self, line):
args = parse_argstring(self.dotenv, line)
# Locate the .env file
dotenv_path = args.dotenv_path
try:
dotenv_path = find_dotenv(dotenv_path, True, True)
except IOError:
print("cannot find .env file")
return
# Load the .env file
load_dotenv(dotenv_path, verbose=args.verbose, override=args.override)
def load_ipython_extension(ipython):
"""Register the %dotenv magic."""
ipython.register_magics(IPythonDotEnv)

View File

@@ -0,0 +1,400 @@
import io
import logging
import os
import pathlib
import shutil
import sys
import tempfile
from collections import OrderedDict
from contextlib import contextmanager
from typing import IO, Dict, Iterable, Iterator, Mapping, Optional, Tuple, Union
from .parser import Binding, parse_stream
from .variables import parse_variables
# A type alias for a string path to be used for the paths in this file.
# These paths may flow to `open()` and `shutil.move()`; `shutil.move()`
# only accepts string paths, not byte paths or file descriptors. See
# https://github.com/python/typeshed/pull/6832.
StrPath = Union[str, "os.PathLike[str]"]
logger = logging.getLogger(__name__)
def with_warn_for_invalid_lines(mappings: Iterator[Binding]) -> Iterator[Binding]:
for mapping in mappings:
if mapping.error:
logger.warning(
"python-dotenv could not parse statement starting at line %s",
mapping.original.line,
)
yield mapping
class DotEnv:
def __init__(
self,
dotenv_path: Optional[StrPath],
stream: Optional[IO[str]] = None,
verbose: bool = False,
encoding: Optional[str] = None,
interpolate: bool = True,
override: bool = True,
) -> None:
self.dotenv_path: Optional[StrPath] = dotenv_path
self.stream: Optional[IO[str]] = stream
self._dict: Optional[Dict[str, Optional[str]]] = None
self.verbose: bool = verbose
self.encoding: Optional[str] = encoding
self.interpolate: bool = interpolate
self.override: bool = override
@contextmanager
def _get_stream(self) -> Iterator[IO[str]]:
if self.dotenv_path and os.path.isfile(self.dotenv_path):
with open(self.dotenv_path, encoding=self.encoding) as stream:
yield stream
elif self.stream is not None:
yield self.stream
else:
if self.verbose:
logger.info(
"python-dotenv could not find configuration file %s.",
self.dotenv_path or ".env",
)
yield io.StringIO("")
def dict(self) -> Dict[str, Optional[str]]:
"""Return dotenv as dict"""
if self._dict:
return self._dict
raw_values = self.parse()
if self.interpolate:
self._dict = OrderedDict(
resolve_variables(raw_values, override=self.override)
)
else:
self._dict = OrderedDict(raw_values)
return self._dict
def parse(self) -> Iterator[Tuple[str, Optional[str]]]:
with self._get_stream() as stream:
for mapping in with_warn_for_invalid_lines(parse_stream(stream)):
if mapping.key is not None:
yield mapping.key, mapping.value
def set_as_environment_variables(self) -> bool:
"""
Load the current dotenv as system environment variable.
"""
if not self.dict():
return False
for k, v in self.dict().items():
if k in os.environ and not self.override:
continue
if v is not None:
os.environ[k] = v
return True
def get(self, key: str) -> Optional[str]:
""" """
data = self.dict()
if key in data:
return data[key]
if self.verbose:
logger.warning("Key %s not found in %s.", key, self.dotenv_path)
return None
def get_key(
dotenv_path: StrPath,
key_to_get: str,
encoding: Optional[str] = "utf-8",
) -> Optional[str]:
"""
Get the value of a given key from the given .env.
Returns `None` if the key isn't found or doesn't have a value.
"""
return DotEnv(dotenv_path, verbose=True, encoding=encoding).get(key_to_get)
@contextmanager
def rewrite(
path: StrPath,
encoding: Optional[str],
) -> Iterator[Tuple[IO[str], IO[str]]]:
pathlib.Path(path).touch()
with tempfile.NamedTemporaryFile(mode="w", encoding=encoding, delete=False) as dest:
error = None
try:
with open(path, encoding=encoding) as source:
yield (source, dest)
except BaseException as err:
error = err
if error is None:
shutil.move(dest.name, path)
else:
os.unlink(dest.name)
raise error from None
def set_key(
dotenv_path: StrPath,
key_to_set: str,
value_to_set: str,
quote_mode: str = "always",
export: bool = False,
encoding: Optional[str] = "utf-8",
) -> Tuple[Optional[bool], str, str]:
"""
Adds or Updates a key/value to the given .env
If the .env path given doesn't exist, fails instead of risking creating
an orphan .env somewhere in the filesystem
"""
if quote_mode not in ("always", "auto", "never"):
raise ValueError(f"Unknown quote_mode: {quote_mode}")
quote = quote_mode == "always" or (
quote_mode == "auto" and not value_to_set.isalnum()
)
if quote:
value_out = "'{}'".format(value_to_set.replace("'", "\\'"))
else:
value_out = value_to_set
if export:
line_out = f"export {key_to_set}={value_out}\n"
else:
line_out = f"{key_to_set}={value_out}\n"
with rewrite(dotenv_path, encoding=encoding) as (source, dest):
replaced = False
missing_newline = False
for mapping in with_warn_for_invalid_lines(parse_stream(source)):
if mapping.key == key_to_set:
dest.write(line_out)
replaced = True
else:
dest.write(mapping.original.string)
missing_newline = not mapping.original.string.endswith("\n")
if not replaced:
if missing_newline:
dest.write("\n")
dest.write(line_out)
return True, key_to_set, value_to_set
def unset_key(
dotenv_path: StrPath,
key_to_unset: str,
quote_mode: str = "always",
encoding: Optional[str] = "utf-8",
) -> Tuple[Optional[bool], str]:
"""
Removes a given key from the given `.env` file.
If the .env path given doesn't exist, fails.
If the given key doesn't exist in the .env, fails.
"""
if not os.path.exists(dotenv_path):
logger.warning("Can't delete from %s - it doesn't exist.", dotenv_path)
return None, key_to_unset
removed = False
with rewrite(dotenv_path, encoding=encoding) as (source, dest):
for mapping in with_warn_for_invalid_lines(parse_stream(source)):
if mapping.key == key_to_unset:
removed = True
else:
dest.write(mapping.original.string)
if not removed:
logger.warning(
"Key %s not removed from %s - key doesn't exist.", key_to_unset, dotenv_path
)
return None, key_to_unset
return removed, key_to_unset
def resolve_variables(
values: Iterable[Tuple[str, Optional[str]]],
override: bool,
) -> Mapping[str, Optional[str]]:
new_values: Dict[str, Optional[str]] = {}
for name, value in values:
if value is None:
result = None
else:
atoms = parse_variables(value)
env: Dict[str, Optional[str]] = {}
if override:
env.update(os.environ) # type: ignore
env.update(new_values)
else:
env.update(new_values)
env.update(os.environ) # type: ignore
result = "".join(atom.resolve(env) for atom in atoms)
new_values[name] = result
return new_values
def _walk_to_root(path: str) -> Iterator[str]:
"""
Yield directories starting from the given directory up to the root
"""
if not os.path.exists(path):
raise IOError("Starting path not found")
if os.path.isfile(path):
path = os.path.dirname(path)
last_dir = None
current_dir = os.path.abspath(path)
while last_dir != current_dir:
yield current_dir
parent_dir = os.path.abspath(os.path.join(current_dir, os.path.pardir))
last_dir, current_dir = current_dir, parent_dir
def find_dotenv(
filename: str = ".env",
raise_error_if_not_found: bool = False,
usecwd: bool = False,
) -> str:
"""
Search in increasingly higher folders for the given file
Returns path to the file if found, or an empty string otherwise
"""
def _is_interactive():
"""Decide whether this is running in a REPL or IPython notebook"""
if hasattr(sys, "ps1") or hasattr(sys, "ps2"):
return True
try:
main = __import__("__main__", None, None, fromlist=["__file__"])
except ModuleNotFoundError:
return False
return not hasattr(main, "__file__")
def _is_debugger():
return sys.gettrace() is not None
if usecwd or _is_interactive() or _is_debugger() or getattr(sys, "frozen", False):
# Should work without __file__, e.g. in REPL or IPython notebook.
path = os.getcwd()
else:
# will work for .py files
frame = sys._getframe()
current_file = __file__
while frame.f_code.co_filename == current_file or not os.path.exists(
frame.f_code.co_filename
):
assert frame.f_back is not None
frame = frame.f_back
frame_filename = frame.f_code.co_filename
path = os.path.dirname(os.path.abspath(frame_filename))
for dirname in _walk_to_root(path):
check_path = os.path.join(dirname, filename)
if os.path.isfile(check_path):
return check_path
if raise_error_if_not_found:
raise IOError("File not found")
return ""
def load_dotenv(
dotenv_path: Optional[StrPath] = None,
stream: Optional[IO[str]] = None,
verbose: bool = False,
override: bool = False,
interpolate: bool = True,
encoding: Optional[str] = "utf-8",
) -> bool:
"""Parse a .env file and then load all the variables found as environment variables.
Parameters:
dotenv_path: Absolute or relative path to .env file.
stream: Text stream (such as `io.StringIO`) with .env content, used if
`dotenv_path` is `None`.
verbose: Whether to output a warning the .env file is missing.
override: Whether to override the system environment variables with the variables
from the `.env` file.
encoding: Encoding to be used to read the file.
Returns:
Bool: True if at least one environment variable is set else False
If both `dotenv_path` and `stream` are `None`, `find_dotenv()` is used to find the
.env file with it's default parameters. If you need to change the default parameters
of `find_dotenv()`, you can explicitly call `find_dotenv()` and pass the result
to this function as `dotenv_path`.
"""
if dotenv_path is None and stream is None:
dotenv_path = find_dotenv()
dotenv = DotEnv(
dotenv_path=dotenv_path,
stream=stream,
verbose=verbose,
interpolate=interpolate,
override=override,
encoding=encoding,
)
return dotenv.set_as_environment_variables()
def dotenv_values(
dotenv_path: Optional[StrPath] = None,
stream: Optional[IO[str]] = None,
verbose: bool = False,
interpolate: bool = True,
encoding: Optional[str] = "utf-8",
) -> Dict[str, Optional[str]]:
"""
Parse a .env file and return its content as a dict.
The returned dict will have `None` values for keys without values in the .env file.
For example, `foo=bar` results in `{"foo": "bar"}` whereas `foo` alone results in
`{"foo": None}`
Parameters:
dotenv_path: Absolute or relative path to the .env file.
stream: `StringIO` object with .env content, used if `dotenv_path` is `None`.
verbose: Whether to output a warning if the .env file is missing.
encoding: Encoding to be used to read the file.
If both `dotenv_path` and `stream` are `None`, `find_dotenv()` is used to find the
.env file.
"""
if dotenv_path is None and stream is None:
dotenv_path = find_dotenv()
return DotEnv(
dotenv_path=dotenv_path,
stream=stream,
verbose=verbose,
interpolate=interpolate,
override=True,
encoding=encoding,
).dict()

View File

@@ -0,0 +1,175 @@
import codecs
import re
from typing import (IO, Iterator, Match, NamedTuple, Optional, # noqa:F401
Pattern, Sequence, Tuple)
def make_regex(string: str, extra_flags: int = 0) -> Pattern[str]:
return re.compile(string, re.UNICODE | extra_flags)
_newline = make_regex(r"(\r\n|\n|\r)")
_multiline_whitespace = make_regex(r"\s*", extra_flags=re.MULTILINE)
_whitespace = make_regex(r"[^\S\r\n]*")
_export = make_regex(r"(?:export[^\S\r\n]+)?")
_single_quoted_key = make_regex(r"'([^']+)'")
_unquoted_key = make_regex(r"([^=\#\s]+)")
_equal_sign = make_regex(r"(=[^\S\r\n]*)")
_single_quoted_value = make_regex(r"'((?:\\'|[^'])*)'")
_double_quoted_value = make_regex(r'"((?:\\"|[^"])*)"')
_unquoted_value = make_regex(r"([^\r\n]*)")
_comment = make_regex(r"(?:[^\S\r\n]*#[^\r\n]*)?")
_end_of_line = make_regex(r"[^\S\r\n]*(?:\r\n|\n|\r|$)")
_rest_of_line = make_regex(r"[^\r\n]*(?:\r|\n|\r\n)?")
_double_quote_escapes = make_regex(r"\\[\\'\"abfnrtv]")
_single_quote_escapes = make_regex(r"\\[\\']")
class Original(NamedTuple):
string: str
line: int
class Binding(NamedTuple):
key: Optional[str]
value: Optional[str]
original: Original
error: bool
class Position:
def __init__(self, chars: int, line: int) -> None:
self.chars = chars
self.line = line
@classmethod
def start(cls) -> "Position":
return cls(chars=0, line=1)
def set(self, other: "Position") -> None:
self.chars = other.chars
self.line = other.line
def advance(self, string: str) -> None:
self.chars += len(string)
self.line += len(re.findall(_newline, string))
class Error(Exception):
pass
class Reader:
def __init__(self, stream: IO[str]) -> None:
self.string = stream.read()
self.position = Position.start()
self.mark = Position.start()
def has_next(self) -> bool:
return self.position.chars < len(self.string)
def set_mark(self) -> None:
self.mark.set(self.position)
def get_marked(self) -> Original:
return Original(
string=self.string[self.mark.chars:self.position.chars],
line=self.mark.line,
)
def peek(self, count: int) -> str:
return self.string[self.position.chars:self.position.chars + count]
def read(self, count: int) -> str:
result = self.string[self.position.chars:self.position.chars + count]
if len(result) < count:
raise Error("read: End of string")
self.position.advance(result)
return result
def read_regex(self, regex: Pattern[str]) -> Sequence[str]:
match = regex.match(self.string, self.position.chars)
if match is None:
raise Error("read_regex: Pattern not found")
self.position.advance(self.string[match.start():match.end()])
return match.groups()
def decode_escapes(regex: Pattern[str], string: str) -> str:
def decode_match(match: Match[str]) -> str:
return codecs.decode(match.group(0), 'unicode-escape') # type: ignore
return regex.sub(decode_match, string)
def parse_key(reader: Reader) -> Optional[str]:
char = reader.peek(1)
if char == "#":
return None
elif char == "'":
(key,) = reader.read_regex(_single_quoted_key)
else:
(key,) = reader.read_regex(_unquoted_key)
return key
def parse_unquoted_value(reader: Reader) -> str:
(part,) = reader.read_regex(_unquoted_value)
return re.sub(r"\s+#.*", "", part).rstrip()
def parse_value(reader: Reader) -> str:
char = reader.peek(1)
if char == u"'":
(value,) = reader.read_regex(_single_quoted_value)
return decode_escapes(_single_quote_escapes, value)
elif char == u'"':
(value,) = reader.read_regex(_double_quoted_value)
return decode_escapes(_double_quote_escapes, value)
elif char in (u"", u"\n", u"\r"):
return u""
else:
return parse_unquoted_value(reader)
def parse_binding(reader: Reader) -> Binding:
reader.set_mark()
try:
reader.read_regex(_multiline_whitespace)
if not reader.has_next():
return Binding(
key=None,
value=None,
original=reader.get_marked(),
error=False,
)
reader.read_regex(_export)
key = parse_key(reader)
reader.read_regex(_whitespace)
if reader.peek(1) == "=":
reader.read_regex(_equal_sign)
value: Optional[str] = parse_value(reader)
else:
value = None
reader.read_regex(_comment)
reader.read_regex(_end_of_line)
return Binding(
key=key,
value=value,
original=reader.get_marked(),
error=False,
)
except Error:
reader.read_regex(_rest_of_line)
return Binding(
key=None,
value=None,
original=reader.get_marked(),
error=True,
)
def parse_stream(stream: IO[str]) -> Iterator[Binding]:
reader = Reader(stream)
while reader.has_next():
yield parse_binding(reader)

View File

@@ -0,0 +1 @@
# Marker file for PEP 561

View File

@@ -0,0 +1,86 @@
import re
from abc import ABCMeta, abstractmethod
from typing import Iterator, Mapping, Optional, Pattern
_posix_variable: Pattern[str] = re.compile(
r"""
\$\{
(?P<name>[^\}:]*)
(?::-
(?P<default>[^\}]*)
)?
\}
""",
re.VERBOSE,
)
class Atom(metaclass=ABCMeta):
def __ne__(self, other: object) -> bool:
result = self.__eq__(other)
if result is NotImplemented:
return NotImplemented
return not result
@abstractmethod
def resolve(self, env: Mapping[str, Optional[str]]) -> str: ...
class Literal(Atom):
def __init__(self, value: str) -> None:
self.value = value
def __repr__(self) -> str:
return f"Literal(value={self.value})"
def __eq__(self, other: object) -> bool:
if not isinstance(other, self.__class__):
return NotImplemented
return self.value == other.value
def __hash__(self) -> int:
return hash((self.__class__, self.value))
def resolve(self, env: Mapping[str, Optional[str]]) -> str:
return self.value
class Variable(Atom):
def __init__(self, name: str, default: Optional[str]) -> None:
self.name = name
self.default = default
def __repr__(self) -> str:
return f"Variable(name={self.name}, default={self.default})"
def __eq__(self, other: object) -> bool:
if not isinstance(other, self.__class__):
return NotImplemented
return (self.name, self.default) == (other.name, other.default)
def __hash__(self) -> int:
return hash((self.__class__, self.name, self.default))
def resolve(self, env: Mapping[str, Optional[str]]) -> str:
default = self.default if self.default is not None else ""
result = env.get(self.name, default)
return result if result is not None else ""
def parse_variables(value: str) -> Iterator[Atom]:
cursor = 0
for match in _posix_variable.finditer(value):
(start, end) = match.span()
name = match["name"]
default = match["default"]
if start > cursor:
yield Literal(value=value[cursor:start])
yield Variable(name=name, default=default)
cursor = end
length = len(value)
if cursor < length:
yield Literal(value=value[cursor:length])

View File

@@ -0,0 +1 @@
__version__ = "1.1.1"

View File

@@ -0,0 +1,91 @@
Metadata-Version: 2.4
Name: Flask
Version: 3.1.2
Summary: A simple framework for building complex web applications.
Maintainer-email: Pallets <contact@palletsprojects.com>
Requires-Python: >=3.9
Description-Content-Type: text/markdown
License-Expression: BSD-3-Clause
Classifier: Development Status :: 5 - Production/Stable
Classifier: Environment :: Web Environment
Classifier: Framework :: Flask
Classifier: Intended Audience :: Developers
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python
Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content
Classifier: Topic :: Internet :: WWW/HTTP :: WSGI
Classifier: Topic :: Internet :: WWW/HTTP :: WSGI :: Application
Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
Classifier: Typing :: Typed
License-File: LICENSE.txt
Requires-Dist: blinker>=1.9.0
Requires-Dist: click>=8.1.3
Requires-Dist: importlib-metadata>=3.6.0; python_version < '3.10'
Requires-Dist: itsdangerous>=2.2.0
Requires-Dist: jinja2>=3.1.2
Requires-Dist: markupsafe>=2.1.1
Requires-Dist: werkzeug>=3.1.0
Requires-Dist: asgiref>=3.2 ; extra == "async"
Requires-Dist: python-dotenv ; extra == "dotenv"
Project-URL: Changes, https://flask.palletsprojects.com/page/changes/
Project-URL: Chat, https://discord.gg/pallets
Project-URL: Documentation, https://flask.palletsprojects.com/
Project-URL: Donate, https://palletsprojects.com/donate
Project-URL: Source, https://github.com/pallets/flask/
Provides-Extra: async
Provides-Extra: dotenv
<div align="center"><img src="https://raw.githubusercontent.com/pallets/flask/refs/heads/stable/docs/_static/flask-name.svg" alt="" height="150"></div>
# Flask
Flask is a lightweight [WSGI] web application framework. It is designed
to make getting started quick and easy, with the ability to scale up to
complex applications. It began as a simple wrapper around [Werkzeug]
and [Jinja], and has become one of the most popular Python web
application frameworks.
Flask offers suggestions, but doesn't enforce any dependencies or
project layout. It is up to the developer to choose the tools and
libraries they want to use. There are many extensions provided by the
community that make adding new functionality easy.
[WSGI]: https://wsgi.readthedocs.io/
[Werkzeug]: https://werkzeug.palletsprojects.com/
[Jinja]: https://jinja.palletsprojects.com/
## A Simple Example
```python
# save this as app.py
from flask import Flask
app = Flask(__name__)
@app.route("/")
def hello():
return "Hello, World!"
```
```
$ flask run
* Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
```
## Donate
The Pallets organization develops and supports Flask and the libraries
it uses. In order to grow the community of contributors and users, and
allow the maintainers to devote more time to the projects, [please
donate today].
[please donate today]: https://palletsprojects.com/donate
## Contributing
See our [detailed contributing documentation][contrib] for many ways to
contribute, including reporting issues, requesting features, asking or answering
questions, and making PRs.
[contrib]: https://palletsprojects.com/contributing/

View File

@@ -0,0 +1,58 @@
../../../bin/flask,sha256=2EceiA1-gYGDcMRKhlhtGeRi4TEjBYdBG47J5LZrmdg,230
flask-3.1.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
flask-3.1.2.dist-info/METADATA,sha256=oRg63DAAIcoLAr7kzTgIEKfm8_4HMTRpmWmIptdY_js,3167
flask-3.1.2.dist-info/RECORD,,
flask-3.1.2.dist-info/REQUESTED,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
flask-3.1.2.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
flask-3.1.2.dist-info/entry_points.txt,sha256=bBP7hTOS5fz9zLtC7sPofBZAlMkEvBxu7KqS6l5lvc4,40
flask-3.1.2.dist-info/licenses/LICENSE.txt,sha256=SJqOEQhQntmKN7uYPhHg9-HTHwvY-Zp5yESOf_N9B-o,1475
flask/__init__.py,sha256=mHvJN9Swtl1RDtjCqCIYyIniK_SZ_l_hqUynOzgpJ9o,2701
flask/__main__.py,sha256=bYt9eEaoRQWdejEHFD8REx9jxVEdZptECFsV7F49Ink,30
flask/__pycache__/__init__.cpython-311.pyc,,
flask/__pycache__/__main__.cpython-311.pyc,,
flask/__pycache__/app.cpython-311.pyc,,
flask/__pycache__/blueprints.cpython-311.pyc,,
flask/__pycache__/cli.cpython-311.pyc,,
flask/__pycache__/config.cpython-311.pyc,,
flask/__pycache__/ctx.cpython-311.pyc,,
flask/__pycache__/debughelpers.cpython-311.pyc,,
flask/__pycache__/globals.cpython-311.pyc,,
flask/__pycache__/helpers.cpython-311.pyc,,
flask/__pycache__/logging.cpython-311.pyc,,
flask/__pycache__/sessions.cpython-311.pyc,,
flask/__pycache__/signals.cpython-311.pyc,,
flask/__pycache__/templating.cpython-311.pyc,,
flask/__pycache__/testing.cpython-311.pyc,,
flask/__pycache__/typing.cpython-311.pyc,,
flask/__pycache__/views.cpython-311.pyc,,
flask/__pycache__/wrappers.cpython-311.pyc,,
flask/app.py,sha256=XGqgFRsLgBhzIoB2HSftoMTIM3hjDiH6rdV7c3g3IKc,61744
flask/blueprints.py,sha256=p5QE2lY18GItbdr_RKRpZ8Do17g0PvQGIgZkSUDhX2k,4541
flask/cli.py,sha256=Pfh72-BxlvoH0QHCDOc1HvXG7Kq5Xetf3zzNz2kNSHk,37184
flask/config.py,sha256=PiqF0DPam6HW0FH4CH1hpXTBe30NSzjPEOwrz1b6kt0,13219
flask/ctx.py,sha256=sPKzahqtgxaS7O0y9E_NzUJNUDyTD6M4GkDrVu2fU3Y,15064
flask/debughelpers.py,sha256=PGIDhStW_efRjpaa3zHIpo-htStJOR41Ip3OJWPYBwo,6080
flask/globals.py,sha256=XdQZmStBmPIs8t93tjx6pO7Bm3gobAaONWkFcUHaGas,1713
flask/helpers.py,sha256=rJZge7_J288J1UQv5-kNf4oEaw332PP8NTW0QRIBbXE,23517
flask/json/__init__.py,sha256=hLNR898paqoefdeAhraa5wyJy-bmRB2k2dV4EgVy2Z8,5602
flask/json/__pycache__/__init__.cpython-311.pyc,,
flask/json/__pycache__/provider.cpython-311.pyc,,
flask/json/__pycache__/tag.cpython-311.pyc,,
flask/json/provider.py,sha256=5imEzY5HjV2HoUVrQbJLqXCzMNpZXfD0Y1XqdLV2XBA,7672
flask/json/tag.py,sha256=DhaNwuIOhdt2R74oOC9Y4Z8ZprxFYiRb5dUP5byyINw,9281
flask/logging.py,sha256=8sM3WMTubi1cBb2c_lPkWpN0J8dMAqrgKRYLLi1dCVI,2377
flask/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
flask/sansio/README.md,sha256=-0X1tECnilmz1cogx-YhNw5d7guK7GKrq_DEV2OzlU0,228
flask/sansio/__pycache__/app.cpython-311.pyc,,
flask/sansio/__pycache__/blueprints.cpython-311.pyc,,
flask/sansio/__pycache__/scaffold.cpython-311.pyc,,
flask/sansio/app.py,sha256=5EbxwHOchgcpZqQyalA9vyDBopknOvDg6BVwXFyFD2s,38099
flask/sansio/blueprints.py,sha256=Tqe-7EkZ-tbWchm8iDoCfD848f0_3nLv6NNjeIPvHwM,24637
flask/sansio/scaffold.py,sha256=wSASXYdFRWJmqcL0Xq-T7N-PDVUSiFGvjO9kPZg58bk,30371
flask/sessions.py,sha256=duvYGmCGh_H3cgMuy2oeSjrCsCvLylF4CBKOXpN0Qms,15480
flask/signals.py,sha256=V7lMUww7CqgJ2ThUBn1PiatZtQanOyt7OZpu2GZI-34,750
flask/templating.py,sha256=IHsdsF-eBJPCJE0AJLCi1VhhnytOGdzHCn3yThz87c4,7536
flask/testing.py,sha256=zzC7XxhBWOP9H697IV_4SG7Lg3Lzb5PWiyEP93_KQXE,10117
flask/typing.py,sha256=L-L5t2jKgS0aOmVhioQ_ylqcgiVFnA6yxO-RLNhq-GU,3293
flask/views.py,sha256=xzJx6oJqGElThtEghZN7ZQGMw5TDFyuRxUkecwRuAoA,6962
flask/wrappers.py,sha256=jUkv4mVek2Iq4hwxd4RvqrIMb69Bv0PElDgWLmd5ORo,9406

View File

@@ -0,0 +1,4 @@
Wheel-Version: 1.0
Generator: flit 3.12.0
Root-Is-Purelib: true
Tag: py3-none-any

View File

@@ -0,0 +1,3 @@
[console_scripts]
flask=flask.cli:main

Some files were not shown because too many files have changed in this diff Show More