diff --git a/client/client_test.go b/client/client_test.go index 2fd68cc..8b4dfe9 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -5,37 +5,11 @@ import ( "time" "github.com/stretchr/testify/require" "fmt" - "crypto/cipher" - "crypto/aes" ) -type setivable interface { - cipher.BlockMode - SetIV([]byte) -} - - -func Test_Poop(t *testing.T) { - var k []byte = make([]byte, 16) - var iv []byte = k - b, e := aes.NewCipher(k) - if e != nil { - fmt.Println(e) - return - } - var c = cipher.NewCBCEncrypter(b, iv) - switch cp := c.(type) { - case setivable: - fmt.Println("Haha!") - cp.SetIV(k) - case cipher.BlockMode: - fmt.Println("Hoho!") - } -} - func initTest(t *testing.T) Client { var config HttpConfig = HttpConfig{ - Addr: "http://127.0.0.1:8080", + Addr: "http://172.17.0.2:8080", Username: "admin", Password: "admin", Timeout: time.Second * 5, @@ -59,7 +33,7 @@ func TestClient_getToken(t *testing.T) { func TestClient_GetRealms(t *testing.T) { var client Client = initTest(t) - var realms []map[string]interface{} + var realms []RealmRepresentation { var err error realms, err = client.GetRealms() diff --git a/client/common.go b/client/common.go index 6af83c8..22dd5b0 100644 --- a/client/common.go +++ b/client/common.go @@ -7,13 +7,14 @@ import ( "time" "net/http" "gopkg.in/h2non/gentleman.v2" + "gopkg.in/h2non/gentleman.v2/plugin" "gopkg.in/h2non/gentleman.v2/plugins/timeout" //"gopkg.in/h2non/gentleman.v2/plugins/multipart" ) type Client interface { - GetRealms() ([]map[string]interface{}, error) + GetRealms() ([]RealmRepresentation, error) GetUsers(realm string) ([]UserRepresentation, error) } @@ -43,7 +44,7 @@ func NewHttpClient(config HttpConfig) (Client, error) { } if u.Scheme != "http" { - var m string = fmt.Sprint("Unsupported protocol %s. Your address must start with http://", u.Scheme) + var m string = fmt.Sprintf("Unsupported protocol %s. Your address must start with http://", u.Scheme) return nil, errors.New(m) } @@ -112,11 +113,14 @@ func (c *client) getToken() error { return nil } -func (c *client) do(path string) (*gentleman.Response, error) { +func (c *client) do(path string, plugins ...plugin.Plugin) (*gentleman.Response, error) { var req *gentleman.Request = c.httpClient.Get() { req = req.Path(path) req = req.SetHeader("Authorization", fmt.Sprintf("Bearer %s", c.accessToken)) + for _, p := range plugins { + req = req.Use(p) + } } var resp *gentleman.Response { diff --git a/client/realms.go b/client/realms.go index 0c95250..b24c5ad 100644 --- a/client/realms.go +++ b/client/realms.go @@ -7,7 +7,7 @@ import ( "encoding/json" ) -func (c *client) GetRealms() ([]map[string]interface{}, error) { +func (c *client) GetRealms() ([]RealmRepresentation, error) { var getRealms_Path string = "/auth/admin/realms" var resp *gentleman.Response { @@ -17,7 +17,7 @@ func (c *client) GetRealms() ([]map[string]interface{}, error) { return nil, errors.Wrap(err, "Get Realms failed.") } } - var result []map[string]interface{} + var result []RealmRepresentation { var err error err = json.Unmarshal(resp.Bytes(), &result) diff --git a/utils/.idea/misc.xml b/utils/.idea/misc.xml deleted file mode 100644 index d8e6b49..0000000 --- a/utils/.idea/misc.xml +++ /dev/null @@ -1,4 +0,0 @@ - - - - \ No newline at end of file diff --git a/utils/.idea/modules.xml b/utils/.idea/modules.xml deleted file mode 100644 index 2213c52..0000000 --- a/utils/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/utils/.idea/utils.iml b/utils/.idea/utils.iml deleted file mode 100644 index 2e70106..0000000 --- a/utils/.idea/utils.iml +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - \ No newline at end of file diff --git a/utils/.idea/workspace.xml b/utils/.idea/workspace.xml deleted file mode 100644 index fad9dca..0000000 --- a/utils/.idea/workspace.xml +++ /dev/null @@ -1,313 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - print - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1500550169408 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/utils/bin/activate b/utils/bin/activate deleted file mode 100644 index f4e7699..0000000 --- a/utils/bin/activate +++ /dev/null @@ -1,78 +0,0 @@ -# This file must be used with "source bin/activate" *from bash* -# you cannot run it directly - -deactivate () { - unset -f pydoc >/dev/null 2>&1 - - # reset old environment variables - # ! [ -z ${VAR+_} ] returns true if VAR is declared at all - if ! [ -z "${_OLD_VIRTUAL_PATH+_}" ] ; then - PATH="$_OLD_VIRTUAL_PATH" - export PATH - unset _OLD_VIRTUAL_PATH - fi - if ! [ -z "${_OLD_VIRTUAL_PYTHONHOME+_}" ] ; then - PYTHONHOME="$_OLD_VIRTUAL_PYTHONHOME" - export PYTHONHOME - unset _OLD_VIRTUAL_PYTHONHOME - fi - - # This should detect bash and zsh, which have a hash command that must - # be called to get it to forget past commands. Without forgetting - # past commands the $PATH changes we made may not be respected - if [ -n "${BASH-}" ] || [ -n "${ZSH_VERSION-}" ] ; then - hash -r 2>/dev/null - fi - - if ! [ -z "${_OLD_VIRTUAL_PS1+_}" ] ; then - PS1="$_OLD_VIRTUAL_PS1" - export PS1 - unset _OLD_VIRTUAL_PS1 - fi - - unset VIRTUAL_ENV - if [ ! "${1-}" = "nondestructive" ] ; then - # Self destruct! - unset -f deactivate - fi -} - -# unset irrelevant variables -deactivate nondestructive - -VIRTUAL_ENV="/root/cloudtrust/go/src/github.com/elca-kairos-py/keycloak/utils" -export VIRTUAL_ENV - -_OLD_VIRTUAL_PATH="$PATH" -PATH="$VIRTUAL_ENV/bin:$PATH" -export PATH - -# unset PYTHONHOME if set -if ! [ -z "${PYTHONHOME+_}" ] ; then - _OLD_VIRTUAL_PYTHONHOME="$PYTHONHOME" - unset PYTHONHOME -fi - -if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT-}" ] ; then - _OLD_VIRTUAL_PS1="$PS1" - if [ "x" != x ] ; then - PS1="$PS1" - else - PS1="(`basename \"$VIRTUAL_ENV\"`) $PS1" - fi - export PS1 -fi - -# Make sure to unalias pydoc if it's already there -alias pydoc 2>/dev/null >/dev/null && unalias pydoc - -pydoc () { - python -m pydoc "$@" -} - -# This should detect bash and zsh, which have a hash command that must -# be called to get it to forget past commands. Without forgetting -# past commands the $PATH changes we made may not be respected -if [ -n "${BASH-}" ] || [ -n "${ZSH_VERSION-}" ] ; then - hash -r 2>/dev/null -fi diff --git a/utils/bin/activate.csh b/utils/bin/activate.csh deleted file mode 100644 index ac3fc9e..0000000 --- a/utils/bin/activate.csh +++ /dev/null @@ -1,36 +0,0 @@ -# This file must be used with "source bin/activate.csh" *from csh*. -# You cannot run it directly. -# Created by Davide Di Blasi . - -alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; test "\!:*" != "nondestructive" && unalias deactivate && unalias pydoc' - -# Unset irrelevant variables. -deactivate nondestructive - -setenv VIRTUAL_ENV "/root/cloudtrust/go/src/github.com/elca-kairos-py/keycloak/utils" - -set _OLD_VIRTUAL_PATH="$PATH" -setenv PATH "$VIRTUAL_ENV/bin:$PATH" - - - -if ("" != "") then - set env_name = "" -else - set env_name = `basename "$VIRTUAL_ENV"` -endif - -# Could be in a non-interactive environment, -# in which case, $prompt is undefined and we wouldn't -# care about the prompt anyway. -if ( $?prompt ) then - set _OLD_VIRTUAL_PROMPT="$prompt" - set prompt = "[$env_name] $prompt" -endif - -unset env_name - -alias pydoc python -m pydoc - -rehash - diff --git a/utils/bin/activate.fish b/utils/bin/activate.fish deleted file mode 100644 index 048f66c..0000000 --- a/utils/bin/activate.fish +++ /dev/null @@ -1,76 +0,0 @@ -# This file must be used using `. bin/activate.fish` *within a running fish ( http://fishshell.com ) session*. -# Do not run it directly. - -function deactivate -d 'Exit virtualenv mode and return to the normal environment.' - # reset old environment variables - if test -n "$_OLD_VIRTUAL_PATH" - set -gx PATH $_OLD_VIRTUAL_PATH - set -e _OLD_VIRTUAL_PATH - end - - if test -n "$_OLD_VIRTUAL_PYTHONHOME" - set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME - set -e _OLD_VIRTUAL_PYTHONHOME - end - - if test -n "$_OLD_FISH_PROMPT_OVERRIDE" - # Set an empty local `$fish_function_path` to allow the removal of `fish_prompt` using `functions -e`. - set -l fish_function_path - - # Erase virtualenv's `fish_prompt` and restore the original. - functions -e fish_prompt - functions -c _old_fish_prompt fish_prompt - functions -e _old_fish_prompt - set -e _OLD_FISH_PROMPT_OVERRIDE - end - - set -e VIRTUAL_ENV - - if test "$argv[1]" != 'nondestructive' - # Self-destruct! - functions -e pydoc - functions -e deactivate - end -end - -# Unset irrelevant variables. -deactivate nondestructive - -set -gx VIRTUAL_ENV "/root/cloudtrust/go/src/github.com/elca-kairos-py/keycloak/utils" - -set -gx _OLD_VIRTUAL_PATH $PATH -set -gx PATH "$VIRTUAL_ENV/bin" $PATH - -# Unset `$PYTHONHOME` if set. -if set -q PYTHONHOME - set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME - set -e PYTHONHOME -end - -function pydoc - python -m pydoc $argv -end - -if test -z "$VIRTUAL_ENV_DISABLE_PROMPT" - # Copy the current `fish_prompt` function as `_old_fish_prompt`. - functions -c fish_prompt _old_fish_prompt - - function fish_prompt - # Save the current $status, for fish_prompts that display it. - set -l old_status $status - - # Prompt override provided? - # If not, just prepend the environment name. - if test -n "" - printf '%s%s' "" (set_color normal) - else - printf '%s(%s) ' (set_color normal) (basename "$VIRTUAL_ENV") - end - - # Restore the original $status - echo "exit $old_status" | source - _old_fish_prompt - end - - set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV" -end diff --git a/utils/bin/activate_this.py b/utils/bin/activate_this.py deleted file mode 100644 index f18193b..0000000 --- a/utils/bin/activate_this.py +++ /dev/null @@ -1,34 +0,0 @@ -"""By using execfile(this_file, dict(__file__=this_file)) you will -activate this virtualenv environment. - -This can be used when you must use an existing Python interpreter, not -the virtualenv bin/python -""" - -try: - __file__ -except NameError: - raise AssertionError( - "You must run this like execfile('path/to/activate_this.py', dict(__file__='path/to/activate_this.py'))") -import sys -import os - -old_os_path = os.environ.get('PATH', '') -os.environ['PATH'] = os.path.dirname(os.path.abspath(__file__)) + os.pathsep + old_os_path -base = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -if sys.platform == 'win32': - site_packages = os.path.join(base, 'Lib', 'site-packages') -else: - site_packages = os.path.join(base, 'lib', 'python%s' % sys.version[:3], 'site-packages') -prev_sys_path = list(sys.path) -import site -site.addsitedir(site_packages) -sys.real_prefix = sys.prefix -sys.prefix = base -# Move the added items to the front of the path: -new_sys_path = [] -for item in list(sys.path): - if item not in prev_sys_path: - new_sys_path.append(item) - sys.path.remove(item) -sys.path[:0] = new_sys_path diff --git a/utils/bin/chardetect b/utils/bin/chardetect deleted file mode 100755 index e45a6d4..0000000 --- a/utils/bin/chardetect +++ /dev/null @@ -1,11 +0,0 @@ -#!/root/cloudtrust/go/src/github.com/elca-kairos-py/keycloak/utils/bin/python3 - -# -*- coding: utf-8 -*- -import re -import sys - -from chardet.cli.chardetect import main - -if __name__ == '__main__': - sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) - sys.exit(main()) diff --git a/utils/bin/easy_install b/utils/bin/easy_install deleted file mode 100755 index 0bb770d..0000000 --- a/utils/bin/easy_install +++ /dev/null @@ -1,11 +0,0 @@ -#!/root/cloudtrust/go/src/github.com/elca-kairos-py/keycloak/utils/bin/python3 - -# -*- coding: utf-8 -*- -import re -import sys - -from setuptools.command.easy_install import main - -if __name__ == '__main__': - sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) - sys.exit(main()) diff --git a/utils/bin/easy_install-3.5 b/utils/bin/easy_install-3.5 deleted file mode 100755 index 0bb770d..0000000 --- a/utils/bin/easy_install-3.5 +++ /dev/null @@ -1,11 +0,0 @@ -#!/root/cloudtrust/go/src/github.com/elca-kairos-py/keycloak/utils/bin/python3 - -# -*- coding: utf-8 -*- -import re -import sys - -from setuptools.command.easy_install import main - -if __name__ == '__main__': - sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) - sys.exit(main()) diff --git a/utils/bin/pip b/utils/bin/pip deleted file mode 100755 index 2a0f09d..0000000 --- a/utils/bin/pip +++ /dev/null @@ -1,11 +0,0 @@ -#!/root/cloudtrust/go/src/github.com/elca-kairos-py/keycloak/utils/bin/python3 - -# -*- coding: utf-8 -*- -import re -import sys - -from pip import main - -if __name__ == '__main__': - sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) - sys.exit(main()) diff --git a/utils/bin/pip3 b/utils/bin/pip3 deleted file mode 100755 index 2a0f09d..0000000 --- a/utils/bin/pip3 +++ /dev/null @@ -1,11 +0,0 @@ -#!/root/cloudtrust/go/src/github.com/elca-kairos-py/keycloak/utils/bin/python3 - -# -*- coding: utf-8 -*- -import re -import sys - -from pip import main - -if __name__ == '__main__': - sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) - sys.exit(main()) diff --git a/utils/bin/pip3.5 b/utils/bin/pip3.5 deleted file mode 100755 index 2a0f09d..0000000 --- a/utils/bin/pip3.5 +++ /dev/null @@ -1,11 +0,0 @@ -#!/root/cloudtrust/go/src/github.com/elca-kairos-py/keycloak/utils/bin/python3 - -# -*- coding: utf-8 -*- -import re -import sys - -from pip import main - -if __name__ == '__main__': - sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) - sys.exit(main()) diff --git a/utils/bin/python b/utils/bin/python deleted file mode 120000 index b8a0adb..0000000 --- a/utils/bin/python +++ /dev/null @@ -1 +0,0 @@ -python3 \ No newline at end of file diff --git a/utils/bin/python-config b/utils/bin/python-config deleted file mode 100755 index 4449dab..0000000 --- a/utils/bin/python-config +++ /dev/null @@ -1,78 +0,0 @@ -#!/root/cloudtrust/go/src/github.com/elca-kairos-py/keycloak/utils/bin/python - -import sys -import getopt -import sysconfig - -valid_opts = ['prefix', 'exec-prefix', 'includes', 'libs', 'cflags', - 'ldflags', 'help'] - -if sys.version_info >= (3, 2): - valid_opts.insert(-1, 'extension-suffix') - valid_opts.append('abiflags') -if sys.version_info >= (3, 3): - valid_opts.append('configdir') - - -def exit_with_usage(code=1): - sys.stderr.write("Usage: {0} [{1}]\n".format( - sys.argv[0], '|'.join('--'+opt for opt in valid_opts))) - sys.exit(code) - -try: - opts, args = getopt.getopt(sys.argv[1:], '', valid_opts) -except getopt.error: - exit_with_usage() - -if not opts: - exit_with_usage() - -pyver = sysconfig.get_config_var('VERSION') -getvar = sysconfig.get_config_var - -opt_flags = [flag for (flag, val) in opts] - -if '--help' in opt_flags: - exit_with_usage(code=0) - -for opt in opt_flags: - if opt == '--prefix': - print(sysconfig.get_config_var('prefix')) - - elif opt == '--exec-prefix': - print(sysconfig.get_config_var('exec_prefix')) - - elif opt in ('--includes', '--cflags'): - flags = ['-I' + sysconfig.get_path('include'), - '-I' + sysconfig.get_path('platinclude')] - if opt == '--cflags': - flags.extend(getvar('CFLAGS').split()) - print(' '.join(flags)) - - elif opt in ('--libs', '--ldflags'): - abiflags = getattr(sys, 'abiflags', '') - libs = ['-lpython' + pyver + abiflags] - libs += getvar('LIBS').split() - libs += getvar('SYSLIBS').split() - # add the prefix/lib/pythonX.Y/config dir, but only if there is no - # shared library in prefix/lib/. - if opt == '--ldflags': - if not getvar('Py_ENABLE_SHARED'): - libs.insert(0, '-L' + getvar('LIBPL')) - if not getvar('PYTHONFRAMEWORK'): - libs.extend(getvar('LINKFORSHARED').split()) - print(' '.join(libs)) - - elif opt == '--extension-suffix': - ext_suffix = sysconfig.get_config_var('EXT_SUFFIX') - if ext_suffix is None: - ext_suffix = sysconfig.get_config_var('SO') - print(ext_suffix) - - elif opt == '--abiflags': - if not getattr(sys, 'abiflags', None): - exit_with_usage() - print(sys.abiflags) - - elif opt == '--configdir': - print(sysconfig.get_config_var('LIBPL')) diff --git a/utils/bin/python3 b/utils/bin/python3 deleted file mode 100755 index b6eb71c..0000000 Binary files a/utils/bin/python3 and /dev/null differ diff --git a/utils/bin/python3.5 b/utils/bin/python3.5 deleted file mode 120000 index b8a0adb..0000000 --- a/utils/bin/python3.5 +++ /dev/null @@ -1 +0,0 @@ -python3 \ No newline at end of file diff --git a/utils/bin/wheel b/utils/bin/wheel deleted file mode 100755 index 7d68eb3..0000000 --- a/utils/bin/wheel +++ /dev/null @@ -1,11 +0,0 @@ -#!/root/cloudtrust/go/src/github.com/elca-kairos-py/keycloak/utils/bin/python3 - -# -*- coding: utf-8 -*- -import re -import sys - -from wheel.tool import main - -if __name__ == '__main__': - sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) - sys.exit(main()) diff --git a/utils/file_parser.py b/utils/file_parser.py deleted file mode 100755 index abd72d9..0000000 --- a/utils/file_parser.py +++ /dev/null @@ -1,91 +0,0 @@ -#!/usr/bin/env python3 - -import sys -import re - -#Parameter required or optional -OPT_TOKEN = 'optional' -REQ_TOKEN = 'required' -REQ_TOKENS = set([OPT_TOKEN,REQ_TOKEN]) - -#Direct Type tokens (directly mapped to golang) -DIRECT_INT_TYPE_TOKENS = set(['int16','int32','int64']) - -#Typed Arrays -STL_OPEN_TOKEN = '<' -STL_CLOSE_TOKEN = '>' -ARRAY_TOKEN = 'array' - -#Type tokens -MAP_TYPE_TOKEN = 'Map' -INT_TYPE_TOKEN = 'integer' -BOOL_TYPE_TOKEN = 'boolean' -STRING_TYPE_TOKEN = 'string' -ENUM_TYPE_TOKEN = 'enum' -TYPE_TOKENS = set([MAP_TYPE_TOKEN,INT_TYPE_TOKEN,BOOL_TYPE_TOKEN,STRING_TYPE_TOKEN, ENUM_TYPE_TOKEN]) - -MAP_TOKEN_TO_GO = { - MAP_TYPE_TOKEN:"map[string]interface{}", - BOOL_TYPE_TOKEN:"bool", - STRING_TYPE_TOKEN:"string", - ENUM_TYPE_TOKEN:"string", -} - -def panic(*args): - eprint(*args) - sys.exit(1) - -def eprint(*args): - print(*args, file=sys.stderr) - -def to_go_array(go_type): - return "[]%s" % go_type - -def is_req(split_line): - req = split_line.pop(0) - if req not in REQ_TOKENS: - panic("Token %s does not match requirement tokens" % req) - if req == REQ_TOKEN: - return True - return False - -def get_type(split_line): - token = split_line.pop(0) - if token in TYPE_TOKENS: - if token == INT_TYPE_TOKEN: - token = split_line.pop() - if token not in DIRECT_INT_TYPE_TOKENS: - panic("%s is wrong int type!" % token) - return token - return MAP_TOKEN_TO_GO[token] - if token == STL_OPEN_TOKEN: - return to_go_array(get_type(split_line)) - return token - -def parse_line(split_line): - field_name = split_line.pop(0) - if field_name == type: - field_name = "typ" - exported_field_name = field_name[0].upper() + field_name[1:] - required = is_req(split_line) - go_type = get_type(split_line) - if required: - res = """\t%s *%s `json:"%s"`\n""" % (exported_field_name,go_type,field_name) - else: - res = """\t%s *%s `json:"%s,omitempty"`\n""" % (exported_field_name,go_type,field_name) - return res - - -def main(): - if len(sys.argv) < 2: - eprint("Usage : %s file" % sys.argv[0]) - return - res = "type %s struct { \n" % sys.argv[1] - with open(sys.argv[1], 'r') as table: - for line in table.readlines(): - res += parse_line([x for x in re.split('\s+|\(|\)',line) if x]) - res += "}\n" - print(res) - -if __name__ == "__main__": - main() diff --git a/utils/html_parser.py b/utils/html_parser.py deleted file mode 100755 index 633be1f..0000000 --- a/utils/html_parser.py +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python3 - -from bs4 import BeautifulSoup -import requests - -keycloak_doc = requests.get("http://www.keycloak.org/docs-api/3.2/rest-api/index.html").text - -soup = BeautifulSoup(keycloak_doc, "lxml") - -for div in soup.body.find_all('div'): - if div.h2: - if div.h2.string == "Definitions": - definitions=div.div - break - -for div in definitions.find_all('div'): - name = div.h3.string - print("\n",name,"\n") - #print(div) - with open("./resources/{}".format(name), "w") as f : - try: - for tr in div.table.tbody.find_all('tr'): - tds = tr.find_all('td') - field_name = tds[0].p.strong.string - field_req = tds[0].p.em.string - if tds[1].p.string: - field_type = tds[1].p.string - else: - field_type = tds[1].p.a.string - if field_req not in set(['optional','required']): - field_req = 'optional' - print(field_name + ' ' + field_req + ' ' + field_type) - f.write(field_name + ' ' + field_req + ' ' + field_type + '\n') - - except : - print("WARNING!!!", name) - -#print(definitions.find_all('div')[-1]) diff --git a/utils/include/python3.5m b/utils/include/python3.5m deleted file mode 120000 index f39ee13..0000000 --- a/utils/include/python3.5m +++ /dev/null @@ -1 +0,0 @@ -/usr/include/python3.5m \ No newline at end of file diff --git a/utils/lib/python3.5/__future__.py b/utils/lib/python3.5/__future__.py deleted file mode 120000 index b6ab033..0000000 --- a/utils/lib/python3.5/__future__.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/__future__.py \ No newline at end of file diff --git a/utils/lib/python3.5/__pycache__/__future__.cpython-35.pyc b/utils/lib/python3.5/__pycache__/__future__.cpython-35.pyc deleted file mode 100644 index 2bfbb52..0000000 Binary files a/utils/lib/python3.5/__pycache__/__future__.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/_bootlocale.cpython-35.pyc b/utils/lib/python3.5/__pycache__/_bootlocale.cpython-35.pyc deleted file mode 100644 index 680f53f..0000000 Binary files a/utils/lib/python3.5/__pycache__/_bootlocale.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/_collections_abc.cpython-35.pyc b/utils/lib/python3.5/__pycache__/_collections_abc.cpython-35.pyc deleted file mode 100644 index 5c091f2..0000000 Binary files a/utils/lib/python3.5/__pycache__/_collections_abc.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/_weakrefset.cpython-35.pyc b/utils/lib/python3.5/__pycache__/_weakrefset.cpython-35.pyc deleted file mode 100644 index 7453704..0000000 Binary files a/utils/lib/python3.5/__pycache__/_weakrefset.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/abc.cpython-35.pyc b/utils/lib/python3.5/__pycache__/abc.cpython-35.pyc deleted file mode 100644 index 66cf942..0000000 Binary files a/utils/lib/python3.5/__pycache__/abc.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/base64.cpython-35.pyc b/utils/lib/python3.5/__pycache__/base64.cpython-35.pyc deleted file mode 100644 index ada3267..0000000 Binary files a/utils/lib/python3.5/__pycache__/base64.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/bisect.cpython-35.pyc b/utils/lib/python3.5/__pycache__/bisect.cpython-35.pyc deleted file mode 100644 index 3873f2a..0000000 Binary files a/utils/lib/python3.5/__pycache__/bisect.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/codecs.cpython-35.pyc b/utils/lib/python3.5/__pycache__/codecs.cpython-35.pyc deleted file mode 100644 index a4bbe49..0000000 Binary files a/utils/lib/python3.5/__pycache__/codecs.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/copy.cpython-35.pyc b/utils/lib/python3.5/__pycache__/copy.cpython-35.pyc deleted file mode 100644 index ab5c172..0000000 Binary files a/utils/lib/python3.5/__pycache__/copy.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/copyreg.cpython-35.pyc b/utils/lib/python3.5/__pycache__/copyreg.cpython-35.pyc deleted file mode 100644 index 350a0ad..0000000 Binary files a/utils/lib/python3.5/__pycache__/copyreg.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/fnmatch.cpython-35.pyc b/utils/lib/python3.5/__pycache__/fnmatch.cpython-35.pyc deleted file mode 100644 index 70fc0d8..0000000 Binary files a/utils/lib/python3.5/__pycache__/fnmatch.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/functools.cpython-35.pyc b/utils/lib/python3.5/__pycache__/functools.cpython-35.pyc deleted file mode 100644 index 8c325e9..0000000 Binary files a/utils/lib/python3.5/__pycache__/functools.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/genericpath.cpython-35.pyc b/utils/lib/python3.5/__pycache__/genericpath.cpython-35.pyc deleted file mode 100644 index 826dcfd..0000000 Binary files a/utils/lib/python3.5/__pycache__/genericpath.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/hashlib.cpython-35.pyc b/utils/lib/python3.5/__pycache__/hashlib.cpython-35.pyc deleted file mode 100644 index 0205496..0000000 Binary files a/utils/lib/python3.5/__pycache__/hashlib.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/heapq.cpython-35.pyc b/utils/lib/python3.5/__pycache__/heapq.cpython-35.pyc deleted file mode 100644 index 35e8b35..0000000 Binary files a/utils/lib/python3.5/__pycache__/heapq.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/hmac.cpython-35.pyc b/utils/lib/python3.5/__pycache__/hmac.cpython-35.pyc deleted file mode 100644 index ca588c1..0000000 Binary files a/utils/lib/python3.5/__pycache__/hmac.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/imp.cpython-35.pyc b/utils/lib/python3.5/__pycache__/imp.cpython-35.pyc deleted file mode 100644 index e70fd9d..0000000 Binary files a/utils/lib/python3.5/__pycache__/imp.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/io.cpython-35.pyc b/utils/lib/python3.5/__pycache__/io.cpython-35.pyc deleted file mode 100644 index 8a05615..0000000 Binary files a/utils/lib/python3.5/__pycache__/io.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/keyword.cpython-35.pyc b/utils/lib/python3.5/__pycache__/keyword.cpython-35.pyc deleted file mode 100644 index 6ded8f0..0000000 Binary files a/utils/lib/python3.5/__pycache__/keyword.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/linecache.cpython-35.pyc b/utils/lib/python3.5/__pycache__/linecache.cpython-35.pyc deleted file mode 100644 index 35f20a8..0000000 Binary files a/utils/lib/python3.5/__pycache__/linecache.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/locale.cpython-35.pyc b/utils/lib/python3.5/__pycache__/locale.cpython-35.pyc deleted file mode 100644 index 44f107e..0000000 Binary files a/utils/lib/python3.5/__pycache__/locale.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/operator.cpython-35.pyc b/utils/lib/python3.5/__pycache__/operator.cpython-35.pyc deleted file mode 100644 index 6b836c9..0000000 Binary files a/utils/lib/python3.5/__pycache__/operator.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/os.cpython-35.pyc b/utils/lib/python3.5/__pycache__/os.cpython-35.pyc deleted file mode 100644 index d36dc21..0000000 Binary files a/utils/lib/python3.5/__pycache__/os.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/posixpath.cpython-35.pyc b/utils/lib/python3.5/__pycache__/posixpath.cpython-35.pyc deleted file mode 100644 index 262a076..0000000 Binary files a/utils/lib/python3.5/__pycache__/posixpath.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/random.cpython-35.pyc b/utils/lib/python3.5/__pycache__/random.cpython-35.pyc deleted file mode 100644 index 75e7822..0000000 Binary files a/utils/lib/python3.5/__pycache__/random.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/re.cpython-35.pyc b/utils/lib/python3.5/__pycache__/re.cpython-35.pyc deleted file mode 100644 index b0f2d0b..0000000 Binary files a/utils/lib/python3.5/__pycache__/re.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/reprlib.cpython-35.pyc b/utils/lib/python3.5/__pycache__/reprlib.cpython-35.pyc deleted file mode 100644 index 73c34e3..0000000 Binary files a/utils/lib/python3.5/__pycache__/reprlib.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/shutil.cpython-35.pyc b/utils/lib/python3.5/__pycache__/shutil.cpython-35.pyc deleted file mode 100644 index 878d9d9..0000000 Binary files a/utils/lib/python3.5/__pycache__/shutil.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/site.cpython-35.pyc b/utils/lib/python3.5/__pycache__/site.cpython-35.pyc deleted file mode 100644 index e8f2304..0000000 Binary files a/utils/lib/python3.5/__pycache__/site.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/sre_compile.cpython-35.pyc b/utils/lib/python3.5/__pycache__/sre_compile.cpython-35.pyc deleted file mode 100644 index 246bb73..0000000 Binary files a/utils/lib/python3.5/__pycache__/sre_compile.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/sre_constants.cpython-35.pyc b/utils/lib/python3.5/__pycache__/sre_constants.cpython-35.pyc deleted file mode 100644 index a91c8a1..0000000 Binary files a/utils/lib/python3.5/__pycache__/sre_constants.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/sre_parse.cpython-35.pyc b/utils/lib/python3.5/__pycache__/sre_parse.cpython-35.pyc deleted file mode 100644 index 44bbeae..0000000 Binary files a/utils/lib/python3.5/__pycache__/sre_parse.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/stat.cpython-35.pyc b/utils/lib/python3.5/__pycache__/stat.cpython-35.pyc deleted file mode 100644 index 336e008..0000000 Binary files a/utils/lib/python3.5/__pycache__/stat.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/struct.cpython-35.pyc b/utils/lib/python3.5/__pycache__/struct.cpython-35.pyc deleted file mode 100644 index 5f84494..0000000 Binary files a/utils/lib/python3.5/__pycache__/struct.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/tarfile.cpython-35.pyc b/utils/lib/python3.5/__pycache__/tarfile.cpython-35.pyc deleted file mode 100644 index 5d09a9a..0000000 Binary files a/utils/lib/python3.5/__pycache__/tarfile.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/tempfile.cpython-35.pyc b/utils/lib/python3.5/__pycache__/tempfile.cpython-35.pyc deleted file mode 100644 index bb12580..0000000 Binary files a/utils/lib/python3.5/__pycache__/tempfile.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/token.cpython-35.pyc b/utils/lib/python3.5/__pycache__/token.cpython-35.pyc deleted file mode 100644 index 0f79772..0000000 Binary files a/utils/lib/python3.5/__pycache__/token.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/tokenize.cpython-35.pyc b/utils/lib/python3.5/__pycache__/tokenize.cpython-35.pyc deleted file mode 100644 index b13aa73..0000000 Binary files a/utils/lib/python3.5/__pycache__/tokenize.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/types.cpython-35.pyc b/utils/lib/python3.5/__pycache__/types.cpython-35.pyc deleted file mode 100644 index 5dc2459..0000000 Binary files a/utils/lib/python3.5/__pycache__/types.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/warnings.cpython-35.pyc b/utils/lib/python3.5/__pycache__/warnings.cpython-35.pyc deleted file mode 100644 index 8e19c6e..0000000 Binary files a/utils/lib/python3.5/__pycache__/warnings.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/__pycache__/weakref.cpython-35.pyc b/utils/lib/python3.5/__pycache__/weakref.cpython-35.pyc deleted file mode 100644 index 1f5733f..0000000 Binary files a/utils/lib/python3.5/__pycache__/weakref.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/_bootlocale.py b/utils/lib/python3.5/_bootlocale.py deleted file mode 120000 index 83c9424..0000000 --- a/utils/lib/python3.5/_bootlocale.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/_bootlocale.py \ No newline at end of file diff --git a/utils/lib/python3.5/_collections_abc.py b/utils/lib/python3.5/_collections_abc.py deleted file mode 120000 index 0e4b41f..0000000 --- a/utils/lib/python3.5/_collections_abc.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/_collections_abc.py \ No newline at end of file diff --git a/utils/lib/python3.5/_dummy_thread.py b/utils/lib/python3.5/_dummy_thread.py deleted file mode 120000 index 7917de0..0000000 --- a/utils/lib/python3.5/_dummy_thread.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/_dummy_thread.py \ No newline at end of file diff --git a/utils/lib/python3.5/_weakrefset.py b/utils/lib/python3.5/_weakrefset.py deleted file mode 120000 index d1d5a57..0000000 --- a/utils/lib/python3.5/_weakrefset.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/_weakrefset.py \ No newline at end of file diff --git a/utils/lib/python3.5/abc.py b/utils/lib/python3.5/abc.py deleted file mode 120000 index 1f42e72..0000000 --- a/utils/lib/python3.5/abc.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/abc.py \ No newline at end of file diff --git a/utils/lib/python3.5/base64.py b/utils/lib/python3.5/base64.py deleted file mode 120000 index 1ddaaac..0000000 --- a/utils/lib/python3.5/base64.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/base64.py \ No newline at end of file diff --git a/utils/lib/python3.5/bisect.py b/utils/lib/python3.5/bisect.py deleted file mode 120000 index ab10c0d..0000000 --- a/utils/lib/python3.5/bisect.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/bisect.py \ No newline at end of file diff --git a/utils/lib/python3.5/codecs.py b/utils/lib/python3.5/codecs.py deleted file mode 120000 index 0ab8747..0000000 --- a/utils/lib/python3.5/codecs.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/codecs.py \ No newline at end of file diff --git a/utils/lib/python3.5/collections b/utils/lib/python3.5/collections deleted file mode 120000 index 98877bb..0000000 --- a/utils/lib/python3.5/collections +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/collections \ No newline at end of file diff --git a/utils/lib/python3.5/config-3.5m b/utils/lib/python3.5/config-3.5m deleted file mode 120000 index d2d1192..0000000 --- a/utils/lib/python3.5/config-3.5m +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/config-3.5m \ No newline at end of file diff --git a/utils/lib/python3.5/copy.py b/utils/lib/python3.5/copy.py deleted file mode 120000 index 28a7fab..0000000 --- a/utils/lib/python3.5/copy.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/copy.py \ No newline at end of file diff --git a/utils/lib/python3.5/copyreg.py b/utils/lib/python3.5/copyreg.py deleted file mode 120000 index af3fcc5..0000000 --- a/utils/lib/python3.5/copyreg.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/copyreg.py \ No newline at end of file diff --git a/utils/lib/python3.5/distutils/__init__.py b/utils/lib/python3.5/distutils/__init__.py deleted file mode 100644 index 29fc1da..0000000 --- a/utils/lib/python3.5/distutils/__init__.py +++ /dev/null @@ -1,101 +0,0 @@ -import os -import sys -import warnings -import imp -import opcode # opcode is not a virtualenv module, so we can use it to find the stdlib - # Important! To work on pypy, this must be a module that resides in the - # lib-python/modified-x.y.z directory - -dirname = os.path.dirname - -distutils_path = os.path.join(os.path.dirname(opcode.__file__), 'distutils') -if os.path.normpath(distutils_path) == os.path.dirname(os.path.normpath(__file__)): - warnings.warn( - "The virtualenv distutils package at %s appears to be in the same location as the system distutils?") -else: - __path__.insert(0, distutils_path) - real_distutils = imp.load_module("_virtualenv_distutils", None, distutils_path, ('', '', imp.PKG_DIRECTORY)) - # Copy the relevant attributes - try: - __revision__ = real_distutils.__revision__ - except AttributeError: - pass - __version__ = real_distutils.__version__ - -from distutils import dist, sysconfig - -try: - basestring -except NameError: - basestring = str - -## patch build_ext (distutils doesn't know how to get the libs directory -## path on windows - it hardcodes the paths around the patched sys.prefix) - -if sys.platform == 'win32': - from distutils.command.build_ext import build_ext as old_build_ext - class build_ext(old_build_ext): - def finalize_options (self): - if self.library_dirs is None: - self.library_dirs = [] - elif isinstance(self.library_dirs, basestring): - self.library_dirs = self.library_dirs.split(os.pathsep) - - self.library_dirs.insert(0, os.path.join(sys.real_prefix, "Libs")) - old_build_ext.finalize_options(self) - - from distutils.command import build_ext as build_ext_module - build_ext_module.build_ext = build_ext - -## distutils.dist patches: - -old_find_config_files = dist.Distribution.find_config_files -def find_config_files(self): - found = old_find_config_files(self) - system_distutils = os.path.join(distutils_path, 'distutils.cfg') - #if os.path.exists(system_distutils): - # found.insert(0, system_distutils) - # What to call the per-user config file - if os.name == 'posix': - user_filename = ".pydistutils.cfg" - else: - user_filename = "pydistutils.cfg" - user_filename = os.path.join(sys.prefix, user_filename) - if os.path.isfile(user_filename): - for item in list(found): - if item.endswith('pydistutils.cfg'): - found.remove(item) - found.append(user_filename) - return found -dist.Distribution.find_config_files = find_config_files - -## distutils.sysconfig patches: - -old_get_python_inc = sysconfig.get_python_inc -def sysconfig_get_python_inc(plat_specific=0, prefix=None): - if prefix is None: - prefix = sys.real_prefix - return old_get_python_inc(plat_specific, prefix) -sysconfig_get_python_inc.__doc__ = old_get_python_inc.__doc__ -sysconfig.get_python_inc = sysconfig_get_python_inc - -old_get_python_lib = sysconfig.get_python_lib -def sysconfig_get_python_lib(plat_specific=0, standard_lib=0, prefix=None): - if standard_lib and prefix is None: - prefix = sys.real_prefix - return old_get_python_lib(plat_specific, standard_lib, prefix) -sysconfig_get_python_lib.__doc__ = old_get_python_lib.__doc__ -sysconfig.get_python_lib = sysconfig_get_python_lib - -old_get_config_vars = sysconfig.get_config_vars -def sysconfig_get_config_vars(*args): - real_vars = old_get_config_vars(*args) - if sys.platform == 'win32': - lib_dir = os.path.join(sys.real_prefix, "libs") - if isinstance(real_vars, dict) and 'LIBDIR' not in real_vars: - real_vars['LIBDIR'] = lib_dir # asked for all - elif isinstance(real_vars, list) and 'LIBDIR' in args: - real_vars = real_vars + [lib_dir] # asked for list - return real_vars -sysconfig_get_config_vars.__doc__ = old_get_config_vars.__doc__ -sysconfig.get_config_vars = sysconfig_get_config_vars diff --git a/utils/lib/python3.5/distutils/__pycache__/__init__.cpython-35.pyc b/utils/lib/python3.5/distutils/__pycache__/__init__.cpython-35.pyc deleted file mode 100644 index f8d326e..0000000 Binary files a/utils/lib/python3.5/distutils/__pycache__/__init__.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/distutils/distutils.cfg b/utils/lib/python3.5/distutils/distutils.cfg deleted file mode 100644 index 1af230e..0000000 --- a/utils/lib/python3.5/distutils/distutils.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# This is a config file local to this virtualenv installation -# You may include options that will be used by all distutils commands, -# and by easy_install. For instance: -# -# [easy_install] -# find_links = http://mylocalsite diff --git a/utils/lib/python3.5/encodings b/utils/lib/python3.5/encodings deleted file mode 120000 index 0518fe7..0000000 --- a/utils/lib/python3.5/encodings +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/encodings \ No newline at end of file diff --git a/utils/lib/python3.5/fnmatch.py b/utils/lib/python3.5/fnmatch.py deleted file mode 120000 index b8da817..0000000 --- a/utils/lib/python3.5/fnmatch.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/fnmatch.py \ No newline at end of file diff --git a/utils/lib/python3.5/functools.py b/utils/lib/python3.5/functools.py deleted file mode 120000 index b5ac78a..0000000 --- a/utils/lib/python3.5/functools.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/functools.py \ No newline at end of file diff --git a/utils/lib/python3.5/genericpath.py b/utils/lib/python3.5/genericpath.py deleted file mode 120000 index 452efc7..0000000 --- a/utils/lib/python3.5/genericpath.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/genericpath.py \ No newline at end of file diff --git a/utils/lib/python3.5/hashlib.py b/utils/lib/python3.5/hashlib.py deleted file mode 120000 index dc88e93..0000000 --- a/utils/lib/python3.5/hashlib.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/hashlib.py \ No newline at end of file diff --git a/utils/lib/python3.5/heapq.py b/utils/lib/python3.5/heapq.py deleted file mode 120000 index 2587d92..0000000 --- a/utils/lib/python3.5/heapq.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/heapq.py \ No newline at end of file diff --git a/utils/lib/python3.5/hmac.py b/utils/lib/python3.5/hmac.py deleted file mode 120000 index 25b5ae9..0000000 --- a/utils/lib/python3.5/hmac.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/hmac.py \ No newline at end of file diff --git a/utils/lib/python3.5/imp.py b/utils/lib/python3.5/imp.py deleted file mode 120000 index 53931ea..0000000 --- a/utils/lib/python3.5/imp.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/imp.py \ No newline at end of file diff --git a/utils/lib/python3.5/importlib b/utils/lib/python3.5/importlib deleted file mode 120000 index fc7b5ff..0000000 --- a/utils/lib/python3.5/importlib +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/importlib \ No newline at end of file diff --git a/utils/lib/python3.5/io.py b/utils/lib/python3.5/io.py deleted file mode 120000 index 21a35fc..0000000 --- a/utils/lib/python3.5/io.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/io.py \ No newline at end of file diff --git a/utils/lib/python3.5/keyword.py b/utils/lib/python3.5/keyword.py deleted file mode 120000 index 4a1f5a9..0000000 --- a/utils/lib/python3.5/keyword.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/keyword.py \ No newline at end of file diff --git a/utils/lib/python3.5/lib-dynload b/utils/lib/python3.5/lib-dynload deleted file mode 120000 index 4f2fd98..0000000 --- a/utils/lib/python3.5/lib-dynload +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/lib-dynload \ No newline at end of file diff --git a/utils/lib/python3.5/linecache.py b/utils/lib/python3.5/linecache.py deleted file mode 120000 index adac63f..0000000 --- a/utils/lib/python3.5/linecache.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/linecache.py \ No newline at end of file diff --git a/utils/lib/python3.5/locale.py b/utils/lib/python3.5/locale.py deleted file mode 120000 index dc527c1..0000000 --- a/utils/lib/python3.5/locale.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/locale.py \ No newline at end of file diff --git a/utils/lib/python3.5/no-global-site-packages.txt b/utils/lib/python3.5/no-global-site-packages.txt deleted file mode 100644 index e69de29..0000000 diff --git a/utils/lib/python3.5/ntpath.py b/utils/lib/python3.5/ntpath.py deleted file mode 120000 index a38b6d1..0000000 --- a/utils/lib/python3.5/ntpath.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/ntpath.py \ No newline at end of file diff --git a/utils/lib/python3.5/operator.py b/utils/lib/python3.5/operator.py deleted file mode 120000 index 24a58c3..0000000 --- a/utils/lib/python3.5/operator.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/operator.py \ No newline at end of file diff --git a/utils/lib/python3.5/orig-prefix.txt b/utils/lib/python3.5/orig-prefix.txt deleted file mode 100644 index e25db58..0000000 --- a/utils/lib/python3.5/orig-prefix.txt +++ /dev/null @@ -1 +0,0 @@ -/usr \ No newline at end of file diff --git a/utils/lib/python3.5/os.py b/utils/lib/python3.5/os.py deleted file mode 120000 index d097144..0000000 --- a/utils/lib/python3.5/os.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/os.py \ No newline at end of file diff --git a/utils/lib/python3.5/plat-linux b/utils/lib/python3.5/plat-linux deleted file mode 120000 index 63b7b37..0000000 --- a/utils/lib/python3.5/plat-linux +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/plat-linux \ No newline at end of file diff --git a/utils/lib/python3.5/posixpath.py b/utils/lib/python3.5/posixpath.py deleted file mode 120000 index 515963a..0000000 --- a/utils/lib/python3.5/posixpath.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/posixpath.py \ No newline at end of file diff --git a/utils/lib/python3.5/random.py b/utils/lib/python3.5/random.py deleted file mode 120000 index 69482aa..0000000 --- a/utils/lib/python3.5/random.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/random.py \ No newline at end of file diff --git a/utils/lib/python3.5/re.py b/utils/lib/python3.5/re.py deleted file mode 120000 index 8331492..0000000 --- a/utils/lib/python3.5/re.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/re.py \ No newline at end of file diff --git a/utils/lib/python3.5/reprlib.py b/utils/lib/python3.5/reprlib.py deleted file mode 120000 index f9510d9..0000000 --- a/utils/lib/python3.5/reprlib.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/reprlib.py \ No newline at end of file diff --git a/utils/lib/python3.5/rlcompleter.py b/utils/lib/python3.5/rlcompleter.py deleted file mode 120000 index 9c8187c..0000000 --- a/utils/lib/python3.5/rlcompleter.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/rlcompleter.py \ No newline at end of file diff --git a/utils/lib/python3.5/shutil.py b/utils/lib/python3.5/shutil.py deleted file mode 120000 index d0ab9ab..0000000 --- a/utils/lib/python3.5/shutil.py +++ /dev/null @@ -1 +0,0 @@ -/usr/lib64/python3.5/shutil.py \ No newline at end of file diff --git a/utils/lib/python3.5/site-packages/__pycache__/easy_install.cpython-35.pyc b/utils/lib/python3.5/site-packages/__pycache__/easy_install.cpython-35.pyc deleted file mode 100644 index 1612d8c..0000000 Binary files a/utils/lib/python3.5/site-packages/__pycache__/easy_install.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/site-packages/beautifulsoup4-4.6.0.dist-info/DESCRIPTION.rst b/utils/lib/python3.5/site-packages/beautifulsoup4-4.6.0.dist-info/DESCRIPTION.rst deleted file mode 100644 index 30379a1..0000000 --- a/utils/lib/python3.5/site-packages/beautifulsoup4-4.6.0.dist-info/DESCRIPTION.rst +++ /dev/null @@ -1,3 +0,0 @@ -Beautiful Soup sits atop an HTML or XML parser, providing Pythonic idioms for iterating, searching, and modifying the parse tree. - - diff --git a/utils/lib/python3.5/site-packages/beautifulsoup4-4.6.0.dist-info/INSTALLER b/utils/lib/python3.5/site-packages/beautifulsoup4-4.6.0.dist-info/INSTALLER deleted file mode 100644 index a1b589e..0000000 --- a/utils/lib/python3.5/site-packages/beautifulsoup4-4.6.0.dist-info/INSTALLER +++ /dev/null @@ -1 +0,0 @@ -pip diff --git a/utils/lib/python3.5/site-packages/beautifulsoup4-4.6.0.dist-info/METADATA b/utils/lib/python3.5/site-packages/beautifulsoup4-4.6.0.dist-info/METADATA deleted file mode 100644 index cc2f64b..0000000 --- a/utils/lib/python3.5/site-packages/beautifulsoup4-4.6.0.dist-info/METADATA +++ /dev/null @@ -1,28 +0,0 @@ -Metadata-Version: 2.0 -Name: beautifulsoup4 -Version: 4.6.0 -Summary: Screen-scraping library -Home-page: http://www.crummy.com/software/BeautifulSoup/bs4/ -Author: Leonard Richardson -Author-email: leonardr@segfault.org -License: MIT -Download-URL: http://www.crummy.com/software/BeautifulSoup/bs4/download/ -Platform: UNKNOWN -Classifier: Development Status :: 5 - Production/Stable -Classifier: Intended Audience :: Developers -Classifier: License :: OSI Approved :: MIT License -Classifier: Programming Language :: Python -Classifier: Programming Language :: Python :: 2.7 -Classifier: Programming Language :: Python :: 3 -Classifier: Topic :: Text Processing :: Markup :: HTML -Classifier: Topic :: Text Processing :: Markup :: XML -Classifier: Topic :: Text Processing :: Markup :: SGML -Classifier: Topic :: Software Development :: Libraries :: Python Modules -Provides-Extra: html5lib -Requires-Dist: html5lib; extra == 'html5lib' -Provides-Extra: lxml -Requires-Dist: lxml; extra == 'lxml' - -Beautiful Soup sits atop an HTML or XML parser, providing Pythonic idioms for iterating, searching, and modifying the parse tree. - - diff --git a/utils/lib/python3.5/site-packages/beautifulsoup4-4.6.0.dist-info/RECORD b/utils/lib/python3.5/site-packages/beautifulsoup4-4.6.0.dist-info/RECORD deleted file mode 100644 index 2cd9fbd..0000000 --- a/utils/lib/python3.5/site-packages/beautifulsoup4-4.6.0.dist-info/RECORD +++ /dev/null @@ -1,41 +0,0 @@ -beautifulsoup4-4.6.0.dist-info/DESCRIPTION.rst,sha256=HReC3om4K1XIgRKEVjgzdTEKfTTE3F83hEHkQQJwuU0,132 -beautifulsoup4-4.6.0.dist-info/METADATA,sha256=MygZZNNKIcXfK60bDPVe3zpyJeyqRoTy1PBE2uqb4TY,1109 -beautifulsoup4-4.6.0.dist-info/RECORD,, -beautifulsoup4-4.6.0.dist-info/WHEEL,sha256=dXGL5yz26tu5uNsUy9EBoBYhrvMYqmFH9Vm82OQUT-8,95 -beautifulsoup4-4.6.0.dist-info/metadata.json,sha256=5165aEwyWzVfTK8Rs17tqfMof2bpgGLGbjGAo24oFNc,1110 -beautifulsoup4-4.6.0.dist-info/top_level.txt,sha256=H8VT-IuPWLzQqwG9_eChjXDJ1z0H9RRebdSR90Bjnkw,4 -bs4/__init__.py,sha256=tO59vxn6pDf_-5iy_a_rG65rDph3TyY7wKqRu9zNQ_4,20394 -bs4/dammit.py,sha256=T91drgzqXmIrH---Qm5tG_jvOqv1QaYdJPOt9lRJucw,29910 -bs4/diagnose.py,sha256=k_dyxYqq52gaikV1hfiwh0_PoWGbx2fsnjm5IAMS7PA,6773 -bs4/element.py,sha256=2EZ_aM5jWf7tREyxXjvziW23Q59tSijCacC8-hfyx5M,68798 -bs4/testing.py,sha256=GEdA91wNzzJ5XewPEgz1oCzDT1ihT-UYphgah2CyXDM,30800 -bs4/builder/__init__.py,sha256=ECq2riLT2cie_wig7cTgMQU6YAuZ6cjZTFKWJC1st7g,11552 -bs4/builder/_html5lib.py,sha256=LZeT3YMgTWWKzydl48fkbwdB6IQC3nV67Ej_nbmJrcs,16688 -bs4/builder/_htmlparser.py,sha256=7ytwx-cp8Ju4nVvZqsUtL7bGNv0c3KgT44lbnyqQvHk,11609 -bs4/builder/_lxml.py,sha256=xPBXWAVfqRvWlAYoykIGVmp8JhhADxriYrXfmEjSqNE,9470 -bs4/tests/__init__.py,sha256=bdUBDE750n7qNEfue7-3a1fBaUxJlvZMkvJvZa-lbYs,27 -bs4/tests/test_builder_registry.py,sha256=pllfRpArh9TYhjjRUiu1wITr9Ryyv4hiaAtRjij-k4E,5582 -bs4/tests/test_docs.py,sha256=FXfz2bGL4Xe0q6duwpmg9hmFiZuU4DVJPNZ0hTb6aH4,1067 -bs4/tests/test_html5lib.py,sha256=MYtpDf9mkYxHUNxBeRVmwqG5E0-R2b_NlpX3lOW30Zs,4907 -bs4/tests/test_htmlparser.py,sha256=22Ivw1wno80DD3j7NxZhc8rrBKSfgwBaAH4tzYIT7lM,1191 -bs4/tests/test_lxml.py,sha256=7ge3DMNPQIBibALPWvqn-hAdOBUCoQAVOHIu8rQhSFg,2379 -bs4/tests/test_soup.py,sha256=ugOafuY7DoCZFaxjMoBivnYNE_Iz0CRN2ZPkChLy6VY,20313 -bs4/tests/test_tree.py,sha256=psSal7EQIeDEoimz_DVloKKG1D_8CSAFJY-Yvov2bx0,78204 -beautifulsoup4-4.6.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 -bs4/__pycache__/testing.cpython-35.pyc,, -bs4/tests/__pycache__/test_builder_registry.cpython-35.pyc,, -bs4/tests/__pycache__/test_docs.cpython-35.pyc,, -bs4/builder/__pycache__/__init__.cpython-35.pyc,, -bs4/__pycache__/diagnose.cpython-35.pyc,, -bs4/builder/__pycache__/_htmlparser.cpython-35.pyc,, -bs4/builder/__pycache__/_html5lib.cpython-35.pyc,, -bs4/builder/__pycache__/_lxml.cpython-35.pyc,, -bs4/tests/__pycache__/__init__.cpython-35.pyc,, -bs4/tests/__pycache__/test_tree.cpython-35.pyc,, -bs4/tests/__pycache__/test_html5lib.cpython-35.pyc,, -bs4/__pycache__/__init__.cpython-35.pyc,, -bs4/__pycache__/element.cpython-35.pyc,, -bs4/__pycache__/dammit.cpython-35.pyc,, -bs4/tests/__pycache__/test_lxml.cpython-35.pyc,, -bs4/tests/__pycache__/test_soup.cpython-35.pyc,, -bs4/tests/__pycache__/test_htmlparser.cpython-35.pyc,, diff --git a/utils/lib/python3.5/site-packages/beautifulsoup4-4.6.0.dist-info/WHEEL b/utils/lib/python3.5/site-packages/beautifulsoup4-4.6.0.dist-info/WHEEL deleted file mode 100644 index a68f088..0000000 --- a/utils/lib/python3.5/site-packages/beautifulsoup4-4.6.0.dist-info/WHEEL +++ /dev/null @@ -1,5 +0,0 @@ -Wheel-Version: 1.0 -Generator: bdist_wheel (0.30.0.a0) -Root-Is-Purelib: true -Tag: py3-none-any - diff --git a/utils/lib/python3.5/site-packages/beautifulsoup4-4.6.0.dist-info/metadata.json b/utils/lib/python3.5/site-packages/beautifulsoup4-4.6.0.dist-info/metadata.json deleted file mode 100644 index 830af35..0000000 --- a/utils/lib/python3.5/site-packages/beautifulsoup4-4.6.0.dist-info/metadata.json +++ /dev/null @@ -1 +0,0 @@ -{"classifiers": ["Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Programming Language :: Python", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Topic :: Text Processing :: Markup :: HTML", "Topic :: Text Processing :: Markup :: XML", "Topic :: Text Processing :: Markup :: SGML", "Topic :: Software Development :: Libraries :: Python Modules"], "download_url": "http://www.crummy.com/software/BeautifulSoup/bs4/download/", "extensions": {"python.details": {"contacts": [{"email": "leonardr@segfault.org", "name": "Leonard Richardson", "role": "author"}], "document_names": {"description": "DESCRIPTION.rst"}, "project_urls": {"Home": "http://www.crummy.com/software/BeautifulSoup/bs4/"}}}, "extras": ["html5lib", "lxml"], "generator": "bdist_wheel (0.30.0.a0)", "license": "MIT", "metadata_version": "2.0", "name": "beautifulsoup4", "run_requires": [{"extra": "html5lib", "requires": ["html5lib"]}, {"extra": "lxml", "requires": ["lxml"]}], "summary": "Screen-scraping library", "version": "4.6.0"} \ No newline at end of file diff --git a/utils/lib/python3.5/site-packages/beautifulsoup4-4.6.0.dist-info/top_level.txt b/utils/lib/python3.5/site-packages/beautifulsoup4-4.6.0.dist-info/top_level.txt deleted file mode 100644 index 1315442..0000000 --- a/utils/lib/python3.5/site-packages/beautifulsoup4-4.6.0.dist-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ -bs4 diff --git a/utils/lib/python3.5/site-packages/bs4/__init__.py b/utils/lib/python3.5/site-packages/bs4/__init__.py deleted file mode 100644 index 62e9e5d..0000000 --- a/utils/lib/python3.5/site-packages/bs4/__init__.py +++ /dev/null @@ -1,529 +0,0 @@ -"""Beautiful Soup -Elixir and Tonic -"The Screen-Scraper's Friend" -http://www.crummy.com/software/BeautifulSoup/ - -Beautiful Soup uses a pluggable XML or HTML parser to parse a -(possibly invalid) document into a tree representation. Beautiful Soup -provides methods and Pythonic idioms that make it easy to navigate, -search, and modify the parse tree. - -Beautiful Soup works with Python 2.7 and up. It works better if lxml -and/or html5lib is installed. - -For more than you ever wanted to know about Beautiful Soup, see the -documentation: -http://www.crummy.com/software/BeautifulSoup/bs4/doc/ - -""" - -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. - -__author__ = "Leonard Richardson (leonardr@segfault.org)" -__version__ = "4.6.0" -__copyright__ = "Copyright (c) 2004-2017 Leonard Richardson" -__license__ = "MIT" - -__all__ = ['BeautifulSoup'] - -import os -import re -import traceback -import warnings - -from .builder import builder_registry, ParserRejectedMarkup -from .dammit import UnicodeDammit -from .element import ( - CData, - Comment, - DEFAULT_OUTPUT_ENCODING, - Declaration, - Doctype, - NavigableString, - PageElement, - ProcessingInstruction, - ResultSet, - SoupStrainer, - Tag, - ) - -# The very first thing we do is give a useful error if someone is -# running this code under Python 3 without converting it. -'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'!='You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).' - -class BeautifulSoup(Tag): - """ - This class defines the basic interface called by the tree builders. - - These methods will be called by the parser: - reset() - feed(markup) - - The tree builder may call these methods from its feed() implementation: - handle_starttag(name, attrs) # See note about return value - handle_endtag(name) - handle_data(data) # Appends to the current data node - endData(containerClass=NavigableString) # Ends the current data node - - No matter how complicated the underlying parser is, you should be - able to build a tree using 'start tag' events, 'end tag' events, - 'data' events, and "done with data" events. - - If you encounter an empty-element tag (aka a self-closing tag, - like HTML's
tag), call handle_starttag and then - handle_endtag. - """ - ROOT_TAG_NAME = '[document]' - - # If the end-user gives no indication which tree builder they - # want, look for one with these features. - DEFAULT_BUILDER_FEATURES = ['html', 'fast'] - - ASCII_SPACES = '\x20\x0a\x09\x0c\x0d' - - NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, change code that looks like this:\n\n BeautifulSoup(YOUR_MARKUP})\n\nto this:\n\n BeautifulSoup(YOUR_MARKUP, \"%(parser)s\")\n" - - def __init__(self, markup="", features=None, builder=None, - parse_only=None, from_encoding=None, exclude_encodings=None, - **kwargs): - """The Soup object is initialized as the 'root tag', and the - provided markup (which can be a string or a file-like object) - is fed into the underlying parser.""" - - if 'convertEntities' in kwargs: - warnings.warn( - "BS4 does not respect the convertEntities argument to the " - "BeautifulSoup constructor. Entities are always converted " - "to Unicode characters.") - - if 'markupMassage' in kwargs: - del kwargs['markupMassage'] - warnings.warn( - "BS4 does not respect the markupMassage argument to the " - "BeautifulSoup constructor. The tree builder is responsible " - "for any necessary markup massage.") - - if 'smartQuotesTo' in kwargs: - del kwargs['smartQuotesTo'] - warnings.warn( - "BS4 does not respect the smartQuotesTo argument to the " - "BeautifulSoup constructor. Smart quotes are always converted " - "to Unicode characters.") - - if 'selfClosingTags' in kwargs: - del kwargs['selfClosingTags'] - warnings.warn( - "BS4 does not respect the selfClosingTags argument to the " - "BeautifulSoup constructor. The tree builder is responsible " - "for understanding self-closing tags.") - - if 'isHTML' in kwargs: - del kwargs['isHTML'] - warnings.warn( - "BS4 does not respect the isHTML argument to the " - "BeautifulSoup constructor. Suggest you use " - "features='lxml' for HTML and features='lxml-xml' for " - "XML.") - - def deprecated_argument(old_name, new_name): - if old_name in kwargs: - warnings.warn( - 'The "%s" argument to the BeautifulSoup constructor ' - 'has been renamed to "%s."' % (old_name, new_name)) - value = kwargs[old_name] - del kwargs[old_name] - return value - return None - - parse_only = parse_only or deprecated_argument( - "parseOnlyThese", "parse_only") - - from_encoding = from_encoding or deprecated_argument( - "fromEncoding", "from_encoding") - - if from_encoding and isinstance(markup, str): - warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.") - from_encoding = None - - if len(kwargs) > 0: - arg = list(kwargs.keys()).pop() - raise TypeError( - "__init__() got an unexpected keyword argument '%s'" % arg) - - if builder is None: - original_features = features - if isinstance(features, str): - features = [features] - if features is None or len(features) == 0: - features = self.DEFAULT_BUILDER_FEATURES - builder_class = builder_registry.lookup(*features) - if builder_class is None: - raise FeatureNotFound( - "Couldn't find a tree builder with the features you " - "requested: %s. Do you need to install a parser library?" - % ",".join(features)) - builder = builder_class() - if not (original_features == builder.NAME or - original_features in builder.ALTERNATE_NAMES): - if builder.is_xml: - markup_type = "XML" - else: - markup_type = "HTML" - - caller = traceback.extract_stack()[0] - filename = caller[0] - line_number = caller[1] - warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict( - filename=filename, - line_number=line_number, - parser=builder.NAME, - markup_type=markup_type)) - - self.builder = builder - self.is_xml = builder.is_xml - self.known_xml = self.is_xml - self.builder.soup = self - - self.parse_only = parse_only - - if hasattr(markup, 'read'): # It's a file-type object. - markup = markup.read() - elif len(markup) <= 256 and ( - (isinstance(markup, bytes) and not b'<' in markup) - or (isinstance(markup, str) and not '<' in markup) - ): - # Print out warnings for a couple beginner problems - # involving passing non-markup to Beautiful Soup. - # Beautiful Soup will still parse the input as markup, - # just in case that's what the user really wants. - if (isinstance(markup, str) - and not os.path.supports_unicode_filenames): - possible_filename = markup.encode("utf8") - else: - possible_filename = markup - is_file = False - try: - is_file = os.path.exists(possible_filename) - except Exception as e: - # This is almost certainly a problem involving - # characters not valid in filenames on this - # system. Just let it go. - pass - if is_file: - if isinstance(markup, str): - markup = markup.encode("utf8") - warnings.warn( - '"%s" looks like a filename, not markup. You should' - ' probably open this file and pass the filehandle into' - ' Beautiful Soup.' % markup) - self._check_markup_is_url(markup) - - for (self.markup, self.original_encoding, self.declared_html_encoding, - self.contains_replacement_characters) in ( - self.builder.prepare_markup( - markup, from_encoding, exclude_encodings=exclude_encodings)): - self.reset() - try: - self._feed() - break - except ParserRejectedMarkup: - pass - - # Clear out the markup and remove the builder's circular - # reference to this object. - self.markup = None - self.builder.soup = None - - def __copy__(self): - copy = type(self)( - self.encode('utf-8'), builder=self.builder, from_encoding='utf-8' - ) - - # Although we encoded the tree to UTF-8, that may not have - # been the encoding of the original markup. Set the copy's - # .original_encoding to reflect the original object's - # .original_encoding. - copy.original_encoding = self.original_encoding - return copy - - def __getstate__(self): - # Frequently a tree builder can't be pickled. - d = dict(self.__dict__) - if 'builder' in d and not self.builder.picklable: - d['builder'] = None - return d - - @staticmethod - def _check_markup_is_url(markup): - """ - Check if markup looks like it's actually a url and raise a warning - if so. Markup can be unicode or str (py2) / bytes (py3). - """ - if isinstance(markup, bytes): - space = b' ' - cant_start_with = (b"http:", b"https:") - elif isinstance(markup, str): - space = ' ' - cant_start_with = ("http:", "https:") - else: - return - - if any(markup.startswith(prefix) for prefix in cant_start_with): - if not space in markup: - if isinstance(markup, bytes): - decoded_markup = markup.decode('utf-8', 'replace') - else: - decoded_markup = markup - warnings.warn( - '"%s" looks like a URL. Beautiful Soup is not an' - ' HTTP client. You should probably use an HTTP client like' - ' requests to get the document behind the URL, and feed' - ' that document to Beautiful Soup.' % decoded_markup - ) - - def _feed(self): - # Convert the document to Unicode. - self.builder.reset() - - self.builder.feed(self.markup) - # Close out any unfinished strings and close all the open tags. - self.endData() - while self.currentTag.name != self.ROOT_TAG_NAME: - self.popTag() - - def reset(self): - Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME) - self.hidden = 1 - self.builder.reset() - self.current_data = [] - self.currentTag = None - self.tagStack = [] - self.preserve_whitespace_tag_stack = [] - self.pushTag(self) - - def new_tag(self, name, namespace=None, nsprefix=None, **attrs): - """Create a new tag associated with this soup.""" - return Tag(None, self.builder, name, namespace, nsprefix, attrs) - - def new_string(self, s, subclass=NavigableString): - """Create a new NavigableString associated with this soup.""" - return subclass(s) - - def insert_before(self, successor): - raise NotImplementedError("BeautifulSoup objects don't support insert_before().") - - def insert_after(self, successor): - raise NotImplementedError("BeautifulSoup objects don't support insert_after().") - - def popTag(self): - tag = self.tagStack.pop() - if self.preserve_whitespace_tag_stack and tag == self.preserve_whitespace_tag_stack[-1]: - self.preserve_whitespace_tag_stack.pop() - #print "Pop", tag.name - if self.tagStack: - self.currentTag = self.tagStack[-1] - return self.currentTag - - def pushTag(self, tag): - #print "Push", tag.name - if self.currentTag: - self.currentTag.contents.append(tag) - self.tagStack.append(tag) - self.currentTag = self.tagStack[-1] - if tag.name in self.builder.preserve_whitespace_tags: - self.preserve_whitespace_tag_stack.append(tag) - - def endData(self, containerClass=NavigableString): - if self.current_data: - current_data = ''.join(self.current_data) - # If whitespace is not preserved, and this string contains - # nothing but ASCII spaces, replace it with a single space - # or newline. - if not self.preserve_whitespace_tag_stack: - strippable = True - for i in current_data: - if i not in self.ASCII_SPACES: - strippable = False - break - if strippable: - if '\n' in current_data: - current_data = '\n' - else: - current_data = ' ' - - # Reset the data collector. - self.current_data = [] - - # Should we add this string to the tree at all? - if self.parse_only and len(self.tagStack) <= 1 and \ - (not self.parse_only.text or \ - not self.parse_only.search(current_data)): - return - - o = containerClass(current_data) - self.object_was_parsed(o) - - def object_was_parsed(self, o, parent=None, most_recent_element=None): - """Add an object to the parse tree.""" - parent = parent or self.currentTag - previous_element = most_recent_element or self._most_recent_element - - next_element = previous_sibling = next_sibling = None - if isinstance(o, Tag): - next_element = o.next_element - next_sibling = o.next_sibling - previous_sibling = o.previous_sibling - if not previous_element: - previous_element = o.previous_element - - o.setup(parent, previous_element, next_element, previous_sibling, next_sibling) - - self._most_recent_element = o - parent.contents.append(o) - - if parent.next_sibling: - # This node is being inserted into an element that has - # already been parsed. Deal with any dangling references. - index = len(parent.contents)-1 - while index >= 0: - if parent.contents[index] is o: - break - index -= 1 - else: - raise ValueError( - "Error building tree: supposedly %r was inserted " - "into %r after the fact, but I don't see it!" % ( - o, parent - ) - ) - if index == 0: - previous_element = parent - previous_sibling = None - else: - previous_element = previous_sibling = parent.contents[index-1] - if index == len(parent.contents)-1: - next_element = parent.next_sibling - next_sibling = None - else: - next_element = next_sibling = parent.contents[index+1] - - o.previous_element = previous_element - if previous_element: - previous_element.next_element = o - o.next_element = next_element - if next_element: - next_element.previous_element = o - o.next_sibling = next_sibling - if next_sibling: - next_sibling.previous_sibling = o - o.previous_sibling = previous_sibling - if previous_sibling: - previous_sibling.next_sibling = o - - def _popToTag(self, name, nsprefix=None, inclusivePop=True): - """Pops the tag stack up to and including the most recent - instance of the given tag. If inclusivePop is false, pops the tag - stack up to but *not* including the most recent instqance of - the given tag.""" - #print "Popping to %s" % name - if name == self.ROOT_TAG_NAME: - # The BeautifulSoup object itself can never be popped. - return - - most_recently_popped = None - - stack_size = len(self.tagStack) - for i in range(stack_size - 1, 0, -1): - t = self.tagStack[i] - if (name == t.name and nsprefix == t.prefix): - if inclusivePop: - most_recently_popped = self.popTag() - break - most_recently_popped = self.popTag() - - return most_recently_popped - - def handle_starttag(self, name, namespace, nsprefix, attrs): - """Push a start tag on to the stack. - - If this method returns None, the tag was rejected by the - SoupStrainer. You should proceed as if the tag had not occurred - in the document. For instance, if this was a self-closing tag, - don't call handle_endtag. - """ - - # print "Start tag %s: %s" % (name, attrs) - self.endData() - - if (self.parse_only and len(self.tagStack) <= 1 - and (self.parse_only.text - or not self.parse_only.search_tag(name, attrs))): - return None - - tag = Tag(self, self.builder, name, namespace, nsprefix, attrs, - self.currentTag, self._most_recent_element) - if tag is None: - return tag - if self._most_recent_element: - self._most_recent_element.next_element = tag - self._most_recent_element = tag - self.pushTag(tag) - return tag - - def handle_endtag(self, name, nsprefix=None): - #print "End tag: " + name - self.endData() - self._popToTag(name, nsprefix) - - def handle_data(self, data): - self.current_data.append(data) - - def decode(self, pretty_print=False, - eventual_encoding=DEFAULT_OUTPUT_ENCODING, - formatter="minimal"): - """Returns a string or Unicode representation of this document. - To get Unicode, pass None for encoding.""" - - if self.is_xml: - # Print the XML declaration - encoding_part = '' - if eventual_encoding != None: - encoding_part = ' encoding="%s"' % eventual_encoding - prefix = '\n' % encoding_part - else: - prefix = '' - if not pretty_print: - indent_level = None - else: - indent_level = 0 - return prefix + super(BeautifulSoup, self).decode( - indent_level, eventual_encoding, formatter) - -# Alias to make it easier to type import: 'from bs4 import _soup' -_s = BeautifulSoup -_soup = BeautifulSoup - -class BeautifulStoneSoup(BeautifulSoup): - """Deprecated interface to an XML parser.""" - - def __init__(self, *args, **kwargs): - kwargs['features'] = 'xml' - warnings.warn( - 'The BeautifulStoneSoup class is deprecated. Instead of using ' - 'it, pass features="xml" into the BeautifulSoup constructor.') - super(BeautifulStoneSoup, self).__init__(*args, **kwargs) - - -class StopParsing(Exception): - pass - -class FeatureNotFound(ValueError): - pass - - -#By default, act as an HTML pretty-printer. -if __name__ == '__main__': - import sys - soup = BeautifulSoup(sys.stdin) - print(soup.prettify()) diff --git a/utils/lib/python3.5/site-packages/bs4/__pycache__/__init__.cpython-35.pyc b/utils/lib/python3.5/site-packages/bs4/__pycache__/__init__.cpython-35.pyc deleted file mode 100644 index 9b6f5c8..0000000 Binary files a/utils/lib/python3.5/site-packages/bs4/__pycache__/__init__.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/site-packages/bs4/__pycache__/dammit.cpython-35.pyc b/utils/lib/python3.5/site-packages/bs4/__pycache__/dammit.cpython-35.pyc deleted file mode 100644 index d0dcca2..0000000 Binary files a/utils/lib/python3.5/site-packages/bs4/__pycache__/dammit.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/site-packages/bs4/__pycache__/diagnose.cpython-35.pyc b/utils/lib/python3.5/site-packages/bs4/__pycache__/diagnose.cpython-35.pyc deleted file mode 100644 index a0928a7..0000000 Binary files a/utils/lib/python3.5/site-packages/bs4/__pycache__/diagnose.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/site-packages/bs4/__pycache__/element.cpython-35.pyc b/utils/lib/python3.5/site-packages/bs4/__pycache__/element.cpython-35.pyc deleted file mode 100644 index f7ad23b..0000000 Binary files a/utils/lib/python3.5/site-packages/bs4/__pycache__/element.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/site-packages/bs4/__pycache__/testing.cpython-35.pyc b/utils/lib/python3.5/site-packages/bs4/__pycache__/testing.cpython-35.pyc deleted file mode 100644 index 87b9298..0000000 Binary files a/utils/lib/python3.5/site-packages/bs4/__pycache__/testing.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/site-packages/bs4/builder/__init__.py b/utils/lib/python3.5/site-packages/bs4/builder/__init__.py deleted file mode 100644 index 4d6f96d..0000000 --- a/utils/lib/python3.5/site-packages/bs4/builder/__init__.py +++ /dev/null @@ -1,333 +0,0 @@ -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. - -from collections import defaultdict -import itertools -import sys -from bs4.element import ( - CharsetMetaAttributeValue, - ContentMetaAttributeValue, - HTMLAwareEntitySubstitution, - whitespace_re - ) - -__all__ = [ - 'HTMLTreeBuilder', - 'SAXTreeBuilder', - 'TreeBuilder', - 'TreeBuilderRegistry', - ] - -# Some useful features for a TreeBuilder to have. -FAST = 'fast' -PERMISSIVE = 'permissive' -STRICT = 'strict' -XML = 'xml' -HTML = 'html' -HTML_5 = 'html5' - - -class TreeBuilderRegistry(object): - - def __init__(self): - self.builders_for_feature = defaultdict(list) - self.builders = [] - - def register(self, treebuilder_class): - """Register a treebuilder based on its advertised features.""" - for feature in treebuilder_class.features: - self.builders_for_feature[feature].insert(0, treebuilder_class) - self.builders.insert(0, treebuilder_class) - - def lookup(self, *features): - if len(self.builders) == 0: - # There are no builders at all. - return None - - if len(features) == 0: - # They didn't ask for any features. Give them the most - # recently registered builder. - return self.builders[0] - - # Go down the list of features in order, and eliminate any builders - # that don't match every feature. - features = list(features) - features.reverse() - candidates = None - candidate_set = None - while len(features) > 0: - feature = features.pop() - we_have_the_feature = self.builders_for_feature.get(feature, []) - if len(we_have_the_feature) > 0: - if candidates is None: - candidates = we_have_the_feature - candidate_set = set(candidates) - else: - # Eliminate any candidates that don't have this feature. - candidate_set = candidate_set.intersection( - set(we_have_the_feature)) - - # The only valid candidates are the ones in candidate_set. - # Go through the original list of candidates and pick the first one - # that's in candidate_set. - if candidate_set is None: - return None - for candidate in candidates: - if candidate in candidate_set: - return candidate - return None - -# The BeautifulSoup class will take feature lists from developers and use them -# to look up builders in this registry. -builder_registry = TreeBuilderRegistry() - -class TreeBuilder(object): - """Turn a document into a Beautiful Soup object tree.""" - - NAME = "[Unknown tree builder]" - ALTERNATE_NAMES = [] - features = [] - - is_xml = False - picklable = False - preserve_whitespace_tags = set() - empty_element_tags = None # A tag will be considered an empty-element - # tag when and only when it has no contents. - - # A value for these tag/attribute combinations is a space- or - # comma-separated list of CDATA, rather than a single CDATA. - cdata_list_attributes = {} - - - def __init__(self): - self.soup = None - - def reset(self): - pass - - def can_be_empty_element(self, tag_name): - """Might a tag with this name be an empty-element tag? - - The final markup may or may not actually present this tag as - self-closing. - - For instance: an HTMLBuilder does not consider a

tag to be - an empty-element tag (it's not in - HTMLBuilder.empty_element_tags). This means an empty

tag - will be presented as "

", not "

". - - The default implementation has no opinion about which tags are - empty-element tags, so a tag will be presented as an - empty-element tag if and only if it has no contents. - "" will become "", and "bar" will - be left alone. - """ - if self.empty_element_tags is None: - return True - return tag_name in self.empty_element_tags - - def feed(self, markup): - raise NotImplementedError() - - def prepare_markup(self, markup, user_specified_encoding=None, - document_declared_encoding=None): - return markup, None, None, False - - def test_fragment_to_document(self, fragment): - """Wrap an HTML fragment to make it look like a document. - - Different parsers do this differently. For instance, lxml - introduces an empty tag, and html5lib - doesn't. Abstracting this away lets us write simple tests - which run HTML fragments through the parser and compare the - results against other HTML fragments. - - This method should not be used outside of tests. - """ - return fragment - - def set_up_substitutions(self, tag): - return False - - def _replace_cdata_list_attribute_values(self, tag_name, attrs): - """Replaces class="foo bar" with class=["foo", "bar"] - - Modifies its input in place. - """ - if not attrs: - return attrs - if self.cdata_list_attributes: - universal = self.cdata_list_attributes.get('*', []) - tag_specific = self.cdata_list_attributes.get( - tag_name.lower(), None) - for attr in list(attrs.keys()): - if attr in universal or (tag_specific and attr in tag_specific): - # We have a "class"-type attribute whose string - # value is a whitespace-separated list of - # values. Split it into a list. - value = attrs[attr] - if isinstance(value, str): - values = whitespace_re.split(value) - else: - # html5lib sometimes calls setAttributes twice - # for the same tag when rearranging the parse - # tree. On the second call the attribute value - # here is already a list. If this happens, - # leave the value alone rather than trying to - # split it again. - values = value - attrs[attr] = values - return attrs - -class SAXTreeBuilder(TreeBuilder): - """A Beautiful Soup treebuilder that listens for SAX events.""" - - def feed(self, markup): - raise NotImplementedError() - - def close(self): - pass - - def startElement(self, name, attrs): - attrs = dict((key[1], value) for key, value in list(attrs.items())) - #print "Start %s, %r" % (name, attrs) - self.soup.handle_starttag(name, attrs) - - def endElement(self, name): - #print "End %s" % name - self.soup.handle_endtag(name) - - def startElementNS(self, nsTuple, nodeName, attrs): - # Throw away (ns, nodeName) for now. - self.startElement(nodeName, attrs) - - def endElementNS(self, nsTuple, nodeName): - # Throw away (ns, nodeName) for now. - self.endElement(nodeName) - #handler.endElementNS((ns, node.nodeName), node.nodeName) - - def startPrefixMapping(self, prefix, nodeValue): - # Ignore the prefix for now. - pass - - def endPrefixMapping(self, prefix): - # Ignore the prefix for now. - # handler.endPrefixMapping(prefix) - pass - - def characters(self, content): - self.soup.handle_data(content) - - def startDocument(self): - pass - - def endDocument(self): - pass - - -class HTMLTreeBuilder(TreeBuilder): - """This TreeBuilder knows facts about HTML. - - Such as which tags are empty-element tags. - """ - - preserve_whitespace_tags = HTMLAwareEntitySubstitution.preserve_whitespace_tags - empty_element_tags = set([ - # These are from HTML5. - 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr', - - # These are from HTML4, removed in HTML5. - 'spacer', 'frame' - ]) - - # The HTML standard defines these attributes as containing a - # space-separated list of values, not a single value. That is, - # class="foo bar" means that the 'class' attribute has two values, - # 'foo' and 'bar', not the single value 'foo bar'. When we - # encounter one of these attributes, we will parse its value into - # a list of values if possible. Upon output, the list will be - # converted back into a string. - cdata_list_attributes = { - "*" : ['class', 'accesskey', 'dropzone'], - "a" : ['rel', 'rev'], - "link" : ['rel', 'rev'], - "td" : ["headers"], - "th" : ["headers"], - "td" : ["headers"], - "form" : ["accept-charset"], - "object" : ["archive"], - - # These are HTML5 specific, as are *.accesskey and *.dropzone above. - "area" : ["rel"], - "icon" : ["sizes"], - "iframe" : ["sandbox"], - "output" : ["for"], - } - - def set_up_substitutions(self, tag): - # We are only interested in tags - if tag.name != 'meta': - return False - - http_equiv = tag.get('http-equiv') - content = tag.get('content') - charset = tag.get('charset') - - # We are interested in tags that say what encoding the - # document was originally in. This means HTML 5-style - # tags that provide the "charset" attribute. It also means - # HTML 4-style tags that provide the "content" - # attribute and have "http-equiv" set to "content-type". - # - # In both cases we will replace the value of the appropriate - # attribute with a standin object that can take on any - # encoding. - meta_encoding = None - if charset is not None: - # HTML 5 style: - # - meta_encoding = charset - tag['charset'] = CharsetMetaAttributeValue(charset) - - elif (content is not None and http_equiv is not None - and http_equiv.lower() == 'content-type'): - # HTML 4 style: - # - tag['content'] = ContentMetaAttributeValue(content) - - return (meta_encoding is not None) - -def register_treebuilders_from(module): - """Copy TreeBuilders from the given module into this module.""" - # I'm fairly sure this is not the best way to do this. - this_module = sys.modules['bs4.builder'] - for name in module.__all__: - obj = getattr(module, name) - - if issubclass(obj, TreeBuilder): - setattr(this_module, name, obj) - this_module.__all__.append(name) - # Register the builder while we're at it. - this_module.builder_registry.register(obj) - -class ParserRejectedMarkup(Exception): - pass - -# Builders are registered in reverse order of priority, so that custom -# builder registrations will take precedence. In general, we want lxml -# to take precedence over html5lib, because it's faster. And we only -# want to use HTMLParser as a last result. -from . import _htmlparser -register_treebuilders_from(_htmlparser) -try: - from . import _html5lib - register_treebuilders_from(_html5lib) -except ImportError: - # They don't have html5lib installed. - pass -try: - from . import _lxml - register_treebuilders_from(_lxml) -except ImportError: - # They don't have lxml installed. - pass diff --git a/utils/lib/python3.5/site-packages/bs4/builder/__pycache__/__init__.cpython-35.pyc b/utils/lib/python3.5/site-packages/bs4/builder/__pycache__/__init__.cpython-35.pyc deleted file mode 100644 index f129955..0000000 Binary files a/utils/lib/python3.5/site-packages/bs4/builder/__pycache__/__init__.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/site-packages/bs4/builder/__pycache__/_html5lib.cpython-35.pyc b/utils/lib/python3.5/site-packages/bs4/builder/__pycache__/_html5lib.cpython-35.pyc deleted file mode 100644 index db70002..0000000 Binary files a/utils/lib/python3.5/site-packages/bs4/builder/__pycache__/_html5lib.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/site-packages/bs4/builder/__pycache__/_htmlparser.cpython-35.pyc b/utils/lib/python3.5/site-packages/bs4/builder/__pycache__/_htmlparser.cpython-35.pyc deleted file mode 100644 index ba89d28..0000000 Binary files a/utils/lib/python3.5/site-packages/bs4/builder/__pycache__/_htmlparser.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/site-packages/bs4/builder/__pycache__/_lxml.cpython-35.pyc b/utils/lib/python3.5/site-packages/bs4/builder/__pycache__/_lxml.cpython-35.pyc deleted file mode 100644 index 19298f1..0000000 Binary files a/utils/lib/python3.5/site-packages/bs4/builder/__pycache__/_lxml.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/site-packages/bs4/builder/_html5lib.py b/utils/lib/python3.5/site-packages/bs4/builder/_html5lib.py deleted file mode 100644 index d9d468f..0000000 --- a/utils/lib/python3.5/site-packages/bs4/builder/_html5lib.py +++ /dev/null @@ -1,426 +0,0 @@ -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. - -__all__ = [ - 'HTML5TreeBuilder', - ] - -import warnings -import re -from bs4.builder import ( - PERMISSIVE, - HTML, - HTML_5, - HTMLTreeBuilder, - ) -from bs4.element import ( - NamespacedAttribute, - whitespace_re, -) -import html5lib -from html5lib.constants import ( - namespaces, - prefixes, - ) -from bs4.element import ( - Comment, - Doctype, - NavigableString, - Tag, - ) - -try: - # Pre-0.99999999 - from html5lib.treebuilders import _base as treebuilder_base - new_html5lib = False -except ImportError as e: - # 0.99999999 and up - from html5lib.treebuilders import base as treebuilder_base - new_html5lib = True - -class HTML5TreeBuilder(HTMLTreeBuilder): - """Use html5lib to build a tree.""" - - NAME = "html5lib" - - features = [NAME, PERMISSIVE, HTML_5, HTML] - - def prepare_markup(self, markup, user_specified_encoding, - document_declared_encoding=None, exclude_encodings=None): - # Store the user-specified encoding for use later on. - self.user_specified_encoding = user_specified_encoding - - # document_declared_encoding and exclude_encodings aren't used - # ATM because the html5lib TreeBuilder doesn't use - # UnicodeDammit. - if exclude_encodings: - warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.") - yield (markup, None, None, False) - - # These methods are defined by Beautiful Soup. - def feed(self, markup): - if self.soup.parse_only is not None: - warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.") - parser = html5lib.HTMLParser(tree=self.create_treebuilder) - - extra_kwargs = dict() - if not isinstance(markup, str): - if new_html5lib: - extra_kwargs['override_encoding'] = self.user_specified_encoding - else: - extra_kwargs['encoding'] = self.user_specified_encoding - doc = parser.parse(markup, **extra_kwargs) - - # Set the character encoding detected by the tokenizer. - if isinstance(markup, str): - # We need to special-case this because html5lib sets - # charEncoding to UTF-8 if it gets Unicode input. - doc.original_encoding = None - else: - original_encoding = parser.tokenizer.stream.charEncoding[0] - if not isinstance(original_encoding, str): - # In 0.99999999 and up, the encoding is an html5lib - # Encoding object. We want to use a string for compatibility - # with other tree builders. - original_encoding = original_encoding.name - doc.original_encoding = original_encoding - - def create_treebuilder(self, namespaceHTMLElements): - self.underlying_builder = TreeBuilderForHtml5lib( - namespaceHTMLElements, self.soup) - return self.underlying_builder - - def test_fragment_to_document(self, fragment): - """See `TreeBuilder`.""" - return '%s' % fragment - - -class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder): - - def __init__(self, namespaceHTMLElements, soup=None): - if soup: - self.soup = soup - else: - from bs4 import BeautifulSoup - self.soup = BeautifulSoup("", "html.parser") - super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements) - - def documentClass(self): - self.soup.reset() - return Element(self.soup, self.soup, None) - - def insertDoctype(self, token): - name = token["name"] - publicId = token["publicId"] - systemId = token["systemId"] - - doctype = Doctype.for_name_and_ids(name, publicId, systemId) - self.soup.object_was_parsed(doctype) - - def elementClass(self, name, namespace): - tag = self.soup.new_tag(name, namespace) - return Element(tag, self.soup, namespace) - - def commentClass(self, data): - return TextNode(Comment(data), self.soup) - - def fragmentClass(self): - from bs4 import BeautifulSoup - self.soup = BeautifulSoup("", "html.parser") - self.soup.name = "[document_fragment]" - return Element(self.soup, self.soup, None) - - def appendChild(self, node): - # XXX This code is not covered by the BS4 tests. - self.soup.append(node.element) - - def getDocument(self): - return self.soup - - def getFragment(self): - return treebuilder_base.TreeBuilder.getFragment(self).element - - def testSerializer(self, element): - from bs4 import BeautifulSoup - rv = [] - doctype_re = re.compile(r'^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$') - - def serializeElement(element, indent=0): - if isinstance(element, BeautifulSoup): - pass - if isinstance(element, Doctype): - m = doctype_re.match(element) - if m: - name = m.group(1) - if m.lastindex > 1: - publicId = m.group(2) or "" - systemId = m.group(3) or m.group(4) or "" - rv.append("""|%s""" % - (' ' * indent, name, publicId, systemId)) - else: - rv.append("|%s" % (' ' * indent, name)) - else: - rv.append("|%s" % (' ' * indent,)) - elif isinstance(element, Comment): - rv.append("|%s" % (' ' * indent, element)) - elif isinstance(element, NavigableString): - rv.append("|%s\"%s\"" % (' ' * indent, element)) - else: - if element.namespace: - name = "%s %s" % (prefixes[element.namespace], - element.name) - else: - name = element.name - rv.append("|%s<%s>" % (' ' * indent, name)) - if element.attrs: - attributes = [] - for name, value in list(element.attrs.items()): - if isinstance(name, NamespacedAttribute): - name = "%s %s" % (prefixes[name.namespace], name.name) - if isinstance(value, list): - value = " ".join(value) - attributes.append((name, value)) - - for name, value in sorted(attributes): - rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) - indent += 2 - for child in element.children: - serializeElement(child, indent) - serializeElement(element, 0) - - return "\n".join(rv) - -class AttrList(object): - def __init__(self, element): - self.element = element - self.attrs = dict(self.element.attrs) - def __iter__(self): - return list(self.attrs.items()).__iter__() - def __setitem__(self, name, value): - # If this attribute is a multi-valued attribute for this element, - # turn its value into a list. - list_attr = HTML5TreeBuilder.cdata_list_attributes - if (name in list_attr['*'] - or (self.element.name in list_attr - and name in list_attr[self.element.name])): - # A node that is being cloned may have already undergone - # this procedure. - if not isinstance(value, list): - value = whitespace_re.split(value) - self.element[name] = value - def items(self): - return list(self.attrs.items()) - def keys(self): - return list(self.attrs.keys()) - def __len__(self): - return len(self.attrs) - def __getitem__(self, name): - return self.attrs[name] - def __contains__(self, name): - return name in list(self.attrs.keys()) - - -class Element(treebuilder_base.Node): - def __init__(self, element, soup, namespace): - treebuilder_base.Node.__init__(self, element.name) - self.element = element - self.soup = soup - self.namespace = namespace - - def appendChild(self, node): - string_child = child = None - if isinstance(node, str): - # Some other piece of code decided to pass in a string - # instead of creating a TextElement object to contain the - # string. - string_child = child = node - elif isinstance(node, Tag): - # Some other piece of code decided to pass in a Tag - # instead of creating an Element object to contain the - # Tag. - child = node - elif node.element.__class__ == NavigableString: - string_child = child = node.element - node.parent = self - else: - child = node.element - node.parent = self - - if not isinstance(child, str) and child.parent is not None: - node.element.extract() - - if (string_child and self.element.contents - and self.element.contents[-1].__class__ == NavigableString): - # We are appending a string onto another string. - # TODO This has O(n^2) performance, for input like - # "aaa..." - old_element = self.element.contents[-1] - new_element = self.soup.new_string(old_element + string_child) - old_element.replace_with(new_element) - self.soup._most_recent_element = new_element - else: - if isinstance(node, str): - # Create a brand new NavigableString from this string. - child = self.soup.new_string(node) - - # Tell Beautiful Soup to act as if it parsed this element - # immediately after the parent's last descendant. (Or - # immediately after the parent, if it has no children.) - if self.element.contents: - most_recent_element = self.element._last_descendant(False) - elif self.element.next_element is not None: - # Something from further ahead in the parse tree is - # being inserted into this earlier element. This is - # very annoying because it means an expensive search - # for the last element in the tree. - most_recent_element = self.soup._last_descendant() - else: - most_recent_element = self.element - - self.soup.object_was_parsed( - child, parent=self.element, - most_recent_element=most_recent_element) - - def getAttributes(self): - if isinstance(self.element, Comment): - return {} - return AttrList(self.element) - - def setAttributes(self, attributes): - - if attributes is not None and len(attributes) > 0: - - converted_attributes = [] - for name, value in list(attributes.items()): - if isinstance(name, tuple): - new_name = NamespacedAttribute(*name) - del attributes[name] - attributes[new_name] = value - - self.soup.builder._replace_cdata_list_attribute_values( - self.name, attributes) - for name, value in list(attributes.items()): - self.element[name] = value - - # The attributes may contain variables that need substitution. - # Call set_up_substitutions manually. - # - # The Tag constructor called this method when the Tag was created, - # but we just set/changed the attributes, so call it again. - self.soup.builder.set_up_substitutions(self.element) - attributes = property(getAttributes, setAttributes) - - def insertText(self, data, insertBefore=None): - text = TextNode(self.soup.new_string(data), self.soup) - if insertBefore: - self.insertBefore(text, insertBefore) - else: - self.appendChild(text) - - def insertBefore(self, node, refNode): - index = self.element.index(refNode.element) - if (node.element.__class__ == NavigableString and self.element.contents - and self.element.contents[index-1].__class__ == NavigableString): - # (See comments in appendChild) - old_node = self.element.contents[index-1] - new_str = self.soup.new_string(old_node + node.element) - old_node.replace_with(new_str) - else: - self.element.insert(index, node.element) - node.parent = self - - def removeChild(self, node): - node.element.extract() - - def reparentChildren(self, new_parent): - """Move all of this tag's children into another tag.""" - # print "MOVE", self.element.contents - # print "FROM", self.element - # print "TO", new_parent.element - - element = self.element - new_parent_element = new_parent.element - # Determine what this tag's next_element will be once all the children - # are removed. - final_next_element = element.next_sibling - - new_parents_last_descendant = new_parent_element._last_descendant(False, False) - if len(new_parent_element.contents) > 0: - # The new parent already contains children. We will be - # appending this tag's children to the end. - new_parents_last_child = new_parent_element.contents[-1] - new_parents_last_descendant_next_element = new_parents_last_descendant.next_element - else: - # The new parent contains no children. - new_parents_last_child = None - new_parents_last_descendant_next_element = new_parent_element.next_element - - to_append = element.contents - if len(to_append) > 0: - # Set the first child's previous_element and previous_sibling - # to elements within the new parent - first_child = to_append[0] - if new_parents_last_descendant: - first_child.previous_element = new_parents_last_descendant - else: - first_child.previous_element = new_parent_element - first_child.previous_sibling = new_parents_last_child - if new_parents_last_descendant: - new_parents_last_descendant.next_element = first_child - else: - new_parent_element.next_element = first_child - if new_parents_last_child: - new_parents_last_child.next_sibling = first_child - - # Find the very last element being moved. It is now the - # parent's last descendant. It has no .next_sibling and - # its .next_element is whatever the previous last - # descendant had. - last_childs_last_descendant = to_append[-1]._last_descendant(False, True) - - last_childs_last_descendant.next_element = new_parents_last_descendant_next_element - if new_parents_last_descendant_next_element: - # TODO: This code has no test coverage and I'm not sure - # how to get html5lib to go through this path, but it's - # just the other side of the previous line. - new_parents_last_descendant_next_element.previous_element = last_childs_last_descendant - last_childs_last_descendant.next_sibling = None - - for child in to_append: - child.parent = new_parent_element - new_parent_element.contents.append(child) - - # Now that this element has no children, change its .next_element. - element.contents = [] - element.next_element = final_next_element - - # print "DONE WITH MOVE" - # print "FROM", self.element - # print "TO", new_parent_element - - def cloneNode(self): - tag = self.soup.new_tag(self.element.name, self.namespace) - node = Element(tag, self.soup, self.namespace) - for key,value in self.attributes: - node.attributes[key] = value - return node - - def hasContent(self): - return self.element.contents - - def getNameTuple(self): - if self.namespace == None: - return namespaces["html"], self.name - else: - return self.namespace, self.name - - nameTuple = property(getNameTuple) - -class TextNode(Element): - def __init__(self, element, soup): - treebuilder_base.Node.__init__(self, None) - self.element = element - self.soup = soup - - def cloneNode(self): - raise NotImplementedError diff --git a/utils/lib/python3.5/site-packages/bs4/builder/_htmlparser.py b/utils/lib/python3.5/site-packages/bs4/builder/_htmlparser.py deleted file mode 100644 index 907d355..0000000 --- a/utils/lib/python3.5/site-packages/bs4/builder/_htmlparser.py +++ /dev/null @@ -1,314 +0,0 @@ -"""Use the HTMLParser library to parse HTML files that aren't too bad.""" - -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. - -__all__ = [ - 'HTMLParserTreeBuilder', - ] - -from html.parser import HTMLParser - -try: - from html.parser import HTMLParseError -except ImportError as e: - # HTMLParseError is removed in Python 3.5. Since it can never be - # thrown in 3.5, we can just define our own class as a placeholder. - class HTMLParseError(Exception): - pass - -import sys -import warnings - -# Starting in Python 3.2, the HTMLParser constructor takes a 'strict' -# argument, which we'd like to set to False. Unfortunately, -# http://bugs.python.org/issue13273 makes strict=True a better bet -# before Python 3.2.3. -# -# At the end of this file, we monkeypatch HTMLParser so that -# strict=True works well on Python 3.2.2. -major, minor, release = sys.version_info[:3] -CONSTRUCTOR_TAKES_STRICT = major == 3 and minor == 2 and release >= 3 -CONSTRUCTOR_STRICT_IS_DEPRECATED = major == 3 and minor == 3 -CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4 - - -from bs4.element import ( - CData, - Comment, - Declaration, - Doctype, - ProcessingInstruction, - ) -from bs4.dammit import EntitySubstitution, UnicodeDammit - -from bs4.builder import ( - HTML, - HTMLTreeBuilder, - STRICT, - ) - - -HTMLPARSER = 'html.parser' - -class BeautifulSoupHTMLParser(HTMLParser): - - def __init__(self, *args, **kwargs): - HTMLParser.__init__(self, *args, **kwargs) - - # Keep a list of empty-element tags that were encountered - # without an explicit closing tag. If we encounter a closing tag - # of this type, we'll associate it with one of those entries. - # - # This isn't a stack because we don't care about the - # order. It's a list of closing tags we've already handled and - # will ignore, assuming they ever show up. - self.already_closed_empty_element = [] - - def handle_startendtag(self, name, attrs): - # This is only called when the markup looks like - # . - - # is_startend() tells handle_starttag not to close the tag - # just because its name matches a known empty-element tag. We - # know that this is an empty-element tag and we want to call - # handle_endtag ourselves. - tag = self.handle_starttag(name, attrs, handle_empty_element=False) - self.handle_endtag(name) - - def handle_starttag(self, name, attrs, handle_empty_element=True): - # XXX namespace - attr_dict = {} - for key, value in attrs: - # Change None attribute values to the empty string - # for consistency with the other tree builders. - if value is None: - value = '' - attr_dict[key] = value - attrvalue = '""' - #print "START", name - tag = self.soup.handle_starttag(name, None, None, attr_dict) - if tag and tag.is_empty_element and handle_empty_element: - # Unlike other parsers, html.parser doesn't send separate end tag - # events for empty-element tags. (It's handled in - # handle_startendtag, but only if the original markup looked like - # .) - # - # So we need to call handle_endtag() ourselves. Since we - # know the start event is identical to the end event, we - # don't want handle_endtag() to cross off any previous end - # events for tags of this name. - self.handle_endtag(name, check_already_closed=False) - - # But we might encounter an explicit closing tag for this tag - # later on. If so, we want to ignore it. - self.already_closed_empty_element.append(name) - - def handle_endtag(self, name, check_already_closed=True): - #print "END", name - if check_already_closed and name in self.already_closed_empty_element: - # This is a redundant end tag for an empty-element tag. - # We've already called handle_endtag() for it, so just - # check it off the list. - # print "ALREADY CLOSED", name - self.already_closed_empty_element.remove(name) - else: - self.soup.handle_endtag(name) - - def handle_data(self, data): - self.soup.handle_data(data) - - def handle_charref(self, name): - # XXX workaround for a bug in HTMLParser. Remove this once - # it's fixed in all supported versions. - # http://bugs.python.org/issue13633 - if name.startswith('x'): - real_name = int(name.lstrip('x'), 16) - elif name.startswith('X'): - real_name = int(name.lstrip('X'), 16) - else: - real_name = int(name) - - try: - data = chr(real_name) - except (ValueError, OverflowError) as e: - data = "\N{REPLACEMENT CHARACTER}" - - self.handle_data(data) - - def handle_entityref(self, name): - character = EntitySubstitution.HTML_ENTITY_TO_CHARACTER.get(name) - if character is not None: - data = character - else: - data = "&%s;" % name - self.handle_data(data) - - def handle_comment(self, data): - self.soup.endData() - self.soup.handle_data(data) - self.soup.endData(Comment) - - def handle_decl(self, data): - self.soup.endData() - if data.startswith("DOCTYPE "): - data = data[len("DOCTYPE "):] - elif data == 'DOCTYPE': - # i.e. "" - data = '' - self.soup.handle_data(data) - self.soup.endData(Doctype) - - def unknown_decl(self, data): - if data.upper().startswith('CDATA['): - cls = CData - data = data[len('CDATA['):] - else: - cls = Declaration - self.soup.endData() - self.soup.handle_data(data) - self.soup.endData(cls) - - def handle_pi(self, data): - self.soup.endData() - self.soup.handle_data(data) - self.soup.endData(ProcessingInstruction) - - -class HTMLParserTreeBuilder(HTMLTreeBuilder): - - is_xml = False - picklable = True - NAME = HTMLPARSER - features = [NAME, HTML, STRICT] - - def __init__(self, *args, **kwargs): - if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED: - kwargs['strict'] = False - if CONSTRUCTOR_TAKES_CONVERT_CHARREFS: - kwargs['convert_charrefs'] = False - self.parser_args = (args, kwargs) - - def prepare_markup(self, markup, user_specified_encoding=None, - document_declared_encoding=None, exclude_encodings=None): - """ - :return: A 4-tuple (markup, original encoding, encoding - declared within markup, whether any characters had to be - replaced with REPLACEMENT CHARACTER). - """ - if isinstance(markup, str): - yield (markup, None, None, False) - return - - try_encodings = [user_specified_encoding, document_declared_encoding] - dammit = UnicodeDammit(markup, try_encodings, is_html=True, - exclude_encodings=exclude_encodings) - yield (dammit.markup, dammit.original_encoding, - dammit.declared_html_encoding, - dammit.contains_replacement_characters) - - def feed(self, markup): - args, kwargs = self.parser_args - parser = BeautifulSoupHTMLParser(*args, **kwargs) - parser.soup = self.soup - try: - parser.feed(markup) - except HTMLParseError as e: - warnings.warn(RuntimeWarning( - "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help.")) - raise e - parser.already_closed_empty_element = [] - -# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some -# 3.2.3 code. This ensures they don't treat markup like

as a -# string. -# -# XXX This code can be removed once most Python 3 users are on 3.2.3. -if major == 3 and minor == 2 and not CONSTRUCTOR_TAKES_STRICT: - import re - attrfind_tolerant = re.compile( - r'\s*((?<=[\'"\s])[^\s/>][^\s/=>]*)(\s*=+\s*' - r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?') - HTMLParserTreeBuilder.attrfind_tolerant = attrfind_tolerant - - locatestarttagend = re.compile(r""" - <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name - (?:\s+ # whitespace before attribute name - (?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name - (?:\s*=\s* # value indicator - (?:'[^']*' # LITA-enclosed value - |\"[^\"]*\" # LIT-enclosed value - |[^'\">\s]+ # bare value - ) - )? - ) - )* - \s* # trailing whitespace -""", re.VERBOSE) - BeautifulSoupHTMLParser.locatestarttagend = locatestarttagend - - from html.parser import tagfind, attrfind - - def parse_starttag(self, i): - self.__starttag_text = None - endpos = self.check_for_whole_start_tag(i) - if endpos < 0: - return endpos - rawdata = self.rawdata - self.__starttag_text = rawdata[i:endpos] - - # Now parse the data between i+1 and j into a tag and attrs - attrs = [] - match = tagfind.match(rawdata, i+1) - assert match, 'unexpected call to parse_starttag()' - k = match.end() - self.lasttag = tag = rawdata[i+1:k].lower() - while k < endpos: - if self.strict: - m = attrfind.match(rawdata, k) - else: - m = attrfind_tolerant.match(rawdata, k) - if not m: - break - attrname, rest, attrvalue = m.group(1, 2, 3) - if not rest: - attrvalue = None - elif attrvalue[:1] == '\'' == attrvalue[-1:] or \ - attrvalue[:1] == '"' == attrvalue[-1:]: - attrvalue = attrvalue[1:-1] - if attrvalue: - attrvalue = self.unescape(attrvalue) - attrs.append((attrname.lower(), attrvalue)) - k = m.end() - - end = rawdata[k:endpos].strip() - if end not in (">", "/>"): - lineno, offset = self.getpos() - if "\n" in self.__starttag_text: - lineno = lineno + self.__starttag_text.count("\n") - offset = len(self.__starttag_text) \ - - self.__starttag_text.rfind("\n") - else: - offset = offset + len(self.__starttag_text) - if self.strict: - self.error("junk characters in start tag: %r" - % (rawdata[k:endpos][:20],)) - self.handle_data(rawdata[i:endpos]) - return endpos - if end.endswith('/>'): - # XHTML-style empty tag: - self.handle_startendtag(tag, attrs) - else: - self.handle_starttag(tag, attrs) - if tag in self.CDATA_CONTENT_ELEMENTS: - self.set_cdata_mode(tag) - return endpos - - def set_cdata_mode(self, elem): - self.cdata_elem = elem.lower() - self.interesting = re.compile(r'' % self.cdata_elem, re.I) - - BeautifulSoupHTMLParser.parse_starttag = parse_starttag - BeautifulSoupHTMLParser.set_cdata_mode = set_cdata_mode - - CONSTRUCTOR_TAKES_STRICT = True diff --git a/utils/lib/python3.5/site-packages/bs4/builder/_lxml.py b/utils/lib/python3.5/site-packages/bs4/builder/_lxml.py deleted file mode 100644 index 244d457..0000000 --- a/utils/lib/python3.5/site-packages/bs4/builder/_lxml.py +++ /dev/null @@ -1,258 +0,0 @@ -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. -__all__ = [ - 'LXMLTreeBuilderForXML', - 'LXMLTreeBuilder', - ] - -from io import BytesIO -from io import StringIO -import collections -from lxml import etree -from bs4.element import ( - Comment, - Doctype, - NamespacedAttribute, - ProcessingInstruction, - XMLProcessingInstruction, -) -from bs4.builder import ( - FAST, - HTML, - HTMLTreeBuilder, - PERMISSIVE, - ParserRejectedMarkup, - TreeBuilder, - XML) -from bs4.dammit import EncodingDetector - -LXML = 'lxml' - -class LXMLTreeBuilderForXML(TreeBuilder): - DEFAULT_PARSER_CLASS = etree.XMLParser - - is_xml = True - processing_instruction_class = XMLProcessingInstruction - - NAME = "lxml-xml" - ALTERNATE_NAMES = ["xml"] - - # Well, it's permissive by XML parser standards. - features = [NAME, LXML, XML, FAST, PERMISSIVE] - - CHUNK_SIZE = 512 - - # This namespace mapping is specified in the XML Namespace - # standard. - DEFAULT_NSMAPS = {'http://www.w3.org/XML/1998/namespace' : "xml"} - - def default_parser(self, encoding): - # This can either return a parser object or a class, which - # will be instantiated with default arguments. - if self._default_parser is not None: - return self._default_parser - return etree.XMLParser( - target=self, strip_cdata=False, recover=True, encoding=encoding) - - def parser_for(self, encoding): - # Use the default parser. - parser = self.default_parser(encoding) - - if isinstance(parser, collections.Callable): - # Instantiate the parser with default arguments - parser = parser(target=self, strip_cdata=False, encoding=encoding) - return parser - - def __init__(self, parser=None, empty_element_tags=None): - # TODO: Issue a warning if parser is present but not a - # callable, since that means there's no way to create new - # parsers for different encodings. - self._default_parser = parser - if empty_element_tags is not None: - self.empty_element_tags = set(empty_element_tags) - self.soup = None - self.nsmaps = [self.DEFAULT_NSMAPS] - - def _getNsTag(self, tag): - # Split the namespace URL out of a fully-qualified lxml tag - # name. Copied from lxml's src/lxml/sax.py. - if tag[0] == '{': - return tuple(tag[1:].split('}', 1)) - else: - return (None, tag) - - def prepare_markup(self, markup, user_specified_encoding=None, - exclude_encodings=None, - document_declared_encoding=None): - """ - :yield: A series of 4-tuples. - (markup, encoding, declared encoding, - has undergone character replacement) - - Each 4-tuple represents a strategy for parsing the document. - """ - # Instead of using UnicodeDammit to convert the bytestring to - # Unicode using different encodings, use EncodingDetector to - # iterate over the encodings, and tell lxml to try to parse - # the document as each one in turn. - is_html = not self.is_xml - if is_html: - self.processing_instruction_class = ProcessingInstruction - else: - self.processing_instruction_class = XMLProcessingInstruction - - if isinstance(markup, str): - # We were given Unicode. Maybe lxml can parse Unicode on - # this system? - yield markup, None, document_declared_encoding, False - - if isinstance(markup, str): - # No, apparently not. Convert the Unicode to UTF-8 and - # tell lxml to parse it as UTF-8. - yield (markup.encode("utf8"), "utf8", - document_declared_encoding, False) - - try_encodings = [user_specified_encoding, document_declared_encoding] - detector = EncodingDetector( - markup, try_encodings, is_html, exclude_encodings) - for encoding in detector.encodings: - yield (detector.markup, encoding, document_declared_encoding, False) - - def feed(self, markup): - if isinstance(markup, bytes): - markup = BytesIO(markup) - elif isinstance(markup, str): - markup = StringIO(markup) - - # Call feed() at least once, even if the markup is empty, - # or the parser won't be initialized. - data = markup.read(self.CHUNK_SIZE) - try: - self.parser = self.parser_for(self.soup.original_encoding) - self.parser.feed(data) - while len(data) != 0: - # Now call feed() on the rest of the data, chunk by chunk. - data = markup.read(self.CHUNK_SIZE) - if len(data) != 0: - self.parser.feed(data) - self.parser.close() - except (UnicodeDecodeError, LookupError, etree.ParserError) as e: - raise ParserRejectedMarkup(str(e)) - - def close(self): - self.nsmaps = [self.DEFAULT_NSMAPS] - - def start(self, name, attrs, nsmap={}): - # Make sure attrs is a mutable dict--lxml may send an immutable dictproxy. - attrs = dict(attrs) - nsprefix = None - # Invert each namespace map as it comes in. - if len(self.nsmaps) > 1: - # There are no new namespaces for this tag, but - # non-default namespaces are in play, so we need a - # separate tag stack to know when they end. - self.nsmaps.append(None) - elif len(nsmap) > 0: - # A new namespace mapping has come into play. - inverted_nsmap = dict((value, key) for key, value in list(nsmap.items())) - self.nsmaps.append(inverted_nsmap) - # Also treat the namespace mapping as a set of attributes on the - # tag, so we can recreate it later. - attrs = attrs.copy() - for prefix, namespace in list(nsmap.items()): - attribute = NamespacedAttribute( - "xmlns", prefix, "http://www.w3.org/2000/xmlns/") - attrs[attribute] = namespace - - # Namespaces are in play. Find any attributes that came in - # from lxml with namespaces attached to their names, and - # turn then into NamespacedAttribute objects. - new_attrs = {} - for attr, value in list(attrs.items()): - namespace, attr = self._getNsTag(attr) - if namespace is None: - new_attrs[attr] = value - else: - nsprefix = self._prefix_for_namespace(namespace) - attr = NamespacedAttribute(nsprefix, attr, namespace) - new_attrs[attr] = value - attrs = new_attrs - - namespace, name = self._getNsTag(name) - nsprefix = self._prefix_for_namespace(namespace) - self.soup.handle_starttag(name, namespace, nsprefix, attrs) - - def _prefix_for_namespace(self, namespace): - """Find the currently active prefix for the given namespace.""" - if namespace is None: - return None - for inverted_nsmap in reversed(self.nsmaps): - if inverted_nsmap is not None and namespace in inverted_nsmap: - return inverted_nsmap[namespace] - return None - - def end(self, name): - self.soup.endData() - completed_tag = self.soup.tagStack[-1] - namespace, name = self._getNsTag(name) - nsprefix = None - if namespace is not None: - for inverted_nsmap in reversed(self.nsmaps): - if inverted_nsmap is not None and namespace in inverted_nsmap: - nsprefix = inverted_nsmap[namespace] - break - self.soup.handle_endtag(name, nsprefix) - if len(self.nsmaps) > 1: - # This tag, or one of its parents, introduced a namespace - # mapping, so pop it off the stack. - self.nsmaps.pop() - - def pi(self, target, data): - self.soup.endData() - self.soup.handle_data(target + ' ' + data) - self.soup.endData(self.processing_instruction_class) - - def data(self, content): - self.soup.handle_data(content) - - def doctype(self, name, pubid, system): - self.soup.endData() - doctype = Doctype.for_name_and_ids(name, pubid, system) - self.soup.object_was_parsed(doctype) - - def comment(self, content): - "Handle comments as Comment objects." - self.soup.endData() - self.soup.handle_data(content) - self.soup.endData(Comment) - - def test_fragment_to_document(self, fragment): - """See `TreeBuilder`.""" - return '\n%s' % fragment - - -class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML): - - NAME = LXML - ALTERNATE_NAMES = ["lxml-html"] - - features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE] - is_xml = False - processing_instruction_class = ProcessingInstruction - - def default_parser(self, encoding): - return etree.HTMLParser - - def feed(self, markup): - encoding = self.soup.original_encoding - try: - self.parser = self.parser_for(encoding) - self.parser.feed(markup) - self.parser.close() - except (UnicodeDecodeError, LookupError, etree.ParserError) as e: - raise ParserRejectedMarkup(str(e)) - - - def test_fragment_to_document(self, fragment): - """See `TreeBuilder`.""" - return '%s' % fragment diff --git a/utils/lib/python3.5/site-packages/bs4/dammit.py b/utils/lib/python3.5/site-packages/bs4/dammit.py deleted file mode 100644 index 8e399e0..0000000 --- a/utils/lib/python3.5/site-packages/bs4/dammit.py +++ /dev/null @@ -1,842 +0,0 @@ -# -*- coding: utf-8 -*- -"""Beautiful Soup bonus library: Unicode, Dammit - -This library converts a bytestream to Unicode through any means -necessary. It is heavily based on code from Mark Pilgrim's Universal -Feed Parser. It works best on XML and HTML, but it does not rewrite the -XML or HTML to reflect a new encoding; that's the tree builder's job. -""" -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. -__license__ = "MIT" - -import codecs -from html.entities import codepoint2name -import re -import logging -import string - -# Import a library to autodetect character encodings. -chardet_type = None -try: - # First try the fast C implementation. - # PyPI package: cchardet - import cchardet - def chardet_dammit(s): - return cchardet.detect(s)['encoding'] -except ImportError: - try: - # Fall back to the pure Python implementation - # Debian package: python-chardet - # PyPI package: chardet - import chardet - def chardet_dammit(s): - return chardet.detect(s)['encoding'] - #import chardet.constants - #chardet.constants._debug = 1 - except ImportError: - # No chardet available. - def chardet_dammit(s): - return None - -# Available from http://cjkpython.i18n.org/. -try: - import iconv_codec -except ImportError: - pass - -xml_encoding_re = re.compile( - '^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode(), re.I) -html_meta_re = re.compile( - '<\s*meta[^>]+charset\s*=\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I) - -class EntitySubstitution(object): - - """Substitute XML or HTML entities for the corresponding characters.""" - - def _populate_class_variables(): - lookup = {} - reverse_lookup = {} - characters_for_re = [] - for codepoint, name in list(codepoint2name.items()): - character = chr(codepoint) - if codepoint != 34: - # There's no point in turning the quotation mark into - # ", unless it happens within an attribute value, which - # is handled elsewhere. - characters_for_re.append(character) - lookup[character] = name - # But we do want to turn " into the quotation mark. - reverse_lookup[name] = character - re_definition = "[%s]" % "".join(characters_for_re) - return lookup, reverse_lookup, re.compile(re_definition) - (CHARACTER_TO_HTML_ENTITY, HTML_ENTITY_TO_CHARACTER, - CHARACTER_TO_HTML_ENTITY_RE) = _populate_class_variables() - - CHARACTER_TO_XML_ENTITY = { - "'": "apos", - '"': "quot", - "&": "amp", - "<": "lt", - ">": "gt", - } - - BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|" - "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)" - ")") - - AMPERSAND_OR_BRACKET = re.compile("([<>&])") - - @classmethod - def _substitute_html_entity(cls, matchobj): - entity = cls.CHARACTER_TO_HTML_ENTITY.get(matchobj.group(0)) - return "&%s;" % entity - - @classmethod - def _substitute_xml_entity(cls, matchobj): - """Used with a regular expression to substitute the - appropriate XML entity for an XML special character.""" - entity = cls.CHARACTER_TO_XML_ENTITY[matchobj.group(0)] - return "&%s;" % entity - - @classmethod - def quoted_attribute_value(self, value): - """Make a value into a quoted XML attribute, possibly escaping it. - - Most strings will be quoted using double quotes. - - Bob's Bar -> "Bob's Bar" - - If a string contains double quotes, it will be quoted using - single quotes. - - Welcome to "my bar" -> 'Welcome to "my bar"' - - If a string contains both single and double quotes, the - double quotes will be escaped, and the string will be quoted - using double quotes. - - Welcome to "Bob's Bar" -> "Welcome to "Bob's bar" - """ - quote_with = '"' - if '"' in value: - if "'" in value: - # The string contains both single and double - # quotes. Turn the double quotes into - # entities. We quote the double quotes rather than - # the single quotes because the entity name is - # """ whether this is HTML or XML. If we - # quoted the single quotes, we'd have to decide - # between ' and &squot;. - replace_with = """ - value = value.replace('"', replace_with) - else: - # There are double quotes but no single quotes. - # We can use single quotes to quote the attribute. - quote_with = "'" - return quote_with + value + quote_with - - @classmethod - def substitute_xml(cls, value, make_quoted_attribute=False): - """Substitute XML entities for special XML characters. - - :param value: A string to be substituted. The less-than sign - will become <, the greater-than sign will become >, - and any ampersands will become &. If you want ampersands - that appear to be part of an entity definition to be left - alone, use substitute_xml_containing_entities() instead. - - :param make_quoted_attribute: If True, then the string will be - quoted, as befits an attribute value. - """ - # Escape angle brackets and ampersands. - value = cls.AMPERSAND_OR_BRACKET.sub( - cls._substitute_xml_entity, value) - - if make_quoted_attribute: - value = cls.quoted_attribute_value(value) - return value - - @classmethod - def substitute_xml_containing_entities( - cls, value, make_quoted_attribute=False): - """Substitute XML entities for special XML characters. - - :param value: A string to be substituted. The less-than sign will - become <, the greater-than sign will become >, and any - ampersands that are not part of an entity defition will - become &. - - :param make_quoted_attribute: If True, then the string will be - quoted, as befits an attribute value. - """ - # Escape angle brackets, and ampersands that aren't part of - # entities. - value = cls.BARE_AMPERSAND_OR_BRACKET.sub( - cls._substitute_xml_entity, value) - - if make_quoted_attribute: - value = cls.quoted_attribute_value(value) - return value - - @classmethod - def substitute_html(cls, s): - """Replace certain Unicode characters with named HTML entities. - - This differs from data.encode(encoding, 'xmlcharrefreplace') - in that the goal is to make the result more readable (to those - with ASCII displays) rather than to recover from - errors. There's absolutely nothing wrong with a UTF-8 string - containg a LATIN SMALL LETTER E WITH ACUTE, but replacing that - character with "é" will make it more readable to some - people. - """ - return cls.CHARACTER_TO_HTML_ENTITY_RE.sub( - cls._substitute_html_entity, s) - - -class EncodingDetector: - """Suggests a number of possible encodings for a bytestring. - - Order of precedence: - - 1. Encodings you specifically tell EncodingDetector to try first - (the override_encodings argument to the constructor). - - 2. An encoding declared within the bytestring itself, either in an - XML declaration (if the bytestring is to be interpreted as an XML - document), or in a tag (if the bytestring is to be - interpreted as an HTML document.) - - 3. An encoding detected through textual analysis by chardet, - cchardet, or a similar external library. - - 4. UTF-8. - - 5. Windows-1252. - """ - def __init__(self, markup, override_encodings=None, is_html=False, - exclude_encodings=None): - self.override_encodings = override_encodings or [] - exclude_encodings = exclude_encodings or [] - self.exclude_encodings = set([x.lower() for x in exclude_encodings]) - self.chardet_encoding = None - self.is_html = is_html - self.declared_encoding = None - - # First order of business: strip a byte-order mark. - self.markup, self.sniffed_encoding = self.strip_byte_order_mark(markup) - - def _usable(self, encoding, tried): - if encoding is not None: - encoding = encoding.lower() - if encoding in self.exclude_encodings: - return False - if encoding not in tried: - tried.add(encoding) - return True - return False - - @property - def encodings(self): - """Yield a number of encodings that might work for this markup.""" - tried = set() - for e in self.override_encodings: - if self._usable(e, tried): - yield e - - # Did the document originally start with a byte-order mark - # that indicated its encoding? - if self._usable(self.sniffed_encoding, tried): - yield self.sniffed_encoding - - # Look within the document for an XML or HTML encoding - # declaration. - if self.declared_encoding is None: - self.declared_encoding = self.find_declared_encoding( - self.markup, self.is_html) - if self._usable(self.declared_encoding, tried): - yield self.declared_encoding - - # Use third-party character set detection to guess at the - # encoding. - if self.chardet_encoding is None: - self.chardet_encoding = chardet_dammit(self.markup) - if self._usable(self.chardet_encoding, tried): - yield self.chardet_encoding - - # As a last-ditch effort, try utf-8 and windows-1252. - for e in ('utf-8', 'windows-1252'): - if self._usable(e, tried): - yield e - - @classmethod - def strip_byte_order_mark(cls, data): - """If a byte-order mark is present, strip it and return the encoding it implies.""" - encoding = None - if isinstance(data, str): - # Unicode data cannot have a byte-order mark. - return data, encoding - if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \ - and (data[2:4] != '\x00\x00'): - encoding = 'utf-16be' - data = data[2:] - elif (len(data) >= 4) and (data[:2] == b'\xff\xfe') \ - and (data[2:4] != '\x00\x00'): - encoding = 'utf-16le' - data = data[2:] - elif data[:3] == b'\xef\xbb\xbf': - encoding = 'utf-8' - data = data[3:] - elif data[:4] == b'\x00\x00\xfe\xff': - encoding = 'utf-32be' - data = data[4:] - elif data[:4] == b'\xff\xfe\x00\x00': - encoding = 'utf-32le' - data = data[4:] - return data, encoding - - @classmethod - def find_declared_encoding(cls, markup, is_html=False, search_entire_document=False): - """Given a document, tries to find its declared encoding. - - An XML encoding is declared at the beginning of the document. - - An HTML encoding is declared in a tag, hopefully near the - beginning of the document. - """ - if search_entire_document: - xml_endpos = html_endpos = len(markup) - else: - xml_endpos = 1024 - html_endpos = max(2048, int(len(markup) * 0.05)) - - declared_encoding = None - declared_encoding_match = xml_encoding_re.search(markup, endpos=xml_endpos) - if not declared_encoding_match and is_html: - declared_encoding_match = html_meta_re.search(markup, endpos=html_endpos) - if declared_encoding_match is not None: - declared_encoding = declared_encoding_match.groups()[0].decode( - 'ascii', 'replace') - if declared_encoding: - return declared_encoding.lower() - return None - -class UnicodeDammit: - """A class for detecting the encoding of a *ML document and - converting it to a Unicode string. If the source encoding is - windows-1252, can replace MS smart quotes with their HTML or XML - equivalents.""" - - # This dictionary maps commonly seen values for "charset" in HTML - # meta tags to the corresponding Python codec names. It only covers - # values that aren't in Python's aliases and can't be determined - # by the heuristics in find_codec. - CHARSET_ALIASES = {"macintosh": "mac-roman", - "x-sjis": "shift-jis"} - - ENCODINGS_WITH_SMART_QUOTES = [ - "windows-1252", - "iso-8859-1", - "iso-8859-2", - ] - - def __init__(self, markup, override_encodings=[], - smart_quotes_to=None, is_html=False, exclude_encodings=[]): - self.smart_quotes_to = smart_quotes_to - self.tried_encodings = [] - self.contains_replacement_characters = False - self.is_html = is_html - self.log = logging.getLogger(__name__) - self.detector = EncodingDetector( - markup, override_encodings, is_html, exclude_encodings) - - # Short-circuit if the data is in Unicode to begin with. - if isinstance(markup, str) or markup == '': - self.markup = markup - self.unicode_markup = str(markup) - self.original_encoding = None - return - - # The encoding detector may have stripped a byte-order mark. - # Use the stripped markup from this point on. - self.markup = self.detector.markup - - u = None - for encoding in self.detector.encodings: - markup = self.detector.markup - u = self._convert_from(encoding) - if u is not None: - break - - if not u: - # None of the encodings worked. As an absolute last resort, - # try them again with character replacement. - - for encoding in self.detector.encodings: - if encoding != "ascii": - u = self._convert_from(encoding, "replace") - if u is not None: - self.log.warning( - "Some characters could not be decoded, and were " - "replaced with REPLACEMENT CHARACTER." - ) - self.contains_replacement_characters = True - break - - # If none of that worked, we could at this point force it to - # ASCII, but that would destroy so much data that I think - # giving up is better. - self.unicode_markup = u - if not u: - self.original_encoding = None - - def _sub_ms_char(self, match): - """Changes a MS smart quote character to an XML or HTML - entity, or an ASCII character.""" - orig = match.group(1) - if self.smart_quotes_to == 'ascii': - sub = self.MS_CHARS_TO_ASCII.get(orig).encode() - else: - sub = self.MS_CHARS.get(orig) - if type(sub) == tuple: - if self.smart_quotes_to == 'xml': - sub = '&#x'.encode() + sub[1].encode() + ';'.encode() - else: - sub = '&'.encode() + sub[0].encode() + ';'.encode() - else: - sub = sub.encode() - return sub - - def _convert_from(self, proposed, errors="strict"): - proposed = self.find_codec(proposed) - if not proposed or (proposed, errors) in self.tried_encodings: - return None - self.tried_encodings.append((proposed, errors)) - markup = self.markup - # Convert smart quotes to HTML if coming from an encoding - # that might have them. - if (self.smart_quotes_to is not None - and proposed in self.ENCODINGS_WITH_SMART_QUOTES): - smart_quotes_re = b"([\x80-\x9f])" - smart_quotes_compiled = re.compile(smart_quotes_re) - markup = smart_quotes_compiled.sub(self._sub_ms_char, markup) - - try: - #print "Trying to convert document to %s (errors=%s)" % ( - # proposed, errors) - u = self._to_unicode(markup, proposed, errors) - self.markup = u - self.original_encoding = proposed - except Exception as e: - #print "That didn't work!" - #print e - return None - #print "Correct encoding: %s" % proposed - return self.markup - - def _to_unicode(self, data, encoding, errors="strict"): - '''Given a string and its encoding, decodes the string into Unicode. - %encoding is a string recognized by encodings.aliases''' - return str(data, encoding, errors) - - @property - def declared_html_encoding(self): - if not self.is_html: - return None - return self.detector.declared_encoding - - def find_codec(self, charset): - value = (self._codec(self.CHARSET_ALIASES.get(charset, charset)) - or (charset and self._codec(charset.replace("-", ""))) - or (charset and self._codec(charset.replace("-", "_"))) - or (charset and charset.lower()) - or charset - ) - if value: - return value.lower() - return None - - def _codec(self, charset): - if not charset: - return charset - codec = None - try: - codecs.lookup(charset) - codec = charset - except (LookupError, ValueError): - pass - return codec - - - # A partial mapping of ISO-Latin-1 to HTML entities/XML numeric entities. - MS_CHARS = {b'\x80': ('euro', '20AC'), - b'\x81': ' ', - b'\x82': ('sbquo', '201A'), - b'\x83': ('fnof', '192'), - b'\x84': ('bdquo', '201E'), - b'\x85': ('hellip', '2026'), - b'\x86': ('dagger', '2020'), - b'\x87': ('Dagger', '2021'), - b'\x88': ('circ', '2C6'), - b'\x89': ('permil', '2030'), - b'\x8A': ('Scaron', '160'), - b'\x8B': ('lsaquo', '2039'), - b'\x8C': ('OElig', '152'), - b'\x8D': '?', - b'\x8E': ('#x17D', '17D'), - b'\x8F': '?', - b'\x90': '?', - b'\x91': ('lsquo', '2018'), - b'\x92': ('rsquo', '2019'), - b'\x93': ('ldquo', '201C'), - b'\x94': ('rdquo', '201D'), - b'\x95': ('bull', '2022'), - b'\x96': ('ndash', '2013'), - b'\x97': ('mdash', '2014'), - b'\x98': ('tilde', '2DC'), - b'\x99': ('trade', '2122'), - b'\x9a': ('scaron', '161'), - b'\x9b': ('rsaquo', '203A'), - b'\x9c': ('oelig', '153'), - b'\x9d': '?', - b'\x9e': ('#x17E', '17E'), - b'\x9f': ('Yuml', ''),} - - # A parochial partial mapping of ISO-Latin-1 to ASCII. Contains - # horrors like stripping diacritical marks to turn á into a, but also - # contains non-horrors like turning “ into ". - MS_CHARS_TO_ASCII = { - b'\x80' : 'EUR', - b'\x81' : ' ', - b'\x82' : ',', - b'\x83' : 'f', - b'\x84' : ',,', - b'\x85' : '...', - b'\x86' : '+', - b'\x87' : '++', - b'\x88' : '^', - b'\x89' : '%', - b'\x8a' : 'S', - b'\x8b' : '<', - b'\x8c' : 'OE', - b'\x8d' : '?', - b'\x8e' : 'Z', - b'\x8f' : '?', - b'\x90' : '?', - b'\x91' : "'", - b'\x92' : "'", - b'\x93' : '"', - b'\x94' : '"', - b'\x95' : '*', - b'\x96' : '-', - b'\x97' : '--', - b'\x98' : '~', - b'\x99' : '(TM)', - b'\x9a' : 's', - b'\x9b' : '>', - b'\x9c' : 'oe', - b'\x9d' : '?', - b'\x9e' : 'z', - b'\x9f' : 'Y', - b'\xa0' : ' ', - b'\xa1' : '!', - b'\xa2' : 'c', - b'\xa3' : 'GBP', - b'\xa4' : '$', #This approximation is especially parochial--this is the - #generic currency symbol. - b'\xa5' : 'YEN', - b'\xa6' : '|', - b'\xa7' : 'S', - b'\xa8' : '..', - b'\xa9' : '', - b'\xaa' : '(th)', - b'\xab' : '<<', - b'\xac' : '!', - b'\xad' : ' ', - b'\xae' : '(R)', - b'\xaf' : '-', - b'\xb0' : 'o', - b'\xb1' : '+-', - b'\xb2' : '2', - b'\xb3' : '3', - b'\xb4' : ("'", 'acute'), - b'\xb5' : 'u', - b'\xb6' : 'P', - b'\xb7' : '*', - b'\xb8' : ',', - b'\xb9' : '1', - b'\xba' : '(th)', - b'\xbb' : '>>', - b'\xbc' : '1/4', - b'\xbd' : '1/2', - b'\xbe' : '3/4', - b'\xbf' : '?', - b'\xc0' : 'A', - b'\xc1' : 'A', - b'\xc2' : 'A', - b'\xc3' : 'A', - b'\xc4' : 'A', - b'\xc5' : 'A', - b'\xc6' : 'AE', - b'\xc7' : 'C', - b'\xc8' : 'E', - b'\xc9' : 'E', - b'\xca' : 'E', - b'\xcb' : 'E', - b'\xcc' : 'I', - b'\xcd' : 'I', - b'\xce' : 'I', - b'\xcf' : 'I', - b'\xd0' : 'D', - b'\xd1' : 'N', - b'\xd2' : 'O', - b'\xd3' : 'O', - b'\xd4' : 'O', - b'\xd5' : 'O', - b'\xd6' : 'O', - b'\xd7' : '*', - b'\xd8' : 'O', - b'\xd9' : 'U', - b'\xda' : 'U', - b'\xdb' : 'U', - b'\xdc' : 'U', - b'\xdd' : 'Y', - b'\xde' : 'b', - b'\xdf' : 'B', - b'\xe0' : 'a', - b'\xe1' : 'a', - b'\xe2' : 'a', - b'\xe3' : 'a', - b'\xe4' : 'a', - b'\xe5' : 'a', - b'\xe6' : 'ae', - b'\xe7' : 'c', - b'\xe8' : 'e', - b'\xe9' : 'e', - b'\xea' : 'e', - b'\xeb' : 'e', - b'\xec' : 'i', - b'\xed' : 'i', - b'\xee' : 'i', - b'\xef' : 'i', - b'\xf0' : 'o', - b'\xf1' : 'n', - b'\xf2' : 'o', - b'\xf3' : 'o', - b'\xf4' : 'o', - b'\xf5' : 'o', - b'\xf6' : 'o', - b'\xf7' : '/', - b'\xf8' : 'o', - b'\xf9' : 'u', - b'\xfa' : 'u', - b'\xfb' : 'u', - b'\xfc' : 'u', - b'\xfd' : 'y', - b'\xfe' : 'b', - b'\xff' : 'y', - } - - # A map used when removing rogue Windows-1252/ISO-8859-1 - # characters in otherwise UTF-8 documents. - # - # Note that \x81, \x8d, \x8f, \x90, and \x9d are undefined in - # Windows-1252. - WINDOWS_1252_TO_UTF8 = { - 0x80 : b'\xe2\x82\xac', # € - 0x82 : b'\xe2\x80\x9a', # ‚ - 0x83 : b'\xc6\x92', # ƒ - 0x84 : b'\xe2\x80\x9e', # „ - 0x85 : b'\xe2\x80\xa6', # … - 0x86 : b'\xe2\x80\xa0', # † - 0x87 : b'\xe2\x80\xa1', # ‡ - 0x88 : b'\xcb\x86', # ˆ - 0x89 : b'\xe2\x80\xb0', # ‰ - 0x8a : b'\xc5\xa0', # Š - 0x8b : b'\xe2\x80\xb9', # ‹ - 0x8c : b'\xc5\x92', # Œ - 0x8e : b'\xc5\xbd', # Ž - 0x91 : b'\xe2\x80\x98', # ‘ - 0x92 : b'\xe2\x80\x99', # ’ - 0x93 : b'\xe2\x80\x9c', # “ - 0x94 : b'\xe2\x80\x9d', # ” - 0x95 : b'\xe2\x80\xa2', # • - 0x96 : b'\xe2\x80\x93', # – - 0x97 : b'\xe2\x80\x94', # — - 0x98 : b'\xcb\x9c', # ˜ - 0x99 : b'\xe2\x84\xa2', # ™ - 0x9a : b'\xc5\xa1', # š - 0x9b : b'\xe2\x80\xba', # › - 0x9c : b'\xc5\x93', # œ - 0x9e : b'\xc5\xbe', # ž - 0x9f : b'\xc5\xb8', # Ÿ - 0xa0 : b'\xc2\xa0', #   - 0xa1 : b'\xc2\xa1', # ¡ - 0xa2 : b'\xc2\xa2', # ¢ - 0xa3 : b'\xc2\xa3', # £ - 0xa4 : b'\xc2\xa4', # ¤ - 0xa5 : b'\xc2\xa5', # ¥ - 0xa6 : b'\xc2\xa6', # ¦ - 0xa7 : b'\xc2\xa7', # § - 0xa8 : b'\xc2\xa8', # ¨ - 0xa9 : b'\xc2\xa9', # © - 0xaa : b'\xc2\xaa', # ª - 0xab : b'\xc2\xab', # « - 0xac : b'\xc2\xac', # ¬ - 0xad : b'\xc2\xad', # ­ - 0xae : b'\xc2\xae', # ® - 0xaf : b'\xc2\xaf', # ¯ - 0xb0 : b'\xc2\xb0', # ° - 0xb1 : b'\xc2\xb1', # ± - 0xb2 : b'\xc2\xb2', # ² - 0xb3 : b'\xc2\xb3', # ³ - 0xb4 : b'\xc2\xb4', # ´ - 0xb5 : b'\xc2\xb5', # µ - 0xb6 : b'\xc2\xb6', # ¶ - 0xb7 : b'\xc2\xb7', # · - 0xb8 : b'\xc2\xb8', # ¸ - 0xb9 : b'\xc2\xb9', # ¹ - 0xba : b'\xc2\xba', # º - 0xbb : b'\xc2\xbb', # » - 0xbc : b'\xc2\xbc', # ¼ - 0xbd : b'\xc2\xbd', # ½ - 0xbe : b'\xc2\xbe', # ¾ - 0xbf : b'\xc2\xbf', # ¿ - 0xc0 : b'\xc3\x80', # À - 0xc1 : b'\xc3\x81', # Á - 0xc2 : b'\xc3\x82', #  - 0xc3 : b'\xc3\x83', # à - 0xc4 : b'\xc3\x84', # Ä - 0xc5 : b'\xc3\x85', # Å - 0xc6 : b'\xc3\x86', # Æ - 0xc7 : b'\xc3\x87', # Ç - 0xc8 : b'\xc3\x88', # È - 0xc9 : b'\xc3\x89', # É - 0xca : b'\xc3\x8a', # Ê - 0xcb : b'\xc3\x8b', # Ë - 0xcc : b'\xc3\x8c', # Ì - 0xcd : b'\xc3\x8d', # Í - 0xce : b'\xc3\x8e', # Î - 0xcf : b'\xc3\x8f', # Ï - 0xd0 : b'\xc3\x90', # Ð - 0xd1 : b'\xc3\x91', # Ñ - 0xd2 : b'\xc3\x92', # Ò - 0xd3 : b'\xc3\x93', # Ó - 0xd4 : b'\xc3\x94', # Ô - 0xd5 : b'\xc3\x95', # Õ - 0xd6 : b'\xc3\x96', # Ö - 0xd7 : b'\xc3\x97', # × - 0xd8 : b'\xc3\x98', # Ø - 0xd9 : b'\xc3\x99', # Ù - 0xda : b'\xc3\x9a', # Ú - 0xdb : b'\xc3\x9b', # Û - 0xdc : b'\xc3\x9c', # Ü - 0xdd : b'\xc3\x9d', # Ý - 0xde : b'\xc3\x9e', # Þ - 0xdf : b'\xc3\x9f', # ß - 0xe0 : b'\xc3\xa0', # à - 0xe1 : b'\xa1', # á - 0xe2 : b'\xc3\xa2', # â - 0xe3 : b'\xc3\xa3', # ã - 0xe4 : b'\xc3\xa4', # ä - 0xe5 : b'\xc3\xa5', # å - 0xe6 : b'\xc3\xa6', # æ - 0xe7 : b'\xc3\xa7', # ç - 0xe8 : b'\xc3\xa8', # è - 0xe9 : b'\xc3\xa9', # é - 0xea : b'\xc3\xaa', # ê - 0xeb : b'\xc3\xab', # ë - 0xec : b'\xc3\xac', # ì - 0xed : b'\xc3\xad', # í - 0xee : b'\xc3\xae', # î - 0xef : b'\xc3\xaf', # ï - 0xf0 : b'\xc3\xb0', # ð - 0xf1 : b'\xc3\xb1', # ñ - 0xf2 : b'\xc3\xb2', # ò - 0xf3 : b'\xc3\xb3', # ó - 0xf4 : b'\xc3\xb4', # ô - 0xf5 : b'\xc3\xb5', # õ - 0xf6 : b'\xc3\xb6', # ö - 0xf7 : b'\xc3\xb7', # ÷ - 0xf8 : b'\xc3\xb8', # ø - 0xf9 : b'\xc3\xb9', # ù - 0xfa : b'\xc3\xba', # ú - 0xfb : b'\xc3\xbb', # û - 0xfc : b'\xc3\xbc', # ü - 0xfd : b'\xc3\xbd', # ý - 0xfe : b'\xc3\xbe', # þ - } - - MULTIBYTE_MARKERS_AND_SIZES = [ - (0xc2, 0xdf, 2), # 2-byte characters start with a byte C2-DF - (0xe0, 0xef, 3), # 3-byte characters start with E0-EF - (0xf0, 0xf4, 4), # 4-byte characters start with F0-F4 - ] - - FIRST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[0][0] - LAST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[-1][1] - - @classmethod - def detwingle(cls, in_bytes, main_encoding="utf8", - embedded_encoding="windows-1252"): - """Fix characters from one encoding embedded in some other encoding. - - Currently the only situation supported is Windows-1252 (or its - subset ISO-8859-1), embedded in UTF-8. - - The input must be a bytestring. If you've already converted - the document to Unicode, you're too late. - - The output is a bytestring in which `embedded_encoding` - characters have been converted to their `main_encoding` - equivalents. - """ - if embedded_encoding.replace('_', '-').lower() not in ( - 'windows-1252', 'windows_1252'): - raise NotImplementedError( - "Windows-1252 and ISO-8859-1 are the only currently supported " - "embedded encodings.") - - if main_encoding.lower() not in ('utf8', 'utf-8'): - raise NotImplementedError( - "UTF-8 is the only currently supported main encoding.") - - byte_chunks = [] - - chunk_start = 0 - pos = 0 - while pos < len(in_bytes): - byte = in_bytes[pos] - if not isinstance(byte, int): - # Python 2.x - byte = ord(byte) - if (byte >= cls.FIRST_MULTIBYTE_MARKER - and byte <= cls.LAST_MULTIBYTE_MARKER): - # This is the start of a UTF-8 multibyte character. Skip - # to the end. - for start, end, size in cls.MULTIBYTE_MARKERS_AND_SIZES: - if byte >= start and byte <= end: - pos += size - break - elif byte >= 0x80 and byte in cls.WINDOWS_1252_TO_UTF8: - # We found a Windows-1252 character! - # Save the string up to this point as a chunk. - byte_chunks.append(in_bytes[chunk_start:pos]) - - # Now translate the Windows-1252 character into UTF-8 - # and add it as another, one-byte chunk. - byte_chunks.append(cls.WINDOWS_1252_TO_UTF8[byte]) - pos += 1 - chunk_start = pos - else: - # Go on to the next character. - pos += 1 - if chunk_start == 0: - # The string is unchanged. - return in_bytes - else: - # Store the final chunk. - byte_chunks.append(in_bytes[chunk_start:]) - return b''.join(byte_chunks) - diff --git a/utils/lib/python3.5/site-packages/bs4/diagnose.py b/utils/lib/python3.5/site-packages/bs4/diagnose.py deleted file mode 100644 index 1254861..0000000 --- a/utils/lib/python3.5/site-packages/bs4/diagnose.py +++ /dev/null @@ -1,219 +0,0 @@ -"""Diagnostic functions, mainly for use when doing tech support.""" - -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. -__license__ = "MIT" - -import cProfile -from io import StringIO -from html.parser import HTMLParser -import bs4 -from bs4 import BeautifulSoup, __version__ -from bs4.builder import builder_registry - -import os -import pstats -import random -import tempfile -import time -import traceback -import sys -import cProfile - -def diagnose(data): - """Diagnostic suite for isolating common problems.""" - print("Diagnostic running on Beautiful Soup %s" % __version__) - print("Python version %s" % sys.version) - - basic_parsers = ["html.parser", "html5lib", "lxml"] - for name in basic_parsers: - for builder in builder_registry.builders: - if name in builder.features: - break - else: - basic_parsers.remove(name) - print(( - "I noticed that %s is not installed. Installing it may help." % - name)) - - if 'lxml' in basic_parsers: - basic_parsers.append(["lxml", "xml"]) - try: - from lxml import etree - print("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))) - except ImportError as e: - print ( - "lxml is not installed or couldn't be imported.") - - - if 'html5lib' in basic_parsers: - try: - import html5lib - print("Found html5lib version %s" % html5lib.__version__) - except ImportError as e: - print ( - "html5lib is not installed or couldn't be imported.") - - if hasattr(data, 'read'): - data = data.read() - elif os.path.exists(data): - print('"%s" looks like a filename. Reading data from the file.' % data) - with open(data) as fp: - data = fp.read() - elif data.startswith("http:") or data.startswith("https:"): - print('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data) - print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.") - return - print() - - for parser in basic_parsers: - print("Trying to parse your markup with %s" % parser) - success = False - try: - soup = BeautifulSoup(data, parser) - success = True - except Exception as e: - print("%s could not parse the markup." % parser) - traceback.print_exc() - if success: - print("Here's what %s did with the markup:" % parser) - print(soup.prettify()) - - print("-" * 80) - -def lxml_trace(data, html=True, **kwargs): - """Print out the lxml events that occur during parsing. - - This lets you see how lxml parses a document when no Beautiful - Soup code is running. - """ - from lxml import etree - for event, element in etree.iterparse(StringIO(data), html=html, **kwargs): - print(("%s, %4s, %s" % (event, element.tag, element.text))) - -class AnnouncingParser(HTMLParser): - """Announces HTMLParser parse events, without doing anything else.""" - - def _p(self, s): - print(s) - - def handle_starttag(self, name, attrs): - self._p("%s START" % name) - - def handle_endtag(self, name): - self._p("%s END" % name) - - def handle_data(self, data): - self._p("%s DATA" % data) - - def handle_charref(self, name): - self._p("%s CHARREF" % name) - - def handle_entityref(self, name): - self._p("%s ENTITYREF" % name) - - def handle_comment(self, data): - self._p("%s COMMENT" % data) - - def handle_decl(self, data): - self._p("%s DECL" % data) - - def unknown_decl(self, data): - self._p("%s UNKNOWN-DECL" % data) - - def handle_pi(self, data): - self._p("%s PI" % data) - -def htmlparser_trace(data): - """Print out the HTMLParser events that occur during parsing. - - This lets you see how HTMLParser parses a document when no - Beautiful Soup code is running. - """ - parser = AnnouncingParser() - parser.feed(data) - -_vowels = "aeiou" -_consonants = "bcdfghjklmnpqrstvwxyz" - -def rword(length=5): - "Generate a random word-like string." - s = '' - for i in range(length): - if i % 2 == 0: - t = _consonants - else: - t = _vowels - s += random.choice(t) - return s - -def rsentence(length=4): - "Generate a random sentence-like string." - return " ".join(rword(random.randint(4,9)) for i in range(length)) - -def rdoc(num_elements=1000): - """Randomly generate an invalid HTML document.""" - tag_names = ['p', 'div', 'span', 'i', 'b', 'script', 'table'] - elements = [] - for i in range(num_elements): - choice = random.randint(0,3) - if choice == 0: - # New tag. - tag_name = random.choice(tag_names) - elements.append("<%s>" % tag_name) - elif choice == 1: - elements.append(rsentence(random.randint(1,4))) - elif choice == 2: - # Close a tag. - tag_name = random.choice(tag_names) - elements.append("" % tag_name) - return "" + "\n".join(elements) + "" - -def benchmark_parsers(num_elements=100000): - """Very basic head-to-head performance benchmark.""" - print("Comparative parser benchmark on Beautiful Soup %s" % __version__) - data = rdoc(num_elements) - print("Generated a large invalid HTML document (%d bytes)." % len(data)) - - for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]: - success = False - try: - a = time.time() - soup = BeautifulSoup(data, parser) - b = time.time() - success = True - except Exception as e: - print("%s could not parse the markup." % parser) - traceback.print_exc() - if success: - print("BS4+%s parsed the markup in %.2fs." % (parser, b-a)) - - from lxml import etree - a = time.time() - etree.HTML(data) - b = time.time() - print("Raw lxml parsed the markup in %.2fs." % (b-a)) - - import html5lib - parser = html5lib.HTMLParser() - a = time.time() - parser.parse(data) - b = time.time() - print("Raw html5lib parsed the markup in %.2fs." % (b-a)) - -def profile(num_elements=100000, parser="lxml"): - - filehandle = tempfile.NamedTemporaryFile() - filename = filehandle.name - - data = rdoc(num_elements) - vars = dict(bs4=bs4, data=data, parser=parser) - cProfile.runctx('bs4.BeautifulSoup(data, parser)' , vars, vars, filename) - - stats = pstats.Stats(filename) - # stats.strip_dirs() - stats.sort_stats("cumulative") - stats.print_stats('_html5lib|bs4', 50) - -if __name__ == '__main__': - diagnose(sys.stdin.read()) diff --git a/utils/lib/python3.5/site-packages/bs4/element.py b/utils/lib/python3.5/site-packages/bs4/element.py deleted file mode 100644 index a4a750d..0000000 --- a/utils/lib/python3.5/site-packages/bs4/element.py +++ /dev/null @@ -1,1808 +0,0 @@ -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. -__license__ = "MIT" - -import collections -import re -import shlex -import sys -import warnings -from bs4.dammit import EntitySubstitution - -DEFAULT_OUTPUT_ENCODING = "utf-8" -PY3K = (sys.version_info[0] > 2) - -whitespace_re = re.compile("\s+") - -def _alias(attr): - """Alias one attribute name to another for backward compatibility""" - @property - def alias(self): - return getattr(self, attr) - - @alias.setter - def alias(self): - return setattr(self, attr) - return alias - - -class NamespacedAttribute(str): - - def __new__(cls, prefix, name, namespace=None): - if name is None: - obj = str.__new__(cls, prefix) - elif prefix is None: - # Not really namespaced. - obj = str.__new__(cls, name) - else: - obj = str.__new__(cls, prefix + ":" + name) - obj.prefix = prefix - obj.name = name - obj.namespace = namespace - return obj - -class AttributeValueWithCharsetSubstitution(str): - """A stand-in object for a character encoding specified in HTML.""" - -class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution): - """A generic stand-in for the value of a meta tag's 'charset' attribute. - - When Beautiful Soup parses the markup '', the - value of the 'charset' attribute will be one of these objects. - """ - - def __new__(cls, original_value): - obj = str.__new__(cls, original_value) - obj.original_value = original_value - return obj - - def encode(self, encoding): - return encoding - - -class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution): - """A generic stand-in for the value of a meta tag's 'content' attribute. - - When Beautiful Soup parses the markup: - - - The value of the 'content' attribute will be one of these objects. - """ - - CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M) - - def __new__(cls, original_value): - match = cls.CHARSET_RE.search(original_value) - if match is None: - # No substitution necessary. - return str.__new__(str, original_value) - - obj = str.__new__(cls, original_value) - obj.original_value = original_value - return obj - - def encode(self, encoding): - def rewrite(match): - return match.group(1) + encoding - return self.CHARSET_RE.sub(rewrite, self.original_value) - -class HTMLAwareEntitySubstitution(EntitySubstitution): - - """Entity substitution rules that are aware of some HTML quirks. - - Specifically, the contents of - -Hello, world! - - -''' - soup = self.soup(html) - self.assertEqual("text/javascript", soup.find('script')['type']) - - def test_comment(self): - # Comments are represented as Comment objects. - markup = "

foobaz

" - self.assertSoupEquals(markup) - - soup = self.soup(markup) - comment = soup.find(text="foobar") - self.assertEqual(comment.__class__, Comment) - - # The comment is properly integrated into the tree. - foo = soup.find(text="foo") - self.assertEqual(comment, foo.next_element) - baz = soup.find(text="baz") - self.assertEqual(comment, baz.previous_element) - - def test_preserved_whitespace_in_pre_and_textarea(self): - """Whitespace must be preserved in
 and "
-        self.assertSoupEquals(pre_markup)
-        self.assertSoupEquals(textarea_markup)
-
-        soup = self.soup(pre_markup)
-        self.assertEqual(soup.pre.prettify(), pre_markup)
-
-        soup = self.soup(textarea_markup)
-        self.assertEqual(soup.textarea.prettify(), textarea_markup)
-
-        soup = self.soup("")
-        self.assertEqual(soup.textarea.prettify(), "")
-
-    def test_nested_inline_elements(self):
-        """Inline elements can be nested indefinitely."""
-        b_tag = "Inside a B tag"
-        self.assertSoupEquals(b_tag)
-
-        nested_b_tag = "

A nested tag

" - self.assertSoupEquals(nested_b_tag) - - double_nested_b_tag = "

A doubly nested tag

" - self.assertSoupEquals(nested_b_tag) - - def test_nested_block_level_elements(self): - """Block elements can be nested.""" - soup = self.soup('

Foo

') - blockquote = soup.blockquote - self.assertEqual(blockquote.p.b.string, 'Foo') - self.assertEqual(blockquote.b.string, 'Foo') - - def test_correctly_nested_tables(self): - """One table can go inside another one.""" - markup = ('' - '' - "') - - self.assertSoupEquals( - markup, - '
Here's another table:" - '' - '' - '
foo
Here\'s another table:' - '
foo
' - '
') - - self.assertSoupEquals( - "" - "" - "
Foo
Bar
Baz
") - - def test_deeply_nested_multivalued_attribute(self): - # html5lib can set the attributes of the same tag many times - # as it rearranges the tree. This has caused problems with - # multivalued attributes. - markup = '
' - soup = self.soup(markup) - self.assertEqual(["css"], soup.div.div['class']) - - def test_multivalued_attribute_on_html(self): - # html5lib uses a different API to set the attributes ot the - # tag. This has caused problems with multivalued - # attributes. - markup = '' - soup = self.soup(markup) - self.assertEqual(["a", "b"], soup.html['class']) - - def test_angle_brackets_in_attribute_values_are_escaped(self): - self.assertSoupEquals('', '') - - def test_entities_in_attributes_converted_to_unicode(self): - expect = '

' - self.assertSoupEquals('

', expect) - self.assertSoupEquals('

', expect) - self.assertSoupEquals('

', expect) - self.assertSoupEquals('

', expect) - - def test_entities_in_text_converted_to_unicode(self): - expect = '

pi\N{LATIN SMALL LETTER N WITH TILDE}ata

' - self.assertSoupEquals("

piñata

", expect) - self.assertSoupEquals("

piñata

", expect) - self.assertSoupEquals("

piñata

", expect) - self.assertSoupEquals("

piñata

", expect) - - def test_quot_entity_converted_to_quotation_mark(self): - self.assertSoupEquals("

I said "good day!"

", - '

I said "good day!"

') - - def test_out_of_range_entity(self): - expect = "\N{REPLACEMENT CHARACTER}" - self.assertSoupEquals("�", expect) - self.assertSoupEquals("�", expect) - self.assertSoupEquals("�", expect) - - def test_multipart_strings(self): - "Mostly to prevent a recurrence of a bug in the html5lib treebuilder." - soup = self.soup("

\nfoo

") - self.assertEqual("p", soup.h2.string.next_element.name) - self.assertEqual("p", soup.p.name) - self.assertConnectedness(soup) - - def test_empty_element_tags(self): - """Verify consistent handling of empty-element tags, - no matter how they come in through the markup. - """ - self.assertSoupEquals('


', "


") - self.assertSoupEquals('


', "


") - - def test_head_tag_between_head_and_body(self): - "Prevent recurrence of a bug in the html5lib treebuilder." - content = """ - - foo - -""" - soup = self.soup(content) - self.assertNotEqual(None, soup.html.body) - self.assertConnectedness(soup) - - def test_multiple_copies_of_a_tag(self): - "Prevent recurrence of a bug in the html5lib treebuilder." - content = """ - - - - - -""" - soup = self.soup(content) - self.assertConnectedness(soup.article) - - def test_basic_namespaces(self): - """Parsers don't need to *understand* namespaces, but at the - very least they should not choke on namespaces or lose - data.""" - - markup = b'4' - soup = self.soup(markup) - self.assertEqual(markup, soup.encode()) - html = soup.html - self.assertEqual('http://www.w3.org/1999/xhtml', soup.html['xmlns']) - self.assertEqual( - 'http://www.w3.org/1998/Math/MathML', soup.html['xmlns:mathml']) - self.assertEqual( - 'http://www.w3.org/2000/svg', soup.html['xmlns:svg']) - - def test_multivalued_attribute_value_becomes_list(self): - markup = b'' - soup = self.soup(markup) - self.assertEqual(['foo', 'bar'], soup.a['class']) - - # - # Generally speaking, tests below this point are more tests of - # Beautiful Soup than tests of the tree builders. But parsers are - # weird, so we run these tests separately for every tree builder - # to detect any differences between them. - # - - def test_can_parse_unicode_document(self): - # A seemingly innocuous document... but it's in Unicode! And - # it contains characters that can't be represented in the - # encoding found in the declaration! The horror! - markup = 'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' - soup = self.soup(markup) - self.assertEqual('Sacr\xe9 bleu!', soup.body.string) - - def test_soupstrainer(self): - """Parsers should be able to work with SoupStrainers.""" - strainer = SoupStrainer("b") - soup = self.soup("A bold statement", - parse_only=strainer) - self.assertEqual(soup.decode(), "bold") - - def test_single_quote_attribute_values_become_double_quotes(self): - self.assertSoupEquals("", - '') - - def test_attribute_values_with_nested_quotes_are_left_alone(self): - text = """a""" - self.assertSoupEquals(text) - - def test_attribute_values_with_double_nested_quotes_get_quoted(self): - text = """a""" - soup = self.soup(text) - soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"' - self.assertSoupEquals( - soup.foo.decode(), - """a""") - - def test_ampersand_in_attribute_value_gets_escaped(self): - self.assertSoupEquals('', - '') - - self.assertSoupEquals( - 'foo', - 'foo') - - def test_escaped_ampersand_in_attribute_value_is_left_alone(self): - self.assertSoupEquals('') - - def test_entities_in_strings_converted_during_parsing(self): - # Both XML and HTML entities are converted to Unicode characters - # during parsing. - text = "

<<sacré bleu!>>

" - expected = "

<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>

" - self.assertSoupEquals(text, expected) - - def test_smart_quotes_converted_on_the_way_in(self): - # Microsoft smart quotes are converted to Unicode characters during - # parsing. - quote = b"

\x91Foo\x92

" - soup = self.soup(quote) - self.assertEqual( - soup.p.string, - "\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}") - - def test_non_breaking_spaces_converted_on_the_way_in(self): - soup = self.soup("  ") - self.assertEqual(soup.a.string, "\N{NO-BREAK SPACE}" * 2) - - def test_entities_converted_on_the_way_out(self): - text = "

<<sacré bleu!>>

" - expected = "

<<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>

".encode("utf-8") - soup = self.soup(text) - self.assertEqual(soup.p.encode("utf-8"), expected) - - def test_real_iso_latin_document(self): - # Smoke test of interrelated functionality, using an - # easy-to-understand document. - - # Here it is in Unicode. Note that it claims to be in ISO-Latin-1. - unicode_html = '

Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!

' - - # That's because we're going to encode it into ISO-Latin-1, and use - # that to test. - iso_latin_html = unicode_html.encode("iso-8859-1") - - # Parse the ISO-Latin-1 HTML. - soup = self.soup(iso_latin_html) - # Encode it to UTF-8. - result = soup.encode("utf-8") - - # What do we expect the result to look like? Well, it would - # look like unicode_html, except that the META tag would say - # UTF-8 instead of ISO-Latin-1. - expected = unicode_html.replace("ISO-Latin-1", "utf-8") - - # And, of course, it would be in UTF-8, not Unicode. - expected = expected.encode("utf-8") - - # Ta-da! - self.assertEqual(result, expected) - - def test_real_shift_jis_document(self): - # Smoke test to make sure the parser can handle a document in - # Shift-JIS encoding, without choking. - shift_jis_html = ( - b'
'
-            b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
-            b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
-            b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B'
-            b'
') - unicode_html = shift_jis_html.decode("shift-jis") - soup = self.soup(unicode_html) - - # Make sure the parse tree is correctly encoded to various - # encodings. - self.assertEqual(soup.encode("utf-8"), unicode_html.encode("utf-8")) - self.assertEqual(soup.encode("euc_jp"), unicode_html.encode("euc_jp")) - - def test_real_hebrew_document(self): - # A real-world test to make sure we can convert ISO-8859-9 (a - # Hebrew encoding) to UTF-8. - hebrew_document = b'Hebrew (ISO 8859-8) in Visual Directionality

Hebrew (ISO 8859-8) in Visual Directionality

\xed\xe5\xec\xf9' - soup = self.soup( - hebrew_document, from_encoding="iso8859-8") - # Some tree builders call it iso8859-8, others call it iso-8859-9. - # That's not a difference we really care about. - assert soup.original_encoding in ('iso8859-8', 'iso-8859-8') - self.assertEqual( - soup.encode('utf-8'), - hebrew_document.decode("iso8859-8").encode("utf-8")) - - def test_meta_tag_reflects_current_encoding(self): - # Here's the tag saying that a document is - # encoded in Shift-JIS. - meta_tag = ('') - - # Here's a document incorporating that meta tag. - shift_jis_html = ( - '\n%s\n' - '' - 'Shift-JIS markup goes here.') % meta_tag - soup = self.soup(shift_jis_html) - - # Parse the document, and the charset is seemingly unaffected. - parsed_meta = soup.find('meta', {'http-equiv': 'Content-type'}) - content = parsed_meta['content'] - self.assertEqual('text/html; charset=x-sjis', content) - - # But that value is actually a ContentMetaAttributeValue object. - self.assertTrue(isinstance(content, ContentMetaAttributeValue)) - - # And it will take on a value that reflects its current - # encoding. - self.assertEqual('text/html; charset=utf8', content.encode("utf8")) - - # For the rest of the story, see TestSubstitutions in - # test_tree.py. - - def test_html5_style_meta_tag_reflects_current_encoding(self): - # Here's the tag saying that a document is - # encoded in Shift-JIS. - meta_tag = ('') - - # Here's a document incorporating that meta tag. - shift_jis_html = ( - '\n%s\n' - '' - 'Shift-JIS markup goes here.') % meta_tag - soup = self.soup(shift_jis_html) - - # Parse the document, and the charset is seemingly unaffected. - parsed_meta = soup.find('meta', id="encoding") - charset = parsed_meta['charset'] - self.assertEqual('x-sjis', charset) - - # But that value is actually a CharsetMetaAttributeValue object. - self.assertTrue(isinstance(charset, CharsetMetaAttributeValue)) - - # And it will take on a value that reflects its current - # encoding. - self.assertEqual('utf8', charset.encode("utf8")) - - def test_tag_with_no_attributes_can_have_attributes_added(self): - data = self.soup("text") - data.a['foo'] = 'bar' - self.assertEqual('text', data.a.decode()) - -class XMLTreeBuilderSmokeTest(object): - - def test_pickle_and_unpickle_identity(self): - # Pickling a tree, then unpickling it, yields a tree identical - # to the original. - tree = self.soup("foo") - dumped = pickle.dumps(tree, 2) - loaded = pickle.loads(dumped) - self.assertEqual(loaded.__class__, BeautifulSoup) - self.assertEqual(loaded.decode(), tree.decode()) - - def test_docstring_generated(self): - soup = self.soup("") - self.assertEqual( - soup.encode(), b'\n') - - def test_xml_declaration(self): - markup = b"""\n""" - soup = self.soup(markup) - self.assertEqual(markup, soup.encode("utf8")) - - def test_processing_instruction(self): - markup = b"""\n""" - soup = self.soup(markup) - self.assertEqual(markup, soup.encode("utf8")) - - def test_real_xhtml_document(self): - """A real XHTML document should come out *exactly* the same as it went in.""" - markup = b""" - - -Hello. -Goodbye. -""" - soup = self.soup(markup) - self.assertEqual( - soup.encode("utf-8"), markup) - - def test_formatter_processes_script_tag_for_xml_documents(self): - doc = """ - -""" - soup = BeautifulSoup(doc, "lxml-xml") - # lxml would have stripped this while parsing, but we can add - # it later. - soup.script.string = 'console.log("< < hey > > ");' - encoded = soup.encode() - self.assertTrue(b"< < hey > >" in encoded) - - def test_can_parse_unicode_document(self): - markup = 'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' - soup = self.soup(markup) - self.assertEqual('Sacr\xe9 bleu!', soup.root.string) - - def test_popping_namespaced_tag(self): - markup = 'b2012-07-02T20:33:42Zcd' - soup = self.soup(markup) - self.assertEqual( - str(soup.rss), markup) - - def test_docstring_includes_correct_encoding(self): - soup = self.soup("") - self.assertEqual( - soup.encode("latin1"), - b'\n') - - def test_large_xml_document(self): - """A large XML document should come out the same as it went in.""" - markup = (b'\n' - + b'0' * (2**12) - + b'') - soup = self.soup(markup) - self.assertEqual(soup.encode("utf-8"), markup) - - - def test_tags_are_empty_element_if_and_only_if_they_are_empty(self): - self.assertSoupEquals("

", "

") - self.assertSoupEquals("

foo

") - - def test_namespaces_are_preserved(self): - markup = 'This tag is in the a namespaceThis tag is in the b namespace' - soup = self.soup(markup) - root = soup.root - self.assertEqual("http://example.com/", root['xmlns:a']) - self.assertEqual("http://example.net/", root['xmlns:b']) - - def test_closing_namespaced_tag(self): - markup = '

20010504

' - soup = self.soup(markup) - self.assertEqual(str(soup.p), markup) - - def test_namespaced_attributes(self): - markup = '' - soup = self.soup(markup) - self.assertEqual(str(soup.foo), markup) - - def test_namespaced_attributes_xml_namespace(self): - markup = 'bar' - soup = self.soup(markup) - self.assertEqual(str(soup.foo), markup) - - def test_find_by_prefixed_name(self): - doc = """ -foo - bar - baz - -""" - soup = self.soup(doc) - - # There are three tags. - self.assertEqual(3, len(soup.find_all('tag'))) - - # But two of them are ns1:tag and one of them is ns2:tag. - self.assertEqual(2, len(soup.find_all('ns1:tag'))) - self.assertEqual(1, len(soup.find_all('ns2:tag'))) - - self.assertEqual(1, len(soup.find_all('ns2:tag', key='value'))) - self.assertEqual(3, len(soup.find_all(['ns1:tag', 'ns2:tag']))) - - def test_copy_tag_preserves_namespace(self): - xml = """ -""" - - soup = self.soup(xml) - tag = soup.document - duplicate = copy.copy(tag) - - # The two tags have the same namespace prefix. - self.assertEqual(tag.prefix, duplicate.prefix) - - -class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest): - """Smoke test for a tree builder that supports HTML5.""" - - def test_real_xhtml_document(self): - # Since XHTML is not HTML5, HTML5 parsers are not tested to handle - # XHTML documents in any particular way. - pass - - def test_html_tags_have_namespace(self): - markup = "" - soup = self.soup(markup) - self.assertEqual("http://www.w3.org/1999/xhtml", soup.a.namespace) - - def test_svg_tags_have_namespace(self): - markup = '' - soup = self.soup(markup) - namespace = "http://www.w3.org/2000/svg" - self.assertEqual(namespace, soup.svg.namespace) - self.assertEqual(namespace, soup.circle.namespace) - - - def test_mathml_tags_have_namespace(self): - markup = '5' - soup = self.soup(markup) - namespace = 'http://www.w3.org/1998/Math/MathML' - self.assertEqual(namespace, soup.math.namespace) - self.assertEqual(namespace, soup.msqrt.namespace) - - def test_xml_declaration_becomes_comment(self): - markup = '' - soup = self.soup(markup) - self.assertTrue(isinstance(soup.contents[0], Comment)) - self.assertEqual(soup.contents[0], '?xml version="1.0" encoding="utf-8"?') - self.assertEqual("html", soup.contents[0].next_element.name) - -def skipIf(condition, reason): - def nothing(test, *args, **kwargs): - return None - - def decorator(test_item): - if condition: - return nothing - else: - return test_item - - return decorator diff --git a/utils/lib/python3.5/site-packages/bs4/tests/__init__.py b/utils/lib/python3.5/site-packages/bs4/tests/__init__.py deleted file mode 100644 index 142c8cc..0000000 --- a/utils/lib/python3.5/site-packages/bs4/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"The beautifulsoup tests." diff --git a/utils/lib/python3.5/site-packages/bs4/tests/__pycache__/__init__.cpython-35.pyc b/utils/lib/python3.5/site-packages/bs4/tests/__pycache__/__init__.cpython-35.pyc deleted file mode 100644 index 57552c1..0000000 Binary files a/utils/lib/python3.5/site-packages/bs4/tests/__pycache__/__init__.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/site-packages/bs4/tests/__pycache__/test_builder_registry.cpython-35.pyc b/utils/lib/python3.5/site-packages/bs4/tests/__pycache__/test_builder_registry.cpython-35.pyc deleted file mode 100644 index 9554b97..0000000 Binary files a/utils/lib/python3.5/site-packages/bs4/tests/__pycache__/test_builder_registry.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/site-packages/bs4/tests/__pycache__/test_docs.cpython-35.pyc b/utils/lib/python3.5/site-packages/bs4/tests/__pycache__/test_docs.cpython-35.pyc deleted file mode 100644 index 04a2a37..0000000 Binary files a/utils/lib/python3.5/site-packages/bs4/tests/__pycache__/test_docs.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/site-packages/bs4/tests/__pycache__/test_html5lib.cpython-35.pyc b/utils/lib/python3.5/site-packages/bs4/tests/__pycache__/test_html5lib.cpython-35.pyc deleted file mode 100644 index 3b0a07b..0000000 Binary files a/utils/lib/python3.5/site-packages/bs4/tests/__pycache__/test_html5lib.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/site-packages/bs4/tests/__pycache__/test_htmlparser.cpython-35.pyc b/utils/lib/python3.5/site-packages/bs4/tests/__pycache__/test_htmlparser.cpython-35.pyc deleted file mode 100644 index 8e822e3..0000000 Binary files a/utils/lib/python3.5/site-packages/bs4/tests/__pycache__/test_htmlparser.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/site-packages/bs4/tests/__pycache__/test_lxml.cpython-35.pyc b/utils/lib/python3.5/site-packages/bs4/tests/__pycache__/test_lxml.cpython-35.pyc deleted file mode 100644 index 8121e6f..0000000 Binary files a/utils/lib/python3.5/site-packages/bs4/tests/__pycache__/test_lxml.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/site-packages/bs4/tests/__pycache__/test_soup.cpython-35.pyc b/utils/lib/python3.5/site-packages/bs4/tests/__pycache__/test_soup.cpython-35.pyc deleted file mode 100644 index 8a41e13..0000000 Binary files a/utils/lib/python3.5/site-packages/bs4/tests/__pycache__/test_soup.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/site-packages/bs4/tests/__pycache__/test_tree.cpython-35.pyc b/utils/lib/python3.5/site-packages/bs4/tests/__pycache__/test_tree.cpython-35.pyc deleted file mode 100644 index c014abd..0000000 Binary files a/utils/lib/python3.5/site-packages/bs4/tests/__pycache__/test_tree.cpython-35.pyc and /dev/null differ diff --git a/utils/lib/python3.5/site-packages/bs4/tests/test_builder_registry.py b/utils/lib/python3.5/site-packages/bs4/tests/test_builder_registry.py deleted file mode 100644 index 90cad82..0000000 --- a/utils/lib/python3.5/site-packages/bs4/tests/test_builder_registry.py +++ /dev/null @@ -1,147 +0,0 @@ -"""Tests of the builder registry.""" - -import unittest -import warnings - -from bs4 import BeautifulSoup -from bs4.builder import ( - builder_registry as registry, - HTMLParserTreeBuilder, - TreeBuilderRegistry, -) - -try: - from bs4.builder import HTML5TreeBuilder - HTML5LIB_PRESENT = True -except ImportError: - HTML5LIB_PRESENT = False - -try: - from bs4.builder import ( - LXMLTreeBuilderForXML, - LXMLTreeBuilder, - ) - LXML_PRESENT = True -except ImportError: - LXML_PRESENT = False - - -class BuiltInRegistryTest(unittest.TestCase): - """Test the built-in registry with the default builders registered.""" - - def test_combination(self): - if LXML_PRESENT: - self.assertEqual(registry.lookup('fast', 'html'), - LXMLTreeBuilder) - - if LXML_PRESENT: - self.assertEqual(registry.lookup('permissive', 'xml'), - LXMLTreeBuilderForXML) - self.assertEqual(registry.lookup('strict', 'html'), - HTMLParserTreeBuilder) - if HTML5LIB_PRESENT: - self.assertEqual(registry.lookup('html5lib', 'html'), - HTML5TreeBuilder) - - def test_lookup_by_markup_type(self): - if LXML_PRESENT: - self.assertEqual(registry.lookup('html'), LXMLTreeBuilder) - self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML) - else: - self.assertEqual(registry.lookup('xml'), None) - if HTML5LIB_PRESENT: - self.assertEqual(registry.lookup('html'), HTML5TreeBuilder) - else: - self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder) - - def test_named_library(self): - if LXML_PRESENT: - self.assertEqual(registry.lookup('lxml', 'xml'), - LXMLTreeBuilderForXML) - self.assertEqual(registry.lookup('lxml', 'html'), - LXMLTreeBuilder) - if HTML5LIB_PRESENT: - self.assertEqual(registry.lookup('html5lib'), - HTML5TreeBuilder) - - self.assertEqual(registry.lookup('html.parser'), - HTMLParserTreeBuilder) - - def test_beautifulsoup_constructor_does_lookup(self): - - with warnings.catch_warnings(record=True) as w: - # This will create a warning about not explicitly - # specifying a parser, but we'll ignore it. - - # You can pass in a string. - BeautifulSoup("", features="html") - # Or a list of strings. - BeautifulSoup("", features=["html", "fast"]) - - # You'll get an exception if BS can't find an appropriate - # builder. - self.assertRaises(ValueError, BeautifulSoup, - "", features="no-such-feature") - -class RegistryTest(unittest.TestCase): - """Test the TreeBuilderRegistry class in general.""" - - def setUp(self): - self.registry = TreeBuilderRegistry() - - def builder_for_features(self, *feature_list): - cls = type('Builder_' + '_'.join(feature_list), - (object,), {'features' : feature_list}) - - self.registry.register(cls) - return cls - - def test_register_with_no_features(self): - builder = self.builder_for_features() - - # Since the builder advertises no features, you can't find it - # by looking up features. - self.assertEqual(self.registry.lookup('foo'), None) - - # But you can find it by doing a lookup with no features, if - # this happens to be the only registered builder. - self.assertEqual(self.registry.lookup(), builder) - - def test_register_with_features_makes_lookup_succeed(self): - builder = self.builder_for_features('foo', 'bar') - self.assertEqual(self.registry.lookup('foo'), builder) - self.assertEqual(self.registry.lookup('bar'), builder) - - def test_lookup_fails_when_no_builder_implements_feature(self): - builder = self.builder_for_features('foo', 'bar') - self.assertEqual(self.registry.lookup('baz'), None) - - def test_lookup_gets_most_recent_registration_when_no_feature_specified(self): - builder1 = self.builder_for_features('foo') - builder2 = self.builder_for_features('bar') - self.assertEqual(self.registry.lookup(), builder2) - - def test_lookup_fails_when_no_tree_builders_registered(self): - self.assertEqual(self.registry.lookup(), None) - - def test_lookup_gets_most_recent_builder_supporting_all_features(self): - has_one = self.builder_for_features('foo') - has_the_other = self.builder_for_features('bar') - has_both_early = self.builder_for_features('foo', 'bar', 'baz') - has_both_late = self.builder_for_features('foo', 'bar', 'quux') - lacks_one = self.builder_for_features('bar') - has_the_other = self.builder_for_features('foo') - - # There are two builders featuring 'foo' and 'bar', but - # the one that also features 'quux' was registered later. - self.assertEqual(self.registry.lookup('foo', 'bar'), - has_both_late) - - # There is only one builder featuring 'foo', 'bar', and 'baz'. - self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'), - has_both_early) - - def test_lookup_fails_when_cannot_reconcile_requested_features(self): - builder1 = self.builder_for_features('foo', 'bar') - builder2 = self.builder_for_features('foo', 'baz') - self.assertEqual(self.registry.lookup('bar', 'baz'), None) diff --git a/utils/lib/python3.5/site-packages/bs4/tests/test_docs.py b/utils/lib/python3.5/site-packages/bs4/tests/test_docs.py deleted file mode 100644 index 5b9f677..0000000 --- a/utils/lib/python3.5/site-packages/bs4/tests/test_docs.py +++ /dev/null @@ -1,36 +0,0 @@ -"Test harness for doctests." - -# pylint: disable-msg=E0611,W0142 - -__metaclass__ = type -__all__ = [ - 'additional_tests', - ] - -import atexit -import doctest -import os -#from pkg_resources import ( -# resource_filename, resource_exists, resource_listdir, cleanup_resources) -import unittest - -DOCTEST_FLAGS = ( - doctest.ELLIPSIS | - doctest.NORMALIZE_WHITESPACE | - doctest.REPORT_NDIFF) - - -# def additional_tests(): -# "Run the doc tests (README.txt and docs/*, if any exist)" -# doctest_files = [ -# os.path.abspath(resource_filename('bs4', 'README.txt'))] -# if resource_exists('bs4', 'docs'): -# for name in resource_listdir('bs4', 'docs'): -# if name.endswith('.txt'): -# doctest_files.append( -# os.path.abspath( -# resource_filename('bs4', 'docs/%s' % name))) -# kwargs = dict(module_relative=False, optionflags=DOCTEST_FLAGS) -# atexit.register(cleanup_resources) -# return unittest.TestSuite(( -# doctest.DocFileSuite(*doctest_files, **kwargs))) diff --git a/utils/lib/python3.5/site-packages/bs4/tests/test_html5lib.py b/utils/lib/python3.5/site-packages/bs4/tests/test_html5lib.py deleted file mode 100644 index 81fb7d3..0000000 --- a/utils/lib/python3.5/site-packages/bs4/tests/test_html5lib.py +++ /dev/null @@ -1,130 +0,0 @@ -"""Tests to ensure that the html5lib tree builder generates good trees.""" - -import warnings - -try: - from bs4.builder import HTML5TreeBuilder - HTML5LIB_PRESENT = True -except ImportError as e: - HTML5LIB_PRESENT = False -from bs4.element import SoupStrainer -from bs4.testing import ( - HTML5TreeBuilderSmokeTest, - SoupTest, - skipIf, -) - -@skipIf( - not HTML5LIB_PRESENT, - "html5lib seems not to be present, not testing its tree builder.") -class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): - """See ``HTML5TreeBuilderSmokeTest``.""" - - @property - def default_builder(self): - return HTML5TreeBuilder() - - def test_soupstrainer(self): - # The html5lib tree builder does not support SoupStrainers. - strainer = SoupStrainer("b") - markup = "

A bold statement.

" - with warnings.catch_warnings(record=True) as w: - soup = self.soup(markup, parse_only=strainer) - self.assertEqual( - soup.decode(), self.document_for(markup)) - - self.assertTrue( - "the html5lib tree builder doesn't support parse_only" in - str(w[0].message)) - - def test_correctly_nested_tables(self): - """html5lib inserts tags where other parsers don't.""" - markup = ('' - '' - "') - - self.assertSoupEquals( - markup, - '
Here's another table:" - '' - '' - '
foo
Here\'s another table:' - '
foo
' - '
') - - self.assertSoupEquals( - "" - "" - "
Foo
Bar
Baz
") - - def test_xml_declaration_followed_by_doctype(self): - markup = ''' - - - - - -

foo

- -''' - soup = self.soup(markup) - # Verify that we can reach the

tag; this means the tree is connected. - self.assertEqual(b"

foo

", soup.p.encode()) - - def test_reparented_markup(self): - markup = '

foo

\n

bar

' - soup = self.soup(markup) - self.assertEqual("

foo

\n

bar

", soup.body.decode()) - self.assertEqual(2, len(soup.find_all('p'))) - - - def test_reparented_markup_ends_with_whitespace(self): - markup = '

foo

\n

bar

\n' - soup = self.soup(markup) - self.assertEqual("

foo

\n

bar

\n", soup.body.decode()) - self.assertEqual(2, len(soup.find_all('p'))) - - def test_reparented_markup_containing_identical_whitespace_nodes(self): - """Verify that we keep the two whitespace nodes in this - document distinct when reparenting the adjacent tags. - """ - markup = '
' - soup = self.soup(markup) - space1, space2 = soup.find_all(string=' ') - tbody1, tbody2 = soup.find_all('tbody') - assert space1.next_element is tbody1 - assert tbody2.next_element is space2 - - def test_reparented_markup_containing_children(self): - markup = '' - soup = self.soup(markup) - noscript = soup.noscript - self.assertEqual("target", noscript.next_element) - target = soup.find(string='target') - - # The 'aftermath' string was duplicated; we want the second one. - final_aftermath = soup.find_all(string='aftermath')[-1] - - # The