Changed tweets, added eca

This commit is contained in:
2022-10-25 12:15:41 +02:00
parent 7597d7648a
commit 9860bee497
71 changed files with 7476 additions and 2036 deletions

326
eca/__init__.py Normal file
View File

@@ -0,0 +1,326 @@
import queue
import collections
import threading
import logging
import sys
import json
from contextlib import contextmanager
from . import util
from . import pubsub
logger = logging.getLogger(__name__)
# all exported names
__all__ = [
'event',
'condition',
'rules',
'Rules',
'Context',
'Event',
'fire',
'fire_global',
'emit',
'get_context',
'spawn_context',
'context_activate',
'context_switch',
'auxiliary',
'register_auxiliary',
'shutdown'
]
# The global event channel
global_channel = pubsub.PubSubChannel()
# The thread local storage used to create a 'current context' with regards
# to the executing thread.
# (See https://docs.python.org/3/library/threading.html#thread-local-data)
thread_local = threading.local()
class Rules:
def __init__(self):
self.rules = set()
def prepare_action(self, fn):
"""
Prepares a function to be usable as an action.
This function assigns an empty list of the 'conditions' attribute if it is
not yet available. This function also registers the action with the action
library.
"""
if not hasattr(fn, 'conditions'):
logger.info("Defined action '{}'".format(fn.__name__))
fn.conditions = getattr(fn, 'conditions', [])
fn.events = getattr(fn, 'events', set())
self.rules.add(fn)
def condition(self, c):
"""
Adds a condition callable to the action.
The condition must be callable. The condition will receive a context and
an event, and must return True or False.
This function returns a decorator so we can pass an argument to the
decorator itself. This is why we define a new function and return it
without calling it.
(See http://docs.python.org/3/glossary.html#term-decorator)
"""
def condition_decorator(fn):
self.prepare_action(fn)
logger.debug("With condition: {}".format(util.describe_function(c)))
fn.conditions.append(c)
return fn
return condition_decorator
def event(self, eventname):
"""
Attaches the action to an event.
This is effectively the same as adding the 'event.name == eventname'
condition. Adding multiple event names will prevent the rule from
triggering.
As condition, this function generates a decorator.
"""
def event_decorator(fn):
self.prepare_action(fn)
logger.debug("Attached to event: {}".format(eventname))
fn.events.add(eventname)
return fn
return event_decorator
# The 'global' rules set
rules = Rules()
event = rules.event
condition = rules.condition
class Event:
"""Abstract event with a name and attributes."""
def __init__(self, name, data=None):
"""Constructs an event.
Attributes are optional.
"""
self.name = name
self.data = data
def get(self, *args, **kwargs):
return self.data.get(*args, **kwargs)
def __str__(self):
data_strings = []
if isinstance(self.data, collections.abc.Mapping):
for k, v in self.data.items():
data_strings.append("{}={}".format(k, v))
else:
data_strings.append(str(self.data))
return "'{}' with {{{}}}".format(self.name, ', '.join(data_strings))
class Context:
"""
ECA Execution context to track scope and events.
Each context maintains both a variables namespace and an event queue. The
context itself provides a run method to allow threaded execution through
starting a new thread targetted at the run method.
Every context also contains a dictionary of auxiliaries which contains
objects to support the context and its rule execution.
"""
def __init__(self, init_data=None, name='<unnamed context>', rules=rules):
self.event_queue = queue.Queue()
self.scope = util.NamespaceDict()
self.channel = pubsub.PubSubChannel()
self.auxiliaries = {}
self.name = name
self.done = False
self.daemon = True
self.rules = rules
# subscribe to own pubsub channel to receive events
self.channel.subscribe(self._pubsub_receiver, 'event')
self.receive_event(Event('init', init_data))
# subscribe to global pubsub channel to receive global eca events
global_channel.subscribe(self._pubsub_receiver, 'event')
def _trace(self, message):
"""Prints tracing statements if trace is enabled."""
logging.getLogger('trace').info(message)
def _pubsub_receiver(self, name, data):
"""Pubsub channel connector."""
self.receive_event(data)
def receive_event(self, event):
"""Receives an Event to handle."""
self._trace("Received event: {}".format(event))
self.event_queue.put(event)
def auxiliary(self, name):
return self.auxiliaries[name]
def run(self):
"""Main event loop."""
# switch context to this one and start working
with context_switch(self):
while not self.done:
self._handle_event()
def start(self, daemon=True):
thread = threading.Thread(target=self.run)
self.daemon = daemon
thread.daemon = self.daemon
thread.start()
def stop(self):
global_channel.unsubscribe(self._pubsub_receiver, 'event')
if not self.daemon:
self.done = True
else:
logger.warning("Can't shutdown daemon context. The context is used in a server.")
def _handle_event(self):
"""Handles a single event, or times out after receiving nothing."""
try:
# wait until we have an upcoming event
# (but don't wait too long -- self.done could have been set to
# true while we were waiting for an event)
event = self.event_queue.get(timeout=1.0)
self._trace("Working on event: {}".format(event))
# Determine candidate rules and execute matches:
# 1) Only rules that match the event name as one of the events
candidates = [r for r in self.rules.rules if event.name in r.events]
# 2) Only rules for which all conditions hold
for r in candidates:
if not [c(self.scope, event) for c in r.conditions].count(False):
self._trace("Rule: {}".format(util.describe_function(r)))
result = r(self.scope, event)
except queue.Empty:
# Timeout on waiting
pass
@contextmanager
def context_switch(context):
"""
Context manager to allow ad-hoc context switches. (The Python 'context' is
different from the eca Context.)
This function can be written without any regard for locking as the
thread_local object will take care of that. Since everything here is done
in the same thread, this effectively allows nesting of context switches.
"""
# activate new context and store old
old_context = context_activate(context)
yield
# restore old context
context_activate(old_context)
def context_activate(context):
"""
Activate an eca Context. If None is passed, this function should
disable the context.
"""
# stash old context
old_context = getattr(thread_local, 'context', None)
# switch to new context
thread_local.context = context
return old_context
def get_context():
"""Returns the current context."""
return getattr(thread_local, 'context', None)
def auxiliary(name):
"""
Returns an auxiliary for this context.
"""
context = get_context()
if context is None:
raise NotImplementedError("Can not get an auxiliary without a current context.")
return context.auxiliaries[name]
def register_auxiliary(name, aux):
"""
Registers an auxiliary object for this context.
"""
context = get_context()
if context is None:
raise NotImplementedError("Can not get an auxiliary without a current context.")
context.auxiliaries[name] = aux
def shutdown():
context = get_context()
if context is None:
raise NotImplementedError("Can not invoke shutdown without a current context.")
context.stop()
def fire(eventname, data=None, delay=None):
"""
Fires an event.
This is the fire-and-forget method to create new events.
"""
e = Event(eventname, data)
context = get_context()
if context is None:
raise NotImplementedError("Can't invoke fire without a current context.")
context.channel.publish('event', e, delay)
def fire_global(eventname, data=None, delay=None):
"""
Fires a global event.
"""
e = Event(eventname, data)
global_channel.publish('event', e, delay)
def emit(name, data, id=None):
"""
Emits an event to whomever is listening (mostly HTTP clients).
"""
e = Event(name, {
'json': json.dumps(data),
'id': id
})
context = get_context()
if context is None:
raise NotImplementedError("Can't invoke emit without a current context.")
context.channel.publish('emit', e)
def spawn_context(init_data=None, name='<unnamed context>', rules=rules, daemon=False):
"""
Spawns a new context and starts it.
"""
context = Context(init_data, name, rules)
context.start(daemon)

View File

Binary file not shown.

View File

Binary file not shown.

View File

Binary file not shown.

View File

Binary file not shown.

View File

Binary file not shown.

View File

Binary file not shown.

View File

Binary file not shown.

384
eca/arff.py Normal file
View File

@@ -0,0 +1,384 @@
"""
ARFF format loading and saving module.
This module implements the book version [1] of the ARFF format. This means there
is no support for instance weights.
Known limitations:
- This implementation does not parse dates
[1]: http://weka.wikispaces.com/ARFF+%28book+version%29
"""
import re
from collections import namedtuple
Field = namedtuple('Field',['name','type'])
__all__ = ['load', 'save', 'Field', 'Numeric', 'Text', 'Nominal']
#
# Line type functions
#
def is_empty(line):
return not line.strip()
def is_comment(line):
return line.startswith('%')
def format_comment(line):
return '% '+line
def is_relation(line):
return line.lower().startswith('@relation')
def format_relation(name):
return '@relation ' + format_identifier(name) + '\n'
def is_attribute(line):
return line.lower().startswith('@attribute')
def format_attribute(field):
return '@attribute ' + format_identifier(field.name) + ' ' + str(field.type) + '\n'
def format_attributes(fields):
result = []
for field in fields:
result.append(format_attribute(field))
return ''.join(result)
def is_data(line):
return line.lower().startswith('@data')
def format_data():
return '@data\n'
def format_row(row, fields, sparse=False):
"""Formats a data row based on the given fields."""
if sparse:
result = []
for i in range(len(fields)):
field = fields[i]
val = row.get(field.name)
if val != field.type.default():
result.append(format_numeric(i) + ' ' + field.type.format(val))
return '{' + ','.join(result) + '}\n'
else:
result = []
for field in fields:
result.append(field.type.format(row.get(field.name)))
return ','.join(result)+'\n'
def safe_next(it):
"""Returns the next character from the iterator or ''."""
try:
return next(it)
except StopIteration:
return ''
def whitespace(rest):
"""Parses whitespace at the beginning of the input."""
return rest.lstrip()
number_pattern = re.compile(r'[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?')
def numeric(rest):
"""Parses a number at the beginning of the input."""
m = number_pattern.match(rest)
if m:
rest = rest[len(m.group(0)):]
try:
number = int(m.group(0))
except ValueError:
number = float(m.group(0))
return number, rest
else:
raise ValueError('Number not parsable')
def format_numeric(number):
"""Outputs a number."""
return str(number)
def expect(rest, string):
"""Expects to see the string at the start of the input."""
result = rest.startswith(string)
if result:
return result, rest[len(string):]
else:
return False, rest
identifier_escapes = {
'\\': '\\',
'n' : '\n',
't' : '\t',
'r' : '\r',
'%' : '%',
"'" : "'"
}
def identifier(rest):
"""Parses an optionally quoted identifier at the start of the input."""
name = ''
it = iter(rest)
c = safe_next(it)
# non-quoted
if c != "'":
while c and c not in [' ', '\t', ',']:
name += c
c = safe_next(it)
return name, c + ''.join(it)
# quoted
# discard the opening quote by fetching next character
c = safe_next(it)
while c:
if c == '\\':
ec = safe_next(it)
if not ec:
raise ValueError('Input end during escape.')
try:
name += identifier_escapes[ec]
except KeyError:
name += '\\' + ec
elif c == "'":
break
else:
name += c
c = safe_next(it)
return name, ''.join(it)
def format_identifier(name):
"""Formats an identifier."""
reverse_escapes = { c:ec for (ec,c) in identifier_escapes.items()}
if any(x in name for x in [' ',','] + list(reverse_escapes.keys())):
escaped = ''
for c in name:
if c in reverse_escapes:
escaped += '\\' + reverse_escapes[c]
else:
escaped += c
return "'"+escaped+"'"
return name
class Numeric:
"""Numeric field type."""
def parse(self, rest):
if rest.startswith('?'):
return None, rest[1:]
return numeric(rest)
def format(self, number):
if number is None:
return '?'
else:
return format_numeric(number)
def default(self):
return 0
def __repr__(self):
return 'Numeric'
def __str__(self):
return 'numeric'
class Text:
"""Text field type."""
def parse(self, rest):
if rest.startswith('?'):
return None, rest[1:]
return identifier(rest)
def format(self, name):
if name is None:
return '?'
else:
return format_identifier(name)
def default(self):
return ''
def __repr__(self):
return 'Text'
def __str__(self):
return 'string'
class Nominal:
"""Nominal field type."""
def __init__(self, names):
self.values = names
def parse(self, rest):
if rest.startswith('?'):
return None, rest[1:]
name, rest = identifier(rest)
if name in self.values:
return name, rest
else:
raise ValueError('Unknown nominal constant "{}" for {}.'.format(name, self.values))
def format(self, name):
if name is None:
return '?'
else:
if name not in self.values:
raise ValueError('Unknown nominal constant "{}" for {}.'.format(name, self.values))
return format_identifier(name)
def default(self):
return self.values[0]
def __repr__(self):
return 'Nominal in {}'.format(self.values)
def __str__(self):
return '{' + ', '.join(format_identifier(name) for name in self.values) + '}'
def attr_type(rest):
"""Parses a field type. Uses the whole rest."""
if rest.lower() in ['numeric', 'integer', 'real']:
return Numeric()
elif rest.lower() in ['string']:
return Text()
elif rest.lower().startswith('date'):
raise NotImplementedError('date parsing is not implemented.')
elif rest.startswith('{') and rest.endswith('}'):
names = []
rest = rest[1:-1]
while rest:
rest = whitespace(rest)
name, rest = identifier(rest)
names.append(name)
rest = whitespace(rest)
seen, rest = expect(rest, ',')
if not seen:
break
return Nominal(names)
else:
raise ValueError('Unknown attribute type "{}"'.format(rest))
def parse_attribute(line):
"""Parses an attribute line."""
# @attribute WS name WS type
rest = line[len('@attribute'):].strip()
rest = whitespace(rest)
name, rest = identifier(rest)
rest = whitespace(rest)
type = attr_type(rest)
return name, type
def parse_row(line, fields):
"""Parses a row. Row can be normal or sparse."""
line = line.strip()
values = {}
if not line.startswith('{'):
rest = line
first = True
for field in fields:
if not first:
rest = whitespace(rest)
seen, rest = expect(rest, ',')
first = False
rest = whitespace(rest)
value, rest = field.type.parse(rest)
values[field.name] = value
return values
else:
todo = set(range(len(fields)))
rest = line[1:-1].strip()
first = True
while rest:
if not first:
rest = whitespace(rest)
seen, rest = expect(rest, ',')
if not seen:
break
first = False
rest = whitespace(rest)
index, rest = numeric(rest)
field = fields[index]
rest = whitespace(rest)
value, rest = field.type.parse(rest)
todo.remove(index)
values[field.name] = value
for field in (fields[i] for i in todo):
values[field.name] = field.type.default()
return values
def load(fileish):
"""
Loads a data set from an arff formatted file-like object.
This generator function will parse the arff format's header to determine
data shape. Each generated item is a single expanded row.
fileish -- a file-like object
"""
# parse header first
lines = iter(fileish)
fields = []
for line in lines:
if is_empty(line) or is_comment(line):
continue
if is_relation(line):
# No care is given for the relation name.
continue
if is_attribute(line):
name, type = parse_attribute(line)
fields.append(Field(name, type))
continue
if is_data(line):
# We are done with the header, next up is 1 row per line
break
# parse data lines
for line in lines:
if is_empty(line) or is_comment(line):
continue
row = parse_row(line, fields)
yield row
def save(fileish, fields, rows, name='unnamed relation', sparse=False):
"""
Saves an arff formatted data set to a file-like object.
The rows parameter can be any iterable. The fields parameter must be a list
of `Field` instances.
fileish -- a file-like object to write to
fields -- a list of `Field` instances
rows -- an iterable containing one dictionary per data row
name -- the relation name, defaults to 'unnamed relation'
sparse -- whether the output should be in sparse format, defaults to False
"""
fileish.write(format_relation(name))
fileish.write('\n')
fileish.write(format_attributes(fields))
fileish.write('\n')
fileish.write(format_data())
for row in rows:
fileish.write(format_row(row, fields, sparse))

120
eca/generators.py Normal file
View File

@@ -0,0 +1,120 @@
import threading
import time
from datetime import datetime
import json
from . import fire, get_context, context_switch, register_auxiliary, auxiliary
from . import arff
import logging
import sys
logger = logging.getLogger(__name__)
class EventGenerator:
"""
An event generator uses a generation function to generate events from
any external source.
"""
def __init__(self, context, generator, event_name='tweet', **kwargs):
self.context = context
self.event_name = event_name
self.generator = generator
self.generator_args = kwargs
self.stop_flag = threading.Event()
def start(self):
"""
Starts a thread to handle run this generator.
"""
thread = threading.Thread(target=self.run)
thread.start()
def stop(self):
"""
Requests shutdown of generator.
"""
self.stop_flag.set()
def run(self):
"""
Invoke the generator to get a sequence of events.
This method passes an event to the generator which will be set to True
if the generator should terminate. Immediate termination is not required.
"""
logger.debug("Running event generator")
with context_switch(self.context):
for event in self.generator(self.stop_flag, **self.generator_args):
fire(self.event_name, event)
def offline_tweets(stop, data_file, time_factor=1000, arff_file=None):
"""
Offline tweet replay.
Takes a datafile formatted with 1 tweet per line, and generates a sequence of
scaled realtime items.
"""
# timing functions return false if we need to abort
def delayer(duration):
logger.debug("Delay for next tweet {}s ({}s real)".format(delay, delay/time_factor))
return not stop.wait(delay / time_factor)
def immediate(duration):
return not stop.is_set()
# select timing function based on time_factor
delayed = immediate if time_factor is None else delayer
arff_data = None
if arff_file:
arff_file = open(arff_file, 'r', encoding='utf-8')
arff_data = arff.load(arff_file)
with open(data_file, encoding='utf-8') as data:
last_time = None
lines = 0
for line in data:
lines += 1
try:
tweet = json.loads(line)
if arff_file:
try:
extra_data = next(arff_data)
except StopIteration:
extra_data = None
except ValueError as e:
logger.error("Could not read arff line for tweet (reason: {})".format(e))
extra_data = None
tweet['extra'] = extra_data
except ValueError as e:
logger.error("Could not read tweet on {}:{} (reason: {})".format(data_file,lines, e))
continue
# time scale the tweet
tweet_time = datetime.strptime(tweet['created_at'], '%a %b %d %H:%M:%S %z %Y')
if not last_time:
last_time = tweet_time
wait = tweet_time - last_time
delay = wait.total_seconds()
# delay and yield or break depending on success
if delayed(delay):
yield tweet
last_time = tweet_time
else:
break
if arff_file:
arff_file.close()
def start_offline_tweets(data_file, event_name='tweet', aux_name='tweeter', **kwargs):
context = get_context()
if context is None:
raise NotImplementedError("Can not start offline tweet replay outside of a context.")
register_auxiliary(aux_name, EventGenerator(context, generator=offline_tweets, data_file=data_file, event_name=event_name, **kwargs))
auxiliary(aux_name).start()

135
eca/http.py Normal file
View File

@@ -0,0 +1,135 @@
import http.cookies
import logging
import json
import collections
from . import httpd
from . import sse
from . import fire, get_context
from . import sessions
# Logging
logger = logging.getLogger(__name__)
# bring some external handlers into this module's scope
# (now you can just import eca.http and have access to all standard handlers)
StaticContent = httpd.StaticContent
SessionManager = sessions.SessionManager
class Cookies(httpd.Filter):
"""Filter to read cookies from request."""
def handle(self):
# process available cookies
cookies = http.cookies.SimpleCookie()
if 'cookie' in self.request.headers:
cookies.load(self.request.headers['cookie'])
# set cookies on request
self.request.cookies = cookies
class HelloWorld(httpd.Handler):
"""The mandatory Hello World example."""
def handle_GET(self):
self.request.send_response(200)
self.request.send_header('content-type','text/html; charset=utf-8')
self.request.end_headers()
output = "<!DOCTYPE html><html><body><h1>Hello world!</h1><p><i>eca-session:</i> {}</p></body></html>"
try:
if not hasattr(self.request, 'cookies'): raise KeyError()
cookie = self.request.cookies['eca-session'].value
except KeyError:
cookie = '<i>no cookie</i>';
self.request.wfile.write(output.format(cookie).encode('utf-8'))
def Redirect(realpath):
"""
Factory for redirection handlers.
"""
class RedirectHandler(httpd.Handler):
def handle_GET(self):
location = None
# check for absolute paths
if realpath.startswith("http://") or realpath.startswith('https://'):
location = realpath
else:
host = self.request.server.server_address[0]
if self.request.server.server_address[1] != 80:
host += ":{}".format(self.request.server.server_address[1])
if 'host' in self.request.headers:
host = self.request.headers['host']
location = "http://{}{}".format(host, realpath)
self.request.send_response(302)
self.request.send_header('content-type','text/html; charset=utf-8')
self.request.send_header('location',location)
self.request.end_headers()
output = "<!DOCTYPE html><html><body><p>Redirect to <a href='{0}'>{0}</a></p></body></html>"
self.request.wfile.write(output.format(location).encode('utf-8'))
return RedirectHandler
def GenerateEvent(name):
"""
This function returns a handler class that creates the named event based
on the posted JSON data.
"""
class EventGenerationHandler(httpd.Handler):
def handle_POST(self):
# handle weirdness
if 'content-length' not in self.request.headers:
self.request.send_error(411)
return
# read content-length header
length = int(self.request.headers['content-length'])
# grab data
data = self.request.rfile.read(length)
try:
structured = json.loads(data.decode('utf-8'))
except ValueError as e:
self.request.send_error(400, "Bad request: "+str(e))
return
if not isinstance(structured, collections.abc.Mapping):
self.request.send_error(400, "Bad request: expect a JSON object")
return
try:
fire(name, structured)
except NotImplementedError:
logger.warn("Event generated by HTTP request without active session. Do you have a SessionManager configured?")
self.request.send_error(500, "No current context available.")
return
self.request.send_response(202)
self.request.send_header('content-type', 'text/plain; charset=utf-8')
self.request.send_header('content-length', 0)
self.request.end_headers()
return EventGenerationHandler
class EventStream(sse.ServerSideEvents):
def go_subscribe(self):
def receiver(name, event):
self.send_event(event.data.get('json'), event.name, event.data.get('id'))
self.receiver = receiver
context = get_context()
context.channel.subscribe(self.receiver, 'emit')
def go_unsubscribe(self):
context = get_context()
context.channel.unsubscribe(self.receiver, 'emit')

326
eca/httpd.py Normal file
View File

@@ -0,0 +1,326 @@
import http.server
import http.cookies
import socketserver
import logging
import os.path
import posixpath
import urllib
from collections import namedtuple
# Logging
logger = logging.getLogger(__name__)
# Hard-coded default error message
DEFAULT_ERROR_MESSAGE = """\
<!DOCTYPE html>
<html>
<head>
<style type="text/css">
* { /* Reset the worst style breakers */
padding: 0;
margin: 0;
}
html { /* We always want at least this height */
min-height: 100%%;
}
body#error {
font-family: sans-serif;
height: 100%%;
background: #3378c6;
background: -webkit-radial-gradient(center, ellipse cover, #3378c6 0%%,#23538a 100%%);
background: radial-gradient(ellipse at center, #3378c6 0%%,#23538a 100%%);
background-size: 100%% 100%%;
background-repeat: no-repeat;
}
#error #message {
position: absolute;
width: 34em;
height: 4em;
top: 50%%;
margin-top: -2em;
left: 50%%;
margin-left: -17em;
text-align: center;
color: #114;
text-shadow: 0 1px 0 #88d;
}
</style>
<title>%(code)d - %(message)s</title>
</head>
<body id='error'>
<div id='message'>
<h1>%(message)s</h1>
<span>%(explain)s</span>
</div>
</body>
</html>
"""
class HTTPRequestHandler(http.server.SimpleHTTPRequestHandler):
"""
This request handler routes requests to a specialised handler.
Handling a request is roughly done in two steps:
1) Requests are first passed through matching registered filters
2) Request is passed to the matching handler.
Responsibility for selecting the handler is left to the server class.
"""
error_message_format = DEFAULT_ERROR_MESSAGE
server_version = 'EcaHTTP/2'
default_request_version = 'HTTP/1.1'
def send_header(self, key, value):
"""Buffer headers until they can be sent."""
if not self.response_sent:
if not hasattr(self, '_cached_headers'):
self._cached_headers = []
self._cached_headers.append((key,value))
else:
super().send_header(key, value)
def send_response(self, *args, **kwargs):
"""Sends the necessary response, and appends buffered headers."""
super().send_response(*args, **kwargs)
self.response_sent = True
if hasattr(self, '_cached_headers'):
for h in self._cached_headers:
self.send_header(*h)
self._cached_headers = []
def dispatch(self):
"""Dispatch incoming requests."""
self.handler = None
self.response_sent = False
# the method we will be looking for
# (uses HTTP method name to build Python method name)
method_name = "handle_{}".format(self.command)
# let server determine specialised handler factory, and call it
handler_factory = self.server.get_handler(self.command, self.path)
if not handler_factory:
self.send_error(404)
return
# instantiate handler
self.handler = handler_factory(self)
# check for necessary HTTP method
if not hasattr(self.handler, method_name):
self.send_error(501, "Unsupported method ({})".format(self.command))
return
# apply filters to request
# note: filters are applied in order of registration
for filter_factory in self.server.get_filters(self.command, self.path):
filter = filter_factory(self)
if not hasattr(filter, method_name):
self.send_error(501, "Unsupported method ({})".format(self.command))
return
filter_method = getattr(filter, method_name)
filter_method()
# select and invoke actual method
method = getattr(self.handler, method_name)
method()
def translate_path(self, path):
"""
Translate a /-separated PATH to the local filename syntax.
This method is unelegantly 'borrowed' from SimpleHTTPServer.py to change
the original so that it has the `path = self.server.static_path' line.
"""
# abandon query parameters
path = path.split('?',1)[0]
path = path.split('#',1)[0]
path = path[len(self.url_path):]
# Don't forget explicit trailing slash when normalizing. Issue17324
trailing_slash = path.rstrip().endswith('/')
path = posixpath.normpath(urllib.parse.unquote(path))
words = path.split('/')
words = filter(None, words)
# server content from static_path, instead of os.getcwd()
path = self.local_path
for word in words:
drive, word = os.path.splitdrive(word)
head, word = os.path.split(word)
if word in (os.curdir, os.pardir): continue
path = os.path.join(path, word)
if trailing_slash:
path += '/'
return path
# Standard HTTP verbs bound to dispatch method
def do_GET(self): self.dispatch()
def do_POST(self): self.dispatch()
def do_PUT(self): self.dispatch()
def do_DELETE(self): self.dispatch()
def do_HEAD(self): self.dispatch()
# Fallback handlers for static content
# (These invoke the original SimpleHTTPRequestHandler behaviour)
def handle_GET(self): super().do_GET()
def handle_HEAD(self): super().do_HEAD()
# handle logging
def _log_data(self):
path = getattr(self, 'path','<unknown path>')
command = getattr(self, 'command', '<unknown command>')
return {
'address': self.client_address,
'location': path,
'method': command
}
def _get_message_format(self, format, args):
log_data = self._log_data()
message_format = "[{}, {} {}] {}".format(self.client_address[0],
log_data['method'],
log_data['location'],
format%args)
return message_format
#overload logging methods
def log_message(self, format, *args):
logger.debug(self._get_message_format(format, args), extra=self._log_data())
def log_error(self, format, *args):
logger.warn(self._get_message_format(format, args), extra=self._log_data())
HandlerRegistration = namedtuple('HandlerRegistration',['methods','path','handler'])
class HTTPServer(socketserver.ThreadingMixIn, http.server.HTTPServer):
"""
HTTP Server with path/method registration functionality to allow simple
configuration of served content.
"""
def __init__(self, server_address, RequestHandlerClass=HTTPRequestHandler):
self.handlers = []
self.filters = []
super().__init__(server_address, RequestHandlerClass)
def get_handler(self, method, path):
"""Selects the best matching handler."""
# Select handlers for the given method, that match any path or a prefix of the given path
matches = [m
for m
in self.handlers
if (not m.methods or method in m.methods) and path.startswith(m.path)]
# if there are matches, we select the one with the longest matching prefix
if matches:
best = max(matches, key=lambda e: len(e.path))
return best.handler
else:
return None
def get_filters(self, method, path):
"""Selects all applicable filters."""
# Select all filters that the given method, that match any path or a suffix of the given path
return [f.handler
for f
in self.filters
if (not f.methods or method in f.methods) and path.startswith(f.path)]
def _log_registration(self, kind, registration):
message_format = "Adding HTTP request {} {} for ({} {})"
message = message_format.format(kind,
registration.handler,
registration.methods,
registration.path)
logger.info(message)
def add_route(self, path, handler_factory, methods=["GET"]):
"""
Adds a request handler to the server.
The handler can be specialised in in or more request methods by
providing a comma separated list of methods. Handlers are matched
longest-matching-prefix with regards to paths.
"""
reg = HandlerRegistration(methods, path, handler_factory)
self._log_registration('handler', reg)
self.handlers.append(reg)
def add_content(self, path, local_path, methods=['GET','HEAD']):
"""
Adds a StaticContent handler to the server.
This method is shorthand for
self.add_route(path, StaticContent(path, local_path), methods)
"""
if not path.endswith('/'):
logger.warn("Static content configured without trailing '/'. "+
"This is different from traditional behaviour.")
logger.info("Serving static content for {} under '{}' from '{}'".format(methods,path,local_path))
self.add_route(path, StaticContent(path, local_path), methods)
def add_filter(self, path, filter_factory, methods=[]):
"""
Adds a filter to the server.
Like handlers, filters can be specialised on in or more request methods
by providing a comma-separated list of methods. Filters are selected on
match prefix with regards to paths.
Filters are applied in order of registration.
"""
reg = HandlerRegistration(methods, path, filter_factory)
self._log_registration('filter', reg)
self.filters.append(reg)
def serve_forever(self):
logger.info("Server is running...")
super().serve_forever()
class Handler:
"""
Handler base class.
"""
def __init__(self, request):
self.request = request
class Filter(Handler):
"""
Filter base class that does nothing.
"""
def handle_GET(self): self.handle()
def handle_POST(self): self.handle()
def handle_HEAD(self): self.handle()
def handle(self):
pass
# static content handler defined here, because of intrinsice coupling with
# request handler.
def StaticContent(url_path, local_path):
class StaticContent(Handler):
"""
Explicit fallback handler.
"""
def set_paths(self):
self.request.local_path = local_path
self.request.url_path = url_path
def handle_GET(self):
self.set_paths()
self.request.handle_GET()
def handle_HEAD(self):
self.set_paths()
self.request.handle_HEAD()
# return class so that it can be constructed
return StaticContent

50
eca/pubsub.py Normal file
View File

@@ -0,0 +1,50 @@
import collections
import threading
class PubSubChannel:
"""
Publish/Subscribe channel used for distribution of events.
The operations on this channel are thread-safe, but subscribers
are executed by the publishing thread. Use a queue to decouple the
publishing thread from the consuming thread.
"""
def __init__(self):
self.lock = threading.RLock()
self.subscriptions = collections.defaultdict(list)
def subscribe(self, target, event='message'):
"""
Subscribe to an event.
The optional event name can be used to subscribe selectively.
"""
with self.lock:
self.subscriptions[event].append(target)
def unsubscribe(self, target, event='message'):
"""
Unsubscribe from an event.
The optional event name can be used to unsubscribe from another event.
"""
with self.lock:
self.subscriptions[event].remove(target)
def publish(self, event='message', data=None, delay=None):
"""
Publishes an event.
The event can be accompanied by optional data. A delay can be set to
delay the publish action by the given amount of seconds.
"""
if delay is None:
with self.lock:
for target in self.subscriptions[event]:
target(event, data)
else:
def task():
self.publish(event, data)
threading.Timer(delay, task).start()

128
eca/sessions.py Normal file
View File

@@ -0,0 +1,128 @@
from http.cookies import SimpleCookie
from collections import namedtuple
from collections.abc import Mapping
from itertools import product, chain
import time
import random
from . import httpd
from . import Context, context_activate
# Name generation for contexts and sessions
def name_parts():
"""
This generator will create an endless list of steadily increasing
name part lists.
"""
# name parts
letters = ['alpha', 'beta', 'gamma', 'delta', 'epsilon', 'zeta', 'eta',
'theta', 'iota', 'kappa', 'lambda', 'mu', 'nu', 'xi', 'omicron',
'pi', 'rho', 'sigma', 'tau', 'upsilon', 'phi', 'chi', 'psi',
'omega']
colours = ['red', 'orange', 'yellow', 'green', 'blue', 'violet']
# randomize order
random.shuffle(letters)
random.shuffle(colours)
# yield initial sequence (letter-colour)
parts = [letters, colours]
yield parts
# forever generate longer sequences by appending the letter list
# over and over. Note that this is the *same* letter list, so it will have
# the exact order.
while True:
random.shuffle(letters)
random.shuffle(colours)
parts.append(letters)
yield parts
# construct an iterator that will endlessly generate names:
# 1) for each parts list p in name_parts() we take the cartesian product
# 2) the product iterators are generated by the for...in generator
# 3) we chain these iterators so that when the first is exhausted, we can
# continue with the second, etc.
# 4) we map the function '-'.join over the list of parts from the chain
names = map('-'.join, chain.from_iterable((product(*p) for p in name_parts())))
class SessionCookie(httpd.Filter):
"""
The actual HTTP filter that will apply the cookie handling logic to each
request. This filter defers to the SessionManager with respect to the
cookie name to use and the activation of sessions.
"""
def bind(self, manager):
"""Post constructor configuration of filter."""
self.manager = manager
def handle(self):
"""
Determine if a cookie needs to be set and let the session manager
handle activation.
"""
cookies = self.request.cookies
morsel = cookies.get(self.manager.cookie)
if not morsel:
# Determine new cookie
value = self.manager.generate_name()
# Set new cookie
cookies[self.manager.cookie] = value
cookies[self.manager.cookie]['path'] = '/'
# Send the new cookie as header
self.request.send_header('Set-Cookie', cookies[self.manager.cookie].OutputString())
else:
value = morsel.value
self.manager.activate(value)
class Session:
"""
The Session bookkeeping data.
"""
def __init__(self, context, seen):
self.context = context
self.seen = seen
def activate(self):
"""Activate the session. Updates last seen time."""
self.seen = time.time()
context_activate(self.context)
class SessionManager:
"""
The SessionManager class. This class is callable so it can be used in place
of a constructor in the configuration.
"""
def __init__(self, cookie_name):
self.sessions = {}
self.cookie = cookie_name
def __call__(self, *args, **kwargs):
handler = SessionCookie(*args, **kwargs)
handler.bind(self)
return handler
def generate_name(self):
result = next(names)
while result in self.sessions:
result = next(names)
return result
def _new_session(self, name):
result = Session(Context(name=name, init_data={'name': name}), time.time())
result.context.start()
return result
def activate(self, name):
if name not in self.sessions:
self.sessions[name] = self._new_session(name)
self.sessions[name].activate()

67
eca/sse.py Normal file
View File

@@ -0,0 +1,67 @@
import queue
from collections import namedtuple
from . import httpd
PendingEvent = namedtuple('PendingEvent', ['data', 'name', 'id'])
class ServerSideEvents(httpd.Handler):
"""
Base class for server side events. See the specification of the W3C
at http://dev.w3.org/html5/eventsource/
This class handles decoupling through the default Queue. Events can be
posted for transmission by using send_event.
"""
def __init__(self, request):
super().__init__(request)
self.queue = queue.Queue()
def send_event(self, data, name=None, id=None):
self.queue.put(PendingEvent(data, name, id))
def go_subscribe(self):
pass
def go_unsubscribe(self):
pass
def handle_GET(self):
self.go_subscribe()
# Send HTTP headers:
self.request.send_response(200)
self.request.send_header("Content-type", "text/event-stream")
self.request.end_headers()
done = False
while not done:
event = self.queue.get()
if event == None:
done = True
else:
done = not self._send_message(event)
self.go_unsubscribe()
def _send_message(self, event):
try:
if event.id is not None:
id_line = "id: {}\n".format(event.id)
self.request.wfile.write(id_line.encode('utf-8'))
if event.name is not None:
event_line = "event: {}\n".format(event.name)
self.request.wfile.write(event_line.encode('utf-8'))
data_line = "data: {}\n".format(event.data)
self.request.wfile.write(data_line.encode('utf-8'))
self.request.wfile.write("\n".encode('utf-8'))
self.request.wfile.flush()
return True
except IOError:
return False

39
eca/util.py Normal file
View File

@@ -0,0 +1,39 @@
import os.path
class NamespaceError(KeyError):
"""Exception raised for errors in the NamespaceDict."""
pass
class NamespaceDict(dict):
"""
A dictionary that also allows access through attributes.
See http://docs.python.org/3.3/reference/datamodel.html#customizing-attribute-access
"""
def __getattr__(self, name):
if name not in self:
raise NamespaceError(name)
return self[name]
def __setattr__(self, name, value):
self[name] = value
def __delattr__(self, name):
del self[name]
def describe_function(fn):
"""
Generates a human readable reference to the given function.
This function is most useful when used on function defined in actual files.
"""
parts = []
parts.append(fn.__name__)
parts.append(" ({}:{})".format(os.path.relpath(fn.__code__.co_filename), fn.__code__.co_firstlineno))
return ''.join(parts)