Welcome, guest | Sign In | My Account | Store | Cart

A basic model class representing Apache Solr. Abstracts the select, delete, update, and commit operations.

Select operation returns Python object parsed from a JSON-formatted response.

Python, 69 lines
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
"""Simple model of Apache Solr 1.4 and 3.x"""

import json
import urllib
import urllib2

import lxml.etree as etree


class Solr(object):
    """Thin abstraction layer around Apache Solr"""

    def __init__(self, url):
        self.url = url

    def select(self, params):
        """Search Solr, return URL and JSON response."""
        params['wt'] = 'json'
        url = self.url + '/select?' + urllib.urlencode(params)
        conn = urllib2.urlopen(url)
        return url, json.load(conn)

    def delete(self, query, commit=False):
        """Delete documents matching `query` from Solr, return (URL, status)"""
        params = {}
        if commit:
            params['commit'] = 'true'
        url = self.url + '/update?' + urllib.urlencode(params)
        request = urllib2.Request(url)
        request.add_header('Content-Type', 'text/xml; charset=utf-8')
        request.add_data('<delete><query>{0}</query></delete>'.format(query))
        response = urllib2.urlopen(request).read()
        status = etree.XML(response).findtext('lst/int')
        return url, status

    def update(self, docs, commitwithin=None):
        """Post list of docs to Solr, return URL and status.
        Opptionall tell Solr to "commitwithin" that many milliseconds."""
        url = self.url + '/update'
        add_xml = etree.Element('add')
        if commitwithin is not None:
            add_xml.set('commitWithin', str(commitwithin))
        for doc in docs:
            xdoc = etree.SubElement(add_xml, 'doc')
            for key, value in doc.iteritems():
                if value:
                    field = etree.Element('field', name=key)
                    field.text = (value if isinstance(value, unicode)
                                  else str(value))
                    xdoc.append(field)
        request = urllib2.Request(url)
        request.add_header('Content-Type', 'text/xml; charset=utf-8')
        request.add_data(etree.tostring(add_xml, pretty_print=True))
        response = urllib2.urlopen(request).read()
        status = etree.XML(response).findtext('lst/int')
        return url, status

    def commit(self, waitsearcher=False, waitflush=False):
        """Commit uncommitted changes to Solr immediately, without waiting."""
        commit_xml = etree.Element('commit')
        commit_xml.set('waitFlush', str(waitflush))
        commit_xml.set('waitSearcher', str(waitsearcher))
        url = self.url + '/update'
        request = urllib2.Request(url)
        request.add_header('Content-Type', 'text/xml; charset=utf-8')
        request.add_data(etree.tostring(commit_xml, pretty_print=True))
        response = urllib2.urlopen(request).read()
        status = etree.XML(response).findtext('lst/int')
        return url, status

There are several full-fledged Python libraries for interfacing to Apache Solr.

But sometimes all you need is a little code to build an appropriate HTTP request and parse the response. In that case, using this class could save you some time.