#!/usr/bin/env python
"""
Read patron information from Unicorn flat format files
and generate SQL for loading into Evergreen
"""

#    Copyright (C) 2009, Dan Scott
#    Thanks to the Robertson Library, UPEI, for funding the initial version
#    of these scripts. 

#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.

import fileinput
import pymarc
import random
import re

__DEBUG__ = False
#__DEBUG__ = True

barcodes = []

class User():
    """
    Represents an Evergreen user record in actor.usr table
    """
    
    def __init__(self):
        """
        Initializes the user
        """
        self.raw_card = None # instance of Card object
        self.raw_barcode = None # 
        self.raw_addresses = [] # instance of address object
        self.previous_ids = [] # previous barcodes, theoretically
        self.notes = {}
        self.group_id = None # Used only for ILL codes?
        self.raw_username = None
        self.raw_profile = None
        
        self.id = None # serial
        self.card = None # int reference to actor.card
        self.profile = None # int reference to profile
        self.usrname = None # text
        self.email = None # text
        self.passwd = None # text
        self.standing = None # int reference to config.standing
        self.ident_type = 1 # int reference to config.identification_type
        self.ident_value = None # text
        self.ident_type2 = None # int reference to config.identification_type
        self.ident_value2 = None # text
        self.net_access_value = None # text
        self.photo_url = None # text
        self.prefix = None # text
        self.first_given_name = None # text
        self.second_given_name = None # text
        self.family_name = None # text
        self.suffix = None # text
        self.day_phone = None # text
        self.evening_phone = None # text
        self.other_phone = None # text
        self.mailing_address = None # int reference to actor.usr_address
        self.billing_address = None # int reference to actor.usr_address
        self.home_ou = None # int reference to actor.org_unit
        self.dob = None # timestamp with timezone
        self.active = None # bool default true
        self.master_account = None # bool default false
        self.super_user = None # bool default false
        self.barred = None # bool default false
        self.deleted = None # bool default false
        self.usrgroup = None # serial
        self.claims_returned_count = None # int
        self.credit_forward_balance = None # numeric(6,2)
        self.last_xact_id = None # text
        self.alert_message = None # text
        self.create_date = None # timestamp with time zone
        self.expire_date = None # timestamp with time zone
    
    def to_sql(self):
        """
        Generate SQL to create or replace a user
        
        1. UPSERT actor.usr and get actor.usr.id
        2. UPSERT actor.card and get actor.card.id
        3. [For each unique address]:
           UPSERT actor.usr_address and get actor.usr_address.id
        
        """
        if not self.raw_barcode or not self.profile or not self.raw_username or not self.home_ou:
            return "-- %s\n" % (str(self.usrname))

        if self.raw_barcode in barcodes:
            # PANIC FIXME - duplicate barcodes not good - see 21520000266595 for an example
            return "-- DUPLICATE BARCODE: %s\n" % (self.raw_barcode)

        self.generate_names()

        sql = "INSERT INTO actor.usr (profile, usrname, email, passwd, standing, ident_type, ident_value, first_given_name, second_given_name, family_name, day_phone, evening_phone, other_phone, alert_message, home_ou, active, barred, deleted, create_date, expire_date)\n"
        sql += "    VALUES ("

        sql += str(sql_integer(self.profile)) + ', '
        sql += sql_text(self.raw_barcode) + ', '
        sql += sql_text(self.email) + ', '
        if self.passwd:
            sql += sql_text(self.passwd) + ', '
        else:
            sql += sql_text(str(random.randint(0,9)) + str(random.randint(0,9))+ str(random.randint(0,9))  + str(random.randint(0,9))) + ', '
        sql += str(sql_integer(self.standing, 1)) + ', '
        sql += '3, ' # force to custom identification type like 100? or modify predefined ones?
        sql += sql_text(self.raw_barcode) + ', ' # ident value = barcode
        sql += sql_text(self.first_given_name) + ', '
        sql += sql_text(self.second_given_name) + ', '
        sql += sql_text(self.family_name) + ', '
        sql += sql_text(self.day_phone) + ', '
        sql += sql_text(self.evening_phone) + ', '
        sql += sql_text(self.other_phone) + ', '
        sql += sql_text(self.alert_message) + ', '
        sql += str(sql_integer(self.home_ou)) + ', '
        sql += str(sql_boolean(self.active)) + ', '
        sql += str(sql_boolean(self.barred)) + ', '
        sql += str(sql_boolean(self.deleted)) + ', '
        sql += sql_date(self.create_date) + ', '
        sql += sql_date(self.expire_date)

        sql += ");\n"

        sql += "INSERT INTO actor.card (usr, barcode)\n"
        sql += "    VALUES (CURRVAL('actor.usr_id_seq'), %s);\n" % (sql_text(self.raw_barcode))
        barcodes.append(self.raw_barcode)
        sql += "UPDATE actor.usr SET card = CURRVAL('actor.card_id_seq') WHERE id = CURRVAL('actor.usr_id_seq');\n"

        for card in self.previous_ids:
            if card not in barcodes:
                sql += "INSERT INTO actor.card (usr, barcode, active)\n"
                sql += "    VALUES (CURRVAL('actor.usr_id_seq'), %s, 'f');\n" % (sql_text(card))
                barcodes.append(card)

        count = 0
        for address in self.raw_addresses:
            address_sql = address.to_sql()
            if not address_sql:
                continue
            sql += address_sql
            if (count == 0):
                sql += "UPDATE actor.usr SET mailing_address = CURRVAL('actor.usr_address_id_seq') WHERE id = CURRVAL('actor.usr_id_seq');\n"
            elif (sql and count == 1):
                sql += "UPDATE actor.usr SET billing_address = CURRVAL('actor.usr_address_id_seq') WHERE id = CURRVAL('actor.usr_id_seq');\n"
            count += 1

        for note in self.notes:
            sql += self.notes[note].to_sql()

        return sql

    def generate_names(self):
        """
        Return a first, last, and (if available) second name
        """
        if self.raw_username.count(','):
            self.family_name, rest = self.raw_username.split(',', 1)
            rest = rest.strip()
            if rest.count(' '):
                self.first_given_name = rest[0:rest.index(' ')]
                self.second_given_name = rest[rest.index(' '):]
            elif rest:
                self.first_given_name = rest
        elif self.raw_profile == 'DESM_ILL':
            if self.group_id:
                if self.raw_username.count('ILL'):
                    self.first_given_name = self.raw_username[0:self.raw_username.index('ILL')]
                else:
                   self.first_given_name = self.raw_username
                self.family_name = self.group_id
            elif self.raw_username.count('ILL'):
                self.first_given_name = self.raw_username[0:self.raw_username.index('ILL')]
                self.family_name = self.raw_username[self.raw_username.index('ILL'):]
        else:
            # No heroic measures, please
            self.first_given_name = self.raw_username
            self.family_name = self.raw_username

        if not self.first_given_name:
            # Give up and flag these for later cleanup
            self.first_given_name = 'UNKNOWN'
        if not self.family_name:
            # Give up and flag these for later cleanup
            self.first_given_name = 'UNKNOWN'


class Address():
    """
    Represents an Evergreen user's address in actor.usr_address table
    """
    
    def __init__(self):
        """
        Initialize the address
        """
        self.id = None # serial
        self.valid = None # bool
        self.within_city_limits = None # bool
        self.address_type = None # text, 'MAILING' or ''
        self.street1 = None # text
        self.street2 = None # text
        self.city = None # text
        self.county = None # text
        self.state = None # text
        self.country = None # text
        self.post_code = None # text
        
    def to_sql(self):
        """
        Generates SQL required to UPSERT actor.usr_address
        """

        if (not(self.street1 or self.city or self.state or self.country or self.post_code)):
            return None

        sql = "INSERT INTO actor.usr_address (usr, street1, street2, city, state, country, post_code)\n"
        sql += "    VALUES (CURRVAL('actor.usr_id_seq'), "
        sql += sql_text(self.street1, 'UNKNOWN') + ', '
        sql += sql_text(self.street2) + ', '
        sql += sql_text(self.city, 'UNKNOWN') + ', '
        sql += sql_text(self.state, 'UNKNOWN') + ', '
        sql += sql_text(self.country, 'UNKNOWN') + ', '
        sql += sql_text(self.post_code, 'UNKNOWN')
        sql += ");\n"

        return sql

class Note():
    """
    Represents notes about an Evergreen user in actor.usr_note table
    """

    def __init__(self, value):
        """
        Initialize the note
        """
        self.id = None
        self.usr = None
        self.creator = None
        self.title = 'LEGACY NOTE'
        self.value = value
        pass

    def to_sql(self):
        """
        Generates SQL required to INSERT the note
        """
        sql = "INSERT INTO actor.usr_note (usr, creator, title, value)\n"
        sql += "    VALUES (CURRVAL('actor.usr_id_seq'), "
        sql += str(sql_integer(1)) + ', '
        sql += sql_text(self.title) + ', '
        sql += sql_text(self.value)
        sql += ");\n"
        
        return sql
    
class Card():
    """
    Represents an Evergreen user's card in actor.card table
    """
    
    def __init__(self, barcode, active='t'):
        """
        Initialize the card
        """
        self.id = None
        self.barcode = barcode
        self.active = active
    
    def to_sql(self):
        """
        Generates SQL required to UPSERT actor.card
        """
        return self.id

class FlatFile():
    """
    Represents a legacy Unicorn patron import file
    """
    addr, xinfo = False, False
    
    mapping = {
        'USER_PROFILE': 'profile', # text to int conversion
        'USER_BIRTHYEAR': None, # throw this away as we're not interested
        'USER_BIRTH_DATE': None, # throw this away as we're not interested
        'BIRTHDATE': None # throw this away as we're not interested
    }
    
    def __init__(self, filename):
        """
        Initialize the file
        """
        self.legacy = fileinput.input(filename)
        self.usr_count = 0
        self.clear()

    def clear(self):
        self.user = User()
        self.active_note = False
    
    def parse(self):
        """
        Generate users, cards, and addresses
        """
        for line in self.legacy:
            key, value = None, None
            if self.active_note:
                if line.startswith('.'):
                    self.active_note = False
                elif self.active_note == 'NOTE':
                    # Currently treating NOTE fields as alerts in Evergreen
                    self.user.alert_message += (" " + line)
                    continue
                else:
                    self.user.notes[self.active_note].value += (" " + line)
                    continue
            if (not (line.startswith('.') or line.startswith('*** DOCUMENT') or line.startswith('FORM'))):
                # Evil, we're skipping stuff: FIXME XXX
                continue

            if line == '*** DOCUMENT BOUNDARY ***\n':
                if self.usr_count > 0:
                    self.to_sql()
                self.usr_count += 1
            elif line == 'FORM=LDUSER\n':
                self.user = User()
            elif line == '.USER_ADDR1_BEGIN.\n':
                self.addr = Address()
            elif line == '.USER_ADDR1_END.\n':
                self.user.raw_addresses.append(self.addr)
            elif line == '.USER_ADDR2_BEGIN.\n':
                self.addr = Address()
            elif line == '.USER_ADDR2_END.\n':
                self.user.raw_addresses.append(self.addr)
            elif line == '.USER_ADDR3_BEGIN.\n':
                self.addr = Address()
            elif line == '.USER_ADDR3_END.\n':
                self.user.raw_addresses.append(self.addr)
            elif line == '.USER_XINFO_BEGIN.\n':
                pass
            elif line == '.USER_XINFO_END.\n':
                pass
            elif line.startswith('.PREV_ID') or line.startswith('.PREV_ID2'):
                prev_id = line[line.index(' '):].strip()
                if (prev_id not in self.user.previous_ids) and (prev_id != self.user.raw_barcode):
                    self.user.previous_ids.append(prev_id)
            else:
                if __DEBUG__:
                    print "line %s" % (line)
                key, value = self.parseline(line)
                if key:
                    self.map_uni_to_eg(key, value)
            
            if __DEBUG__:
                print "%s = %s" % (key, value)
        
    def parseline(self, line):
        """
        Split key/value pairs from patron data
        """
        
        pat = re.compile(r'^\.(.+?)\.\s+\|a(.+)\s+$')
        match = pat.search(line)
        if (match):
            return match.group(1,2)
        return None, None
    
    def map_uni_to_eg(self, key, value):
        """
        Maps a Unicorn patron field to an Evergreen patron field
        """
        
        if key == 'USER_ID':
            if len(value) == 14:
                self.user.raw_barcode = value
            else:
                self.user.usrname = value
        elif key == 'USER_GROUP_ID':
            # Used only for ILL codes?
            self.user.group_id = value
        elif key == 'USER_ALT_ID':
            self.user.usrname = value
        elif key == 'USER_PIN':
            self.user.passwd = value
        elif key == 'USER_STATUS':
            if value == 'OK':
                self.user.active = True
            elif value == 'BARRED':
                self.user.active = True
                self.user.barred = True
            elif value == 'BLOCKED':
                self.user.active = True
                self.user.barred = True # this is an escalation
            elif value == 'DELINQUENT':
                self.user.active = True
        elif key == 'USER_NAME':
            self.user.raw_username = value
        elif key == 'USER_LIBRARY':
            self.user.home_ou = self.map_library(value)
        elif key == 'USER_PRIV_GRANTED':
            self.user.create_date = value
        elif key == 'USER_PRIV_EXPIRES':
            self.user.expire_date = value

        # Argh, different Unicorn libraries map addresses differently
        elif key in ('LINE_1', 'STREET'):
            self.addr.street1 = value
        elif key == 'LINE_2':
            self.addr.street2 = value
        elif key == 'CITY':
            self.addr.city = value
        elif key == 'PROVINCE':
            self.addr.state = value
        elif key == 'CITY/PROV':
            self.addr.city, self.addr.state = self.map_city_prov(value)
        elif key == 'POSTALCODE':
            self.addr.post_code = value
        elif key == 'COUNTRY':
            self.addr.country = value
        elif key in ('NOTE'):
            self.active_note = key
            self.user.alert_message = value
        elif key in ('COMMENT', 'WEBCATPREF'):
            self.active_note = key
            # Assumption here that there won't be more than one COMMENT or WEBCATPREF per user
            self.user.notes[key] = Note(value)
        elif key == 'EMAIL':
            self.user.email = value
        # Argh again, different Unicorn libraries map phone numbers differently
        elif key == 'HOMEPHONE':
            self.user.evening_phone = value
        elif key == 'WORKPHONE':
            # Seems more likely that there will be multiple work phones than home phones
            if self.user.day_phone:
                self.user.other_phone = value
            else:
                self.user.day_phone = value
        elif key == 'PHONE':
            ###
            # This is a complete hack as there is no good
            # way to distinguish between addresses and phone
            # numbers other than local convention (e.g. ADDR1 is always home)
            ###
            if not self.user.day_phone:
                self.user.day_phone = value
            elif not self.user.evening_phone:
                self.user.evening_phone = value
            else:
                self.user.other_phone = value
        elif key in ('PROFILE', 'USER_PROFILE'):
            self.user.raw_profile = value
            self.user.profile = self.map_profile(value)
    
    def map_city_prov(self, value):
        """
        Maps a combined city/province field into a separate city and province
        """

        if (value.count(',') == 0):
            return value, None
        match = re.compile(r'^(.+),\s*(.*)$').search(value)
        return match.group(1), match.group(2)

    def map_library(self, value):
        """
        Maps a name of a library to the respective integer in Evergreen's actor.org_unit table
        """

        # For example...

        if value == 'ALGOMA':
            return 124
        elif value == 'ARTGALL':
            return 112
        elif value == 'CFOF':
            return 113
        elif value == 'DESMARAIS':
            return 103
        elif value == 'EDUCATION':
            return 132
        elif value == 'FALCON':
            return 119
        elif value == 'HEARST':
            return 127
        elif value == 'HIRC':
            return 125
        elif value == 'HRSRH':
            return 115
        elif value == 'HUNTINGTON':
            return 104
        elif value == 'INCO':
            return 120
        elif value == 'MEDIACEN':
            return 108
        elif value == 'MNDM':
            return 118
        elif value == 'NEMHC':
            return 123
        elif value == 'SAH':
            return 116
        elif value == 'SUDBURY':
            return 107

    def map_profile(self, value):
        """
        Maps a name of a profile to the respective integer (FIXME)
        """
        faculty = ['FACULTY', 'ALGO_FAC', 'CAMB_FAC', 'DESM_FAC', 'HIRC_LKFAC', 'HIRC_MDFAC', 'HRST-PROF', 'HUNT_FAC', 'SUDB_FAC']
        graduate = ['GRADUATE', 'HIRC_LKGR', 'HUNT_GRAD', 'SUDB_UNDER']
        reader = ['ALGO_RDRS', 'HIRC_PUBL', 'HRST-PUBL', 'PUBLIC', 'PUBLRDRS', 'PUBPATIENT']
        undergrad = ['UNDERGRAD', 'ALGO_UNDER', 'HIRC_LKUG', 'HIRC_MDFAC', 'HRST-ETUD', 'LU_UNDER', 'SUDB_GRADS']
        system = ['REPAIR', 'RESERVE', 'SIRSI', 'ADMIN', 'LIBRARYUSE', 'WEBSERVER', 'LOST', 'MISSING', 'CIRC', 'TECH', 'BINDERY', 'PROCESS']
        if value in faculty:
            return 11
        elif value in graduate:
            return 12
        elif value in reader:
            return 14
        elif value in undergrad:
            return 13
        elif value in system:
            return None
        else:
            # Raw Patron, for lack of anything better
            return 2
   
    def to_sql(self):
        print self.user.to_sql()
        self.clear()

def sql_text(text, default=None):
    """
    Prepares input value for insertion into a TEXT column
    """
    if text:
        # Convert from yucky old MARC8 encoding to UTF8
        text = pymarc.marc8_to_unicode(text).encode('utf8')
        # Escape single quotes in the value
        text = re.sub(r"'", r"''", text)
        # Normally I would double the backslashes, but in this case I know there's only one bad occurrence
        text = re.sub(r'\\', r'', text)
        return "'%s'" % (text)
    if default:
        return "'%s'" % (default)
    return 'NULL'

def sql_integer(integer, default=None):
    """
    Prepares input value for insertion into an INTEGER column
    """
    if integer:
        return integer
    if default:
        return default
    return 'NULL'

def sql_boolean(value):
    """
    Prepares input value for insertion into a BOOLEAN column
    """
    if value is True:
        return "'t'"
    return "'f'"

def sql_date(value):
    """
    Prepares input value for insertion into a DATE column
    """
    if value == 'NEVER':
        return "'2010-05-01'"
    elif value:
        date = re.compile(r'^(\d{4})(\d{2})(\d{2})$').search(value)
        return "'%s-%s-%s'" % (date.group(1), date.group(2), date.group(3))
    else:
        return "'NULL'"

if __name__ == '__main__':
    test = FlatFile('userdata.dump')
    test.parse()

# vim:et:ts=4:sw=4:
