# TODO: Make a project website.
"""findit.py  -- Find ideas and files by tag.

Arguments are the tags you want to look up.

  -x  Exclude a tag:  If an idea or file has this tag, ignore it.
  -p  INCLUDE ideas and files tagged "private."
      By default, anything tagged "private" will NOT show up.

  -d  Documentation!  Output documentation on how to use this program.
"""


import re
import pprint
import sys
import os
import optparse


SOURCE_FILES = ["tags.txt",
                "c:/Documents and Settings/Lion/My Documents/cwrepository/individuals/lion/tagged.txt",
                "c:/tagged/private.txt"]
SOURCE_PATHS = ["C:/tagged"]  # Full paths to directories

FILENAME_TAG_SEPARATOR = "_"  # sp?


DOCUMENTATION = """
This is a "tagging" program.  You use it to search for tagged ideas,
and tagged files.

You are reading a tutorial that shows you how to store & retrieve
ideas and files with this program.

  IDEAS
  ----------------------------------------------------------------------
  
  1. Create a new text file, perhaps: "tagged.txt"
     Input text so that it resembles the following:
     
                    _____ space following the colon!
                   v
       ---------------------------------------- tagged.txt ------
       |1. foo bar: This idea has been tagged "foo" and "bar."  |
       |2. foo: This idea has ONLY been tagged "foo."           |
       |3. bar: This idea has ONLY been tagged "bar."           |
       |4. robot: This idea has been tagged "robot."            |
       ----------------------------------------------------------
        ^     ^   ^__ no space after last tag!
        |     |
        |     |______ spaces separating tags
        |
     Notice that there are no spaces before the 1.
     Don't have any spaces there.

  2. Now, edit this file.  Find where it says "SOURCE_FILES" at the
     top, and change it to:

       ---------------------------------------- findit.py -------
       $                                                        $
       |SOURCE_FILES = ["C:\your\path\to\tagged.txt"]           |
       |SOURCE_PATHS = []                                       |
       $                                                        $
       ----------------------------------------------------------

  3. Now, run findit.py!

       $  python findit.py foo

  It'll show you all the ideas that are tagged "foo".

  4. Try searching for TWO tags.

       $  python findit.py foo bar

     It'll show you the idea that is tagged with BOTH "foo" AND "bar".

  Next, lets see how to work with files.


  FILES
  ----------------------------------------------------------------------

  10. Create a new folder, perhaps: "C:\tagged\"

  11. Create a few empty text documents in it, and rename them like so:

      foo.txt
      foo_bar.txt
      bar.txt
      robot.txt

  12. Now, run findit.py!

      $  python findit.py foo

      It'll show you both ideas and files that are tagged "foo".

      This is just like looking for ideas.
      You might notice that ideas start with "I", and files start with "F".


  PRODUCTIONS & EQUIVALENCES
  ----------------------------------------------------------------------

  Sometimes, you have a tag that "implies" other tags.

  For example, you might want "opensourceprojectidea" to show up under
  searches for tags "opensource", "project", "idea", automatically, as
  well, without having to list them.

  So, there are special "production" lines.  They let you say what
  tags automatically imply some other tags.
  
  1. Add the following line to tagged.txt:
  
       ---------------------------------------- tagged.txt ------
       $                                                        $
       |opensourceprojectidea -> opensource project idea        |
       |5. opensourceprojectidea -> some software for tagging!  |
       ----------------------------------------------------------
  
  2. Now, do a search for "project."

     $  python findit.py project

     Whoah! It returned the opensourceprojectidea idea, even though it
     wasn't explicitly tagged!

     Yes! That's the power of productions!

  There is also "equivalence."

  It means that two tags are equivalent to each other.

  3.  Add the following line to tagged.txt:

       ---------------------------------------- tagged.txt ------
       $                                                        $
       |mobilephone = cellphone                                 |
       |6. cellphone opensourceprojectidea: cell <-> VoIP       |
       ----------------------------------------------------------

  4.  Search for "mobilephone."

     $  python findit.py mobilephone

     You get the idea tagged "cellphone" back.

     How is this different than a production?
     Because it works both ways.
     An "equivalence" is really **two** productions: one going A->B,
     the other going B->A.

     If you tag an idea "cellphone" and search for "mobilephone,"
     you'll get it.  If you tag an idea "mobilephone" and search for
     "cellphone," you'll get it.  It doesn't matter which way, with an
     equivalence.

     But a production only works one way.  "cat" may produce "animal,"
     but "animal" shouldn't necissarily produce "cat."


  That's about it!  There's more to the program, but I leave that to
  you to discover.  As it is, this should be enough to get you started
  tagging ideas.

  Have fun!,
    Lion {:)}=
"""


# Symbols

SYM_FAILED = "failed"


# Magic Numbers

STR_RFIND_NOT_FOUND = -1


# Functions

compose = lambda fn1, fn2: lambda x: fn1(fn2(x))


def partial(fn, before_args=[], after_args=[], added_args_dict={}):
    def go(*args, **args_dict):
        D = args_dict
        D.update(added_args_dict)
        L = before_args + list(args) + after_args
        fn(*L, **D)
    return go


def first_causing_response(fn, L):
    """Find the first thing that returns postive."""
    for obj in L:
        if fn(obj):
            return obj
    return None


ident = lambda x: x
to_integer = int
to_lower_case = lambda x: x.lower()
to_lower_case_list = lambda x: [y.lower() for y in x.split()]
is_flawless_obj = lambda x: x.error == None


# Interpretation Rules

TAG_REGEX = "[a-zA-Z0-9-]+"


class InterpretationRule:

    """A rule for interpreting text.

    __init__  -- Store values, compile regex.
    """

    def __init__(self, title, meaning, regex, names, conversions):
        """Store the values, and compile the regex."""
        self.title = title
        self.meaning = meaning
        self.regex = re.compile(regex)
        self.names = names
        self.conversions = conversions


IDEA_LINE = "idea line"
APPEND_TAGS_LINE = "append tags line"
TAG_PRODUCTIONS = "tag productions"
TAG_EQUIVALENCES = "tag equivalences"
BLANK_LINE = "blank line"


interpretation_rules = [
    InterpretationRule(
    title=IDEA_LINE,
    meaning="An idea that is tagged.",
    regex=("([0-9]+)[.] "  # a number
           "(" + TAG_REGEX + ""  # 1 tag
           "(?: " + TAG_REGEX + ")*)"  # tags
           "[:] (.+)"),  # the idea
    names=["number", "tags", "idea"],
    conversions=[to_integer, to_lower_case_list, ident]
    ),

    InterpretationRule(
    title=APPEND_TAGS_LINE,
    meaning="Tags that are to be automatically applied.",
    regex=("== (" + TAG_REGEX + ""  # 1 tag
           "(?: " + TAG_REGEX + ")*) =="),  # tags
    names=["tags"],
    conversions=[to_lower_case_list],
    ),

    InterpretationRule(
    title=TAG_PRODUCTIONS,
    meaning="One tag implies some other tags.",
    regex=("(" + TAG_REGEX + ") -> "  # 1 tag
           "(" + TAG_REGEX + ""  # produced tag
           "(?: " + TAG_REGEX + ")*)"),  # tags
    names=["src", "tags"],
    conversions=[to_lower_case, to_lower_case_list],
    ),

    InterpretationRule(
    title=TAG_EQUIVALENCES,
    meaning="Several tags are equal to each other.",
    regex=("(" + TAG_REGEX + ") = ("
           "" + TAG_REGEX + ""
           "(?: " + TAG_REGEX + ")*)"),
    names=["first", "tags"],
    conversions=[to_lower_case, to_lower_case_list],
    ),

    InterpretationRule(
    title=BLANK_LINE,
    meaning="A blank line.",
    regex=("\s*"),
    names=[],
    conversions=[]),
    ]


class LineObject:

    """A decoded line of text.

    Stores the filename, line number, line itself, and the type of
    InterpretationRule used to decode the line.

    Also stores the result of applying the rules' regex, conversions,
    and binding the data to appropriate keys.

    __init__  -- Store values, compute bindings.
    """

    def __init__(self, rule, line, line_number, filename):
        """Store all values, and interpret bindings from the line.

        The details of how to interpret the bindings come from the
        InterpretationRule.

        "bindings" refers to the regex matches that have been assigned
        to a given name, as per the rule's specification.
        """
        self.filename = filename
        self.line_number = line_number
        self.line = line
        self.rule = rule
        self.mo = rule.regex.match(line)
        if self.mo == None:
            self.error = SYM_FAILED
            self.bindings = None
        else:
            self.error = None
            results = map(lambda (fn, args): fn(*[args]),
                          zip(rule.conversions, self.mo.groups()))
            self.bindings = dict(zip(rule.names, results))


## LineObject Tests

## print LineObject(interpretation_rules[0],
##                  "1. tag0 tag1 tag2: robotic nation evidence",
##                  0, "eraseme.txt").bindings

## print LineObject(interpretation_rules[1],
##                  "== tag0 tag1 tag2 ==",
##                  0, "eraseme.txt").bindings

## print LineObject(interpretation_rules[2],
##                  "tag0 -> tag1 tag2 tag3",
##                  0, "eraseme.txt").bindings


def interpret_line(line, line_number, filename):
    """Interpret a line by interpretation_rules, return a LineObject."""
    global interpretation_rules
    results = [LineObject(rule, line, line_number, filename)
               for rule in interpretation_rules]
    return first_causing_response(is_flawless_obj,
                                  results)


def interpret_file(filename):
    """Interpret all lines of a file, return a list of LineObjects."""
    all_lines = open(filename, "r").read().splitlines()
    return [interpret_line(line, line_number, filename)
            for (line_number, line) in enumerate(all_lines)]


LINE_IDEA = "line idea"
FILENAME_IDEA = "filename idea"


class IdeaObject:

    """Adapt a lineobj or filename to a uniform idea interface.

    __init__  -- Store values.
    """

    def __init__(self, kind, source, tags):
        """Store the values.

        I've taken the position that the tags may differ from the
        source's tags list (however stored internally) because:

        A) It's easier to write this way, and
        B) Contextual information may significantly alter the tags list.

        Examples of (B) present today (2006-09-28):
        * productions, equivalences
        * automatic associations

        Productions, transitive or otherwise, aren't included in here,
        now.  But automatic associations still are.
        """
        self.kind = kind
        self.source = source
        self.tags = tags


def strip_extension(filename):
    """Return the filename before the extension, excluding the period.

    For example, if it's "readme.txt", this returns "readme".
    """
    index = filename.rfind(".")
    if index == STR_RFIND_NOT_FOUND:
        return filename  # no extension
    return filename[:index]


def get_extension(filename):
    """Return the extension text of a filename, including the period.

    For example, if it's "readme.txt", this returns ".txt".
    """
    index = filename.rfind(".")
    if index == STR_RFIND_NOT_FOUND:
        return None  # no extension
    return filename[index:]


class FilenameObject:

    """Interprets tags from a filename, and stores associated details.

    __init__  -- Constructor
    __repr__  -- Python representation
    """

    def __init__(self, folder_path, filename):
        """Construct a FilenameTags, interpreting tags."""
        self.folder_path = folder_path
        self.filename = filename
        self.tags = [x.lower() for x in
                     strip_extension(filename).split(FILENAME_TAG_SEPARATOR)]
        self.extension = get_extension(filename)

    def __repr__(self):
        """Render a representation that can recreate the object."""
        return "FilenameTags(%s, %s)" % (repr(self.fullpath),
                                        repr(self.filename))


class ProductionRules:

    """Maintain a graph of tag productions.

    The graph is stored through two dictionaries, that map tags
    (strings) to sets.  The sets contain other tags (strings.)

    The graph is built up incrementally, one production at a time.

    The dictionaries store "outgoing" links, and "incoming" links.
    That is, from a given tag, you can see what other tags it goes to,
    and what other tags lead into it.

    __init__  -- Create blank.
    _inout_connect  -- Internal function for linking tags.
    add_production  -- Add a production rule.
    add_productions_from_lines  -- Add several productions from lines.
    """

    def __init__(self):
        """Create an empty ProductionRules instance."""
        self.going_out = {}  # "tag" -> set(["tag", ...])
        self.going_in = {}  # "tag" -> set(["tag", ...])

    def _inout_connect(self, s, d):
        """Internal function to hook two nodes together in the graph.

        Does not respect transitives; For that, use "add_production,"
        which is also publicly accessible.
        """
        if s != d:
            self.going_out.setdefault(s, set()).add(d)
            self.going_in.setdefault(d, set()).add(s)

    def add_production(self, s, d):
        """Add a single production rule."""
        sdt0 = self.going_out.get(d, set())
        sst0 = self.going_in.get(s, set())
        for dt in sdt0:
            self._inout_connect(s, dt)
        for st in sst0:
            for dt in sdt0:
                self._inout_connect(st, dt)
            self._inout_connect(st, d)
        self._inout_connect(s, d)

    def add_productions_from_lines(self, all_lineobjs):
        """Update graph of tags from equivalence & production rules.

        An equivalence is modelled as a two-way production.
        """
        for line in all_lineobjs:
            if line.rule.title == TAG_EQUIVALENCES:
                equivalent_tags = [line.bindings["first"]] + line.bindings["tags"]
                for tag1 in equivalent_tags:
                    for tag2 in equivalent_tags:
                        if tag1 != tag2:
                            self.add_production(tag1, tag2)
            elif line.rule.title == TAG_PRODUCTIONS:
                tag1 = line.bindings["src"]
                for tag2 in line.bindings["tags"]:
                    if tag1 != tag2:
                        self.add_production(tag1, tag2)


def compile_lineobjs_into_ideas(all_lineobjs):

    """Compile several LineObjects into a list of IdeaObjects.

    Note that ProductionRules are not used here; Those are aquired
    separately, through ProductionRules.add_productions_from_lines.

    ProductionRules are applied at search time, not idea collection
    time.
    """

    all_ideas = []
    appenderobj = None

    for lineobj in all_lineobjs:
        if lineobj.rule.title == APPEND_TAGS_LINE:
            appenderobj = lineobj  # current appender object
        elif lineobj.rule.title == IDEA_LINE:
            lineobj_tags = set(lineobj.bindings["tags"])  # Copy tags
            if appenderobj is not None:
                lineobj_tags.update(set(appenderobj.bindings["tags"]))
            all_ideas.append(IdeaObject(LINE_IDEA, lineobj, lineobj_tags))
    return all_ideas


def compile_folder_into_ideas(folder_path):
    """Interpret filenames as tags, and return a list of LineObjects."""
    all_filenameobjs = [FilenameObject(folder_path, filename)
                        for filename in os.listdir(folder_path)]
    return [IdeaObject(FILENAME_IDEA, filenameobj, set(filenameobj.tags))
            for filenameobj in all_filenameobjs]


def build_possibilities_set(tag, rules):
    """Build a set of the tag, and it's anticedents."""
    s = set([tag])
    s.update(rules.going_in.get(tag, set()))
    return s


def find_all_tags_in_search_spec(have_tags, search_spec):
    """Make sure all possibility sets are satisfied by tags.

    The search spec is a list of possibility sets.  To pass this test,
    each possibility set (a collection of tags that are "good
    enough,") must find a match within the "have_tags."
    """
    for possibility_set in search_spec:
        if not possibility_set.intersection(have_tags):
            return False
    return True


def find_any_tag_in_search_spec(have_tags, search_spec):
    """See if ANY possibility sets are satisfied by tags.

    We do this when we're searching for forbidden tags.  A single
    forbidden tag, or any tags that can substitute for the forbidden
    tag, and you're a goner (return True.)
    """
    for possibility_set in search_spec:
        if possibility_set.intersection(have_tags):
            return True
    return False


def search(seek_tags, forbid_tags):
    all_ideaobjs = []
    production_rules = ProductionRules()

    # Get all rules from SOURCE_FILES, and all ideas,
    # and get all ideas from SOURCE_PATHS.

    for filename in SOURCE_FILES:
        all_lineobjs = interpret_file(filename)
        production_rules.add_productions_from_lines(all_lineobjs)
        all_ideaobjs.extend(compile_lineobjs_into_ideas(all_lineobjs))
    for filename in SOURCE_PATHS:
        all_ideaobjs.extend(compile_folder_into_ideas(filename))

    seek_spec = [build_possibilities_set(tag, production_rules)
             for tag in seek_tags]

    forbid_spec = [build_possibilities_set(tag, production_rules)
                   for tag in forbid_tags]

    matches = []

    for ideaobj in all_ideaobjs:
        matching = True
        if (find_all_tags_in_search_spec(ideaobj.tags, seek_spec)
            and not find_any_tag_in_search_spec(ideaobj.tags,
                                                forbid_spec)):
            matches.append(ideaobj)

    return matches


if __name__ == "__main__":
    parser = optparse.OptionParser(__doc__)
    parser.add_option("-x", "--exclude", dest="exclude",
                      type="string", action="append",
                      default=[], help="exclude tag")
    parser.add_option("-p", "--private", dest="private",
                      action="store_true", default=False,
                      help="allow private")
    parser.add_option("-d", "--documentation",
                      dest="show_documentation",
                      action="store_true", default=False,
                      help="show documentation")
    
    (options, args) = parser.parse_args()
    
    if options.show_documentation:
        print DOCUMENTATION
        sys.exit(0)
    
    if len(args) == 0:
        parser.error("need to search for at least one tag")
    
    seek_tags = set(map(to_lower_case, args))
    forbid_tags = set(map(to_lower_case, options.exclude))
    if options.private == False:
        if "private" not in args:
            forbid_tags.add("private")

    matches = search(seek_tags, forbid_tags)

    for ideaobj in matches:
        if ideaobj.kind == LINE_IDEA:
            lineobj = ideaobj.source
            print "I|%s|%d|#%d|%s|%s" % (lineobj.filename,
                                         lineobj.line_number,
                                         lineobj.bindings["number"],
                                         ", ".join(list(ideaobj.tags)),
                                         lineobj.bindings["idea"])
        elif ideaobj.kind == FILENAME_IDEA:
            fileobj = ideaobj.source
            print "F|%s|%s" % (fileobj.folder_path,
                               fileobj.filename)
        else:
            assert False


