Parsing by generation

By far the simplest method of parsing is to use the grammar to generate sentences, and then wait for the input sentence to show up…

Needless to say, you could be in for a long wait, so this is not a feasible method, but every top-down parsing method is just a refinement of this, so it is interesting theoretically.

As I have already written the algorithm to generate sentences from a grammar, the implementation of the parser is trivial. The only difficulty is that it’s hard to know how many sentences you need to generate to be sure of encountering the input sentence if it is valid, so I’ve added a max_gen parameter to specify the maximum number of sentences to generate.

#ifndef GENERATION_PARSER_HPP
#define GENERATION_PARSER_HPP

#include <vector>
#include <string>
#include <algorithm>

#include <grammar.hpp>
#include <generator.hpp>

namespace MB
{

class generation_parser
{
public:
    generation_parser(const MB::grammar &grammar)
        : grammar_(grammar)
    {
    }
    template <class InputIt>
    bool parse(InputIt begin, InputIt end, unsigned int max_gen = 100)
    {
        std::vector<std::string> sentence(begin, end);
        std::vector<std::vector<std::string> > sentences;
        generator(grammar_).generate(std::back_inserter(sentences), max_gen);
        return std::find(sentences.begin(), sentences.end(), sentence) != sentences.end();
    }
private:
    const grammar& grammar_;
};

} // namespace MB


#endif // GENERATION_PARSER_HPP

The algorithm to generate production trees from a grammar can be used in the same way. The only difference is that the sentence needs to be extracted from the trees for comparison.

#ifndef TREE_GENERATION_PARSER_HPP
#define TREE_GENERATION_PARSER_HPP

#include <string>
#include <vector>
#include <algorithm>

#include <grammar.hpp>
#include <tree_generator.hpp>

namespace MB
{

class tree_generation_parser
{
public:
    tree_generation_parser(const MB::grammar &grammar)
        : grammar_(grammar)
    {
    }
    template <class InputIt>
    bool parse(InputIt begin, InputIt end, unsigned int max_gen = 100)
    {
        std::vector<std::string> sentence(begin, end);
                std::vector<tree> trees;
        tree_generator(grammar_).generate(std::back_inserter(trees), max_gen);
        std::vector<std::vector<std::string> > sentences(trees.size());
        std::transform(trees.begin(), trees.end(), sentences.begin(),
                std::mem_fun_ref(&tree::get_leaves));
        return std::find(sentences.begin(), sentences.end(), sentence) != sentences.end();
    }
private:
    const grammar& grammar_;
};

} // namespace MB

#endif // TREE_GENERATION_PARSER_HPP