csa-to-kisen.cc
Go to the documentation of this file.
00001 #include "osl/container/moveVector.h"
00002 #include "osl/hash/hashKey.h"
00003 #include "osl/state/numEffectState.h"
00004 #include "osl/record/ki2.h"
00005 #include "osl/record/kisen.h"
00006 #include "osl/record/kakinoki.h"
00007 #include "osl/record/csaRecord.h"
00008 #include "osl/record/checkDuplicate.h"
00009 #include "osl/record/csaIOError.h"
00010 #include "osl/record/kanjiCode.h"
00011 #include "osl/misc/filePath.h"
00012 #include "osl/misc/iconvConvert.h"
00013 
00014 #include <boost/algorithm/string/predicate.hpp>
00015 #include <boost/algorithm/string/trim.hpp>
00016 #include <boost/date_time/gregorian/gregorian.hpp>
00017 #include <boost/scoped_ptr.hpp>
00018 #include <boost/program_options.hpp>
00019 #include <boost/filesystem/convenience.hpp>
00020 #include <boost/foreach.hpp>
00021 #include <boost/format.hpp>
00022 #include <boost/progress.hpp>
00023 #include <boost/regex.hpp>
00024 #include <deque>
00025 #include <exception>
00026 #include <iostream>
00027 #include <fstream>
00028 #include <tr1/unordered_map>
00029 
00030 std::vector<std::string> good_tournaments;
00031 bool accept_tournament(const std::string& name) 
00032 {
00033   BOOST_FOREACH(const std::string& tournament, good_tournaments) {
00034     if (tournament.find(name) == 0)
00035       return true;
00036     else if (tournament.find(name) != tournament.npos)
00037       std::cerr << tournament << " != " << name << "\n";
00038   }
00039   return good_tournaments.empty();
00040 }
00041 
00042 std::string heuristic_find_title(osl::Record& record, osl::Player player)
00043 {
00044   static const osl::CArray<const char*,25> titles = {{
00045       K_K1 K_DAN, K_K2 K_DAN, K_K3 K_DAN, 
00046       K_K4 K_DAN, K_K5 K_DAN, K_K6 K_DAN, K_K7 K_DAN, K_K8 K_DAN, K_K9 K_DAN, 
00047       K_MEIJIN, K_PROOK2 K_KING2, K_KING2 K_KURAI, K_KING2 K_SHOU, K_KING2 K_ZA,
00048       K_KI K_KING2, K_KI K_SEI, 
00049       K_K2 K_KANMURI, K_K3 K_KANMURI, K_K4 K_KANMURI, K_K5 K_KANMURI,
00050       K_K6 K_KANMURI, K_K7 K_KANMURI, K_K8 K_KANMURI, K_K9 K_KANMURI, 
00051       K_JORYUU,
00052     }};
00053   std::string name = record.getPlayer(player);
00054   std::string title_found = "";
00055   BOOST_FOREACH(const char *title, titles) {
00056     if (boost::algorithm::iends_with(name, title)) {
00057       title_found = title + title_found;
00058       name.resize(name.size() - strlen(title));
00059     }
00060   }
00061   record.setPlayer(player, name);
00062   return title_found;
00063 }
00064 
00065 void run(osl::record::Record& record,
00066          osl::record::OKisenStream& ks,
00067          boost::scoped_ptr<osl::record::KisenIpxWriter>& ipx_writer,
00068          osl::record::CheckDuplicate& check_duplicates,
00069          int default_rating, int min_year, int max_year)
00070 {
00071   boost::gregorian::date date = record.getDate();
00072   if (min_year > 0 && (date.is_special() || date.year() < min_year))
00073     return;
00074   if (max_year > 0 && (date.is_special() || date.year() > max_year))
00075     return;
00076   // 重複チェック 
00077   const osl::vector<osl::Move>& moves = record.getMoves();
00078   if (check_duplicates.regist(moves))
00079     return;
00080 
00081   std::string black_title = heuristic_find_title(record, osl::BLACK);
00082   std::string white_title = heuristic_find_title(record, osl::WHITE);
00083   ks.save(&record);
00084   if (ipx_writer)
00085   {
00086     ipx_writer->save(record, default_rating, default_rating,
00087                      black_title, white_title);
00088   }
00089 }
00090 static void convert(const std::string &kisen_filename,
00091                     const std::vector<std::string> &files,
00092                     bool output_ipx,
00093                     osl::record::CheckDuplicate& check_duplicates,
00094                     int default_rating, int min_year, int max_year)
00095 {
00096   std::ofstream ofs(kisen_filename.c_str());
00097   osl::record::OKisenStream ks(ofs);
00098 
00099   boost::scoped_ptr<osl::record::KisenIpxWriter> ipx_writer;
00100   boost::scoped_ptr<std::ofstream> ipx_ofs;
00101   if (output_ipx)
00102   {
00103     const boost::filesystem::path ipx_path =
00104       boost::filesystem::change_extension(boost::filesystem::path(kisen_filename), ".ipx");
00105     const std::string ipx = osl::misc::file_string(ipx_path);
00106     ipx_ofs.reset(new std::ofstream(ipx.c_str()));
00107     ipx_writer.reset(new osl::record::KisenIpxWriter(*ipx_ofs));
00108   }
00109 
00110   boost::progress_display progress(files.size());
00111   boost::regex date_time_regex("/(20[0-9][0-9]/[0-9][0-9]/[0-9][0-9])/");
00112   // boost::regex date_time_regex(".");
00113   for (size_t i = 0; i < files.size(); ++i, ++progress)
00114   {
00115     try
00116     {
00117       osl::record::Record record;
00118       const std::string& filename = files[i];
00119       if (boost::algorithm::iends_with(filename, ".kif")) 
00120       {
00121         try 
00122         {
00123           osl::KisenFile kisen(filename);
00124           osl::KisenIpxFile ipx(kisen.ipxFileName());
00125           for (size_t j=0; j<kisen.size(); ++j) {
00126             osl::record::Record record(kisen.getInitialState(), kisen.getMoves(j));
00127             record.setPlayer(osl::BLACK, ipx.getPlayer(j, osl::BLACK)
00128                              + ipx.getTitle(j, osl::BLACK));
00129             record.setPlayer(osl::WHITE, ipx.getPlayer(j, osl::WHITE)
00130                              + ipx.getTitle(j, osl::WHITE));
00131             record.setDate(ipx.getStartDate(j));
00132             run(record, ks, ipx_writer, check_duplicates,
00133                 default_rating, min_year, max_year);
00134           }
00135           if (kisen.size() > 0)
00136             continue;           // it was actually kisen file, going to next file
00137         }
00138         catch (...) 
00139         {
00140         }
00141         // fall through
00142       }
00143       if (boost::algorithm::iends_with(filename, ".csa"))
00144       {
00145         const osl::record::csa::CsaFile csa(filename);
00146         record = csa.getRecord();
00147       }
00148       else if (boost::algorithm::iends_with(filename, ".ki2"))
00149       {
00150         const osl::Ki2File ki2(filename);
00151         record = ki2.getRecord();
00152         // std::cerr << osl::IconvConvert::eucToLang(record.tounamentName()) << "\n";
00153         if (! accept_tournament(record.tounamentName()))
00154           continue;
00155       }
00156       else if (boost::algorithm::iends_with(filename, ".kif"))
00157       {
00158         const osl::KakinokiFile kif(filename);
00159         record = kif.getRecord();
00160       }
00161       else
00162       {
00163         std::cerr << "Unknown file type: " << filename << "\n";
00164         continue;
00165       }
00166       if (record.getDate().is_special()) {
00167         boost::smatch match;
00168         if (boost::regex_search(filename, match, date_time_regex)) {
00169           std::string s(match[1].first, match[1].second);
00170           std::cerr << "use date in path " << s << "\n";
00171           record.setDate(boost::gregorian::from_string(s));
00172         }
00173       }
00174       run(record, ks, ipx_writer, check_duplicates, default_rating, min_year, max_year);
00175     }
00176     catch(std::exception& e)
00177     {
00178       std::cerr << "ERROR: reading " <<  files[i] << "; " << 
00179         e.what() << std::endl;
00180       continue;
00181     }
00182   }
00183 }
00184 
00185 int main(int argc, char **argv)
00186 {
00187   bool output_ipx;
00188   std::string kisen_filename, tournament_filename;
00189   int default_rating, year, min_year, max_year;
00190   boost::program_options::options_description command_line_options;
00191   command_line_options.add_options()
00192     ("output-ipx",
00193      boost::program_options::value<bool>(&output_ipx)->default_value(true),
00194      "Whether output IPX file in addition to KIF file")
00195     ("tournament-file", boost::program_options::value<std::string>(&tournament_filename)
00196      ->default_value(""),
00197      "ignore records unless the name of their tournament is listed in the file in EUC-JP")
00198     ("year", boost::program_options::value<int>(&year)->default_value(0),
00199      "year to select (0 for all)")
00200     ("min-year", boost::program_options::value<int>(&min_year)->default_value(0),
00201      "min year to select (0 for all)")
00202     ("max-year", boost::program_options::value<int>(&max_year)->default_value(0),
00203      "max year to select (0 for all)")
00204     ("kisen-filename,o",
00205      boost::program_options::value<std::string>(&kisen_filename)->
00206      default_value("test.kif"),
00207      "Output filename of Kisen file")
00208     ("input-file", boost::program_options::value< std::vector<std::string> >(),
00209      "input files in kisen format")
00210     ("default-rating", boost::program_options::value<int>(&default_rating)->
00211      default_value(0),
00212      "default rating")
00213     ("help", "Show help message");
00214   boost::program_options::variables_map vm;
00215   boost::program_options::positional_options_description p;
00216   p.add("input-file", -1);
00217 
00218   try
00219   {
00220     boost::program_options::store(
00221       boost::program_options::command_line_parser(
00222         argc, argv).options(command_line_options).positional(p).run(), vm);
00223     boost::program_options::notify(vm);
00224     if (vm.count("help"))
00225     {
00226       std::cerr << "Usage: " << argv[0] << " [options] csa-files | ki2-files \n";
00227       std::cerr << "       " << argv[0] << " [options]\n";
00228       std::cout << command_line_options << std::endl;
00229       return 0;
00230     }
00231   }
00232   catch (std::exception &e)
00233   {
00234     std::cerr << "error in parsing options" << std::endl
00235               << e.what() << std::endl;
00236     std::cerr << "Usage: " << argv[0] << " [options] csa-files | ki2-files\n";
00237     std::cerr << "       " << argv[0] << " [options]\n";
00238     std::cerr << command_line_options << std::endl;
00239     return 1;
00240   }
00241 
00242   if (tournament_filename != "")
00243   {
00244     std::ifstream is(tournament_filename.c_str());
00245     std::string name;
00246     while(std::getline(is, name))
00247     {
00248       boost::algorithm::trim(name);
00249       good_tournaments.push_back(name);
00250     }
00251     if (good_tournaments.empty())
00252       throw std::runtime_error("read failed "+tournament_filename);
00253   }
00254   if (year)
00255     min_year = max_year = year;
00256 
00257   std::vector<std::string> files;
00258   if (vm.count("input-file"))
00259   {
00260     const std::vector<std::string> temp = vm["input-file"].as<std::vector<std::string> >();
00261     files.insert(files.end(), temp.begin(), temp.end());
00262   }
00263   else
00264   {
00265     std::string line;
00266     while(std::getline(std::cin , line))
00267     {
00268       boost::algorithm::trim(line);
00269       files.push_back(line);
00270     }
00271   }
00272 
00273   osl::record::CheckDuplicate check_duplicate;
00274   convert(kisen_filename, files, output_ipx, check_duplicate, default_rating,
00275           min_year, max_year);
00276 
00277   std::locale::global(std::locale(""));
00278   check_duplicate.print(std::cout);
00279 
00280   return 0;
00281 }
00282 // ;;; Local Variables:
00283 // ;;; mode:c++
00284 // ;;; c-basic-offset:2
00285 // ;;; coding:utf-8
00286 // ;;; End:
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines