[{"content":"","date":"7 July 2026","externalUrl":null,"permalink":"/publications/asymmetric-perturbation-bilinear-icml-2026/","section":"Publications","summary":"","title":"Asymmetric Perturbation in Solving Bilinear Saddle-Point Optimization","type":"publications"},{"content":"","date":"7 July 2026","externalUrl":null,"permalink":"/authors/atsushi-iwasaki/","section":"Authors","summary":"","title":"Atsushi Iwasaki","type":"authors"},{"content":"","date":"7 July 2026","externalUrl":null,"permalink":"/authors/","section":"Authors","summary":"","title":"Authors","type":"authors"},{"content":"","date":"7 July 2026","externalUrl":null,"permalink":"/authors/kaito-ariu/","section":"Authors","summary":"","title":"Kaito Ariu","type":"authors"},{"content":" Selected Publications # ICML 2026 Asymmetric Perturbation in Solving Bilinear Saddle-Point Optimization Kenshi Abe, Mitsuki Sakamoto, Kaito Ariu, Atsushi Iwasaki\nTheme:Learning in Games arXiv ICLR 2025 Boosting Perturbed Gradient Ascent for Last-Iterate Convergence in Games Kenshi Abe, Mitsuki Sakamoto, Kaito Ariu, Atsushi Iwasaki\nTheme:Learning in Games arXiv ICML 2024 Adaptively Perturbed Mirror Descent for Learning in Games Kenshi Abe, Kaito Ariu, Mitsuki Sakamoto, Atsushi Iwasaki\nTheme:Learning in Games arXiv AISTATS 2023 Last-Iterate Convergence with Full and Noisy Feedback in Two-Player Zero-Sum Games Kenshi Abe, Kaito Ariu, Mitsuki Sakamoto, Kentaro Toyoshima, Atsushi Iwasaki\nTheme:Learning in Games arXiv View all publications\n","date":"7 July 2026","externalUrl":null,"permalink":"/","section":"Kenshi Abe","summary":"","title":"Kenshi Abe","type":"page"},{"content":"","date":"7 July 2026","externalUrl":null,"permalink":"/authors/kenshi-abe/","section":"Authors","summary":"","title":"Kenshi Abe","type":"authors"},{"content":"","date":"7 July 2026","externalUrl":null,"permalink":"/authors/mitsuki-sakamoto/","section":"Authors","summary":"","title":"Mitsuki Sakamoto","type":"authors"},{"content":"","date":"7 July 2026","externalUrl":null,"permalink":"/publications/","section":"Publications","summary":"","title":"Publications","type":"publications"},{"content":"","date":"27 May 2026","externalUrl":null,"permalink":"/publications/time-varyingness-auction-aamas-2026/","section":"Publications","summary":"","title":"Time-Varyingness in Auction Breaks Revenue Equivalence","type":"publications"},{"content":"","date":"27 May 2026","externalUrl":null,"permalink":"/authors/yuma-fujimoto/","section":"Authors","summary":"","title":"Yuma Fujimoto","type":"authors"},{"content":"","date":"2 May 2026","externalUrl":null,"permalink":"/authors/alexandre-proutiere/","section":"Authors","summary":"","title":"Alexandre Proutiere","type":"authors"},{"content":"","date":"2 May 2026","externalUrl":null,"permalink":"/authors/po-an-wang/","section":"Authors","summary":"","title":"Po-an Wang","type":"authors"},{"content":"","date":"2 May 2026","externalUrl":null,"permalink":"/publications/policy-testing-mdp-aistats-2026/","section":"Publications","summary":"","title":"Policy Testing in Markov Decision Processes","type":"publications"},{"content":"","date":"6 December 2025","externalUrl":null,"permalink":"/publications/policy-testing-mdp-neurips-2025/","section":"Publications","summary":"","title":"Policy Testing in Markov Decision Processes","type":"publications"},{"content":"","date":"3 December 2025","externalUrl":null,"permalink":"/publications/monotone-mean-field-games-neurips-2025/","section":"Publications","summary":"","title":"Last Iterate Convergence in Monotone Mean Field Games","type":"publications"},{"content":"","date":"3 December 2025","externalUrl":null,"permalink":"/publications/learning-from-delayed-feedback-neurips-2025/","section":"Publications","summary":"","title":"Learning from Delayed Feedback in Games via Extra Prediction","type":"publications"},{"content":"","date":"3 December 2025","externalUrl":null,"permalink":"/authors/noboru-isobe/","section":"Authors","summary":"","title":"Noboru Isobe","type":"authors"},{"content":"","date":"2025年11月12日","externalUrl":null,"permalink":"/ja/publications/asymmetric-perturbation-bilinear-ibis-2025/","section":"論文","summary":"","title":"Asymmetric Perturbation in Solving Bilinear Saddle-Point Optimization","type":"publications"},{"content":"","date":"2025年11月12日","externalUrl":null,"permalink":"/ja/publications/policy-testing-mdp-ibis-2025/","section":"論文","summary":"","title":"Policy Testing in Markov Decision Processes","type":"publications"},{"content":"","date":"2025年11月12日","externalUrl":null,"permalink":"/ja/publications/unified-convergence-guarantees-efg-ibis-2025/","section":"論文","summary":"","title":"Unified Convergence Guarantees for Learning with General Payoff Perturbations in Extensive-Form Games","type":"publications"},{"content":"","date":"2025年11月12日","externalUrl":null,"permalink":"/ja/publications/time-varyingness-auction-ibis-2025/","section":"論文","summary":"","title":"オークション環境の時間変動による収入同値の破れ","type":"publications"},{"content":"","date":"2025年11月12日","externalUrl":null,"permalink":"/ja/publications/learning-from-delayed-feedback-ibis-2025/","section":"論文","summary":"","title":"ゲームにおける時間遅れフィードバックからの学習","type":"publications"},{"content":"","date":"2025年11月12日","externalUrl":null,"permalink":"/ja/publications/additive-reward-non-stationary-bandit-ibis-2025/","section":"論文","summary":"","title":"共通トレンドを考慮した加法報酬モデルに基づく非定常バンディットアルゴリズム","type":"publications"},{"content":"","date":"2025年11月12日","externalUrl":null,"permalink":"/ja/authors/%E5%9D%82%E6%9C%AC-%E5%85%85%E7%94%9F/","section":"Authors","summary":"","title":"坂本 充生","type":"authors"},{"content":"","date":"2025年11月12日","externalUrl":null,"permalink":"/ja/authors/%E5%B2%A9%E5%B4%8E-%E6%95%A6/","section":"Authors","summary":"","title":"岩崎 敦","type":"authors"},{"content":"","date":"2025年11月12日","externalUrl":null,"permalink":"/ja/authors/%E6%9A%AE%E7%9F%B3-%E8%88%AA%E5%A4%A7/","section":"Authors","summary":"","title":"暮石 航大","type":"authors"},{"content":"","date":"2025年11月12日","externalUrl":null,"permalink":"/ja/authors/%E7%9C%9E%E5%9D%82-%E8%88%AA%E5%AE%99/","section":"Authors","summary":"","title":"眞坂 航宙","type":"authors"},{"content":"","date":"2025年11月12日","externalUrl":null,"permalink":"/ja/authors/%E8%97%A4%E6%9C%AC-%E6%82%A0%E9%9B%85/","section":"Authors","summary":"","title":"藤本 悠雅","type":"authors"},{"content":"","date":"2025年11月12日","externalUrl":null,"permalink":"/ja/authors/%E8%9F%BB%E7%94%9F-%E9%96%8B%E4%BA%BA/","section":"Authors","summary":"","title":"蟻生 開人","type":"authors"},{"content":"","date":"2025年11月12日","externalUrl":null,"permalink":"/ja/authors/%E9%98%BF%E9%83%A8-%E6%8B%B3%E4%B9%8B/","section":"Authors","summary":"","title":"阿部 拳之","type":"authors"},{"content":"","date":"2025年9月3日","externalUrl":null,"permalink":"/ja/publications/perturbation-under-sampling-efg-fit-2025/","section":"論文","summary":"","title":"不完全情報展開型ゲームの求解における利得摂動に関する研究","type":"publications"},{"content":"","date":"8 June 2025","externalUrl":null,"permalink":"/authors/edgar-simo-serra/","section":"Authors","summary":"","title":"Edgar Simo-Serra","type":"authors"},{"content":"","date":"8 June 2025","externalUrl":null,"permalink":"/publications/return-aligned-decision-transformer-tmlr-2025/","section":"Publications","summary":"","title":"Return-Aligned Decision Transformer","type":"publications"},{"content":"","date":"8 June 2025","externalUrl":null,"permalink":"/authors/tetsuro-morimura/","section":"Authors","summary":"","title":"Tetsuro Morimura","type":"authors"},{"content":"","date":"8 June 2025","externalUrl":null,"permalink":"/authors/tsunehiko-tanaka/","section":"Authors","summary":"","title":"Tsunehiko Tanaka","type":"authors"},{"content":"","date":"2025年5月27日","externalUrl":null,"permalink":"/ja/publications/perturbation-under-sampling-efg-jsai-2025/","section":"論文","summary":"","title":"不完全情報展開型ゲームの求解における利得摂動に関する研究","type":"publications"},{"content":"","date":"2025年5月27日","externalUrl":null,"permalink":"/ja/publications/synchronization-periodic-zero-sum-jsai-2025/","section":"論文","summary":"","title":"周期的なゼロ和ゲームにおけるマルチエージェント学習","type":"publications"},{"content":"","date":"2025年5月27日","externalUrl":null,"permalink":"/ja/publications/alignment-evaluation-llm-jsai-2025/","section":"論文","summary":"","title":"日本語大規模言語モデルの自己学習によるアライメントの実験評価","type":"publications"},{"content":"","date":"2025年5月27日","externalUrl":null,"permalink":"/ja/authors/%E6%A3%AE%E6%9D%91-%E5%93%B2%E9%83%8E/","section":"Authors","summary":"","title":"森村 哲郎","type":"authors"},{"content":"","date":"2025年5月27日","externalUrl":null,"permalink":"/ja/authors/%E9%99%A3%E5%86%85-%E4%BD%91/","section":"Authors","summary":"","title":"陣内 佑","type":"authors"},{"content":"","date":"21 May 2025","externalUrl":null,"permalink":"/publications/global-behavior-zero-sum-games-aamas-2025/","section":"Publications","summary":"","title":"Global Behavior of Learning Dynamics in Zero-Sum Games with Memory Asymmetry","type":"publications"},{"content":"","date":"21 May 2025","externalUrl":null,"permalink":"/publications/three-player-matching-games-aamas-2025/","section":"Publications","summary":"","title":"Nash Equilibrium and Learning Dynamics in Three-Player Matching m-Action Games","type":"publications"},{"content":"","date":"30 April 2025","externalUrl":null,"permalink":"/publications/regularized-best-of-n-naacl-2025/","section":"Publications","summary":"","title":"Regularized Best-of-N Sampling with Minimum Bayes Risk Objective for Language Model Alignment","type":"publications"},{"content":"","date":"30 April 2025","externalUrl":null,"permalink":"/authors/yuu-jinnai/","section":"Authors","summary":"","title":"Yuu Jinnai","type":"authors"},{"content":"","date":"24 April 2025","externalUrl":null,"permalink":"/publications/boosting-perturbed-gradient-ascent-iclr-2025/","section":"Publications","summary":"","title":"Boosting Perturbed Gradient Ascent for Last-Iterate Convergence in Games","type":"publications"},{"content":"","date":"2025年3月13日","externalUrl":null,"permalink":"/ja/publications/perturbation-under-sampling-efg-ipsj-2025/","section":"論文","summary":"","title":"不完全情報展開型ゲームの求解における利得摂動に関する研究","type":"publications"},{"content":"","date":"11 March 2025","externalUrl":null,"permalink":"/authors/daiki-katsuragawa/","section":"Authors","summary":"","title":"Daiki Katsuragawa","type":"authors"},{"content":"","date":"11 March 2025","externalUrl":null,"permalink":"/publications/creative-selection-online-advertising-wsdm-2025/","section":"Publications","summary":"","title":"Efficient Creative Selection in Online Advertising using Top-Two Thompson Sampling","type":"publications"},{"content":"","date":"11 March 2025","externalUrl":null,"permalink":"/authors/yusuke-kaneko/","section":"Authors","summary":"","title":"Yusuke Kaneko","type":"authors"},{"content":"","date":"2025年3月10日","externalUrl":null,"permalink":"/ja/publications/alignment-evaluation-llm-nlp-2025/","section":"論文","summary":"","title":"大規模言語モデルのためのアライメントデータ合成手法の実験的評価","type":"publications"},{"content":"","date":"27 February 2025","externalUrl":null,"permalink":"/publications/state-abstraction-markov-games-aaai-2025/","section":"Publications","summary":"","title":"Approximate State Abstraction for Markov Games","type":"publications"},{"content":"","date":"27 February 2025","externalUrl":null,"permalink":"/authors/hiroki-ishibashi/","section":"Authors","summary":"","title":"Hiroki Ishibashi","type":"authors"},{"content":"","date":"27 February 2025","externalUrl":null,"permalink":"/publications/synchronization-periodic-zero-sum-aaai-2025/","section":"Publications","summary":"","title":"Synchronization behind Learning in Periodic Zero-Sum Games Triggers Divergence from Nash equilibrium","type":"publications"},{"content":"","date":"15 February 2025","externalUrl":null,"permalink":"/authors/eiji-uchibe/","section":"Authors","summary":"","title":"Eiji Uchibe","type":"authors"},{"content":"","date":"15 February 2025","externalUrl":null,"permalink":"/publications/evaluation-best-of-n-tmlr-2025/","section":"Publications","summary":"","title":"Evaluation of Best-of-N Sampling Strategies for Language Model Alignment","type":"publications"},{"content":"","date":"15 February 2025","externalUrl":null,"permalink":"/authors/yuki-ichihara/","section":"Authors","summary":"","title":"Yuki Ichihara","type":"authors"},{"content":"","date":"28 January 2025","externalUrl":null,"permalink":"/publications/perturbation-under-sampling-efg-arxiv-2025/","section":"Publications","summary":"","title":"The Power of Perturbation under Sampling in Solving Extensive-Form Games","type":"publications"},{"content":"","date":"28 January 2025","externalUrl":null,"permalink":"/authors/tuomas-sandholm/","section":"Authors","summary":"","title":"Tuomas Sandholm","type":"authors"},{"content":"","date":"28 January 2025","externalUrl":null,"permalink":"/authors/wataru-masaka/","section":"Authors","summary":"","title":"Wataru Masaka","type":"authors"},{"content":"","date":"12 November 2024","externalUrl":null,"permalink":"/publications/filtered-dpo-emnlp-2024/","section":"Publications","summary":"","title":"Filtered Direct Preference Optimization","type":"publications"},{"content":"","date":"2024年11月4日","externalUrl":null,"permalink":"/ja/publications/perturbation-under-sampling-efg-ibis-2024/","section":"論文","summary":"","title":"（不完全情報）展開型ゲームにおける零分散の利得摂動手法","type":"publications"},{"content":"","date":"2024年11月4日","externalUrl":null,"permalink":"/ja/publications/evaluation-best-of-n-ibis-2024/","section":"論文","summary":"","title":"Evaluation of Best-of-N Sampling Strategies for Language Model Alignment","type":"publications"},{"content":"","date":"2024年11月4日","externalUrl":null,"permalink":"/ja/publications/filtered-dpo-ibis-2024/","section":"論文","summary":"","title":"Filtered Direct Preference Optimization: 選好データセットの質に基づくフィルタリング手法の提案","type":"publications"},{"content":"","date":"2024年11月4日","externalUrl":null,"permalink":"/ja/publications/monotone-mean-field-games-ibis-2024/","section":"論文","summary":"","title":"Last Iterate Convergence in Monotone Mean Field Games","type":"publications"},{"content":"","date":"2024年11月4日","externalUrl":null,"permalink":"/ja/publications/synchronization-periodic-zero-sum-ibis-2024/","section":"論文","summary":"","title":"Synchronization behind Learning in Periodic Zero-Sum Games Triggers Divergence from Nash equilibrium","type":"publications"},{"content":"","date":"2024年11月4日","externalUrl":null,"permalink":"/ja/publications/bayes-risk-preference-optimization-ibis-2024/","section":"論文","summary":"","title":"ベイズリスク選好最適化：報酬モデル不要のオンライン選好最適化手法","type":"publications"},{"content":"","date":"2024年11月4日","externalUrl":null,"permalink":"/ja/publications/policy-testing-mdp-ibis-2024/","section":"論文","summary":"","title":"マルコフ決定過程における良方策検定手法の提案","type":"publications"},{"content":"","date":"2024年11月4日","externalUrl":null,"permalink":"/ja/authors/%E5%86%85%E9%83%A8-%E8%8B%B1%E6%B2%BB/","section":"Authors","summary":"","title":"内部 英治","type":"authors"},{"content":"","date":"2024年11月4日","externalUrl":null,"permalink":"/ja/authors/%E5%B8%82%E5%8E%9F-%E6%9C%89%E7%94%9F%E5%B8%8C/","section":"Authors","summary":"","title":"市原 有生希","type":"authors"},{"content":"","date":"2024年11月4日","externalUrl":null,"permalink":"/ja/authors/%E7%A3%AF%E9%83%A8-%E4%BC%B8/","section":"Authors","summary":"","title":"磯部 伸","type":"authors"},{"content":"","date":"2024年9月4日","externalUrl":null,"permalink":"/ja/publications/state-abstraction-markov-games-fit-2024/","section":"論文","summary":"","title":"二人零和マルコフゲームにおける状態抽象化に関する研究","type":"publications"},{"content":"","date":"2024年9月4日","externalUrl":null,"permalink":"/ja/authors/%E7%9F%B3%E6%A9%8B-%E5%AE%99%E5%B8%8C/","section":"Authors","summary":"","title":"石橋 宙希","type":"authors"},{"content":"","date":"10 August 2024","externalUrl":null,"permalink":"/authors/kazuhiro-ota/","section":"Authors","summary":"","title":"Kazuhiro Ota","type":"authors"},{"content":"","date":"10 August 2024","externalUrl":null,"permalink":"/authors/peinan-zhang/","section":"Authors","summary":"","title":"Peinan Zhang","type":"authors"},{"content":"","date":"10 August 2024","externalUrl":null,"permalink":"/publications/policy-gradient-mcts-rlc-2024/","section":"Publications","summary":"","title":"Policy Gradient Algorithms with Monte-Carlo Tree Search for Non-Markov Decision Processes","type":"publications"},{"content":"","date":"26 July 2024","externalUrl":null,"permalink":"/publications/filtered-dpo-icml-2024/","section":"Publications","summary":"","title":"Filtered Direct Preference Optimization","type":"publications"},{"content":"","date":"26 July 2024","externalUrl":null,"permalink":"/publications/regularized-best-of-n-icml-2024/","section":"Publications","summary":"","title":"Regularized Best-of-N Sampling to Mitigate Reward Hacking for Language Model Alignment","type":"publications"},{"content":"","date":"23 July 2024","externalUrl":null,"permalink":"/publications/adaptively-perturbed-mirror-descent-icml-2024/","section":"Publications","summary":"","title":"Adaptively Perturbed Mirror Descent for Learning in Games","type":"publications"},{"content":"","date":"23 July 2024","externalUrl":null,"permalink":"/publications/model-based-mbr-icml-2024/","section":"Publications","summary":"","title":"Model-Based Minimum Bayes Risk Decoding","type":"publications"},{"content":"","date":"23 July 2024","externalUrl":null,"permalink":"/authors/ukyo-honda/","section":"Authors","summary":"","title":"Ukyo Honda","type":"authors"},{"content":"","date":"2024年5月28日","externalUrl":null,"permalink":"/ja/publications/distribution-shift-evaluation-rlhf-jsai-2024/","section":"論文","summary":"","title":"RLHFにおける分布シフトの評価","type":"publications"},{"content":"","date":"2024年5月15日","externalUrl":null,"permalink":"/ja/publications/mutation-driven-ftrl-ipsj-j-2024/","section":"論文","summary":"","title":"二人零和ゲームにおける突然変異駆動型正則化先導者追従法の終極反復収束","type":"publications"},{"content":"","date":"2024年5月15日","externalUrl":null,"permalink":"/ja/authors/%E8%B1%8A%E5%B3%B6-%E5%81%A5%E5%A4%AA%E9%83%8E/","section":"Authors","summary":"","title":"豊島 健太郎","type":"authors"},{"content":"","date":"14 May 2024","externalUrl":null,"permalink":"/authors/riku-togashi/","section":"Authors","summary":"","title":"Riku Togashi","type":"authors"},{"content":"","date":"14 May 2024","externalUrl":null,"permalink":"/publications/scalable-fair-exposure-control-www-2024/","section":"Publications","summary":"","title":"Scalable and Provably Fair Exposure Control for Large-Scale Recommender Systems","type":"publications"},{"content":"","date":"14 May 2024","externalUrl":null,"permalink":"/authors/yuta-saito/","section":"Authors","summary":"","title":"Yuta Saito","type":"authors"},{"content":"","date":"2 May 2024","externalUrl":null,"permalink":"/authors/hakuei-yamada/","section":"Authors","summary":"","title":"Hakuei Yamada","type":"authors"},{"content":"","date":"2 May 2024","externalUrl":null,"permalink":"/authors/junpei-komiyama/","section":"Authors","summary":"","title":"Junpei Komiyama","type":"authors"},{"content":"","date":"2 May 2024","externalUrl":null,"permalink":"/publications/learning-fair-division-aistats-2024/","section":"Publications","summary":"","title":"Learning Fair Division from Bandit Feedback","type":"publications"},{"content":"","date":"2024年3月15日","externalUrl":null,"permalink":"/ja/publications/state-abstraction-markov-games-ipsj-2024/","section":"論文","summary":"","title":"二人零和マルコフゲームにおける状態抽象化法に関する研究","type":"publications"},{"content":"","date":"2024年3月15日","externalUrl":null,"permalink":"/ja/authors/%E5%B0%8F%E5%AE%AE%E5%B1%B1-%E7%B4%94%E5%B9%B3/","section":"Authors","summary":"","title":"小宮山 純平","type":"authors"},{"content":"","date":"2024年3月15日","externalUrl":null,"permalink":"/ja/authors/%E5%B3%B6%E9%87%8E-%E9%9B%84%E8%B2%B4/","section":"Authors","summary":"","title":"島野 雄貴","type":"authors"},{"content":"","date":"2024年3月15日","externalUrl":null,"permalink":"/ja/authors/%E6%9D%BF%E5%9E%A3-%E5%9C%AD%E7%9F%A5/","section":"Authors","summary":"","title":"板垣 圭知","type":"authors"},{"content":"","date":"2024年3月15日","externalUrl":null,"permalink":"/ja/publications/medical-residency-match-ipsj-2024/","section":"論文","summary":"","title":"研修医配属における地域間格差を調整する制約のモンテカルロ木探索","type":"publications"},{"content":"","date":"22 February 2024","externalUrl":null,"permalink":"/publications/memory-asymmetry-heteroclinic-orbits-aaai-2024/","section":"Publications","summary":"","title":"Memory Asymmetry Creates Heteroclinic Orbits to Nash Equilibrium in Learning in Zero-Sum Games","type":"publications"},{"content":"","date":"2023年10月29日","externalUrl":null,"permalink":"/ja/publications/adaptively-perturbed-mirror-descent-ibis-2023/","section":"論文","summary":"","title":"A Slingshot Approach to Learning in Monotone Games","type":"publications"},{"content":"","date":"2023年10月29日","externalUrl":null,"permalink":"/ja/publications/multi-memory-games-ibis-2023/","section":"論文","summary":"","title":"Learning in Multi-Memory Games Triggers Complex Dynamics Diverging from Nash Equilibrium","type":"publications"},{"content":"","date":"2023年10月29日","externalUrl":null,"permalink":"/ja/publications/perturbation-under-sampling-efg-ibis-2023/","section":"論文","summary":"","title":"Zero-Variance Perturbation Utility for Extensive-Form Games","type":"publications"},{"content":"","date":"2023年10月29日","externalUrl":null,"permalink":"/ja/publications/learning-fair-division-ibis-2023/","section":"論文","summary":"","title":"オンライン環境において公平な資源配分を実現するアルゴリズムに関する研究","type":"publications"},{"content":"","date":"2023年10月29日","externalUrl":null,"permalink":"/ja/authors/%E5%B1%B1%E7%94%B0-%E5%8D%9A%E7%91%9B/","section":"Authors","summary":"","title":"山田 博瑛","type":"authors"},{"content":"","date":"2023年9月6日","externalUrl":null,"permalink":"/ja/publications/learning-fair-division-fit-2023/","section":"論文","summary":"","title":"オンライン環境において公平な資源配分を実現するアルゴリズムに関する研究","type":"publications"},{"content":"","date":"2023年9月6日","externalUrl":null,"permalink":"/ja/publications/medical-residency-match-fit-2023/","section":"論文","summary":"","title":"研修医配属における地域間格差を調整するための制約のモンテカルロ木探索","type":"publications"},{"content":"","date":"22 August 2023","externalUrl":null,"permalink":"/publications/multi-memory-games-ijcai-2023/","section":"Publications","summary":"","title":"Learning in Multi-Memory Games Triggers Complex Dynamics Diverging from Nash Equilibrium","type":"publications"},{"content":"","date":"24 July 2023","externalUrl":null,"permalink":"/publications/safe-online-learning-to-rerank-sigir-2023/","section":"Publications","summary":"","title":"Exploration of Unranked Items in Safe Online Learning to Re-Rank","type":"publications"},{"content":"","date":"24 July 2023","externalUrl":null,"permalink":"/authors/hiroaki-shiino/","section":"Authors","summary":"","title":"Hiroaki Shiino","type":"authors"},{"content":"","date":"24 July 2023","externalUrl":null,"permalink":"/authors/togashi-riku/","section":"Authors","summary":"","title":"Togashi Riku","type":"authors"},{"content":"","date":"13 July 2023","externalUrl":null,"permalink":"/authors/asahi-hentona/","section":"Authors","summary":"","title":"Asahi Hentona","type":"authors"},{"content":"","date":"13 July 2023","externalUrl":null,"permalink":"/authors/hirotaka-ninomiya/","section":"Authors","summary":"","title":"Hirotaka Ninomiya","type":"authors"},{"content":"","date":"13 July 2023","externalUrl":null,"permalink":"/authors/masakazu-sugiyama/","section":"Authors","summary":"","title":"Masakazu Sugiyama","type":"authors"},{"content":"","date":"13 July 2023","externalUrl":null,"permalink":"/authors/sho-shimoyama/","section":"Authors","summary":"","title":"Sho Shimoyama","type":"authors"},{"content":"","date":"13 July 2023","externalUrl":null,"permalink":"/authors/toda-takamichi/","section":"Authors","summary":"","title":"Toda Takamichi","type":"authors"},{"content":"","date":"13 July 2023","externalUrl":null,"permalink":"/publications/guided-dialog-adversarial-arxiv-2023/","section":"Publications","summary":"","title":"Why Guided Dialog Policy Learning performs well? Understanding the role of adversarial learning and its alternative","type":"publications"},{"content":"","date":"13 July 2023","externalUrl":null,"permalink":"/authors/yuta-tomomatsu/","section":"Authors","summary":"","title":"Yuta Tomomatsu","type":"authors"},{"content":"","date":"13 July 2023","externalUrl":null,"permalink":"/authors/yuuki-azuma/","section":"Authors","summary":"","title":"Yuuki Azuma","type":"authors"},{"content":"","date":"2023年6月6日","externalUrl":null,"permalink":"/ja/publications/learning-fair-division-jsai-2023/","section":"論文","summary":"","title":"オンライン環境において公平な資源配分を実現するアルゴリズムに関する研究","type":"publications"},{"content":"","date":"2023年6月6日","externalUrl":null,"permalink":"/ja/publications/mutation-mwu-efg-jsai-2023/","section":"論文","summary":"","title":"二人零和展開型ゲームにおける突然変異付き乗算型重み更新に関する研究","type":"publications"},{"content":"","date":"25 April 2023","externalUrl":null,"permalink":"/authors/kentaro-toyoshima/","section":"Authors","summary":"","title":"Kentaro Toyoshima","type":"authors"},{"content":"","date":"25 April 2023","externalUrl":null,"permalink":"/publications/last-iterate-full-noisy-feedback-aistats-2023/","section":"Publications","summary":"","title":"Last-Iterate Convergence with Full and Noisy Feedback in Two-Player Zero-Sum Games","type":"publications"},{"content":"","date":"2023年3月13日","externalUrl":null,"permalink":"/ja/publications/decision-transformer-dialogue-nlp-2023/","section":"論文","summary":"","title":"タスク指向対話システムの方策学習への Decision Transformerの適用","type":"publications"},{"content":"","date":"2023年3月13日","externalUrl":null,"permalink":"/ja/publications/guided-dialog-adversarial-nlp-2023/","section":"論文","summary":"","title":"タスク指向対話における強化学習を用いた対話方策学習への敵対的学習の役割の解明","type":"publications"},{"content":"","date":"2023年3月13日","externalUrl":null,"permalink":"/ja/authors/%E4%B8%8B%E5%B1%B1-%E7%BF%94/","section":"Authors","summary":"","title":"下山 翔","type":"authors"},{"content":"","date":"2023年3月13日","externalUrl":null,"permalink":"/ja/authors/%E6%88%B8%E7%94%B0-%E9%9A%86%E9%81%93/","section":"Authors","summary":"","title":"戸田 隆道","type":"authors"},{"content":"","date":"2023年3月2日","externalUrl":null,"permalink":"/ja/publications/learning-fair-division-ipsj-2023/","section":"論文","summary":"","title":"オンライン環境において公平な資源配分を実現するアルゴリズムに関する研究","type":"publications"},{"content":"","date":"2023年3月2日","externalUrl":null,"permalink":"/ja/publications/medical-residency-match-ipsj-2023/","section":"論文","summary":"","title":"研修医配属における地域間格差を調整するための制約のモンテカルロ木探索","type":"publications"},{"content":"","date":"2022年11月20日","externalUrl":null,"permalink":"/ja/publications/last-iterate-full-noisy-feedback-ibis-2022/","section":"論文","summary":"","title":"Last-Iterate Convergence with Full- and Noisy-Information Feedback in Two-Player Zero-Sum Games","type":"publications"},{"content":"","date":"2022年11月20日","externalUrl":null,"permalink":"/ja/publications/thresholded-lasso-bandit-ibis-2022/","section":"論文","summary":"","title":"Thresholded Lasso Bandit","type":"publications"},{"content":"","date":"2022年11月20日","externalUrl":null,"permalink":"/ja/publications/policy-gradient-mcts-ibis-2022/","section":"論文","summary":"","title":"ビームサーチ推論のための強化学習","type":"publications"},{"content":"","date":"2022年11月20日","externalUrl":null,"permalink":"/ja/publications/scalable-fair-exposure-control-ibis-2022/","section":"論文","summary":"","title":"公平性を考慮した大規模推薦システム","type":"publications"},{"content":"","date":"2022年11月20日","externalUrl":null,"permalink":"/ja/authors/%E5%A4%A7%E7%94%B0-%E5%92%8C%E5%AF%9B/","section":"Authors","summary":"","title":"大田 和寛","type":"authors"},{"content":"","date":"2022年11月20日","externalUrl":null,"permalink":"/ja/authors/%E5%AF%8C%E6%A8%AB-%E9%99%B8/","section":"Authors","summary":"","title":"富樫 陸","type":"authors"},{"content":"","date":"2022年11月20日","externalUrl":null,"permalink":"/ja/authors/%E5%BC%B5-%E5%9F%B9%E6%A5%A0/","section":"Authors","summary":"","title":"張 培楠","type":"authors"},{"content":"","date":"23 September 2022","externalUrl":null,"permalink":"/publications/scalable-fair-exposure-control-recsys-2022/","section":"Publications","summary":"","title":"Fair Matrix Factorisation for Large-Scale Recommender Systems","type":"publications"},{"content":"","date":"2022年9月13日","externalUrl":null,"permalink":"/ja/publications/mutation-driven-ftrl-fit-2022/","section":"論文","summary":"","title":"二人零和ゲームにおける突然変異駆動型Follow-The-Regularized-Leaderの終極反復収束","type":"publications"},{"content":"","date":"2 August 2022","externalUrl":null,"permalink":"/publications/mutation-driven-ftrl-uai-2022/","section":"Publications","summary":"","title":"Mutation-Driven Follow the Regularized Leader for Last-Iterate Convergence in Zero-Sum Games","type":"publications"},{"content":"","date":"26 July 2022","externalUrl":null,"permalink":"/publications/medical-residency-match-ijcai-2022/","section":"Publications","summary":"","title":"Anytime Capacity Expansion in Medical Residency Match by Monte Carlo Tree Search","type":"publications"},{"content":"","date":"19 July 2022","externalUrl":null,"permalink":"/authors/alexandre-prouti%C3%A8re/","section":"Authors","summary":"","title":"Alexandre Proutière","type":"authors"},{"content":"","date":"19 July 2022","externalUrl":null,"permalink":"/publications/thresholded-lasso-bandit-icml-2022/","section":"Publications","summary":"","title":"Thresholded LASSO Bandit","type":"publications"},{"content":"","date":"2022年6月14日","externalUrl":null,"permalink":"/ja/publications/mutation-driven-ftrl-jsai-2022/","section":"論文","summary":"","title":"二人零和ゲームにおける突然変異付きレプリケータダイナミクスを用いた学習アルゴリズムに関する研究","type":"publications"},{"content":"","date":"2022年3月3日","externalUrl":null,"permalink":"/ja/publications/multi-agent-rl-cournot-competition-ipsj-2022/","section":"論文","summary":"","title":"クールノー競争におけるマルチエージェント強化学習に関する研究","type":"publications"},{"content":"","date":"2022年3月3日","externalUrl":null,"permalink":"/ja/publications/mutation-driven-ftrl-ipsj-2022/","section":"論文","summary":"","title":"二人零和ゲームにおける突然変異付きレプリケータダイナミクスを用いた学習アルゴリズムに関する研究","type":"publications"},{"content":"","date":"28 February 2022","externalUrl":null,"permalink":"/publications/american-football-cfr-aaai-2022/","section":"Publications","summary":"","title":"Computing Strategies of American Football via Counterfactual Regret Minimization","type":"publications"},{"content":"","date":"28 February 2022","externalUrl":null,"permalink":"/authors/kazunori-ohkawara/","section":"Authors","summary":"","title":"Kazunori Ohkawara","type":"authors"},{"content":"","date":"28 February 2022","externalUrl":null,"permalink":"/authors/yuki-shimano/","section":"Authors","summary":"","title":"Yuki Shimano","type":"authors"},{"content":"","date":"13 December 2021","externalUrl":null,"permalink":"/publications/direct-quadratic-utility-maximization-neurips-2021/","section":"Publications","summary":"","title":"Direct Expected Quadratic Utility Maximization for Mean-Variance Controlled Reinforcement Learning","type":"publications"},{"content":"","date":"13 December 2021","externalUrl":null,"permalink":"/authors/kei-nakagawa/","section":"Authors","summary":"","title":"Kei Nakagawa","type":"authors"},{"content":"","date":"13 December 2021","externalUrl":null,"permalink":"/authors/masahiro-kato/","section":"Authors","summary":"","title":"Masahiro Kato","type":"authors"},{"content":"","date":"2021年11月10日","externalUrl":null,"permalink":"/ja/publications/misperception-repeated-games-ibis-2021/","section":"論文","summary":"","title":"見間違えのある繰り返しゲームのためのActor-Critic型強化学習","type":"publications"},{"content":"","date":"2021年9月16日","externalUrl":null,"permalink":"/ja/publications/misperception-repeated-games-orsj-2021/","section":"論文","summary":"","title":"見間違えのある繰り返しゲームのためのActor-Critic型強化学習","type":"publications"},{"content":"","date":"2021年8月25日","externalUrl":null,"permalink":"/ja/publications/american-football-cfr-fit-2021/","section":"論文","summary":"","title":"反実仮想後悔最小化によるアメリカンフットボールにおけるオフェンス戦略の均衡推定","type":"publications"},{"content":"","date":"2021年8月25日","externalUrl":null,"permalink":"/ja/authors/%E5%A4%A7%E6%B2%B3%E5%8E%9F-%E4%B8%80%E6%86%B2/","section":"Authors","summary":"","title":"大河原 一憲","type":"authors"},{"content":"","date":"2021年8月25日","externalUrl":null,"permalink":"/ja/publications/misperception-repeated-games-fit-2021/","section":"論文","summary":"","title":"見間違えのある繰り返し囚人のジレンマにおける方策勾配法に関する研究","type":"publications"},{"content":"","date":"5 May 2021","externalUrl":null,"permalink":"/publications/off-policy-exploitability-evaluation-aamas-2021/","section":"Publications","summary":"","title":"Off-Policy Exploitability-Evaluation in Two-Player Zero-Sum Markov Games","type":"publications"},{"content":"","date":"2020年11月14日","externalUrl":null,"permalink":"/ja/publications/off-policy-q-learning-markov-games-gpw-2020/","section":"論文","summary":"","title":"二人零和マルコフゲームにおけるオフ方策評価のためのQ学習","type":"publications"},{"content":"","date":"2020年11月14日","externalUrl":null,"permalink":"/ja/authors/%E9%87%91%E5%AD%90-%E9%9B%84%E7%A5%90/","section":"Authors","summary":"","title":"金子 雄祐","type":"authors"},{"content":"","date":"23 October 2020","externalUrl":null,"permalink":"/publications/off-policy-evaluation-bandits-guide-arxiv-2020/","section":"Publications","summary":"","title":"A Practical Guide of Off-Policy Evaluation for Bandit Problems","type":"publications"},{"content":"","date":"23 October 2020","externalUrl":null,"permalink":"/authors/shota-yasui/","section":"Authors","summary":"","title":"Shota Yasui","type":"authors"},{"content":"","date":"8 February 2020","externalUrl":null,"permalink":"/authors/gota-morishita/","section":"Authors","summary":"","title":"Gota Morishita","type":"authors"},{"content":"","date":"8 February 2020","externalUrl":null,"permalink":"/authors/kazuhisa-ogawa/","section":"Authors","summary":"","title":"Kazuhisa Ogawa","type":"authors"},{"content":"","date":"8 February 2020","externalUrl":null,"permalink":"/publications/bidding-agent-first-price-auction-aaai-2020/","section":"Publications","summary":"","title":"Online Learning for Bidding Agent in First Price Auction","type":"publications"},{"content":"","date":"2019年11月20日","externalUrl":null,"permalink":"/ja/publications/nash-equilibrium-strategy-hanafuda-ibis-2019/","section":"論文","summary":"","title":"花札におけるナッシュ均衡戦略の計算","type":"publications"},{"content":"","date":"18 November 2019","externalUrl":null,"permalink":"/publications/bayesian-optimization-low-budget-arxiv-2019/","section":"Publications","summary":"","title":"A Simple Heuristic for Bayesian Optimization with A Low Budget","type":"publications"},{"content":"","date":"18 November 2019","externalUrl":null,"permalink":"/authors/masahiro-nomura/","section":"Authors","summary":"","title":"Masahiro Nomura","type":"authors"},{"content":"","date":"2019年6月4日","externalUrl":null,"permalink":"/ja/publications/bayesian-optimization-low-budget-jsai-2019/","section":"論文","summary":"","title":"Black-box最適化に対するBudgetを考慮した探索空間の初期化","type":"publications"},{"content":"","date":"2019年6月4日","externalUrl":null,"permalink":"/ja/authors/%E9%87%8E%E6%9D%91-%E5%B0%86%E5%AF%9B/","section":"Authors","summary":"","title":"野村 将寛","type":"authors"},{"content":"","date":"2018年11月4日","externalUrl":null,"permalink":"/ja/publications/non-stationary-bandit-hpo-ibis-2018/","section":"論文","summary":"","title":"非定常多腕バンディットアルゴリズムを用いたハイパーパラメータ最適化フレームワークの提案","type":"publications"},{"content":"","date":"2017年3月13日","externalUrl":null,"permalink":"/ja/authors/%E5%B0%8F%E9%87%8E-%E5%8A%9F/","section":"Authors","summary":"","title":"小野 功","type":"authors"},{"content":"","date":"2017年3月13日","externalUrl":null,"permalink":"/ja/publications/exemplar-policy-optimization-ee-jpnsec-2017/","section":"論文","summary":"","title":"活用と探索の釣り合いを考慮した事例ベース政策最適化","type":"publications"},{"content":"","date":"2016年12月6日","externalUrl":null,"permalink":"/ja/publications/exemplar-policy-optimization-multimodal-ssi-2016/","section":"論文","summary":"","title":"多峰性景観下での自然進化戦略による事例ベース政策最適化","type":"publications"},{"content":"","date":"2016年3月7日","externalUrl":null,"permalink":"/ja/publications/exemplar-policy-optimization-nes-sice-se-2016/","section":"論文","summary":"","title":"自然進化戦略を用いた事例ベース政策最適化","type":"publications"},{"content":"This theme studies algorithms that learn from feedback obtained through online decision making. I focus on achieving stable learning under partial feedback and in environments that change over time.\nKey questions # How can agents learn efficiently from partial feedback? How can stable learning be achieved in environments that change over time? ","externalUrl":null,"permalink":"/research/bandits-online-learning/","section":"Research Themes","summary":"How can agents learn efficiently while making decisions online?","title":"Bandits and Online Learning","type":"research"},{"content":"","externalUrl":null,"permalink":"/categories/","section":"Categories","summary":"","title":"Categories","type":"categories"},{"content":"This theme studies how systems allocate limited opportunities under fairness considerations. I focus on designing allocation mechanisms that remain fair, effective, and scalable in practical decision-making environments.\nKey questions # How can systems allocate limited opportunities fairly? How can fairness-aware allocation remain effective at scale? ","externalUrl":null,"permalink":"/research/fairness-recsys-allocation/","section":"Research Themes","summary":"How can we allocate limited resources and opportunities fairly?","title":"Fairness in Recommender Systems and Allocation","type":"research"},{"content":"This theme studies how to generate language model outputs that better align with human preferences. I focus on how generation strategies can be designed and evaluated for alignment and robustness against reward hacking.\nKey questions # How can generation strategies better reflect human preferences? What makes a generation strategy robust to reward hacking? ","externalUrl":null,"permalink":"/research/language-model-alignment/","section":"Research Themes","summary":"How can we generate language model outputs that align with human preferences?","title":"Language Model Alignment and Preference Optimization","type":"research"},{"content":"This theme studies learning algorithms for computing equilibria in games. I focus on algorithms whose strategies converge to Nash equilibrium in a last-iterate sense, including perturbation-based methods for stabilizing learning dynamics.\nKey questions # How can learning algorithms converge quickly to Nash equilibrium? How can perturbation-based methods stabilize learning dynamics? When do multi-agent learning dynamics fail to converge? ","externalUrl":null,"permalink":"/research/learning-dynamics-equilibrium-games/","section":"Research Themes","summary":"How can learning algorithms converge quickly to Nash equilibrium?","title":"Learning Dynamics and Equilibrium Computation in Games","type":"research"},{"content":"This theme studies how agents improve and evaluate policies in sequential decision-making problems.\nKey questions # How can agents improve policies to obtain higher or target returns? How can we evaluate whether a policy achieves a desired level of performance? ","externalUrl":null,"permalink":"/research/reinforcement-learning-sequential-decision/","section":"Research Themes","summary":"How can agents improve and evaluate policies over time?","title":"Reinforcement Learning and Sequential Decision Making","type":"research"},{"content":"","externalUrl":null,"permalink":"/research/","section":"Research Themes","summary":"","title":"Research Themes","type":"research"},{"content":"","externalUrl":null,"permalink":"/series/","section":"Series","summary":"","title":"Series","type":"series"},{"content":"","externalUrl":null,"permalink":"/tags/","section":"Tags","summary":"","title":"Tags","type":"tags"}]