Name: VALUE Benchmark
License: https://github.com/VALUE-Leaderboard/DataRelease/blob/main/DATA.md#license

Please cite our paper as below if you use the VALUE benchmark or starter code.


@InProceedings{li2021value,
title={VALUE: A Multi-Task Benchmark for Video-and-Language Understanding Evaluation},
author={Li, Linjie and Lei, Jie and Gan, Zhe and Yu, Licheng and Chen, Yen-Chun and Pillai, Rohit
        and Cheng, Yu and Zhou, Luowei and Wang, Xin Eric and Wang, William Yang and others},
booktitle = {35th Conference on Neural Information Processing Systems (NeurIPS 2021) Track on Datasets and Benchmarks},
year = {2021}
}

We sincerely thank all dataset contributors to VALUE benchmark, please cite the following datasets if you use the VALUE benchmark.


@inproceedings{lei2018tvqa,
    title={TVQA: Localized, Compositional Video Question Answering},
    author={Lei, Jie and Yu, Licheng and Bansal, Mohit and Berg, Tamara L},
    booktitle={EMNLP},
    year={2018}
    }


@inproceedings{lei2020tvr,
    title={TVR: A Large-Scale Dataset for Video-Subtitle Moment Retrieval},
    author={Lei, Jie and Yu, Licheng and Berg, Tamara L and Bansal, Mohit},
    booktitle={ECCV},
    year={2020}
    }


@inproceedings{lei2020tvr,
    title={TVR: A Large-Scale Dataset for Video-Subtitle Moment Retrieval},
    author={Lei, Jie and Yu, Licheng and Berg, Tamara L and Bansal, Mohit},
    booktitle={ECCV},
    year={2020}
    }


@inproceedings{li2020hero,
title={HERO: Hierarchical Encoder for Video+ Language Omni-representation Pre-training},
author={Li, Linjie and Chen, Yen-Chun and Cheng, Yu and Gan, Zhe and Yu, Licheng and Liu, Jingjing},
booktitle={EMNLP},
year={2020}
}


@inproceedings{li2020hero,
title={HERO: Hierarchical Encoder for Video+ Language Omni-representation Pre-training},
author={Li, Linjie and Chen, Yen-Chun and Cheng, Yu and Gan, Zhe and Yu, Licheng and Liu, Jingjing},
booktitle={EMNLP},
year={2020}
}


@inproceedings{ZhXuCoAAAI18,
    author={Zhou, Luowei and Xu, Chenliang and Corso, Jason J},
    title = {Towards Automatic Learning of Procedures From Web Instructional Videos},
    booktitle = {AAAI},
    year = {2018}
}
@inproceedings{ZhZhCoCVPR2018,
    title={End-to-End Dense Video Captioning with Masked Transformer},
    author={Zhou, Luowei and Zhou, Yingbo and Corso, Jason J and Socher, Richard and Xiong, Caiming},
    booktitle={CVPR},
    year={2018}
}


@InProceedings{Wang_2019_ICCV,
    author = {Wang, Xin and Wu, Jiawei and Chen, Junkun and Li, Lei and Wang, Yuan-Fang and Wang, William Yang},
    title = {VaTeX: A Large-Scale, High-Quality Multilingual Dataset for Video-and-Language Research},
    booktitle = {ICCV},
    year = {2019}
    }


@inproceedings{lei2020vlep,
    title={What is More Likely to Happen Next? Video-and-Language Future Event Prediction},
    author={Lei, Jie and Yu, Licheng and Berg, Tamara L and Bansal, Mohit},
    booktitle={EMNLP},
    year={2020}
    }


@inproceedings{liu2020violin,
    title={Violin: A large-scale dataset for video-and-language inference},
    author={Liu, Jingzhou and Chen, Wenhu and Cheng, Yu and Gan, Zhe and Yu, Licheng and Yang, Yiming and Liu, Jingjing},
    booktitle={CVPR},
    year={2020}
    }

Meet VALUE!

Why VALUE?

Multi-channel Video

Diverse Video Domain

Various Datasets over Representative Tasks

Leaderboard!

What is VALUE?

Paper

Please cite our paper as below if you use the VALUE benchmark or starter code.

We sincerely thank all dataset contributors to VALUE benchmark, please cite the following datasets if you use the VALUE benchmark.

Contact

Have any questions or suggestions? Feel free to reach us at value-benchmark@googlegroups.com!