diff --git a/.ipynb_checkpoints/NeotomaTwitterBot-checkpoint.ipynb b/.ipynb_checkpoints/NeotomaTwitterBot-checkpoint.ipynb deleted file mode 100644 index 1828e5d..0000000 --- a/.ipynb_checkpoints/NeotomaTwitterBot-checkpoint.ipynb +++ /dev/null @@ -1,232 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 27, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Twitter authenticated \n", - "\n", - "Files opened\n", - "\n", - "Neotoma welcomes another dataset: Greenbrier Lake from A.J. Smith, D.F. Palmer http://apps.neotomadb.org/Explorer/?datasetid=15823\n" - ] - }, - { - "ename": "TweepError", - "evalue": "Twitter error response: status code = 403", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mTweepError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 119\u001b[0m \u001b[0mcheck_neotoma\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 120\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 121\u001b[1;33m \u001b[0mpost_tweet\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[1;32m\u001b[0m in \u001b[0;36mpost_tweet\u001b[1;34m()\u001b[0m\n\u001b[0;32m 101\u001b[0m \u001b[0mapi\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mupdate_status\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstatus\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mline\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 102\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 103\u001b[1;33m \u001b[0mapi\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mupdate_status\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstatus\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mline\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 104\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 105\u001b[0m \u001b[1;31m# Add the tweeted site to `old_files` and then delete it from the to_print.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32mC:\\Python34\\lib\\site-packages\\tweepy\\api.py\u001b[0m in \u001b[0;36mupdate_status\u001b[1;34m(self, media_ids, *args, **kwargs)\u001b[0m\n\u001b[0;32m 191\u001b[0m \u001b[0mallowed_param\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'status'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'in_reply_to_status_id'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'lat'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'long'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'source'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'place_id'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'display_coordinates'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 192\u001b[0m \u001b[0mrequire_auth\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 193\u001b[1;33m )(post_data=post_data, *args, **kwargs)\n\u001b[0m\u001b[0;32m 194\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 195\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mmedia_upload\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfilename\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32mC:\\Python34\\lib\\site-packages\\tweepy\\binder.py\u001b[0m in \u001b[0;36m_call\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 237\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mmethod\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 238\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 239\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mmethod\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 240\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 241\u001b[0m \u001b[1;31m# Set pagination mode\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;32mC:\\Python34\\lib\\site-packages\\tweepy\\binder.py\u001b[0m in \u001b[0;36mexecute\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 221\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 222\u001b[0m \u001b[0merror_msg\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m\"Twitter error response: status code = %s\"\u001b[0m \u001b[1;33m%\u001b[0m \u001b[0mresp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstatus_code\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 223\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mTweepError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0merror_msg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mresp\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 224\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 225\u001b[0m \u001b[1;31m# Parse the response payload\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;31mTweepError\u001b[0m: Twitter error response: status code = 403" - ] - } - ], - "source": [ - "#!/usr/bin/env python\n", - "# -*- coding: utf-8 -*-\n", - "#!python3\n", - "\n", - "import tweepy, time, sys, json, requests, random\n", - " \n", - "def check_neotoma():\n", - " ## This function call to neotoma, reads a text file, compares the two\n", - " ## and then returns all the 'new' records to a different text file.\n", - "\n", - " # inputs:\n", - " # 1. text file: old_results.json\n", - " # 2. text file: to_print.json\n", - " # 3. json call: neotoma\n", - "\n", - " with open('old_results.json', 'r') as old_file:\n", - " old_calls = json.loads(old_file.read())\n", - " \n", - " with open('to_print.json', 'r') as print_file:\n", - " to_print = json.loads(print_file.read())\n", - " \n", - " neotoma = requests.get(\"http://ceiwin10.cei.psu.edu/NDB/RecentUploads?months=1\")\n", - " inp_json = json.loads(neotoma.text)['data']\n", - "\n", - " def get_datasets(x):\n", - " did = []\n", - " for y in x:\n", - " did.append(y[\"DatasetID\"])\n", - " return did\n", - "\n", - " neo_datasets = get_datasets(inp_json)\n", - " old_datasets = get_datasets(old_calls)\n", - " new_datasets = get_datasets(to_print)\n", - " \n", - " # So this works\n", - " # We now have the numeric dataset IDs for the most recent month of\n", - " # new files to neotoma (neo_datasets), all the ones we've already tweeted\n", - " # (old_datasets) and all the ones in our queue (new_datasets).\n", - " #\n", - " # The next thing we want to do is to remove all the neo_datasets that\n", - " # are in old_datasets and then remove all the new_datasets that are\n", - " # in neo_datasets, append neo_datasets to new_datasets (if new_datasets\n", - " # has a length > 0) and then dump new_datasets.\n", - " #\n", - " # Old datasets gets re-written when the tweets go out.\n", - "\n", - " # remove all the neo_datasets:\n", - " for i in range(len(neo_datasets)-1, 0, -1):\n", - " if neo_datasets[i] in old_datasets:\n", - " del inp_json[i]\n", - "\n", - " # This now gives us a pared down version of inp_json\n", - " # Now we need to make sure to add any of the to_print to neo_dataset.\n", - " # We do this by cycling through new_datasets. Any dataset number that\n", - " # is not in old_datasets or neo_datasets gets added to the beginning of\n", - " # the new list. This way it is always the first called up when twitter\n", - " # posts:\n", - " \n", - " for i in range(0, len(new_datasets)-1):\n", - " if new_datasets[i] not in old_datasets and new_datasets[i] not in neo_datasets:\n", - " inp_json.insert(0,to_print[i])\n", - "\n", - " # Now write out to file. Old file doesn't get changed until the\n", - " # twitter app is run.\n", - " with open('to_print.json', 'w') as print_file:\n", - " json.dump(inp_json, print_file)\n", - "\n", - "def post_tweet():\n", - " CONSUMER_KEY = 'jou6H9DZLPzw6f3aSIY7wzC6n'\n", - " CONSUMER_SECRET = 'eum3NCrtrVC1tFsGvEj0GuqsxwQCWFfN8nmgcbMyA5xdmQhSdU'\n", - " ACCESS_KEY = '3184480124-AHNgg72lXKYEuOjyzh5WKzBMkBBejpKIX9OxKpX'\n", - " ACCESS_SECRET = 'GAmE6PX3ulj61tluwXA6jUKcPJwoCNToCg5JrJS8BbA3U'\n", - " auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)\n", - " auth.set_access_token(ACCESS_KEY, ACCESS_SECRET)\n", - " api = tweepy.API(auth)\n", - "\n", - " print('Twitter authenticated \\n')\n", - " \n", - " # Read in the printable tweets:\n", - " with open('to_print.json', 'r') as print_file:\n", - " to_print = json.loads(print_file.read())\n", - " \n", - " with open('old_results.json', 'r') as print_file:\n", - " old_files = json.loads(print_file.read())\n", - " \n", - " print('Files opened\\n')\n", - " \n", - " # Now loop through the records:\n", - " while len(to_print) > 0:\n", - " weblink = 'http://apps.neotomadb.org/Explorer/?datasetid=' + str(to_print[0][\"DatasetID\"])\n", - " \n", - " line = 'Neotoma welcomes another ' + to_print[0][\"DatabaseName\"] + ' dataset: ' + to_print[0][\"SiteName\"] + \" from \" + to_print[0][\"Investigator\"] + \" \" + weblink\n", - " \n", - " if len(line) > 170:\n", - " line = 'Neotoma welcomes another dataset: ' + to_print[0][\"SiteName\"] + \" from \" + to_print[0][\"Investigator\"] + \" \" + weblink\n", - " \n", - " print('%s' % line)\n", - " \n", - " if random.randint(0,30) == 10:\n", - " line = 'This is a twitter bot for the Neotoma Paleoecological Database, letting you know what\\'s new. http://neotomadb.org managed by @sjgoring'\n", - " api.update_status(status=line)\n", - " else:\n", - " api.update_status(status=line)\n", - "\n", - " # Add the tweeted site to `old_files` and then delete it from the to_print.\n", - " old_files.append(to_print[0])\n", - "\n", - " del to_print[0]\n", - "\n", - " with open('to_print.json', 'w') as print_file:\n", - " json.dump(to_print, print_file)\n", - "\n", - " with open('old_results.json', 'w') as print_file:\n", - " json.dump(old_files, print_file)\n", - "\n", - " time.sleep(600) # Tweet every 10 minutes.\n", - " \n", - " if len(to_print) < 5:\n", - " check_neotoma()\n", - "\n", - "post_tweet()" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "import tweepy, time, sys, json, requests, random" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "3" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.4.1" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000..13dabe1 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,11 @@ +cff-version: 1.2.0 +message: "If you use this software, please cite it as below." +authors: +- family-names: "Goring" + given-names: "Simon" + orcid: "https://orcid.org/0000-0002-2700-4605" +title: "Neotoma Twitter Bot" +version: 2.0 +date-released: 2021-10-20 +doi: 10.5281/zenodo.5587213 +url: "https://github.com/NeotomaDB/NeotomaBot" \ No newline at end of file diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..0379038 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,25 @@ +# Contributor Code of Conduct + +As contributors and maintainers of this project, we pledge to respect all people who +contribute through reporting issues, posting feature requests, updating documentation, +submitting pull requests or patches, and other activities. + +We are committed to making participation in this project a harassment-free experience for +everyone, regardless of level of experience, gender, gender identity and expression, +sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion. + +Examples of unacceptable behavior by participants include the use of sexual language or +imagery, derogatory comments or personal attacks, trolling, public or private harassment, +insults, or other unprofessional conduct. + +Project maintainers have the right and responsibility to remove, edit, or reject comments, +commits, code, wiki edits, issues, and other contributions that are not aligned to this +Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed +from the project team. + +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by +opening an issue or contacting one or more of the project maintainers. + +This Code of Conduct is adapted from the Contributor Covenant +(http://contributor-covenant.org), version 1.0.0, available at +http://contributor-covenant.org/version/1/0/0/ diff --git a/Procfile b/Procfile index 115328b..6644e1f 100644 --- a/Procfile +++ b/Procfile @@ -1 +1 @@ -worker: python neotomabot.py \ No newline at end of file +worker: python3 neotomabot.py \ No newline at end of file diff --git a/README.md b/README.md index 7902b71..eeef0c3 100644 --- a/README.md +++ b/README.md @@ -1,30 +1,43 @@ -NeotomaBot -========== +# NeotomaBot -by: Simon Goring + -May 4, 2015 +[![Lifecycle: archived](https://img.shields.io/badge/Lifecycle-archived-orange.svg)](https://neotomadb.org) +[![DOI](https://zenodo.org/badge/417625973.svg)](https://zenodo.org/badge/latestdoi/417625973) +[![NSF-1948926](https://img.shields.io/badge/NSF-1948926-blue.svg)](https://nsf.gov/awardsearch/showAward?AWD_ID=1948926) + + -Description ----------------------- A twitter bot to search for new records in the [Neotoma Paleoecology Database](http://neotomadb.org) and then post them to the [@neotomadb](http://twitter.com/neotomadb) Twitter account. This program was an experiment to see how good my Python programming skills are. Apparently they're okay. The code could probably use some cleaning, but I'm generally happy with the way it turned out. The program runs on a free [Heroku](https://heroku.com) dyno and tweets semi-regularly. -Requirements ------------------------------ -The program uses `tweepy`, `time`, `sys`, `json`, `requests`, `random` and `imp` package for Python, as well as the Neotoma [API](http://api.neotomadb.org/doc/about). It was coded in Notepad++ because I wanted to try to do it quickly. +![Neotoma Twitter Banner](resources/neotomatwitter.png) +## Contributors + +This project is an open project, and contributions are welcome from any individual. All contributors to this project are bound by a [code of conduct](CODE_OF_CONDUCT.md). Please review and follow this code of conduct as part of your contribution. + +* [Simon Goring](http://goring.org) [![orcid](https://img.shields.io/badge/orcid-0000--0002--2700--4605-brightgreen.svg)](https://orcid.org/0000-0002-2700-4605) + +### Tips for Contributing + +Issues and bug reports are always welcome. Code clean-up, and feature additions can be done either through pull requests to project forks or branches. + +## Requirements + +This application runs using Python v3. All required packages are listed in the [requirements.txt](requirements.txt) file, generated using the python package `pipreqs`. + +Tweets are pulled either from the [resources/cannedtweets.txt](resources/cannedtweets.txt) or generated using the [Neotoma API](https://api.neotomadb.org) `/v1.5/data/recentuploads/n` endpoint. This endpoint returns a list of all dataset uploads within the last `n` months using an XML format. -The OAuth information is hidden on my computer and added to the `.gitignore`. If you want to run this yourself you'll need to go to the Twitter [apps](https://apps.twitter.com/) page and register a bot of your own. Once you get the `CONSUMER_KEY` and the other associated KEYs and SECRETs put them in a file called `apikeys.txt`. The code will work. +The application requires four environment variables, stored as configuration variables in the Heroku dynamo. These are the keys required to access the Twitter API. To obtain these keys for your own use you must add the [Developer status](https://developer.twitter.com) to your existing Twitter account and register an application to obtain the following keys: -Contributions and Bugs ----------------- -This is a work in progress. I'd like to add some more functionality in the near future, for example, following and reposting any posts using the hashtag [`#neotomadb`](https://twitter.com/search?f=realtime&q=%23neotomadb), and posting links to articles using the Neotoma Database. +* CONSUMER_KEY +* CONSUMER_SECRET +* ACCESS_TOKEN_KEY +* ACCESS_TOKEN_SECRET -If you have any issues (about the program!), would like to fork the repository, or would like to help improve or add functionality please feel free to contribute. +### Running Locally -License -------------------- -This project is distributed under an [MIT](http://opensource.org/licenses/MIT) license. +It is possible to run this code locally if you have the environment variables set. Just run `python3 neotomabot.py` \ No newline at end of file diff --git a/c b/c deleted file mode 100644 index 2d75265..0000000 Binary files a/c and /dev/null differ diff --git a/neotomabot.py b/neotomabot.py index 8381a62..11c21d9 100644 --- a/neotomabot.py +++ b/neotomabot.py @@ -1,212 +1,104 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- #!python3 +""" Neotoma Database Twitter Manager v2.0 + by: Simon Goring + This Twitter bot is intended to provide updated information to individuals about additions to the Neotoma + Paleoecology database. The script leverages the `schedule` package for Python, running continually in + the background, sending out tweets at a specified time and interval. +""" + +from TwitterAPI import TwitterAPI +import random +import requests +import json +import xmltodict +import urllib.request +import schedule +import time +import os + +twitstuff = {'consumer_key': os.environ['consumer_key'], + 'consumer_secret': os.environ['consumer_secret'], + 'access_token_key':os.environ['access_token_key'], + 'access_token_secret':os.environ['access_token_secret']} + +datasets = set() + +api = TwitterAPI(consumer_key=twitstuff['consumer_key'], + consumer_secret=twitstuff['consumer_secret'], + access_token_key=twitstuff['access_token_key'], + access_token_secret=twitstuff['access_token_secret']) + +def twitterup(api): + line = "Someone just restarted me by pushing to GitHub. This means I've been updated, yay!" + api.request('statuses/update', {'status':line}) + + +def randomtweet(api): + """ Tweet a random statement from a plain text document. Passing in the twitter API object. + The tweets are all present in the file `resources/cannedtweets.txt`. These can be edited + directly on GitHub if anyone chooses to. + """ + with open('resources/cannedtweets.txt', 'r') as f: + alltweets = f.read().splitlines() + line = random.choice(alltweets) + api.request('statuses/update', {'status':line}) + + +def recentsite(api): + """ Tweet one of the recent data uploads from Neotoma. Passing in the twitter API object. + This leverages the v1.5 API's XML response for recent uploads. It selects one of the new uploads + (except geochronology uploads) and tweets it out. It selects them randomly, and adds the selected + dataset to a set object so that values cannot be repeatedly tweeted out. + """ + with urllib.request.urlopen('https://api.neotomadb.org/v1.5/data/recentuploads/1') as response: + html = response.read() + output = xmltodict.parse(html)['results']['results'] + records = list(filter(lambda x: x['record']['datasettype'] != 'geochronology' or x['record']['datasetid'] not in datasets, output)) + if len(records) > 0: + tweet = random.choice(records)['record'] + tweet['geo'] = tweet['geo'].split('|')[0].strip() + while tweet['geo'] == 'Russia': + tweet = random.choice(records)['record'] + tweet['geo'] = tweet['geo'].split('|')[0].strip() + while tweet['datasetid'] in datasets: + tweet = random.choice(records)['record'] + string = "It's a new {datasettype} dataset from the {databasename} at {sitename} ({geo})! https://data.neotomadb.org/{datasetid}".format(**tweet) + if len(string) < 280: + api.request('statuses/update', {'status':string}) + datasets.add(tweet['datasetid']) + else: + string = "It's a new dataset from the {databasename} at {sitename} ({geo})! https://data.neotomadb.org/{datasetid}".format(**tweet) + if len(string) < 280: + api.request('statuses/update', {'status':string}) + datasets.add(tweet['datasetid']) + + +def ukrsite(api): + """ Tweet one of the recent data uploads from Neotoma. Passing in the twitter API object. + This leverages the v1.5 API's XML response for recent uploads. It selects one of the new uploads + (except geochronology uploads) and tweets it out. It selects them randomly, and adds the selected + dataset to a set object so that values cannot be repeatedly tweeted out. + """ + with requests.get('https://api.neotomadb.org/v2.0/data/geopoliticalunits/5852/datasets?limit=9000') as response: + output = filter(lambda x: x["geopoliticalname"] == "Ukraine", json.loads(response.text)['data']) + records = list(map(lambda x: {'id': x['siteid'], 'name': x['sitename']}, list(output)[0]['sites'])) + if len(records) > 0: + tweet = random.choice(records) + string = "{name} is a site in Neotoma from Ukraine πŸ‡ΊπŸ‡¦ https://apps.neotomadb.org/explorer?siteids={id}".format(**tweet) + api.request('statuses/update', {'status':string}) -import os, tweepy, time, sys, json, requests, random, imp, datetime, schedule, time, random - -def twit_auth(): - # Authenticate the twitter session. - # Should only be needed once at the initiation of the code. - - CONSUMER_KEY = os.environ['CONSUMER_KEY'] - CONSUMER_SECRET = os.environ['CONSUMER_SECRET'] - ACCESS_KEY = os.environ['ACCESS_KEY'] - ACCESS_SECRET = os.environ['ACCESS_SECRET'] - - auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) - auth.set_access_token(ACCESS_KEY, ACCESS_SECRET) - api = tweepy.API(auth) - print('Twitter authenticated \n') - return api - - -def check_neotoma(): - # This function call to neotoma, reads a text file, compares the two - # and then outputs all the 'new' records to a different text file. - # Function returns the number of new records returned. - - # inputs: - # 1. text file: old_results.json - # 2. text file: to_print.json - # 3. json call: neotoma - - with open('old_results.json', 'r') as old_file: - old_calls = json.loads(old_file.read()) - - with open('to_print.json', 'r') as print_file: - to_print = json.loads(print_file.read()) - - neotoma = requests.get("http://ceiwin10.cei.psu.edu/NDB/RecentUploads?months=1") - inp_json = json.loads(neotoma.text)['data'] - - def get_datasets(x): - did = [] - for y in x: - did.append(y["DatasetID"]) - return did - - neo_datasets = get_datasets(inp_json) - old_datasets = get_datasets(old_calls) - new_datasets = get_datasets(to_print) - - # So this works - # We now have the numeric dataset IDs for the most recent month of - # new files to neotoma (neo_datasets), all the ones we've already tweeted - # (old_datasets) and all the ones in our queue (new_datasets). - # - # The next thing we want to do is to remove all the neo_datasets that - # are in old_datasets and then remove all the new_datasets that are - # in neo_datasets, append neo_datasets to new_datasets (if new_datasets - # has a length > 0) and then dump new_datasets. - # - # Old datasets gets re-written when the tweets go out. - - # remove all the neo_datasets: - for i in range(len(neo_datasets)-1, 0, -1): - if neo_datasets[i] in old_datasets: - del inp_json[i] - - # This now gives us a pared down version of inp_json - # Now we need to make sure to add any of the to_print to neo_dataset. - # We do this by cycling through new_datasets. Any dataset number that - # is not in old_datasets or neo_datasets gets added to the beginning of - # the new list. This way it is always the first called up when twitter - # posts: - - for i in range(0, len(new_datasets)-1): - if new_datasets[i] not in old_datasets and new_datasets[i] not in neo_datasets: - inp_json.insert(0,to_print[i]) - - # Now write out to file. Old file doesn't get changed until the - # twitter app is run. - with open('to_print.json', 'w') as print_file: - json.dump(inp_json, print_file) - return len(inp_json) - len(to_print) - -def print_neotoma_update(api): - # Check for new records by using the neotoma "recent" API: - old_toprint = check_neotoma() - - # load files: - with open('to_print.json', 'r') as print_file: - to_print = json.loads(print_file.read()) - with open('old_results.json', 'r') as print_file: - old_files = json.loads(print_file.read()) - - print('Neotoma dataset updated.\n') - if (old_toprint) == 1: - # If only a single site has been added: - line = "I've got a backlog of " + str(len(to_print)) + " sites to tweet and " + str(old_toprint) + " site has been added since I last checked Neotoma. http://neotomadb.org" - elif (old_toprint) > 1: - line = "I've got a backlog of " + str(len(to_print)) + " sites to tweet and " + str(old_toprint) + " sites have been added since I last checked Neotoma. http://neotomadb.org" - else: - line = "I've got a backlog of " + str(len(to_print)) + " sites to tweet. Nothing new has been added since I last checked. http://neotomadb.org" - - print('%s' % line) - try: - print('%s' % line) - api.update_status(status=line) - except tweepy.error.TweepError: - print("Twitter error raised") - -def post_tweet(api): - # Read in the printable tweets: - with open('to_print.json', 'r') as print_file: - to_print = json.loads(print_file.read()) - - with open('old_results.json', 'r') as print_file: - old_files = json.loads(print_file.read()) - - print('Files opened\n') - - pr_tw = random.randint(0,len(to_print) - 1) - site = to_print[pr_tw] - - # Get ready to print the first [0] record in to_print: - weblink = 'http://apps.neotomadb.org/Explorer/?datasetid=' + str(site["DatasetID"]) - - # The datasets have long names. I want to match to simplify: - - line = 'Neotoma welcomes ' + site["SiteName"] + ', a ' + site["DatasetType"] + ' dataset by ' + site["Investigator"] + " " + weblink - - # There's a few reasons why the name might be very long, one is the site name, the other is the author name: - if len(line) > 170: - line = 'Neotoma welcomes ' + site["SiteName"] + " by " + site["Investigator"] + " " + weblink - - # If it's still too long then clip the author list: - if len(line) > 170 & site["Investigator"].find(','): - author = site["Investigator"][0:to_print[0]["Investigator"].find(',')] - line = 'Neotoma welcomes ' + site["SiteName"] + " by " + author + " et al. " + weblink - - try: - print('%s' % line) - api.update_status(status=line) - old_files.append(site) - del to_print[pr_tw] - with open('to_print.json', 'w') as print_file: - json.dump(to_print, print_file) - with open('old_results.json', 'w') as print_file: - json.dump(old_files, print_file) - except tweepy.error.TweepError: - print("Twitter error raised") - - -def self_identify(api): - - # Identify myself as the owner of the bot: - line = 'This twitter bot for the Neotoma Paleoecological Database is managed by @sjgoring. Letting you know what\'s new at http://neotomadb.org' - try: - print('%s' % line) - api.update_status(status=line) - except tweepy.error.TweepError: - print("Twitter error raised") def self_identify_hub(api): - # Identify the codebase for the bot: - line = 'This twitter bot for the Neotoma Paleoecological Database is programmed in #python and publicly available through an MIT License on GitHub: https://github.com/SimonGoring/neotomabot' - try: - print('%s' % line) - api.update_status(status=line) - except tweepy.error.TweepError: - print("Twitter error raised") - -def other_inf_hub(api): - # Identify the codebase for the bot: - line = ['The bot for the Neotoma Database is programmed in #python and publicly available through an MIT License on GitHub: https://github.com/SimonGoring/neotomabot', - 'Neotoma has teaching modules you can use in the class room, check it out: https://www.neotomadb.org/education/category/higher_ed/', - 'The governance for Neotoma includes representatives from our constituent databases. Find out more: https://www.neotomadb.org/about/category/governance', - 'We are invested in #cyberinfrastructure. Our response to emerging challenges is posted on @authorea: https://www.authorea.com/users/152134/articles/165940-cyberinfrastructure-in-the-paleosciences-mobilizing-long-tail-data-building-distributed-community-infrastructure-empowering-individual-geoscientists', - 'We keep a list of all publications that have used Neotoma for their research. Want to be added? Contact us! https://www.neotomadb.org/references', - 'These days everyone\'s got a Google Scholar page. So does Neotoma! https://scholar.google.ca/citations?user=idoixqkAAAAJ&hl=en', - 'If you use #rstats then you can access Neotoma data directly thanks to @rOpenSci! https://ropensci.org/tutorials/neotoma_tutorial.html', - 'Neotoma is more than just pollen & mammals; it contains 28 data types incl phytoliths & biochemistry data. Explore! https://www.neotomadb.org/data/category/explorer', - 'Think you\'ve got better tweets? Add them to my code & make a pull request! https://github.com/SimonGoring/neotomabot', - 'Behold, the very first Neotoma dataset, ID 1: https://apps.neotomadb.org/explorer/?datasetid=1', - 'We\'ve got some new R tutorials up online. Is there anything you\'d like to do with Neotoma? http://neotomadb.github.io', - 'Neotoma is a member of the @ICSU_WDS, working to share best practices for data stewardship.', - 'Are you presenting at an upcoming meeting? Will you be talking about Neotoma? Let us know and we can help get the word out! Contact @sjgoring', - 'You know you want to slide into these mentions. . . Let us know what cool #pollen, #paleoecology, #archaeology, #whatever you\'re doing with Neotoma data!', - 'Referencing Neotoma? Why not check out our Quaternary Research paper? https://doi.org/10.1017/qua.2017.105', - 'How is Neotoma leveraging text mining to improve its data holdings? Find out on the @earthcube blog: https://earthcube.wordpress.com/2018/03/06/geodeepdive-into-darkdata/', - "Building an application that could leverage Neotoma data? Our API (https://api-dev.neotomadb.org) is public and open: https://github.com/NeotomaDB/api_nodetest/", - "The landing pages for Neotoma were built using Vue.js, all code is published on Github at https://github.com/NeotomaDB/ndbLandingPage", - "Learn more about how Neotoma makes the most of teaching and cutting-edge research in a new publication in Elements of Paleontology: http://dx.doi.org/10.1017/9781108681582", - "Neotoma is on Slack. Come join the discussion and get involved! We're looking for folks to help with documentation, stewardship and coding. https://join.slack.com/t/neotomadb/shared_invite/zt-cvsv53ep-wjGeCTkq7IhP6eUNA9NxYQ" - ] - - try: - print('%s' % line) - api.update_status(status=line[random.randint(0,len(line))]) - except tweepy.error.TweepError: - print("Twitter error raised") - -api = twit_auth() + """ Identify the codebase for the bot through a tweet. """ + line = 'This twitter bot for the Neotoma Paleoecological Database is programmed in #python and publicly available through an MIT License on GitHub: https://github.com/NeotomaDB/neotomabot' + api.request('statuses/update', {'status':line}) -schedule.every(3).hours.do(post_tweet, api) -schedule.every().day.at("15:37").do(print_neotoma_update, api) -schedule.every().wednesday.at("14:30").do(self_identify, api) +schedule.every(6).hours.do(recentsite, api) +schedule.every(5).hours.do(randomtweet, api) +schedule.every(3).hours.do(ukrsite, api) schedule.every().monday.at("14:30").do(self_identify_hub, api) -schedule.every().day.at("10:30").do(other_inf_hub, api) while 1: schedule.run_pending() diff --git a/requirements.txt b/requirements.txt index 26b386c..a0f9945 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ -tweepy==3.7.0 -requests==2.21.0 -schedule==0.5.0 +schedule==1.1.0 +requests==2.22.0 +xmltodict==0.12.0 +tweepy==4.1.0 +TwitterAPI==2.7.5 diff --git a/resources/cannedtweets.txt b/resources/cannedtweets.txt new file mode 100644 index 0000000..862b6b7 --- /dev/null +++ b/resources/cannedtweets.txt @@ -0,0 +1,25 @@ +The bot for the Neotoma Database is programmed in #python and publicly available through an MIT License on GitHub: https://github.com/NeotomaDB/neotomabot +We've been working with the @EuPolDB to update records and get new data in. Looking forward to lots more #openscience! http://www.europeanpollendatabase.net/index.php +Such amazing work from our partners at the Latin American Pollen Database! Glad to have more records from this important region https://www.latinamericapollendb.com/ +Neotoma has teaching modules you can use in the classroom, check it out: https://www.neotomadb.org/education/category/higher_ed/ +Governance for Neotoma includes representatives from our 34 constituent databases. Find out more: https://www.neotomadb.org/about/category/governance +Collaboration with @carletonserc led to the development of a number of #paleoecology instruction modules from high school to upper college courses: https://serc.carleton.edu/neotoma/activities.html +Honestly, there's so much to be done with Neotoma, let us know if you're looking for a project to work on. We'd be happy to help! +We are invested in #cyberinfrastructure. Our response to emerging challenges is posted on @authorea: https://www.authorea.com/users/152134/articles/165940-cyberinfrastructure-in-the-paleosciences-mobilizing-long-tail-data-building-distributed-community-infrastructure-empowering-individual-geoscientists +There's a big @zotero library of Neotoma publications that we've been working on. Check it out here: https://www.zotero.org/groups/2321378/neotomadb +Neotoma is more than just pollen & mammals; it contains 28 data types incl phytoliths & biochemistry data. Explore! https://apps.neotomadb.org/explorer +Think you've got better tweets? Add them to my code & make a pull request! https://github.com/NeotomaDB/neotomabot +Behold, the very first Neotoma dataset, ID 1: https://apps.neotomadb.org/explorer/?datasetid=1 +Our site at https://open.neotomadb.org hosts all our #openscience work, including a link to the database schema. Check it out! +Neotoma is a member of the @ICSU_WDS, working to share best practices for data stewardship. +Are you presenting at an upcoming meeting? Will you be talking about Neotoma? Let us know and we can help get the word out! Contact @sjgoring +You know you want to slide into these mentions. . . Let us know what cool #pollen, #paleoecology, #archaeology, #whatever you're doing with Neotoma data! +Referencing Neotoma? Why not check out our Quaternary Research paper? https://doi.org/10.1017/qua.2017.105 +How is Neotoma leveraging text mining to improve its data holdings? We've been working with @geodeepdive to discover articles that have yet to be submitted to the database. @earthcube +Building an application that could leverage Neotoma data? Our API (https://api.neotomadb.org) is public and open: https://github.com/NeotomaDB/api_nodetest/ #openscience +The landing pages for Neotoma were built using Vue.js, all code is published on Github at https://github.com/NeotomaDB/ndbLandingPage Check them out here: https://data.neotomadb.org +Learn more about how Neotoma makes the most of teaching and cutting-edge research in our Elements of Paleontology publication: http://dx.doi.org/10.1017/9781108681582 +Neotoma is on Slack. Come join the discussion and get involved! We're looking for folks to help with documentation, stewardship and coding. https://join.slack.com/t/neotomadb/shared_invite/zt-cvsv53ep-wjGeCTkq7IhP6eUNA9NxYQ +Neotoma is at the center of research, engagement and outreach. Find out more in our Elements of Paleontology article: https://doi.org/10.1017/9781108681582 +Do you like Diatoms? We're working with @AcadNatSci to get more diatom and water chemistry data into Neotoma. Look how pretty these things are! https://artsandculture.google.com/exhibit/diatoms-of-the-academy-of-natural-sciences-of-drexel-university-academy-of-natural-sciences-of-drexel-university/7QKi7EaVlShRLw?hl=en +The new Neotoma #rstats package is now in beta testing. We're looking for folks to help out with development, testing and documentation. Get involved! https://github.com/NeotomaDB/neotoma2 diff --git a/resources/neotomatwitter.png b/resources/neotomatwitter.png new file mode 100644 index 0000000..e6474e7 Binary files /dev/null and b/resources/neotomatwitter.png differ diff --git a/v1/neotomabot.py b/v1/neotomabot.py new file mode 100644 index 0000000..8381a62 --- /dev/null +++ b/v1/neotomabot.py @@ -0,0 +1,213 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +#!python3 + +import os, tweepy, time, sys, json, requests, random, imp, datetime, schedule, time, random + +def twit_auth(): + # Authenticate the twitter session. + # Should only be needed once at the initiation of the code. + + CONSUMER_KEY = os.environ['CONSUMER_KEY'] + CONSUMER_SECRET = os.environ['CONSUMER_SECRET'] + ACCESS_KEY = os.environ['ACCESS_KEY'] + ACCESS_SECRET = os.environ['ACCESS_SECRET'] + + auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) + auth.set_access_token(ACCESS_KEY, ACCESS_SECRET) + api = tweepy.API(auth) + print('Twitter authenticated \n') + return api + + +def check_neotoma(): + # This function call to neotoma, reads a text file, compares the two + # and then outputs all the 'new' records to a different text file. + # Function returns the number of new records returned. + + # inputs: + # 1. text file: old_results.json + # 2. text file: to_print.json + # 3. json call: neotoma + + with open('old_results.json', 'r') as old_file: + old_calls = json.loads(old_file.read()) + + with open('to_print.json', 'r') as print_file: + to_print = json.loads(print_file.read()) + + neotoma = requests.get("http://ceiwin10.cei.psu.edu/NDB/RecentUploads?months=1") + inp_json = json.loads(neotoma.text)['data'] + + def get_datasets(x): + did = [] + for y in x: + did.append(y["DatasetID"]) + return did + + neo_datasets = get_datasets(inp_json) + old_datasets = get_datasets(old_calls) + new_datasets = get_datasets(to_print) + + # So this works + # We now have the numeric dataset IDs for the most recent month of + # new files to neotoma (neo_datasets), all the ones we've already tweeted + # (old_datasets) and all the ones in our queue (new_datasets). + # + # The next thing we want to do is to remove all the neo_datasets that + # are in old_datasets and then remove all the new_datasets that are + # in neo_datasets, append neo_datasets to new_datasets (if new_datasets + # has a length > 0) and then dump new_datasets. + # + # Old datasets gets re-written when the tweets go out. + + # remove all the neo_datasets: + for i in range(len(neo_datasets)-1, 0, -1): + if neo_datasets[i] in old_datasets: + del inp_json[i] + + # This now gives us a pared down version of inp_json + # Now we need to make sure to add any of the to_print to neo_dataset. + # We do this by cycling through new_datasets. Any dataset number that + # is not in old_datasets or neo_datasets gets added to the beginning of + # the new list. This way it is always the first called up when twitter + # posts: + + for i in range(0, len(new_datasets)-1): + if new_datasets[i] not in old_datasets and new_datasets[i] not in neo_datasets: + inp_json.insert(0,to_print[i]) + + # Now write out to file. Old file doesn't get changed until the + # twitter app is run. + with open('to_print.json', 'w') as print_file: + json.dump(inp_json, print_file) + return len(inp_json) - len(to_print) + +def print_neotoma_update(api): + # Check for new records by using the neotoma "recent" API: + old_toprint = check_neotoma() + + # load files: + with open('to_print.json', 'r') as print_file: + to_print = json.loads(print_file.read()) + with open('old_results.json', 'r') as print_file: + old_files = json.loads(print_file.read()) + + print('Neotoma dataset updated.\n') + if (old_toprint) == 1: + # If only a single site has been added: + line = "I've got a backlog of " + str(len(to_print)) + " sites to tweet and " + str(old_toprint) + " site has been added since I last checked Neotoma. http://neotomadb.org" + elif (old_toprint) > 1: + line = "I've got a backlog of " + str(len(to_print)) + " sites to tweet and " + str(old_toprint) + " sites have been added since I last checked Neotoma. http://neotomadb.org" + else: + line = "I've got a backlog of " + str(len(to_print)) + " sites to tweet. Nothing new has been added since I last checked. http://neotomadb.org" + + print('%s' % line) + try: + print('%s' % line) + api.update_status(status=line) + except tweepy.error.TweepError: + print("Twitter error raised") + +def post_tweet(api): + # Read in the printable tweets: + with open('to_print.json', 'r') as print_file: + to_print = json.loads(print_file.read()) + + with open('old_results.json', 'r') as print_file: + old_files = json.loads(print_file.read()) + + print('Files opened\n') + + pr_tw = random.randint(0,len(to_print) - 1) + site = to_print[pr_tw] + + # Get ready to print the first [0] record in to_print: + weblink = 'http://apps.neotomadb.org/Explorer/?datasetid=' + str(site["DatasetID"]) + + # The datasets have long names. I want to match to simplify: + + line = 'Neotoma welcomes ' + site["SiteName"] + ', a ' + site["DatasetType"] + ' dataset by ' + site["Investigator"] + " " + weblink + + # There's a few reasons why the name might be very long, one is the site name, the other is the author name: + if len(line) > 170: + line = 'Neotoma welcomes ' + site["SiteName"] + " by " + site["Investigator"] + " " + weblink + + # If it's still too long then clip the author list: + if len(line) > 170 & site["Investigator"].find(','): + author = site["Investigator"][0:to_print[0]["Investigator"].find(',')] + line = 'Neotoma welcomes ' + site["SiteName"] + " by " + author + " et al. " + weblink + + try: + print('%s' % line) + api.update_status(status=line) + old_files.append(site) + del to_print[pr_tw] + with open('to_print.json', 'w') as print_file: + json.dump(to_print, print_file) + with open('old_results.json', 'w') as print_file: + json.dump(old_files, print_file) + except tweepy.error.TweepError: + print("Twitter error raised") + + +def self_identify(api): + + # Identify myself as the owner of the bot: + line = 'This twitter bot for the Neotoma Paleoecological Database is managed by @sjgoring. Letting you know what\'s new at http://neotomadb.org' + try: + print('%s' % line) + api.update_status(status=line) + except tweepy.error.TweepError: + print("Twitter error raised") + +def self_identify_hub(api): + # Identify the codebase for the bot: + line = 'This twitter bot for the Neotoma Paleoecological Database is programmed in #python and publicly available through an MIT License on GitHub: https://github.com/SimonGoring/neotomabot' + try: + print('%s' % line) + api.update_status(status=line) + except tweepy.error.TweepError: + print("Twitter error raised") + +def other_inf_hub(api): + # Identify the codebase for the bot: + line = ['The bot for the Neotoma Database is programmed in #python and publicly available through an MIT License on GitHub: https://github.com/SimonGoring/neotomabot', + 'Neotoma has teaching modules you can use in the class room, check it out: https://www.neotomadb.org/education/category/higher_ed/', + 'The governance for Neotoma includes representatives from our constituent databases. Find out more: https://www.neotomadb.org/about/category/governance', + 'We are invested in #cyberinfrastructure. Our response to emerging challenges is posted on @authorea: https://www.authorea.com/users/152134/articles/165940-cyberinfrastructure-in-the-paleosciences-mobilizing-long-tail-data-building-distributed-community-infrastructure-empowering-individual-geoscientists', + 'We keep a list of all publications that have used Neotoma for their research. Want to be added? Contact us! https://www.neotomadb.org/references', + 'These days everyone\'s got a Google Scholar page. So does Neotoma! https://scholar.google.ca/citations?user=idoixqkAAAAJ&hl=en', + 'If you use #rstats then you can access Neotoma data directly thanks to @rOpenSci! https://ropensci.org/tutorials/neotoma_tutorial.html', + 'Neotoma is more than just pollen & mammals; it contains 28 data types incl phytoliths & biochemistry data. Explore! https://www.neotomadb.org/data/category/explorer', + 'Think you\'ve got better tweets? Add them to my code & make a pull request! https://github.com/SimonGoring/neotomabot', + 'Behold, the very first Neotoma dataset, ID 1: https://apps.neotomadb.org/explorer/?datasetid=1', + 'We\'ve got some new R tutorials up online. Is there anything you\'d like to do with Neotoma? http://neotomadb.github.io', + 'Neotoma is a member of the @ICSU_WDS, working to share best practices for data stewardship.', + 'Are you presenting at an upcoming meeting? Will you be talking about Neotoma? Let us know and we can help get the word out! Contact @sjgoring', + 'You know you want to slide into these mentions. . . Let us know what cool #pollen, #paleoecology, #archaeology, #whatever you\'re doing with Neotoma data!', + 'Referencing Neotoma? Why not check out our Quaternary Research paper? https://doi.org/10.1017/qua.2017.105', + 'How is Neotoma leveraging text mining to improve its data holdings? Find out on the @earthcube blog: https://earthcube.wordpress.com/2018/03/06/geodeepdive-into-darkdata/', + "Building an application that could leverage Neotoma data? Our API (https://api-dev.neotomadb.org) is public and open: https://github.com/NeotomaDB/api_nodetest/", + "The landing pages for Neotoma were built using Vue.js, all code is published on Github at https://github.com/NeotomaDB/ndbLandingPage", + "Learn more about how Neotoma makes the most of teaching and cutting-edge research in a new publication in Elements of Paleontology: http://dx.doi.org/10.1017/9781108681582", + "Neotoma is on Slack. Come join the discussion and get involved! We're looking for folks to help with documentation, stewardship and coding. https://join.slack.com/t/neotomadb/shared_invite/zt-cvsv53ep-wjGeCTkq7IhP6eUNA9NxYQ" + ] + + try: + print('%s' % line) + api.update_status(status=line[random.randint(0,len(line))]) + except tweepy.error.TweepError: + print("Twitter error raised") + +api = twit_auth() + +schedule.every(3).hours.do(post_tweet, api) +schedule.every().day.at("15:37").do(print_neotoma_update, api) +schedule.every().wednesday.at("14:30").do(self_identify, api) +schedule.every().monday.at("14:30").do(self_identify_hub, api) +schedule.every().day.at("10:30").do(other_inf_hub, api) + +while 1: + schedule.run_pending() + time.sleep(61) diff --git a/old_results.json b/v1/old_results.json similarity index 100% rename from old_results.json rename to v1/old_results.json diff --git a/to_print.json b/v1/to_print.json similarity index 100% rename from to_print.json rename to v1/to_print.json diff --git a/tweets.json b/v1/tweets.json similarity index 100% rename from tweets.json rename to v1/tweets.json diff --git a/v2/neotomabot.py b/v2/neotomabot.py new file mode 100644 index 0000000..ccb8cb0 --- /dev/null +++ b/v2/neotomabot.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +#!python3 +""" Neotoma Database Twitter Manager + by: Simon Goring + This Twitter bot is intended to provide updated information to individuals about additions to the Neotoma + Paleoecology database. The script leverages the `schedule` package for Python, running continually in + the background, sending out tweets at a specified time and interval. +""" + +from TwitterAPI import TwitterAPI +import random +import xmltodict +import urllib.request +import schedule +import time +import os + +twitstuff = {'consumer_key':os.environ['consumer_key'], + 'consumer_secret': os.environ['consumer_secret'], + 'access_token_key':os.environ['access_token_key'], + 'access_token_secret':os.environ['access_token_secret']} + +datasets = set() + +api = TwitterAPI(consumer_key=twitstuff['consumer_key'], + consumer_secret=twitstuff['consumer_secret'], + access_token_key=twitstuff['access_token_key'], + access_token_secret=twitstuff['access_token_secret']) + +def randomtweet(api): + """ Tweet a random statement from a plain text document. Passing in the twitter API object. + The tweets are all present in the file `resources/cannedtweets.txt`. These can be edited + directly on GitHub if anyone chooses to. + """ + with open('../resources/cannedtweets.txt', 'r') as f: + alltweets = f.read().splitlines() + line = random.choice(alltweets) + api.request('statuses/update', {'status':line}) + +def recentsite(api): + """ Tweet one of the recent data uploads from Neotoma. Passing in the twitter API object. + This leverages the v1.5 API's XML response for recent uploads. It selects one of the new uploads + (except geochronology uploads) and tweets it out. It selects them randomly, and adds the selected + dataset to a set object so that values cannot be repeatedly tweeted out. + """ + with urllib.request.urlopen('https://api.neotomadb.org/v1.5/data/recentuploads/1') as response: + html = response.read() + output = xmltodict.parse(html)['results']['results'] + records = list(filter(lambda x: x['record']['datasettype'] != 'geochronology' or x['record']['datasetid'] not in datasets, output)) + if len(records) > 0: + tweet = random.choice(records)['record'] + while tweet['datasetid'] in datasets: + tweet = random.choice(records)['record'] + string = "It's a new {datasettype} dataset from the {databasename} at {sitename}! https://data.neotomadb.org/{datasetid}".format(**tweet) + if len(string) < 280: + api.request('statuses/update', {'status':string}) + datasets.add(tweet['datasetid']) + else: + string = "It's a new dataset from the {databasename} at {sitename}! https://data.neotomadb.org/{datasetid}".format(**tweet) + if len(string) < 280: + api.request('statuses/update', {'status':string}) + datasets.add(tweet['datasetid']) + + +def self_identify_hub(api): + """ Identify the codebase for the bot through a tweet. """ + line = 'This twitter bot for the Neotoma Paleoecological Database is programmed in #python and publicly available through an MIT License on GitHub: https://github.com/NeotomaDB/neotomabot' + api.request('statuses/update', {'status':line}) + + +schedule.every(6).hours.do(recentsite, api) +schedule.every(5).hours.do(randomtweet, api) +schedule.every().monday.at("14:30").do(self_identify_hub, api) +schedule.every().day.at("10:30").do(other_inf_hub, api) + +while 1: + schedule.run_pending() + time.sleep(61)