I am running a django website on Heroku using uwsgi with 4 processes.
Often , whenever I click save and continue editing I get multiple entries of same instance, separated by 1-2 secs in their creation time.
What could be the issue behind this? I have even tried to prevent multiple clicks by disabling the button using javascript.
Also, the model has its save overridden . Although I dont think this makes any difference.
Venue ModelAdmin - simple calculations and save here
def save_model(self, request, obj, form, change):
obj.checked_by = request.user
if not obj.created_by:
obj.created_by = request.user.username
spaces = obj.venuetospacetypemapper_set.all().values('min_seating_capacity', 'max_seating_capacity')
obj.min_seating_capacity = min([space.get('min_seating_capacity') or 0 for space in spaces] or [0])
obj.max_seating_capacity = sum([space.get('max_seating_capacity') or 0 for space in spaces] or [0])
obj.save()
Since there are a number of inlines in Venue admin, only
VenueMedia model has save overridden:
def save(self, *args, **kwargs):
start=datetime.datetime.now()
print "starting save in admin.save", start
update_image = True
if self.id: # if updating record, then check if image path has changed
update_image = False
orig = VenueMedia.objects.get(id=self.id)
if self.url != orig.url:
update_image = True
print "in save in admin.save, seconds passed=", (datetime.datetime.now()-start).seconds
if update_image:
print "in update_image in admin.save, seconds passed=", (datetime.datetime.now()-start).seconds
image = Img.open(StringIO.StringIO(self.url.read()))
orignal_ratio = float(image.size[0])/image.size[1]
new_height = int(round(orignal_ratio * 171))
image.thumbnail((new_height,171), Img.ANTIALIAS)
output = StringIO.StringIO()
image.save(output, format='JPEG', quality=75)
output.seek(0)
self.thumbnail_url = InMemoryUploadedFile(output, 'ImageField', "%s" % str(self.url.name), 'image/jpeg', output.len, None)
if not self.venueid.thumb_image_path:
self.venueid.thumb_image_path = self.thumbnail_url
self.venueid.save()
print "before calling django.save in admin.save, seconds passed=", (datetime.datetime.now()-start).seconds
super(VenueMedia, self).save(*args, **kwargs)
Related
I have a Django form where the user can select a variable in a choice field, start date and end date in a select date widget. For different variables, the possible start and end dates varies. The form works, but only with one pre-defined date range for all variables. How can I change the possible start and end dates according to the variable?
from django import forms
from datetime import datetime
Parameter_CHOICES = [
('', 'Wähle ein Umweltvariable'),
('niederschlag', 'Niederschlag [L/m²]'),
('bodentemperatur_5cm', 'Bodentemperatur in 5cm Tiefe [°C]')
]
class DataForm(forms.Form):
parameter = forms.ChoiceField(
widget=forms.Select,
choices=Parameter_CHOICES)
start_date = forms.DateField(label='Beginn:', widget=forms.SelectDateWidget(years=list(range(2007,2023))))
end_date = forms.DateField(label='Ende:', initial=datetime.now, widget=forms.SelectDateWidget(years=list(range(2007,2023))))
What i need is to set the
years=list(range(2007,2023))
for niederschlag
and for bodentemperatur_5cm to
years=list(range(1991,2023))
Is it possible to do this in the forms.py or do I have to make this in javascript/html?
Update
I try to solve it with jQuery, but I stuck.
I try to change the Years Range choicec according to parameter with this function:
$("#id_parameter").on('change', function(){
var parameter = $("#id_parameter").val();
if (parameter == 'niederschlag') {
$("#id_start_date_year").val(2007);
}
if (parameter == 'bodentemperatur_5cm') {
$("#id_start_date_year").val(1991);
}
But this only change the value the user can see first. How can I change the possible years or range after the user select the parameter with jQuery?
There are 2 solutions to this that I can think of:
With views
First create the forms, the first that lets you choose the type, the second initiates the form for the options you make in "get_first_and_second_year":
# forms
class DataForm(forms.Form):
Parameter_CHOICES = [
('', 'Wähle ein Umweltvariable'),
('niederschlag', 'Niederschlag [L/m²]'),
('bodentemperatur_5cm', 'Bodentemperatur in 5cm Tiefe [°C]')
]
parameter = forms.ChoiceField(
widget = forms.Select,
choices = Parameter_CHOICES
)
class DatePickForm(forms.Form):
start_date = forms.DateField(label='Beginn:', widget=forms.SelectDateWidget())
end_date = forms.DateField(label='Ende:', initial=datetime.now, widget=forms.SelectDateWidget())
def __init__(self, *args, **kwargs):
# get kwargs
first_year = kwargs.pop('first_year', None)
second_year = kwargs.pop('second_year', None)
# super
super(DatePickForm, self).__init__(*args, **kwargs)
# initialize form fields
self.fields['start_date'] = forms.DateField(
label='Beginn:',
widget=forms.SelectDateWidget(
years=list(range(first_year,second_year))
)
)
self.fields['end_date'] = forms.DateField(
label='Ende:',
widget=forms.SelectDateWidget(
years=list(range(first_year,second_year))
)
)
Make the call for the page using different views:
# call in views
#login_required(login_url="/login/")
def date_form_view(request):
form = DataForm(request.POST or None)
if request.method == 'POST':
#................................................................
# treat form
#................................................................
redirect('date_pick', form_choice=form_choice)
# render with context
context = {'form': form}
render(request, "", context)
def get_first_and_second_year(form_choice):
# return first and second year depending on form choice
match form_choice:
case 'niederschlag':
return first_year, second_year
case 'bodentemperatur_5cm':
return first_year, second_year
case _:
return 0, 0
#login_required(login_url="/login/")
def date_form_view(request, form_choice):
# dont forget to map url to this view with the argument
first_year, second_year = get_first_and_second_year(form_choice)
form = DatePickForm(request.POST or None, first_year=first_year, second_year=second_year)
if request.method == 'POST':
#................................................................
# treat form
#................................................................
start_date = request.POST.get('start_date')
# ...
context = {'form': form}
render(request, "", context)
With ajax (smoother interface, harder to code):
Ajax post/get calls explanation https://www.pluralsight.com/guides/work-with-ajax-django
Pass a json string to to the view that describes the different actions for each form_choice:
And prepend the html with the form for the form_choice.
I am sending a string through http get request to my views.py and then send back to my javascript but I am not getting my desired data, and instead, I am getting an empty dict. I am on Django 1.8 as well.
views.py:
def getData(request):
some_data = request.GET.get('selectionvalue')
cursor = connection.cursor()
cursor.execute("SELECT SW_ID FROM sw WHERE SWName = %s ", [some_data])
row = cursor.fetchall()
return JsonResponse({"Item" : list(row)})
#return JsonResponse({"Hello" : "world"}) works.
I've been stuck on this problem for the past couple hours and would love any help. Thank you.
I tried this but still no luck:
def getData(request):
some_data = request.GET.get('selectionvalue')
cursor = connection.cursor()
cursor.execute("SELECT SW_ID FROM sw WHERE SWName = %s ", [some_data])
row = cursor.fetchall()
data = {"Item" : row}
return JsonResponse(data)
UPDATE:
I have now converted the query dict that i originally had to a string. I am now trying to use that string variable within my query but it is not working. If a different query without the variable it works perfectly. I can't seem to figure this out.
Have you tried to print "row" variable? Is there any result?
Because maybe this is not a problem from the query, but the problem is that row data is not json serializable, and so JsonResponse can't render it.
i. e., I believe cursor.fetchall() will deliver your data in the following format: [(1,),(5,),(7,)], because it returns a list of tuples. And when you call list(row), response is the same (as a list of tuples is already a list).
Just to be sure, try this:
def getData(request):
some_data = request.GET.get('selectionvalue')
cursor = connection.cursor()
cursor.execute("SELECT SW_ID FROM sw WHERE SWName = %s ", [some_data])
row = cursor.fetchall()
items = []
for r in row:
items.append(r[0])
return JsonResponse({"Item" : items})
Okay after playing around for a bit, I finally fixed it. My problem was that I needed to search for an exact string name in one of the tables and columns in my database but I was retrieving my sent data in views.py as a query dictionary. The thing is that this query dictionary had my single string that I passed in through my request as the key of the dictionary, not the value. So i needed to convert it into a single string variable then use my query. I also needed to not have the ' ' around my %s inside the query.
Views.py:
def getData(request):
some_data = request.GET
for key in some_data:
SWVALUE = key
cursor = connection.cursor()
cursor.execute("SELECT SW_ID FROM sw WHERE SWName = %s ", (SWVALUE))
= %s ", %ItemfromSW)
row = cursor.fetchall()
json_data = json.dumps(list(row))
return HttpResponse(json_data, content_type = "application/json")
This is the website I'm working on. On each page, there are 18 posts in a table. I want to access each post and scrape its content, and repeat this for the first 5 pages.
My approach is to make my spider to scrape all links in the 5 pages and iterate over them to get the content. Because the "next page" button and certain text in each post is written by JavaScript, I use Selenium and Scrapy. I ran my spider and could see that Firefox webdriver displays the first 5 pages, but then the spider stopped without scraping any content. Scrapy returns no error message either.
Now I suspect that the failure may be due to:
1) No link is stored into all_links.
2) Somehow parse_content did not run.
My diagnosis may be wrong and I need help with finding the problem. Thank you very much!
This is my spider:
import scrapy
from bjdaxing.items_bjdaxing import BjdaxingItem
from selenium import webdriver
from scrapy.http import TextResponse
import time
all_links = [] # a global variable to store post links
class Bjdaxing(scrapy.Spider):
name = "daxing"
allowed_domains = ["bjdx.gov.cn"] # DO NOT use www in allowed domains
start_urls = ["http://app.bjdx.gov.cn/cms/daxing/lookliuyan_bjdx.jsp"] # This has to start with http
def __init__(self):
self.driver = webdriver.Firefox()
def parse(self, response):
self.driver.get(response.url) # request the start url in the browser
i = 1
while i <= 5: # The number of pages to be scraped in this session
response = TextResponse(url = response.url, body = self.driver.page_source, encoding='utf-8') # Assign page source to response. I can treat response as if it's a normal scrapy project.
global all_links
all_links.extend(response.xpath("//a/#href").extract()[0:18])
next = self.driver.find_element_by_xpath(u'//a[text()="\u4e0b\u9875\xa0"]') # locate "next" button
next.click() # Click next page
time.sleep(2) # Wait a few seconds for next page to load.
i += 1
def parse_content(self, response):
item = BjdaxingItem()
global all_links
for link in all_links:
self.driver.get("http://app.bjdx.gov.cn/cms/daxing/") + link
response = TextResponse(url = response.url, body = self.driver.page_source, encoding = 'utf-8')
if len(response.xpath("//table/tbody/tr[1]/td[2]/text()").extract() > 0):
item['title'] = response.xpath("//table/tbody/tr[1]/td[2]/text()").extract()
else:
item['title'] = ""
if len(response.xpath("//table/tbody/tr[3]/td[2]/text()").extract() > 0):
item['netizen'] = response.xpath("//table/tbody/tr[3]/td[2]/text()").extract()
else:
item['netizen'] = ""
if len(response.xpath("//table/tbody/tr[3]/td[4]/text()").extract() > 0):
item['sex'] = response.xpath("//table/tbody/tr[3]/td[4]/text()").extract()
else:
item['sex'] = ""
if len(response.xpath("//table/tbody/tr[5]/td[2]/text()").extract() > 0):
item['time1'] = response.xpath("//table/tbody/tr[5]/td[2]/text()").extract()
else:
item['time1'] = ""
if len(response.xpath("//table/tbody/tr[11]/td[2]/text()").extract() > 0):
item['time2'] = response.xpath("//table/tbody/tr[11]/td[2]/text()").extract()
else:
item['time2'] = ""
if len(response.xpath("//table/tbody/tr[7]/td[2]/text()").extract()) > 0:
question = "".join(response.xpath("//table/tbody/tr[7]/td[2]/text()").extract())
item['question'] = "".join(map(unicode.strip, question))
else: item['question'] = ""
if len(response.xpath("//table/tbody/tr[9]/td[2]/text()").extract()) > 0:
reply = "".join(response.xpath("//table/tbody/tr[9]/td[2]/text()").extract())
item['reply'] = "".join(map(unicode.strip, reply))
else: item['reply'] = ""
if len(response.xpath("//table/tbody/tr[13]/td[2]/text()").extract()) > 0:
agency = "".join(response.xpath("//table/tbody/tr[13]/td[2]/text()").extract())
item['agency'] = "".join(map(unicode.strip, agency))
else: item['agency'] = ""
yield item
Multiple problems and possible improvements here:
you don't have any "link" between the parse() and the parse_content() methods
using global variables is usually a bad practice
you don't need selenium here at all. To follow the pagination you just need to make a POST request to the same url providing the currPage parameter
The idea is to use .start_requests() and create a list/queue of requests to handle the pagination. Follow the pagination and gather the links from the table. Once the queue of requests is empty, switch to following the previously gathered links. Implementation:
import json
from urlparse import urljoin
import scrapy
NUM_PAGES = 5
class Bjdaxing(scrapy.Spider):
name = "daxing"
allowed_domains = ["bjdx.gov.cn"] # DO NOT use www in allowed domains
def __init__(self):
self.pages = []
self.links = []
def start_requests(self):
self.pages = [scrapy.Request("http://app.bjdx.gov.cn/cms/daxing/lookliuyan_bjdx.jsp",
body=json.dumps({"currPage": str(page)}),
method="POST",
callback=self.parse_page,
dont_filter=True)
for page in range(1, NUM_PAGES + 1)]
yield self.pages.pop()
def parse_page(self, response):
base_url = response.url
self.links += [urljoin(base_url, link) for link in response.css("table tr td a::attr(href)").extract()]
try:
yield self.pages.pop()
except IndexError: # no more pages to follow, going over the gathered links
for link in self.links:
yield scrapy.Request(link, callback=self.parse_content)
def parse_content(self, response):
# your parse_content method here
Consider the following scenario:
CSV file is generated by a reporting tool every friday. It contains records for all the employees in the organisation (almost 1 million employees and increasing).
This data is saved in mongo using mongoimport in "Employee" Collection.
However, the requirement is to send "Welcome Mail" to new employees and "Year Completion Mail" to existing employees.
To solve this, I am importing the new file to a temporary collection("EmployeeTemp").
For every record in the temporary collection (EmployeeTemp), I check the old collection ("Employee"), for existing employees and mark "SendYearCompletionFlag" as true. Further, if a new employee record is found, I mark "SendWelcomeFlag" as true. Also, the project of each employee needs to be updated.
This complete process is executed via a script submitted to mongo.
The issue is that script is taking almost 18 hrs to complete.
Please help me to reduce the execution time of script.
This the script:
var list = db.employeeTemp.find().addOption(DBQuery.Option.noTimeout);
while(list.hasNext()){
var f = list.next();
var itr = db.employee.find({"eid":f.eid});
var obj = itr.hasNext() ? itr.next() : null;
if(!obj){
f.joiningDate = new Date();
f.sendWelcomeMail = true;
print("Saving New record : " + f.eid);
db.save(f);
} else {
var joinDate = obj.joiningDate;
if(new Date().getTime()-joinDate>=31536000000){
print("Sending Year Completion Mail to " + obj.eid)
obj.sendYearCompletionMail = true;
}
obj.projecct = f.project;
print("Saving Existing record : " + obj.eid);
db.save(obj);
}
}
I suggest you to create an index on employee.eid.
Another thing you can try is to change the batch size in the first find adding batchSize(500) after setting the no timeout option:
http://docs.mongodb.org/manual/reference/method/cursor.batchSize/
I want to use this snippet
# extra.py in yourproject/app/
from django.db.models import FileField
from django.forms import forms
from django.template.defaultfilters import filesizeformat
from django.utils.translation import ugettext_lazy as _
class ContentTypeRestrictedFileField(FileField):
"""
Same as FileField, but you can specify:
* content_types - list containing allowed content_types. Example: ['application/pdf', 'image/jpeg']
* max_upload_size - a number indicating the maximum file size allowed for upload.
2.5MB - 2621440
5MB - 5242880
10MB - 10485760
20MB - 20971520
50MB - 5242880
100MB 104857600
250MB - 214958080
500MB - 429916160
"""
def __init__(self, *args, **kwargs):
self.content_types = kwargs.pop("content_types")
self.max_upload_size = kwargs.pop("max_upload_size")
super(ContentTypeRestrictedFileField, self).__init__(*args, **kwargs)
def clean(self, *args, **kwargs):
data = super(ContentTypeRestrictedFileField, self).clean(*args, **kwargs)
file = data.file
content_type = file.content_type
if content_type in self.content_types:
if file._size > self.max_upload_size:
raise forms.ValidationError(_('Please keep filesize under %s. Current filesize %s') % (filesizeformat(self.max_upload_size), filesizeformat(file._size)))
else:
raise forms.ValidationError(_('Filetype not supported.'))
return data
with this snippet
// fileInput is a HTMLInputElement: <input type="file" multiple id="myfileinput">
var fileInput = document.getElementById("myfileinput");
// files is a FileList object (simliar to NodeList)
var files = fileInput.files;
for (var i = 0; i < files.length; i++)
{
alert(files[i].name + " has a size of " + files[i].size + " Bytes");
}
so i can check the size of a file using html5, how to combine these 2 snippets into 1? i also found a java snippet which upload the video and check the size but i can't find any doc on how to implement it. I can't use javascript coz i can't trust the client
Create a Widget for this field based on django's file input widget. Add JS from second snippet to the output of that widget. And pass value of max_upload_size from field to the widget on creation on rendering.